Skip to content

Commit 8079d26

Browse files
adam900710kdave
authored andcommitted
btrfs: submit read time repair only for each corrupted sector
Currently btrfs_submit_read_repair() has some extra check on whether the failed bio needs extra validation for repair. But we can avoid all these extra mechanisms if we submit the repair for each sector. By this, each read repair can be easily handled without the need to verify which sector is corrupted. This will also benefit subpage, as one subpage bvec can contain several sectors, making the extra verification more complex. So this patch will: - Introduce repair_one_sector() The main code submitting repair, which is more or less the same as old btrfs_submit_read_repair(). But this time, it only repairs one sector. - Make btrfs_submit_read_repair() to handle sectors differently There are 3 different cases: * Good sector We need to release the page and extent, set the range uptodate. * Bad sector and failed to submit repair bio We need to release the page and extent, but not set the range uptodate. * Bad sector but repair bio submitted The page and extent release will be handled by the submitted repair bio. Nothing needs to be done. Since btrfs_submit_read_repair() will handle the page and extent release now, we need to skip to next bvec even we hit some error. - Change the lifespan of @uptodate in end_bio_extent_readpage() Since now btrfs_submit_read_repair() will handle the full bvec which contains any corruption, we don't need to bother updating @uptodate bit anymore. Just let @uptodate to be local variable inside the main loop, so that any error from one bvec won't affect later bvec. - Only export btrfs_repair_one_sector(), unexport btrfs_submit_read_repair() The only outside caller for read repair is DIO, which already submits its repair for just one sector. Only export btrfs_repair_one_sector() for DIO. This patch will focus on the change on the repair path, the extra validation code is still kept as is, and will be cleaned up later. Signed-off-by: Qu Wenruo <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent fd35b13 commit 8079d26

File tree

3 files changed

+168
-80
lines changed

3 files changed

+168
-80
lines changed

fs/btrfs/extent_io.c

Lines changed: 154 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,14 +2449,15 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
24492449
}
24502450

24512451
static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
2452-
u64 start, u64 end)
2452+
u64 start)
24532453
{
24542454
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
24552455
struct io_failure_record *failrec;
24562456
struct extent_map *em;
24572457
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
24582458
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
24592459
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2460+
const u32 sectorsize = fs_info->sectorsize;
24602461
int ret;
24612462
u64 logical;
24622463

@@ -2480,7 +2481,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
24802481
return ERR_PTR(-ENOMEM);
24812482

24822483
failrec->start = start;
2483-
failrec->len = end - start + 1;
2484+
failrec->len = sectorsize;
24842485
failrec->this_mirror = 0;
24852486
failrec->bio_flags = 0;
24862487
failrec->in_validation = 0;
@@ -2519,12 +2520,13 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
25192520
free_extent_map(em);
25202521

25212522
/* Set the bits in the private failure tree */
2522-
ret = set_extent_bits(failure_tree, start, end,
2523+
ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
25232524
EXTENT_LOCKED | EXTENT_DIRTY);
25242525
if (ret >= 0) {
25252526
ret = set_state_failrec(failure_tree, start, failrec);
25262527
/* Set the bits in the inode's tree */
2527-
ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
2528+
ret = set_extent_bits(tree, start, start + sectorsize - 1,
2529+
EXTENT_DAMAGED);
25282530
} else if (ret < 0) {
25292531
kfree(failrec);
25302532
return ERR_PTR(ret);
@@ -2639,11 +2641,11 @@ static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
26392641
return false;
26402642
}
26412643

2642-
blk_status_t btrfs_submit_read_repair(struct inode *inode,
2643-
struct bio *failed_bio, u32 bio_offset,
2644-
struct page *page, unsigned int pgoff,
2645-
u64 start, u64 end, int failed_mirror,
2646-
submit_bio_hook_t *submit_bio_hook)
2644+
int btrfs_repair_one_sector(struct inode *inode,
2645+
struct bio *failed_bio, u32 bio_offset,
2646+
struct page *page, unsigned int pgoff,
2647+
u64 start, int failed_mirror,
2648+
submit_bio_hook_t *submit_bio_hook)
26472649
{
26482650
struct io_failure_record *failrec;
26492651
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2661,16 +2663,22 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode,
26612663

26622664
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
26632665

2664-
failrec = btrfs_get_io_failure_record(inode, start, end);
2666+
failrec = btrfs_get_io_failure_record(inode, start);
26652667
if (IS_ERR(failrec))
2666-
return errno_to_blk_status(PTR_ERR(failrec));
2667-
2668-
need_validation = btrfs_io_needs_validation(inode, failed_bio);
2668+
return PTR_ERR(failrec);
26692669

2670+
/*
2671+
* We will only submit repair for one sector, thus we don't need
2672+
* extra validation anymore.
2673+
*
2674+
* TODO: All those extra validation related code will be cleaned up
2675+
* later.
2676+
*/
2677+
need_validation = false;
26702678
if (!btrfs_check_repairable(inode, need_validation, failrec,
26712679
failed_mirror)) {
26722680
free_io_failure(failure_tree, tree, failrec);
2673-
return BLK_STS_IOERR;
2681+
return -EIO;
26742682
}
26752683

26762684
repair_bio = btrfs_io_bio_alloc(1);
@@ -2704,7 +2712,120 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode,
27042712
free_io_failure(failure_tree, tree, failrec);
27052713
bio_put(repair_bio);
27062714
}
2707-
return status;
2715+
return blk_status_to_errno(status);
2716+
}
2717+
2718+
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
2719+
{
2720+
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
2721+
2722+
ASSERT(page_offset(page) <= start &&
2723+
start + len <= page_offset(page) + PAGE_SIZE);
2724+
2725+
/*
2726+
* For subapge metadata case, all btrfs_page_* helpers need page to
2727+
* have page::private populated.
2728+
* But we can have rare case where the last eb in the page is only
2729+
* referred by the IO, and it gets released immedately after it's
2730+
* read and verified.
2731+
*
2732+
* This can detach the page private completely.
2733+
* In that case, we can just skip the page status update completely,
2734+
* as the page has no eb anymore.
2735+
*/
2736+
if (fs_info->sectorsize < PAGE_SIZE && unlikely(!PagePrivate(page))) {
2737+
ASSERT(!is_data_inode(page->mapping->host));
2738+
return;
2739+
}
2740+
if (uptodate) {
2741+
btrfs_page_set_uptodate(fs_info, page, start, len);
2742+
} else {
2743+
btrfs_page_clear_uptodate(fs_info, page, start, len);
2744+
btrfs_page_set_error(fs_info, page, start, len);
2745+
}
2746+
2747+
if (fs_info->sectorsize == PAGE_SIZE)
2748+
unlock_page(page);
2749+
else if (is_data_inode(page->mapping->host))
2750+
/*
2751+
* For subpage data, unlock the page if we're the last reader.
2752+
* For subpage metadata, page lock is not utilized for read.
2753+
*/
2754+
btrfs_subpage_end_reader(fs_info, page, start, len);
2755+
}
2756+
2757+
static blk_status_t submit_read_repair(struct inode *inode,
2758+
struct bio *failed_bio, u32 bio_offset,
2759+
struct page *page, unsigned int pgoff,
2760+
u64 start, u64 end, int failed_mirror,
2761+
unsigned int error_bitmap,
2762+
submit_bio_hook_t *submit_bio_hook)
2763+
{
2764+
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2765+
const u32 sectorsize = fs_info->sectorsize;
2766+
const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
2767+
int error = 0;
2768+
int i;
2769+
2770+
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2771+
2772+
/* We're here because we had some read errors or csum mismatch */
2773+
ASSERT(error_bitmap);
2774+
2775+
/*
2776+
* We only get called on buffered IO, thus page must be mapped and bio
2777+
* must not be cloned.
2778+
*/
2779+
ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED));
2780+
2781+
/* Iterate through all the sectors in the range */
2782+
for (i = 0; i < nr_bits; i++) {
2783+
const unsigned int offset = i * sectorsize;
2784+
struct extent_state *cached = NULL;
2785+
bool uptodate = false;
2786+
int ret;
2787+
2788+
if (!(error_bitmap & (1U << i))) {
2789+
/*
2790+
* This sector has no error, just end the page read
2791+
* and unlock the range.
2792+
*/
2793+
uptodate = true;
2794+
goto next;
2795+
}
2796+
2797+
ret = btrfs_repair_one_sector(inode, failed_bio,
2798+
bio_offset + offset,
2799+
page, pgoff + offset, start + offset,
2800+
failed_mirror, submit_bio_hook);
2801+
if (!ret) {
2802+
/*
2803+
* We have submitted the read repair, the page release
2804+
* will be handled by the endio function of the
2805+
* submitted repair bio.
2806+
* Thus we don't need to do any thing here.
2807+
*/
2808+
continue;
2809+
}
2810+
/*
2811+
* Repair failed, just record the error but still continue.
2812+
* Or the remaining sectors will not be properly unlocked.
2813+
*/
2814+
if (!error)
2815+
error = ret;
2816+
next:
2817+
end_page_read(page, uptodate, start + offset, sectorsize);
2818+
if (uptodate)
2819+
set_extent_uptodate(&BTRFS_I(inode)->io_tree,
2820+
start + offset,
2821+
start + offset + sectorsize - 1,
2822+
&cached, GFP_ATOMIC);
2823+
unlock_extent_cached_atomic(&BTRFS_I(inode)->io_tree,
2824+
start + offset,
2825+
start + offset + sectorsize - 1,
2826+
&cached);
2827+
}
2828+
return errno_to_blk_status(error);
27082829
}
27092830

27102831
/* lots and lots of room for performance fixes in the end_bio funcs */
@@ -2862,30 +2983,6 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
28622983
btrfs_subpage_start_reader(fs_info, page, page_offset(page), PAGE_SIZE);
28632984
}
28642985

2865-
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
2866-
{
2867-
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
2868-
2869-
ASSERT(page_offset(page) <= start &&
2870-
start + len <= page_offset(page) + PAGE_SIZE);
2871-
2872-
if (uptodate) {
2873-
btrfs_page_set_uptodate(fs_info, page, start, len);
2874-
} else {
2875-
btrfs_page_clear_uptodate(fs_info, page, start, len);
2876-
btrfs_page_set_error(fs_info, page, start, len);
2877-
}
2878-
2879-
if (fs_info->sectorsize == PAGE_SIZE)
2880-
unlock_page(page);
2881-
else if (is_data_inode(page->mapping->host))
2882-
/*
2883-
* For subpage data, unlock the page if we're the last reader.
2884-
* For subpage metadata, page lock is not utilized for read.
2885-
*/
2886-
btrfs_subpage_end_reader(fs_info, page, start, len);
2887-
}
2888-
28892986
/*
28902987
* Find extent buffer for a givne bytenr.
28912988
*
@@ -2929,7 +3026,6 @@ static struct extent_buffer *find_extent_buffer_readpage(
29293026
static void end_bio_extent_readpage(struct bio *bio)
29303027
{
29313028
struct bio_vec *bvec;
2932-
int uptodate = !bio->bi_status;
29333029
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
29343030
struct extent_io_tree *tree, *failure_tree;
29353031
struct processed_extent processed = { 0 };
@@ -2944,10 +3040,12 @@ static void end_bio_extent_readpage(struct bio *bio)
29443040

29453041
ASSERT(!bio_flagged(bio, BIO_CLONED));
29463042
bio_for_each_segment_all(bvec, bio, iter_all) {
3043+
bool uptodate = !bio->bi_status;
29473044
struct page *page = bvec->bv_page;
29483045
struct inode *inode = page->mapping->host;
29493046
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
29503047
const u32 sectorsize = fs_info->sectorsize;
3048+
unsigned int error_bitmap = (unsigned int)-1;
29513049
u64 start;
29523050
u64 end;
29533051
u32 len;
@@ -2982,14 +3080,16 @@ static void end_bio_extent_readpage(struct bio *bio)
29823080

29833081
mirror = io_bio->mirror_num;
29843082
if (likely(uptodate)) {
2985-
if (is_data_inode(inode))
2986-
ret = btrfs_verify_data_csum(io_bio,
3083+
if (is_data_inode(inode)) {
3084+
error_bitmap = btrfs_verify_data_csum(io_bio,
29873085
bio_offset, page, start, end);
2988-
else
3086+
ret = error_bitmap;
3087+
} else {
29893088
ret = btrfs_validate_metadata_buffer(io_bio,
29903089
page, start, end, mirror);
3090+
}
29913091
if (ret)
2992-
uptodate = 0;
3092+
uptodate = false;
29933093
else
29943094
clean_io_failure(BTRFS_I(inode)->root->fs_info,
29953095
failure_tree, tree, start,
@@ -3001,27 +3101,18 @@ static void end_bio_extent_readpage(struct bio *bio)
30013101
goto readpage_ok;
30023102

30033103
if (is_data_inode(inode)) {
3004-
30053104
/*
3006-
* The generic bio_readpage_error handles errors the
3007-
* following way: If possible, new read requests are
3008-
* created and submitted and will end up in
3009-
* end_bio_extent_readpage as well (if we're lucky,
3010-
* not in the !uptodate case). In that case it returns
3011-
* 0 and we just go on with the next page in our bio.
3012-
* If it can't handle the error it will return -EIO and
3013-
* we remain responsible for that page.
3105+
* btrfs_submit_read_repair() will handle all the good
3106+
* and bad sectors, we just continue to the next bvec.
30143107
*/
3015-
if (!btrfs_submit_read_repair(inode, bio, bio_offset,
3016-
page,
3017-
start - page_offset(page),
3018-
start, end, mirror,
3019-
btrfs_submit_data_bio)) {
3020-
uptodate = !bio->bi_status;
3021-
ASSERT(bio_offset + len > bio_offset);
3022-
bio_offset += len;
3023-
continue;
3024-
}
3108+
submit_read_repair(inode, bio, bio_offset, page,
3109+
start - page_offset(page), start,
3110+
end, mirror, error_bitmap,
3111+
btrfs_submit_data_bio);
3112+
3113+
ASSERT(bio_offset + len > bio_offset);
3114+
bio_offset += len;
3115+
continue;
30253116
} else {
30263117
struct extent_buffer *eb;
30273118

fs/btrfs/extent_io.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -296,12 +296,11 @@ struct io_failure_record {
296296
int in_validation;
297297
};
298298

299-
300-
blk_status_t btrfs_submit_read_repair(struct inode *inode,
301-
struct bio *failed_bio, u32 bio_offset,
302-
struct page *page, unsigned int pgoff,
303-
u64 start, u64 end, int failed_mirror,
304-
submit_bio_hook_t *submit_bio_hook);
299+
int btrfs_repair_one_sector(struct inode *inode,
300+
struct bio *failed_bio, u32 bio_offset,
301+
struct page *page, unsigned int pgoff,
302+
u64 start, int failed_mirror,
303+
submit_bio_hook_t *submit_bio_hook);
305304

306305
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
307306
bool find_lock_delalloc_range(struct inode *inode,

fs/btrfs/inode.c

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7933,19 +7933,17 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
79337933
btrfs_ino(BTRFS_I(inode)),
79347934
pgoff);
79357935
} else {
7936-
blk_status_t status;
7936+
int ret;
79377937

79387938
ASSERT((start - io_bio->logical) < UINT_MAX);
7939-
status = btrfs_submit_read_repair(inode,
7940-
&io_bio->bio,
7941-
start - io_bio->logical,
7942-
bvec.bv_page, pgoff,
7943-
start,
7944-
start + sectorsize - 1,
7945-
io_bio->mirror_num,
7946-
submit_dio_repair_bio);
7947-
if (status)
7948-
err = status;
7939+
ret = btrfs_repair_one_sector(inode,
7940+
&io_bio->bio,
7941+
start - io_bio->logical,
7942+
bvec.bv_page, pgoff,
7943+
start, io_bio->mirror_num,
7944+
submit_dio_repair_bio);
7945+
if (ret)
7946+
err = errno_to_blk_status(ret);
79497947
}
79507948
start += sectorsize;
79517949
ASSERT(bio_offset + sectorsize > bio_offset);

0 commit comments

Comments
 (0)