@@ -2449,14 +2449,15 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2449
2449
}
2450
2450
2451
2451
static struct io_failure_record * btrfs_get_io_failure_record (struct inode * inode ,
2452
- u64 start , u64 end )
2452
+ u64 start )
2453
2453
{
2454
2454
struct btrfs_fs_info * fs_info = btrfs_sb (inode -> i_sb );
2455
2455
struct io_failure_record * failrec ;
2456
2456
struct extent_map * em ;
2457
2457
struct extent_io_tree * failure_tree = & BTRFS_I (inode )-> io_failure_tree ;
2458
2458
struct extent_io_tree * tree = & BTRFS_I (inode )-> io_tree ;
2459
2459
struct extent_map_tree * em_tree = & BTRFS_I (inode )-> extent_tree ;
2460
+ const u32 sectorsize = fs_info -> sectorsize ;
2460
2461
int ret ;
2461
2462
u64 logical ;
2462
2463
@@ -2480,7 +2481,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
2480
2481
return ERR_PTR (- ENOMEM );
2481
2482
2482
2483
failrec -> start = start ;
2483
- failrec -> len = end - start + 1 ;
2484
+ failrec -> len = sectorsize ;
2484
2485
failrec -> this_mirror = 0 ;
2485
2486
failrec -> bio_flags = 0 ;
2486
2487
failrec -> in_validation = 0 ;
@@ -2519,12 +2520,13 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
2519
2520
free_extent_map (em );
2520
2521
2521
2522
/* Set the bits in the private failure tree */
2522
- ret = set_extent_bits (failure_tree , start , end ,
2523
+ ret = set_extent_bits (failure_tree , start , start + sectorsize - 1 ,
2523
2524
EXTENT_LOCKED | EXTENT_DIRTY );
2524
2525
if (ret >= 0 ) {
2525
2526
ret = set_state_failrec (failure_tree , start , failrec );
2526
2527
/* Set the bits in the inode's tree */
2527
- ret = set_extent_bits (tree , start , end , EXTENT_DAMAGED );
2528
+ ret = set_extent_bits (tree , start , start + sectorsize - 1 ,
2529
+ EXTENT_DAMAGED );
2528
2530
} else if (ret < 0 ) {
2529
2531
kfree (failrec );
2530
2532
return ERR_PTR (ret );
@@ -2639,11 +2641,11 @@ static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
2639
2641
return false;
2640
2642
}
2641
2643
2642
- blk_status_t btrfs_submit_read_repair (struct inode * inode ,
2643
- struct bio * failed_bio , u32 bio_offset ,
2644
- struct page * page , unsigned int pgoff ,
2645
- u64 start , u64 end , int failed_mirror ,
2646
- submit_bio_hook_t * submit_bio_hook )
2644
+ int btrfs_repair_one_sector (struct inode * inode ,
2645
+ struct bio * failed_bio , u32 bio_offset ,
2646
+ struct page * page , unsigned int pgoff ,
2647
+ u64 start , int failed_mirror ,
2648
+ submit_bio_hook_t * submit_bio_hook )
2647
2649
{
2648
2650
struct io_failure_record * failrec ;
2649
2651
struct btrfs_fs_info * fs_info = btrfs_sb (inode -> i_sb );
@@ -2661,16 +2663,22 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode,
2661
2663
2662
2664
BUG_ON (bio_op (failed_bio ) == REQ_OP_WRITE );
2663
2665
2664
- failrec = btrfs_get_io_failure_record (inode , start , end );
2666
+ failrec = btrfs_get_io_failure_record (inode , start );
2665
2667
if (IS_ERR (failrec ))
2666
- return errno_to_blk_status (PTR_ERR (failrec ));
2667
-
2668
- need_validation = btrfs_io_needs_validation (inode , failed_bio );
2668
+ return PTR_ERR (failrec );
2669
2669
2670
+ /*
2671
+ * We will only submit repair for one sector, thus we don't need
2672
+ * extra validation anymore.
2673
+ *
2674
+ * TODO: All those extra validation related code will be cleaned up
2675
+ * later.
2676
+ */
2677
+ need_validation = false;
2670
2678
if (!btrfs_check_repairable (inode , need_validation , failrec ,
2671
2679
failed_mirror )) {
2672
2680
free_io_failure (failure_tree , tree , failrec );
2673
- return BLK_STS_IOERR ;
2681
+ return - EIO ;
2674
2682
}
2675
2683
2676
2684
repair_bio = btrfs_io_bio_alloc (1 );
@@ -2704,7 +2712,120 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode,
2704
2712
free_io_failure (failure_tree , tree , failrec );
2705
2713
bio_put (repair_bio );
2706
2714
}
2707
- return status ;
2715
+ return blk_status_to_errno (status );
2716
+ }
2717
+
2718
+ static void end_page_read (struct page * page , bool uptodate , u64 start , u32 len )
2719
+ {
2720
+ struct btrfs_fs_info * fs_info = btrfs_sb (page -> mapping -> host -> i_sb );
2721
+
2722
+ ASSERT (page_offset (page ) <= start &&
2723
+ start + len <= page_offset (page ) + PAGE_SIZE );
2724
+
2725
+ /*
2726
+ * For subapge metadata case, all btrfs_page_* helpers need page to
2727
+ * have page::private populated.
2728
+ * But we can have rare case where the last eb in the page is only
2729
+ * referred by the IO, and it gets released immedately after it's
2730
+ * read and verified.
2731
+ *
2732
+ * This can detach the page private completely.
2733
+ * In that case, we can just skip the page status update completely,
2734
+ * as the page has no eb anymore.
2735
+ */
2736
+ if (fs_info -> sectorsize < PAGE_SIZE && unlikely (!PagePrivate (page ))) {
2737
+ ASSERT (!is_data_inode (page -> mapping -> host ));
2738
+ return ;
2739
+ }
2740
+ if (uptodate ) {
2741
+ btrfs_page_set_uptodate (fs_info , page , start , len );
2742
+ } else {
2743
+ btrfs_page_clear_uptodate (fs_info , page , start , len );
2744
+ btrfs_page_set_error (fs_info , page , start , len );
2745
+ }
2746
+
2747
+ if (fs_info -> sectorsize == PAGE_SIZE )
2748
+ unlock_page (page );
2749
+ else if (is_data_inode (page -> mapping -> host ))
2750
+ /*
2751
+ * For subpage data, unlock the page if we're the last reader.
2752
+ * For subpage metadata, page lock is not utilized for read.
2753
+ */
2754
+ btrfs_subpage_end_reader (fs_info , page , start , len );
2755
+ }
2756
+
2757
+ static blk_status_t submit_read_repair (struct inode * inode ,
2758
+ struct bio * failed_bio , u32 bio_offset ,
2759
+ struct page * page , unsigned int pgoff ,
2760
+ u64 start , u64 end , int failed_mirror ,
2761
+ unsigned int error_bitmap ,
2762
+ submit_bio_hook_t * submit_bio_hook )
2763
+ {
2764
+ struct btrfs_fs_info * fs_info = btrfs_sb (inode -> i_sb );
2765
+ const u32 sectorsize = fs_info -> sectorsize ;
2766
+ const int nr_bits = (end + 1 - start ) >> fs_info -> sectorsize_bits ;
2767
+ int error = 0 ;
2768
+ int i ;
2769
+
2770
+ BUG_ON (bio_op (failed_bio ) == REQ_OP_WRITE );
2771
+
2772
+ /* We're here because we had some read errors or csum mismatch */
2773
+ ASSERT (error_bitmap );
2774
+
2775
+ /*
2776
+ * We only get called on buffered IO, thus page must be mapped and bio
2777
+ * must not be cloned.
2778
+ */
2779
+ ASSERT (page -> mapping && !bio_flagged (failed_bio , BIO_CLONED ));
2780
+
2781
+ /* Iterate through all the sectors in the range */
2782
+ for (i = 0 ; i < nr_bits ; i ++ ) {
2783
+ const unsigned int offset = i * sectorsize ;
2784
+ struct extent_state * cached = NULL ;
2785
+ bool uptodate = false;
2786
+ int ret ;
2787
+
2788
+ if (!(error_bitmap & (1U << i ))) {
2789
+ /*
2790
+ * This sector has no error, just end the page read
2791
+ * and unlock the range.
2792
+ */
2793
+ uptodate = true;
2794
+ goto next ;
2795
+ }
2796
+
2797
+ ret = btrfs_repair_one_sector (inode , failed_bio ,
2798
+ bio_offset + offset ,
2799
+ page , pgoff + offset , start + offset ,
2800
+ failed_mirror , submit_bio_hook );
2801
+ if (!ret ) {
2802
+ /*
2803
+ * We have submitted the read repair, the page release
2804
+ * will be handled by the endio function of the
2805
+ * submitted repair bio.
2806
+ * Thus we don't need to do any thing here.
2807
+ */
2808
+ continue ;
2809
+ }
2810
+ /*
2811
+ * Repair failed, just record the error but still continue.
2812
+ * Or the remaining sectors will not be properly unlocked.
2813
+ */
2814
+ if (!error )
2815
+ error = ret ;
2816
+ next :
2817
+ end_page_read (page , uptodate , start + offset , sectorsize );
2818
+ if (uptodate )
2819
+ set_extent_uptodate (& BTRFS_I (inode )-> io_tree ,
2820
+ start + offset ,
2821
+ start + offset + sectorsize - 1 ,
2822
+ & cached , GFP_ATOMIC );
2823
+ unlock_extent_cached_atomic (& BTRFS_I (inode )-> io_tree ,
2824
+ start + offset ,
2825
+ start + offset + sectorsize - 1 ,
2826
+ & cached );
2827
+ }
2828
+ return errno_to_blk_status (error );
2708
2829
}
2709
2830
2710
2831
/* lots and lots of room for performance fixes in the end_bio funcs */
@@ -2862,30 +2983,6 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
2862
2983
btrfs_subpage_start_reader (fs_info , page , page_offset (page ), PAGE_SIZE );
2863
2984
}
2864
2985
2865
- static void end_page_read (struct page * page , bool uptodate , u64 start , u32 len )
2866
- {
2867
- struct btrfs_fs_info * fs_info = btrfs_sb (page -> mapping -> host -> i_sb );
2868
-
2869
- ASSERT (page_offset (page ) <= start &&
2870
- start + len <= page_offset (page ) + PAGE_SIZE );
2871
-
2872
- if (uptodate ) {
2873
- btrfs_page_set_uptodate (fs_info , page , start , len );
2874
- } else {
2875
- btrfs_page_clear_uptodate (fs_info , page , start , len );
2876
- btrfs_page_set_error (fs_info , page , start , len );
2877
- }
2878
-
2879
- if (fs_info -> sectorsize == PAGE_SIZE )
2880
- unlock_page (page );
2881
- else if (is_data_inode (page -> mapping -> host ))
2882
- /*
2883
- * For subpage data, unlock the page if we're the last reader.
2884
- * For subpage metadata, page lock is not utilized for read.
2885
- */
2886
- btrfs_subpage_end_reader (fs_info , page , start , len );
2887
- }
2888
-
2889
2986
/*
2890
2987
* Find extent buffer for a givne bytenr.
2891
2988
*
@@ -2929,7 +3026,6 @@ static struct extent_buffer *find_extent_buffer_readpage(
2929
3026
static void end_bio_extent_readpage (struct bio * bio )
2930
3027
{
2931
3028
struct bio_vec * bvec ;
2932
- int uptodate = !bio -> bi_status ;
2933
3029
struct btrfs_io_bio * io_bio = btrfs_io_bio (bio );
2934
3030
struct extent_io_tree * tree , * failure_tree ;
2935
3031
struct processed_extent processed = { 0 };
@@ -2944,10 +3040,12 @@ static void end_bio_extent_readpage(struct bio *bio)
2944
3040
2945
3041
ASSERT (!bio_flagged (bio , BIO_CLONED ));
2946
3042
bio_for_each_segment_all (bvec , bio , iter_all ) {
3043
+ bool uptodate = !bio -> bi_status ;
2947
3044
struct page * page = bvec -> bv_page ;
2948
3045
struct inode * inode = page -> mapping -> host ;
2949
3046
struct btrfs_fs_info * fs_info = btrfs_sb (inode -> i_sb );
2950
3047
const u32 sectorsize = fs_info -> sectorsize ;
3048
+ unsigned int error_bitmap = (unsigned int )-1 ;
2951
3049
u64 start ;
2952
3050
u64 end ;
2953
3051
u32 len ;
@@ -2982,14 +3080,16 @@ static void end_bio_extent_readpage(struct bio *bio)
2982
3080
2983
3081
mirror = io_bio -> mirror_num ;
2984
3082
if (likely (uptodate )) {
2985
- if (is_data_inode (inode ))
2986
- ret = btrfs_verify_data_csum (io_bio ,
3083
+ if (is_data_inode (inode )) {
3084
+ error_bitmap = btrfs_verify_data_csum (io_bio ,
2987
3085
bio_offset , page , start , end );
2988
- else
3086
+ ret = error_bitmap ;
3087
+ } else {
2989
3088
ret = btrfs_validate_metadata_buffer (io_bio ,
2990
3089
page , start , end , mirror );
3090
+ }
2991
3091
if (ret )
2992
- uptodate = 0 ;
3092
+ uptodate = false ;
2993
3093
else
2994
3094
clean_io_failure (BTRFS_I (inode )-> root -> fs_info ,
2995
3095
failure_tree , tree , start ,
@@ -3001,27 +3101,18 @@ static void end_bio_extent_readpage(struct bio *bio)
3001
3101
goto readpage_ok ;
3002
3102
3003
3103
if (is_data_inode (inode )) {
3004
-
3005
3104
/*
3006
- * The generic bio_readpage_error handles errors the
3007
- * following way: If possible, new read requests are
3008
- * created and submitted and will end up in
3009
- * end_bio_extent_readpage as well (if we're lucky,
3010
- * not in the !uptodate case). In that case it returns
3011
- * 0 and we just go on with the next page in our bio.
3012
- * If it can't handle the error it will return -EIO and
3013
- * we remain responsible for that page.
3105
+ * btrfs_submit_read_repair() will handle all the good
3106
+ * and bad sectors, we just continue to the next bvec.
3014
3107
*/
3015
- if (!btrfs_submit_read_repair (inode , bio , bio_offset ,
3016
- page ,
3017
- start - page_offset (page ),
3018
- start , end , mirror ,
3019
- btrfs_submit_data_bio )) {
3020
- uptodate = !bio -> bi_status ;
3021
- ASSERT (bio_offset + len > bio_offset );
3022
- bio_offset += len ;
3023
- continue ;
3024
- }
3108
+ submit_read_repair (inode , bio , bio_offset , page ,
3109
+ start - page_offset (page ), start ,
3110
+ end , mirror , error_bitmap ,
3111
+ btrfs_submit_data_bio );
3112
+
3113
+ ASSERT (bio_offset + len > bio_offset );
3114
+ bio_offset += len ;
3115
+ continue ;
3025
3116
} else {
3026
3117
struct extent_buffer * eb ;
3027
3118
0 commit comments