@@ -4065,7 +4065,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
4065
4065
4066
4066
ASSERT3P (marker , != , NULL );
4067
4067
4068
- mls = multilist_sublist_lock (ml , idx );
4068
+ mls = multilist_sublist_lock_idx (ml , idx );
4069
4069
4070
4070
for (hdr = multilist_sublist_prev (mls , marker ); likely (hdr != NULL );
4071
4071
hdr = multilist_sublist_prev (mls , marker )) {
@@ -4178,6 +4178,26 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
4178
4178
return (bytes_evicted );
4179
4179
}
4180
4180
4181
+ static arc_buf_hdr_t *
4182
+ arc_state_alloc_marker (void )
4183
+ {
4184
+ arc_buf_hdr_t * marker = kmem_cache_alloc (hdr_full_cache , KM_SLEEP );
4185
+
4186
+ /*
4187
+ * A b_spa of 0 is used to indicate that this header is
4188
+ * a marker. This fact is used in arc_evict_state_impl().
4189
+ */
4190
+ marker -> b_spa = 0 ;
4191
+
4192
+ return (marker );
4193
+ }
4194
+
4195
+ static void
4196
+ arc_state_free_marker (arc_buf_hdr_t * marker )
4197
+ {
4198
+ kmem_cache_free (hdr_full_cache , marker );
4199
+ }
4200
+
4181
4201
/*
4182
4202
* Allocate an array of buffer headers used as placeholders during arc state
4183
4203
* eviction.
@@ -4188,25 +4208,16 @@ arc_state_alloc_markers(int count)
4188
4208
arc_buf_hdr_t * * markers ;
4189
4209
4190
4210
markers = kmem_zalloc (sizeof (* markers ) * count , KM_SLEEP );
4191
- for (int i = 0 ; i < count ; i ++ ) {
4192
- markers [i ] = kmem_cache_alloc (hdr_full_cache , KM_SLEEP );
4193
-
4194
- /*
4195
- * A b_spa of 0 is used to indicate that this header is
4196
- * a marker. This fact is used in arc_evict_type() and
4197
- * arc_evict_state_impl().
4198
- */
4199
- markers [i ]-> b_spa = 0 ;
4200
-
4201
- }
4211
+ for (int i = 0 ; i < count ; i ++ )
4212
+ markers [i ] = arc_state_alloc_marker ();
4202
4213
return (markers );
4203
4214
}
4204
4215
4205
4216
static void
4206
4217
arc_state_free_markers (arc_buf_hdr_t * * markers , int count )
4207
4218
{
4208
4219
for (int i = 0 ; i < count ; i ++ )
4209
- kmem_cache_free ( hdr_full_cache , markers [i ]);
4220
+ arc_state_free_marker ( markers [i ]);
4210
4221
kmem_free (markers , sizeof (* markers ) * count );
4211
4222
}
4212
4223
@@ -4250,7 +4261,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, uint64_t bytes,
4250
4261
for (int i = 0 ; i < num_sublists ; i ++ ) {
4251
4262
multilist_sublist_t * mls ;
4252
4263
4253
- mls = multilist_sublist_lock (ml , i );
4264
+ mls = multilist_sublist_lock_idx (ml , i );
4254
4265
multilist_sublist_insert_tail (mls , markers [i ]);
4255
4266
multilist_sublist_unlock (mls );
4256
4267
}
@@ -4328,7 +4339,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, uint64_t bytes,
4328
4339
}
4329
4340
4330
4341
for (int i = 0 ; i < num_sublists ; i ++ ) {
4331
- multilist_sublist_t * mls = multilist_sublist_lock (ml , i );
4342
+ multilist_sublist_t * mls = multilist_sublist_lock_idx (ml , i );
4332
4343
multilist_sublist_remove (mls , markers [i ]);
4333
4344
multilist_sublist_unlock (mls );
4334
4345
}
@@ -4568,8 +4579,8 @@ arc_evict_type(arc_state_t *state)
4568
4579
* We keep the sublist lock until we're finished, to prevent
4569
4580
* the headers from being destroyed via arc_evict_state().
4570
4581
*/
4571
- data_mls = multilist_sublist_lock (data_ml , data_idx );
4572
- meta_mls = multilist_sublist_lock (meta_ml , meta_idx );
4582
+ data_mls = multilist_sublist_lock_idx (data_ml , data_idx );
4583
+ meta_mls = multilist_sublist_lock_idx (meta_ml , meta_idx );
4573
4584
4574
4585
/*
4575
4586
* These two loops are to ensure we skip any markers that
@@ -9139,7 +9150,7 @@ l2arc_sublist_lock(int list_num)
9139
9150
* sublists being selected.
9140
9151
*/
9141
9152
idx = multilist_get_random_index (ml );
9142
- return (multilist_sublist_lock (ml , idx ));
9153
+ return (multilist_sublist_lock_idx (ml , idx ));
9143
9154
}
9144
9155
9145
9156
/*
@@ -9569,9 +9580,9 @@ l2arc_blk_fetch_done(zio_t *zio)
9569
9580
static uint64_t
9570
9581
l2arc_write_buffers (spa_t * spa , l2arc_dev_t * dev , uint64_t target_sz )
9571
9582
{
9572
- arc_buf_hdr_t * hdr , * hdr_prev , * head ;
9573
- uint64_t write_asize , write_psize , write_lsize , headroom ;
9574
- boolean_t full ;
9583
+ arc_buf_hdr_t * hdr , * head , * marker ;
9584
+ uint64_t write_asize , write_psize , headroom ;
9585
+ boolean_t full , from_head = ! arc_warm ;
9575
9586
l2arc_write_callback_t * cb = NULL ;
9576
9587
zio_t * pio , * wzio ;
9577
9588
uint64_t guid = spa_load_guid (spa );
@@ -9580,10 +9591,11 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
9580
9591
ASSERT3P (dev -> l2ad_vdev , != , NULL );
9581
9592
9582
9593
pio = NULL ;
9583
- write_lsize = write_asize = write_psize = 0 ;
9594
+ write_asize = write_psize = 0 ;
9584
9595
full = B_FALSE ;
9585
9596
head = kmem_cache_alloc (hdr_l2only_cache , KM_PUSHPAGE );
9586
9597
arc_hdr_set_flags (head , ARC_FLAG_L2_WRITE_HEAD | ARC_FLAG_HAS_L2HDR );
9598
+ marker = arc_state_alloc_marker ();
9587
9599
9588
9600
/*
9589
9601
* Copy buffers for L2ARC writing.
@@ -9598,40 +9610,34 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
9598
9610
continue ;
9599
9611
}
9600
9612
9601
- multilist_sublist_t * mls = l2arc_sublist_lock (pass );
9602
9613
uint64_t passed_sz = 0 ;
9603
-
9604
- VERIFY3P (mls , != , NULL );
9614
+ headroom = target_sz * l2arc_headroom ;
9615
+ if (zfs_compressed_arc_enabled )
9616
+ headroom = (headroom * l2arc_headroom_boost ) / 100 ;
9605
9617
9606
9618
/*
9607
- * L2ARC fast warmup.
9608
- *
9609
9619
* Until the ARC is warm and starts to evict, read from the
9610
9620
* head of the ARC lists rather than the tail.
9611
9621
*/
9612
- if (arc_warm == B_FALSE )
9622
+ multilist_sublist_t * mls = l2arc_sublist_lock (pass );
9623
+ ASSERT3P (mls , != , NULL );
9624
+ if (from_head )
9613
9625
hdr = multilist_sublist_head (mls );
9614
9626
else
9615
9627
hdr = multilist_sublist_tail (mls );
9616
9628
9617
- headroom = target_sz * l2arc_headroom ;
9618
- if (zfs_compressed_arc_enabled )
9619
- headroom = (headroom * l2arc_headroom_boost ) / 100 ;
9620
-
9621
- for (; hdr ; hdr = hdr_prev ) {
9629
+ while (hdr != NULL ) {
9622
9630
kmutex_t * hash_lock ;
9623
9631
abd_t * to_write = NULL ;
9624
9632
9625
- if (arc_warm == B_FALSE )
9626
- hdr_prev = multilist_sublist_next (mls , hdr );
9627
- else
9628
- hdr_prev = multilist_sublist_prev (mls , hdr );
9629
-
9630
9633
hash_lock = HDR_LOCK (hdr );
9631
9634
if (!mutex_tryenter (hash_lock )) {
9632
- /*
9633
- * Skip this buffer rather than waiting.
9634
- */
9635
+ skip :
9636
+ /* Skip this buffer rather than waiting. */
9637
+ if (from_head )
9638
+ hdr = multilist_sublist_next (mls , hdr );
9639
+ else
9640
+ hdr = multilist_sublist_prev (mls , hdr );
9635
9641
continue ;
9636
9642
}
9637
9643
@@ -9646,7 +9652,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
9646
9652
9647
9653
if (!l2arc_write_eligible (guid , hdr )) {
9648
9654
mutex_exit (hash_lock );
9649
- continue ;
9655
+ goto skip ;
9650
9656
}
9651
9657
9652
9658
/*
@@ -9656,7 +9662,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
9656
9662
* ARC_FLAG_L2_WRITING bit ensures this won't happen.
9657
9663
*/
9658
9664
ASSERT (HDR_HAS_L1HDR (hdr ));
9659
-
9660
9665
ASSERT3U (HDR_GET_PSIZE (hdr ), > , 0 );
9661
9666
ASSERT3U (arc_hdr_size (hdr ), > , 0 );
9662
9667
ASSERT (hdr -> b_l1hdr .b_pabd != NULL ||
@@ -9672,18 +9677,18 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
9672
9677
}
9673
9678
9674
9679
/*
9675
- * We rely on the L1 portion of the header below, so
9676
- * it's invalid for this header to have been evicted out
9677
- * of the ghost cache, prior to being written out. The
9678
- * ARC_FLAG_L2_WRITING bit ensures this won't happen.
9680
+ * We should not sleep with sublist lock held or it
9681
+ * may block ARC eviction. Insert a marker to save
9682
+ * the position and drop the lock.
9679
9683
*/
9680
- arc_hdr_set_flags (hdr , ARC_FLAG_L2_WRITING );
9681
- ASSERT (HDR_HAS_L1HDR (hdr ));
9682
-
9683
- ASSERT3U (HDR_GET_PSIZE (hdr ), > , 0 );
9684
- ASSERT (hdr -> b_l1hdr .b_pabd != NULL ||
9685
- HDR_HAS_RABD (hdr ));
9686
- ASSERT3U (arc_hdr_size (hdr ), > , 0 );
9684
+ if (from_head ) {
9685
+ multilist_sublist_insert_after (mls , hdr ,
9686
+ marker );
9687
+ } else {
9688
+ multilist_sublist_insert_before (mls , hdr ,
9689
+ marker );
9690
+ }
9691
+ multilist_sublist_unlock (mls );
9687
9692
9688
9693
/*
9689
9694
* If this header has b_rabd, we can use this since it
@@ -9714,9 +9719,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
9714
9719
& to_write );
9715
9720
if (ret != 0 ) {
9716
9721
arc_hdr_clear_flags (hdr ,
9717
- ARC_FLAG_L2_WRITING );
9722
+ ARC_FLAG_L2CACHE );
9718
9723
mutex_exit (hash_lock );
9719
- continue ;
9724
+ goto next ;
9720
9725
}
9721
9726
9722
9727
l2arc_free_abd_on_write (to_write , asize , type );
@@ -9725,73 +9730,70 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
9725
9730
/* l2arc_hdr_arcstats_update() expects a valid asize */
9726
9731
HDR_SET_L2SIZE (hdr , asize );
9727
9732
9733
+ hdr -> b_l2hdr .b_dev = dev ;
9734
+ hdr -> b_l2hdr .b_daddr = dev -> l2ad_hand ;
9735
+ hdr -> b_l2hdr .b_hits = 0 ;
9736
+ hdr -> b_l2hdr .b_arcs_state =
9737
+ hdr -> b_l1hdr .b_state -> arcs_state ;
9738
+ mutex_enter (& dev -> l2ad_mtx );
9728
9739
if (pio == NULL ) {
9729
9740
/*
9730
9741
* Insert a dummy header on the buflist so
9731
9742
* l2arc_write_done() can find where the
9732
9743
* write buffers begin without searching.
9733
9744
*/
9734
- mutex_enter (& dev -> l2ad_mtx );
9735
9745
list_insert_head (& dev -> l2ad_buflist , head );
9736
- mutex_exit (& dev -> l2ad_mtx );
9746
+ }
9747
+ list_insert_head (& dev -> l2ad_buflist , hdr );
9748
+ mutex_exit (& dev -> l2ad_mtx );
9749
+ arc_hdr_set_flags (hdr , ARC_FLAG_HAS_L2HDR |
9750
+ ARC_FLAG_L2_WRITING );
9737
9751
9752
+ (void ) zfs_refcount_add_many (& dev -> l2ad_alloc ,
9753
+ arc_hdr_size (hdr ), hdr );
9754
+ l2arc_hdr_arcstats_increment (hdr );
9755
+
9756
+ boolean_t commit = l2arc_log_blk_insert (dev , hdr );
9757
+ mutex_exit (hash_lock );
9758
+
9759
+ if (pio == NULL ) {
9738
9760
cb = kmem_alloc (
9739
9761
sizeof (l2arc_write_callback_t ), KM_SLEEP );
9740
9762
cb -> l2wcb_dev = dev ;
9741
9763
cb -> l2wcb_head = head ;
9742
- /*
9743
- * Create a list to save allocated abd buffers
9744
- * for l2arc_log_blk_commit().
9745
- */
9746
9764
list_create (& cb -> l2wcb_abd_list ,
9747
9765
sizeof (l2arc_lb_abd_buf_t ),
9748
9766
offsetof(l2arc_lb_abd_buf_t , node ));
9749
9767
pio = zio_root (spa , l2arc_write_done , cb ,
9750
9768
ZIO_FLAG_CANFAIL );
9751
9769
}
9752
9770
9753
- hdr -> b_l2hdr .b_dev = dev ;
9754
- hdr -> b_l2hdr .b_hits = 0 ;
9755
-
9756
- hdr -> b_l2hdr .b_daddr = dev -> l2ad_hand ;
9757
- hdr -> b_l2hdr .b_arcs_state =
9758
- hdr -> b_l1hdr .b_state -> arcs_state ;
9759
- arc_hdr_set_flags (hdr , ARC_FLAG_HAS_L2HDR );
9760
-
9761
- mutex_enter (& dev -> l2ad_mtx );
9762
- list_insert_head (& dev -> l2ad_buflist , hdr );
9763
- mutex_exit (& dev -> l2ad_mtx );
9764
-
9765
- (void ) zfs_refcount_add_many (& dev -> l2ad_alloc ,
9766
- arc_hdr_size (hdr ), hdr );
9767
-
9768
9771
wzio = zio_write_phys (pio , dev -> l2ad_vdev ,
9769
- hdr -> b_l2hdr . b_daddr , asize , to_write ,
9772
+ dev -> l2ad_hand , asize , to_write ,
9770
9773
ZIO_CHECKSUM_OFF , NULL , hdr ,
9771
9774
ZIO_PRIORITY_ASYNC_WRITE ,
9772
9775
ZIO_FLAG_CANFAIL , B_FALSE );
9773
9776
9774
- write_lsize += HDR_GET_LSIZE (hdr );
9775
9777
DTRACE_PROBE2 (l2arc__write , vdev_t * , dev -> l2ad_vdev ,
9776
9778
zio_t * , wzio );
9779
+ zio_nowait (wzio );
9777
9780
9778
9781
write_psize += psize ;
9779
9782
write_asize += asize ;
9780
9783
dev -> l2ad_hand += asize ;
9781
- l2arc_hdr_arcstats_increment (hdr );
9782
9784
vdev_space_update (dev -> l2ad_vdev , asize , 0 , 0 );
9783
9785
9784
- mutex_exit (hash_lock );
9785
-
9786
- /*
9787
- * Append buf info to current log and commit if full.
9788
- * arcstat_l2_{size,asize} kstats are updated
9789
- * internally.
9790
- */
9791
- if (l2arc_log_blk_insert (dev , hdr ))
9786
+ if (commit ) {
9792
9787
l2arc_log_blk_commit (dev , pio , cb );
9788
+ }
9793
9789
9794
- zio_nowait (wzio );
9790
+ next :
9791
+ multilist_sublist_lock (mls );
9792
+ if (from_head )
9793
+ hdr = multilist_sublist_next (mls , marker );
9794
+ else
9795
+ hdr = multilist_sublist_prev (mls , marker );
9796
+ multilist_sublist_remove (mls , marker );
9795
9797
}
9796
9798
9797
9799
multilist_sublist_unlock (mls );
@@ -9800,9 +9802,11 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
9800
9802
break ;
9801
9803
}
9802
9804
9805
+ arc_state_free_marker (marker );
9806
+
9803
9807
/* No buffers selected for writing? */
9804
9808
if (pio == NULL ) {
9805
- ASSERT0 (write_lsize );
9809
+ ASSERT0 (write_psize );
9806
9810
ASSERT (!HDR_HAS_L1HDR (head ));
9807
9811
kmem_cache_free (hdr_l2only_cache , head );
9808
9812
@@ -11229,7 +11233,7 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr)
11229
11233
L2BLK_SET_TYPE ((le )-> le_prop , hdr -> b_type );
11230
11234
L2BLK_SET_PROTECTED ((le )-> le_prop , !!(HDR_PROTECTED (hdr )));
11231
11235
L2BLK_SET_PREFETCH ((le )-> le_prop , !!(HDR_PREFETCH (hdr )));
11232
- L2BLK_SET_STATE ((le )-> le_prop , hdr -> b_l1hdr . b_state -> arcs_state );
11236
+ L2BLK_SET_STATE ((le )-> le_prop , hdr -> b_l2hdr . b_arcs_state );
11233
11237
11234
11238
dev -> l2ad_log_blk_payload_asize += vdev_psize_to_asize (dev -> l2ad_vdev ,
11235
11239
HDR_GET_PSIZE (hdr ));
0 commit comments