Skip to content

Commit f5fbe6b

Browse files
rgushchintorvalds
authored andcommitted
writeback, cgroup: support switching multiple inodes at once
Currently only a single inode can be switched to another writeback structure at once. That means to switch an inode a separate inode_switch_wbs_context structure must be allocated, and a separate rcu callback and work must be scheduled. It's fine for the existing ad-hoc switching, which is not happening that often, but sub-optimal for massive switching required in order to release a writeback structure. To prepare for it, let's add a support for switching multiple inodes at once. Instead of containing a single inode pointer, inode_switch_wbs_context will contain a NULL-terminated array of inode pointers. inode_do_switch_wbs() will be called for each inode. To optimize the locking bdi->wb_switch_rwsem, old_wb's and new_wb's list_locks will be acquired and released only once altogether for all inodes. wb_wakeup() will be also be called only once. Instead of calling wb_put(old_wb) after each successful switch, wb_put_many() is introduced and used. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Roman Gushchin <[email protected]> Acked-by: Tejun Heo <[email protected]> Reviewed-by: Jan Kara <[email protected]> Acked-by: Dennis Zhou <[email protected]> Cc: Alexander Viro <[email protected]> Cc: Dave Chinner <[email protected]> Cc: Jan Kara <[email protected]> Cc: Jens Axboe <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 72d4512 commit f5fbe6b

File tree

2 files changed

+80
-44
lines changed

2 files changed

+80
-44
lines changed

fs/fs-writeback.c

Lines changed: 64 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -335,10 +335,18 @@ static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
335335
}
336336

337337
struct inode_switch_wbs_context {
338-
struct inode *inode;
339-
struct bdi_writeback *new_wb;
340-
341338
struct rcu_work work;
339+
340+
/*
341+
* Multiple inodes can be switched at once. The switching procedure
342+
* consists of two parts, separated by a RCU grace period. To make
343+
* sure that the second part is executed for each inode gone through
344+
* the first part, all inode pointers are placed into a NULL-terminated
345+
* array embedded into struct inode_switch_wbs_context. Otherwise
346+
* an inode could be left in a non-consistent state.
347+
*/
348+
struct bdi_writeback *new_wb;
349+
struct inode *inodes[];
342350
};
343351

344352
static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
@@ -351,39 +359,15 @@ static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
351359
up_write(&bdi->wb_switch_rwsem);
352360
}
353361

354-
static void inode_do_switch_wbs(struct inode *inode,
362+
static bool inode_do_switch_wbs(struct inode *inode,
363+
struct bdi_writeback *old_wb,
355364
struct bdi_writeback *new_wb)
356365
{
357-
struct backing_dev_info *bdi = inode_to_bdi(inode);
358366
struct address_space *mapping = inode->i_mapping;
359-
struct bdi_writeback *old_wb = inode->i_wb;
360367
XA_STATE(xas, &mapping->i_pages, 0);
361368
struct page *page;
362369
bool switched = false;
363370

364-
/*
365-
* If @inode switches cgwb membership while sync_inodes_sb() is
366-
* being issued, sync_inodes_sb() might miss it. Synchronize.
367-
*/
368-
down_read(&bdi->wb_switch_rwsem);
369-
370-
/*
371-
* By the time control reaches here, RCU grace period has passed
372-
* since I_WB_SWITCH assertion and all wb stat update transactions
373-
* between unlocked_inode_to_wb_begin/end() are guaranteed to be
374-
* synchronizing against the i_pages lock.
375-
*
376-
* Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
377-
* gives us exclusion against all wb related operations on @inode
378-
* including IO list manipulations and stat updates.
379-
*/
380-
if (old_wb < new_wb) {
381-
spin_lock(&old_wb->list_lock);
382-
spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
383-
} else {
384-
spin_lock(&new_wb->list_lock);
385-
spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
386-
}
387371
spin_lock(&inode->i_lock);
388372
xa_lock_irq(&mapping->i_pages);
389373

@@ -458,25 +442,63 @@ static void inode_do_switch_wbs(struct inode *inode,
458442

459443
xa_unlock_irq(&mapping->i_pages);
460444
spin_unlock(&inode->i_lock);
461-
spin_unlock(&new_wb->list_lock);
462-
spin_unlock(&old_wb->list_lock);
463-
464-
up_read(&bdi->wb_switch_rwsem);
465445

466-
if (switched) {
467-
wb_wakeup(new_wb);
468-
wb_put(old_wb);
469-
}
446+
return switched;
470447
}
471448

472449
static void inode_switch_wbs_work_fn(struct work_struct *work)
473450
{
474451
struct inode_switch_wbs_context *isw =
475452
container_of(to_rcu_work(work), struct inode_switch_wbs_context, work);
453+
struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]);
454+
struct bdi_writeback *old_wb = isw->inodes[0]->i_wb;
455+
struct bdi_writeback *new_wb = isw->new_wb;
456+
unsigned long nr_switched = 0;
457+
struct inode **inodep;
458+
459+
/*
460+
* If @inode switches cgwb membership while sync_inodes_sb() is
461+
* being issued, sync_inodes_sb() might miss it. Synchronize.
462+
*/
463+
down_read(&bdi->wb_switch_rwsem);
464+
465+
/*
466+
* By the time control reaches here, RCU grace period has passed
467+
* since I_WB_SWITCH assertion and all wb stat update transactions
468+
* between unlocked_inode_to_wb_begin/end() are guaranteed to be
469+
* synchronizing against the i_pages lock.
470+
*
471+
* Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
472+
* gives us exclusion against all wb related operations on @inode
473+
* including IO list manipulations and stat updates.
474+
*/
475+
if (old_wb < new_wb) {
476+
spin_lock(&old_wb->list_lock);
477+
spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
478+
} else {
479+
spin_lock(&new_wb->list_lock);
480+
spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
481+
}
482+
483+
for (inodep = isw->inodes; *inodep; inodep++) {
484+
WARN_ON_ONCE((*inodep)->i_wb != old_wb);
485+
if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
486+
nr_switched++;
487+
}
488+
489+
spin_unlock(&new_wb->list_lock);
490+
spin_unlock(&old_wb->list_lock);
491+
492+
up_read(&bdi->wb_switch_rwsem);
493+
494+
if (nr_switched) {
495+
wb_wakeup(new_wb);
496+
wb_put_many(old_wb, nr_switched);
497+
}
476498

477-
inode_do_switch_wbs(isw->inode, isw->new_wb);
478-
wb_put(isw->new_wb);
479-
iput(isw->inode);
499+
for (inodep = isw->inodes; *inodep; inodep++)
500+
iput(*inodep);
501+
wb_put(new_wb);
480502
kfree(isw);
481503
atomic_dec(&isw_nr_in_flight);
482504
}
@@ -503,7 +525,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
503525
if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
504526
return;
505527

506-
isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
528+
isw = kzalloc(sizeof(*isw) + 2 * sizeof(struct inode *), GFP_ATOMIC);
507529
if (!isw)
508530
return;
509531

@@ -530,7 +552,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
530552
__iget(inode);
531553
spin_unlock(&inode->i_lock);
532554

533-
isw->inode = inode;
555+
isw->inodes[0] = inode;
534556

535557
/*
536558
* In addition to synchronizing among switchers, I_WB_SWITCH tells

include/linux/backing-dev-defs.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,9 @@ static inline void wb_get(struct bdi_writeback *wb)
240240
/**
241241
* wb_put - decrement a wb's refcount
242242
* @wb: bdi_writeback to put
243+
* @nr: number of references to put
243244
*/
244-
static inline void wb_put(struct bdi_writeback *wb)
245+
static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
245246
{
246247
if (WARN_ON_ONCE(!wb->bdi)) {
247248
/*
@@ -252,7 +253,16 @@ static inline void wb_put(struct bdi_writeback *wb)
252253
}
253254

254255
if (wb != &wb->bdi->wb)
255-
percpu_ref_put(&wb->refcnt);
256+
percpu_ref_put_many(&wb->refcnt, nr);
257+
}
258+
259+
/**
260+
* wb_put - decrement a wb's refcount
261+
* @wb: bdi_writeback to put
262+
*/
263+
static inline void wb_put(struct bdi_writeback *wb)
264+
{
265+
wb_put_many(wb, 1);
256266
}
257267

258268
/**
@@ -281,6 +291,10 @@ static inline void wb_put(struct bdi_writeback *wb)
281291
{
282292
}
283293

294+
static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
295+
{
296+
}
297+
284298
static inline bool wb_dying(struct bdi_writeback *wb)
285299
{
286300
return false;

0 commit comments

Comments
 (0)