Skip to content

Commit eb85dac

Browse files
konisakpm00
authored andcommitted
nilfs2: fix potential hang in nilfs_detach_log_writer()
Syzbot has reported a potential hang in nilfs_detach_log_writer() called during nilfs2 unmount. Analysis revealed that this is because nilfs_segctor_sync(), which synchronizes with the log writer thread, can be called after nilfs_segctor_destroy() terminates that thread, as shown in the call trace below: nilfs_detach_log_writer nilfs_segctor_destroy nilfs_segctor_kill_thread --> Shut down log writer thread flush_work nilfs_iput_work_func nilfs_dispose_list iput nilfs_evict_inode nilfs_transaction_commit nilfs_construct_segment (if inode needs sync) nilfs_segctor_sync --> Attempt to synchronize with log writer thread *** DEADLOCK *** Fix this issue by changing nilfs_segctor_sync() so that the log writer thread returns normally without synchronizing after it terminates, and by forcing tasks that are already waiting to complete once after the thread terminates. The skipped inode metadata flushout will then be processed together in the subsequent cleanup work in nilfs_segctor_destroy(). Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Ryusuke Konishi <[email protected]> Reported-by: [email protected] Closes: https://syzkaller.appspot.com/bug?extid=e3973c409251e136fdd0 Tested-by: Ryusuke Konishi <[email protected]> Cc: <[email protected]> Cc: "Bai, Shuangpeng" <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 936184e commit eb85dac

File tree

1 file changed

+18
-3
lines changed

1 file changed

+18
-3
lines changed

fs/nilfs2/segment.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2190,6 +2190,14 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
21902190
for (;;) {
21912191
set_current_state(TASK_INTERRUPTIBLE);
21922192

2193+
/*
2194+
* Synchronize only while the log writer thread is alive.
2195+
* Leave flushing out after the log writer thread exits to
2196+
* the cleanup work in nilfs_segctor_destroy().
2197+
*/
2198+
if (!sci->sc_task)
2199+
break;
2200+
21932201
if (atomic_read(&wait_req.done)) {
21942202
err = wait_req.err;
21952203
break;
@@ -2205,15 +2213,15 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
22052213
return err;
22062214
}
22072215

2208-
static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
2216+
static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force)
22092217
{
22102218
struct nilfs_segctor_wait_request *wrq, *n;
22112219
unsigned long flags;
22122220

22132221
spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
22142222
list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
22152223
if (!atomic_read(&wrq->done) &&
2216-
nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
2224+
(force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) {
22172225
wrq->err = err;
22182226
atomic_set(&wrq->done, 1);
22192227
}
@@ -2362,7 +2370,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
23622370
if (mode == SC_LSEG_SR) {
23632371
sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
23642372
sci->sc_seq_done = sci->sc_seq_accepted;
2365-
nilfs_segctor_wakeup(sci, err);
2373+
nilfs_segctor_wakeup(sci, err, false);
23662374
sci->sc_flush_request = 0;
23672375
} else {
23682376
if (mode == SC_FLUSH_FILE)
@@ -2746,6 +2754,13 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
27462754
|| sci->sc_seq_request != sci->sc_seq_done);
27472755
spin_unlock(&sci->sc_state_lock);
27482756

2757+
/*
2758+
* Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can
2759+
* be called from delayed iput() via nilfs_evict_inode() and can race
2760+
* with the above log writer thread termination.
2761+
*/
2762+
nilfs_segctor_wakeup(sci, 0, true);
2763+
27492764
if (flush_work(&sci->sc_iput_work))
27502765
flag = true;
27512766

0 commit comments

Comments
 (0)