Skip to content

Commit cafe91c

Browse files
ebiedermBrian Maly
authored and
Brian Maly
committed
proc/sysctl: Don't grab i_lock under sysctl_lock.
Konstantin Khlebnikov <[email protected]> writes: > This patch has locking problem. I've got lockdep splat under LTP. > > [ 6633.115456] ====================================================== > [ 6633.115502] [ INFO: possible circular locking dependency detected ] > [ 6633.115553] 4.9.10-debug+ #9 Tainted: G L > [ 6633.115584] ------------------------------------------------------- > [ 6633.115627] ksm02/284980 is trying to acquire lock: > [ 6633.115659] (&sb->s_type->i_lock_key#4){+.+...}, at: [<ffffffff816bc1ce>] igrab+0x1e/0x80 > [ 6633.115834] but task is already holding lock: > [ 6633.115882] (sysctl_lock){+.+...}, at: [<ffffffff817e379b>] unregister_sysctl_table+0x6b/0x110 > [ 6633.116026] which lock already depends on the new lock. > [ 6633.116026] > [ 6633.116080] > [ 6633.116080] the existing dependency chain (in reverse order) is: > [ 6633.116117] > -> #2 (sysctl_lock){+.+...}: > -> #1 (&(&dentry->d_lockref.lock)->rlock){+.+...}: > -> #0 (&sb->s_type->i_lock_key#4){+.+...}: > > d_lock nests inside i_lock > sysctl_lock nests inside d_lock in d_compare > > This patch adds i_lock nesting inside sysctl_lock. Al Viro <[email protected]> replied: > Once ->unregistering is set, you can drop sysctl_lock just fine. So I'd > try something like this - use rcu_read_lock() in proc_sys_prune_dcache(), > drop sysctl_lock() before it and regain after. Make sure that no inodes > are added to the list ones ->unregistering has been set and use RCU list > primitives for modifying the inode list, with sysctl_lock still used to > serialize its modifications. > > Freeing struct inode is RCU-delayed (see proc_destroy_inode()), so doing > igrab() is safe there. Since we don't drop inode reference until after we'd > passed beyond it in the list, list_for_each_entry_rcu() should be fine. I agree with Al Viro's analsysis of the situtation. Fixes: d6cffbb ("proc/sysctl: prune stale dentries during unregistering") Reported-by: Konstantin Khlebnikov <[email protected]> Tested-by: Konstantin Khlebnikov <[email protected]> Suggested-by: Al Viro <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]> (cherry picked from commit ace0c79) Orabug: 29689925 Signed-off-by: Shuning Zhang <[email protected]> Reviewed-by: Junxiao Bi <[email protected]> Signed-off-by: Brian Maly <[email protected]>
1 parent b743c09 commit cafe91c

File tree

1 file changed

+18
-13
lines changed

1 file changed

+18
-13
lines changed

fs/proc/proc_sysctl.c

+18-13
Original file line numberDiff line numberDiff line change
@@ -266,21 +266,19 @@ static void proc_sys_prune_dcache(struct ctl_table_header *head)
266266
struct inode *inode, *prev = NULL;
267267
struct proc_inode *ei;
268268

269-
list_for_each_entry(ei, &head->inodes, sysctl_inodes) {
269+
rcu_read_lock();
270+
list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
270271
inode = igrab(&ei->vfs_inode);
271272
if (inode) {
272-
spin_unlock(&sysctl_lock);
273+
rcu_read_unlock();
273274
iput(prev);
274275
prev = inode;
275276
d_prune_aliases(inode);
276-
spin_lock(&sysctl_lock);
277+
rcu_read_lock();
277278
}
278279
}
279-
if (prev) {
280-
spin_unlock(&sysctl_lock);
281-
iput(prev);
282-
spin_lock(&sysctl_lock);
283-
}
280+
rcu_read_unlock();
281+
iput(prev);
284282
}
285283

286284
/* called under sysctl_lock, will reacquire if has to wait */
@@ -296,10 +294,10 @@ static void start_unregistering(struct ctl_table_header *p)
296294
p->unregistering = &wait;
297295
spin_unlock(&sysctl_lock);
298296
wait_for_completion(&wait);
299-
spin_lock(&sysctl_lock);
300297
} else {
301298
/* anything non-NULL; we'll never dereference it */
302299
p->unregistering = ERR_PTR(-EINVAL);
300+
spin_unlock(&sysctl_lock);
303301
}
304302
/*
305303
* Prune dentries for unregistered sysctls: namespaced sysctls
@@ -310,6 +308,7 @@ static void start_unregistering(struct ctl_table_header *p)
310308
* do not remove from the list until nobody holds it; walking the
311309
* list in do_sysctl() relies on that.
312310
*/
311+
spin_lock(&sysctl_lock);
313312
erase_header(p);
314313
}
315314

@@ -454,12 +453,18 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
454453
inode->i_ino = get_next_ino();
455454

456455
ei = PROC_I(inode);
457-
ei->sysctl = head;
458-
ei->sysctl_entry = table;
459456

460457

461458
spin_lock(&sysctl_lock);
462-
list_add(&ei->sysctl_inodes, &head->inodes);
459+
if (unlikely(head->unregistering)) {
460+
spin_unlock(&sysctl_lock);
461+
iput(inode);
462+
inode = NULL;
463+
goto out;
464+
}
465+
ei->sysctl = head;
466+
ei->sysctl_entry = table;
467+
list_add_rcu(&ei->sysctl_inodes, &head->inodes);
463468
head->count++;
464469
spin_unlock(&sysctl_lock);
465470

@@ -483,7 +488,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
483488
void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
484489
{
485490
spin_lock(&sysctl_lock);
486-
list_del(&PROC_I(inode)->sysctl_inodes);
491+
list_del_rcu(&PROC_I(inode)->sysctl_inodes);
487492
if (!--head->count)
488493
kfree_rcu(head, rcu);
489494
spin_unlock(&sysctl_lock);

0 commit comments

Comments
 (0)