Skip to content

Commit b743c09

Browse files
koct9iBrian Maly
authored and
Brian Maly
committed
proc/sysctl: prune stale dentries during unregistering
Currently unregistering sysctl table does not prune its dentries. Stale dentries could slowdown sysctl operations significantly. For example, command: # for i in {1..100000} ; do unshare -n -- sysctl -a &> /dev/null ; done creates a millions of stale denties around sysctls of loopback interface: # sysctl fs.dentry-state fs.dentry-state = 25812579 24724135 45 0 0 0 All of them have matching names thus lookup have to scan though whole hash chain and call d_compare (proc_sys_compare) which checks them under system-wide spinlock (sysctl_lock). # time sysctl -a > /dev/null real 1m12.806s user 0m0.016s sys 1m12.400s Currently only memory reclaimer could remove this garbage. But without significant memory pressure this never happens. This patch collects sysctl inodes into list on sysctl table header and prunes all their dentries once that table unregisters. Konstantin Khlebnikov <[email protected]> writes: > On 10.02.2017 10:47, Al Viro wrote: >> how about >> the matching stats *after* that patch? > > dcache size doesn't grow endlessly, so stats are fine > > # sysctl fs.dentry-state > fs.dentry-state = 92712 58376 45 0 0 0 > > # time sysctl -a &>/dev/null > > real 0m0.013s > user 0m0.004s > sys 0m0.008s Signed-off-by: Konstantin Khlebnikov <[email protected]> Suggested-by: Al Viro <[email protected]> Signed-off-by: Eric W. Biederman <[email protected]> (cherry picked from commit d6cffbb) Signed-off-by: Brian Maly <[email protected]> Conflicts: fs/proc/proc_sysctl.c Context has changed Orabug: 29689925 Signed-off-by: Shuning Zhang <[email protected]> Reviewed-by: Junxiao Bi <[email protected]> Signed-off-by: Brian Maly <[email protected]>
1 parent 2549879 commit b743c09

File tree

4 files changed

+52
-19
lines changed

4 files changed

+52
-19
lines changed

fs/proc/inode.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,11 @@ static void proc_evict_inode(struct inode *inode)
4444
de = PDE(inode);
4545
if (de)
4646
pde_put(de);
47+
4748
head = PROC_I(inode)->sysctl;
4849
if (head) {
4950
RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
50-
sysctl_head_put(head);
51+
proc_sys_evict_inode(inode, head);
5152
}
5253
}
5354

fs/proc/internal.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ struct proc_inode {
8787
struct proc_dir_entry *pde;
8888
struct ctl_table_header *sysctl;
8989
struct ctl_table *sysctl_entry;
90+
struct list_head sysctl_inodes;
9091
const struct proc_ns_operations *ns_ops;
9192
struct inode vfs_inode;
9293
};
@@ -286,10 +287,12 @@ extern void proc_thread_self_init(void);
286287
*/
287288
#ifdef CONFIG_PROC_SYSCTL
288289
extern int proc_sys_init(void);
289-
extern void sysctl_head_put(struct ctl_table_header *);
290+
extern void proc_sys_evict_inode(struct inode *inode,
291+
struct ctl_table_header *head);
290292
#else
291293
static inline void proc_sys_init(void) { }
292-
static inline void sysctl_head_put(struct ctl_table_header *head) { }
294+
static inline void proc_sys_evict_inode(struct inode *inode,
295+
struct ctl_table_header *head) { }
293296
#endif
294297

295298
/*

fs/proc/proc_sysctl.c

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ static void init_header(struct ctl_table_header *head,
190190
head->set = set;
191191
head->parent = NULL;
192192
head->node = node;
193+
INIT_LIST_HEAD(&head->inodes);
193194
if (node) {
194195
struct ctl_table *entry;
195196
for (entry = table; entry->procname; entry++, node++)
@@ -259,6 +260,29 @@ static void unuse_table(struct ctl_table_header *p)
259260
complete(p->unregistering);
260261
}
261262

263+
/* called under sysctl_lock */
264+
static void proc_sys_prune_dcache(struct ctl_table_header *head)
265+
{
266+
struct inode *inode, *prev = NULL;
267+
struct proc_inode *ei;
268+
269+
list_for_each_entry(ei, &head->inodes, sysctl_inodes) {
270+
inode = igrab(&ei->vfs_inode);
271+
if (inode) {
272+
spin_unlock(&sysctl_lock);
273+
iput(prev);
274+
prev = inode;
275+
d_prune_aliases(inode);
276+
spin_lock(&sysctl_lock);
277+
}
278+
}
279+
if (prev) {
280+
spin_unlock(&sysctl_lock);
281+
iput(prev);
282+
spin_lock(&sysctl_lock);
283+
}
284+
}
285+
262286
/* called under sysctl_lock, will reacquire if has to wait */
263287
static void start_unregistering(struct ctl_table_header *p)
264288
{
@@ -277,28 +301,18 @@ static void start_unregistering(struct ctl_table_header *p)
277301
/* anything non-NULL; we'll never dereference it */
278302
p->unregistering = ERR_PTR(-EINVAL);
279303
}
304+
/*
305+
* Prune dentries for unregistered sysctls: namespaced sysctls
306+
* can have duplicate names and contaminate dcache very badly.
307+
*/
308+
proc_sys_prune_dcache(p);
280309
/*
281310
* do not remove from the list until nobody holds it; walking the
282311
* list in do_sysctl() relies on that.
283312
*/
284313
erase_header(p);
285314
}
286315

287-
static void sysctl_head_get(struct ctl_table_header *head)
288-
{
289-
spin_lock(&sysctl_lock);
290-
head->count++;
291-
spin_unlock(&sysctl_lock);
292-
}
293-
294-
void sysctl_head_put(struct ctl_table_header *head)
295-
{
296-
spin_lock(&sysctl_lock);
297-
if (!--head->count)
298-
kfree_rcu(head, rcu);
299-
spin_unlock(&sysctl_lock);
300-
}
301-
302316
static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
303317
{
304318
BUG_ON(!head);
@@ -439,11 +453,16 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
439453

440454
inode->i_ino = get_next_ino();
441455

442-
sysctl_head_get(head);
443456
ei = PROC_I(inode);
444457
ei->sysctl = head;
445458
ei->sysctl_entry = table;
446459

460+
461+
spin_lock(&sysctl_lock);
462+
list_add(&ei->sysctl_inodes, &head->inodes);
463+
head->count++;
464+
spin_unlock(&sysctl_lock);
465+
447466
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
448467
inode->i_mode = table->mode;
449468
if (!S_ISDIR(table->mode)) {
@@ -461,6 +480,15 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
461480
return inode;
462481
}
463482

483+
void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
484+
{
485+
spin_lock(&sysctl_lock);
486+
list_del(&PROC_I(inode)->sysctl_inodes);
487+
if (!--head->count)
488+
kfree_rcu(head, rcu);
489+
spin_unlock(&sysctl_lock);
490+
}
491+
464492
static struct ctl_table_header *grab_header(struct inode *inode)
465493
{
466494
struct ctl_table_header *head = PROC_I(inode)->sysctl;

include/linux/sysctl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ struct ctl_table_header
139139
struct ctl_table_set *set;
140140
struct ctl_dir *parent;
141141
struct ctl_node *node;
142+
struct list_head inodes; /* head for proc_inode->sysctl_inodes */
142143
};
143144

144145
struct ctl_dir {

0 commit comments

Comments
 (0)