Skip to content

Commit 6caa257

Browse files
melversfX-bot
authored andcommitted
kcsan: Turn report_filterlist_lock into a raw_spinlock
[ Upstream commit 59458fa4ddb47e7891c61b4a928d13d5f5b00aa0 ] Ran Xiaokai reports that with a KCSAN-enabled PREEMPT_RT kernel, we can see splats like: | BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 | preempt_count: 10002, expected: 0 | RCU nest depth: 0, expected: 0 | no locks held by swapper/1/0. | irq event stamp: 156674 | hardirqs last enabled at (156673): [<ffffffff81130bd9>] do_idle+0x1f9/0x240 | hardirqs last disabled at (156674): [<ffffffff82254f84>] sysvec_apic_timer_interrupt+0x14/0xc0 | softirqs last enabled at (0): [<ffffffff81099f47>] copy_process+0xfc7/0x4b60 | softirqs last disabled at (0): [<0000000000000000>] 0x0 | Preemption disabled at: | [<ffffffff814a3e2a>] paint_ptr+0x2a/0x90 | CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.11.0+ #3 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 | Call Trace: | <IRQ> | dump_stack_lvl+0x7e/0xc0 | dump_stack+0x1d/0x30 | __might_resched+0x1a2/0x270 | rt_spin_lock+0x68/0x170 | kcsan_skip_report_debugfs+0x43/0xe0 | print_report+0xb5/0x590 | kcsan_report_known_origin+0x1b1/0x1d0 | kcsan_setup_watchpoint+0x348/0x650 | __tsan_unaligned_write1+0x16d/0x1d0 | hrtimer_interrupt+0x3d6/0x430 | __sysvec_apic_timer_interrupt+0xe8/0x3a0 | sysvec_apic_timer_interrupt+0x97/0xc0 | </IRQ> On a detected data race, KCSAN's reporting logic checks if it should filter the report. That list is protected by the report_filterlist_lock *non-raw* spinlock which may sleep on RT kernels. Since KCSAN may report data races in any context, convert it to a raw_spinlock. This requires being careful about when to allocate memory for the filter list itself which can be done via KCSAN's debugfs interface. Concurrent modification of the filter list via debugfs should be rare: the chosen strategy is to optimistically pre-allocate memory before the critical section and discard if unused. Link: https://lore.kernel.org/all/[email protected]/ Reported-by: Ran Xiaokai <[email protected]> Tested-by: Ran Xiaokai <[email protected]> Signed-off-by: Marco Elver <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent e14f092 commit 6caa257

File tree

1 file changed

+36
-38
lines changed

1 file changed

+36
-38
lines changed

kernel/kcsan/debugfs.c

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,8 @@ static struct {
4141
int used; /* number of elements used */
4242
bool sorted; /* if elements are sorted */
4343
bool whitelist; /* if list is a blacklist or whitelist */
44-
} report_filterlist = {
45-
.addrs = NULL,
46-
.size = 8, /* small initial size */
47-
.used = 0,
48-
.sorted = false,
49-
.whitelist = false, /* default is blacklist */
50-
};
51-
static DEFINE_SPINLOCK(report_filterlist_lock);
44+
} report_filterlist;
45+
static DEFINE_RAW_SPINLOCK(report_filterlist_lock);
5246

5347
/*
5448
* The microbenchmark allows benchmarking KCSAN core runtime only. To run
@@ -105,7 +99,7 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr)
10599
return false;
106100
func_addr -= offset; /* Get function start */
107101

108-
spin_lock_irqsave(&report_filterlist_lock, flags);
102+
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
109103
if (report_filterlist.used == 0)
110104
goto out;
111105

@@ -122,67 +116,71 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr)
122116
ret = !ret;
123117

124118
out:
125-
spin_unlock_irqrestore(&report_filterlist_lock, flags);
119+
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
126120
return ret;
127121
}
128122

129123
static void set_report_filterlist_whitelist(bool whitelist)
130124
{
131125
unsigned long flags;
132126

133-
spin_lock_irqsave(&report_filterlist_lock, flags);
127+
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
134128
report_filterlist.whitelist = whitelist;
135-
spin_unlock_irqrestore(&report_filterlist_lock, flags);
129+
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
136130
}
137131

138132
/* Returns 0 on success, error-code otherwise. */
139133
static ssize_t insert_report_filterlist(const char *func)
140134
{
141135
unsigned long flags;
142136
unsigned long addr = kallsyms_lookup_name(func);
137+
unsigned long *delay_free = NULL;
138+
unsigned long *new_addrs = NULL;
139+
size_t new_size = 0;
143140
ssize_t ret = 0;
144141

145142
if (!addr) {
146143
pr_err("could not find function: '%s'\n", func);
147144
return -ENOENT;
148145
}
149146

150-
spin_lock_irqsave(&report_filterlist_lock, flags);
147+
retry_alloc:
148+
/*
149+
* Check if we need an allocation, and re-validate under the lock. Since
150+
* the report_filterlist_lock is a raw, cannot allocate under the lock.
151+
*/
152+
if (data_race(report_filterlist.used == report_filterlist.size)) {
153+
new_size = (report_filterlist.size ?: 4) * 2;
154+
delay_free = new_addrs = kmalloc_array(new_size, sizeof(unsigned long), GFP_KERNEL);
155+
if (!new_addrs)
156+
return -ENOMEM;
157+
}
151158

152-
if (report_filterlist.addrs == NULL) {
153-
/* initial allocation */
154-
report_filterlist.addrs =
155-
kmalloc_array(report_filterlist.size,
156-
sizeof(unsigned long), GFP_ATOMIC);
157-
if (report_filterlist.addrs == NULL) {
158-
ret = -ENOMEM;
159-
goto out;
160-
}
161-
} else if (report_filterlist.used == report_filterlist.size) {
162-
/* resize filterlist */
163-
size_t new_size = report_filterlist.size * 2;
164-
unsigned long *new_addrs =
165-
krealloc(report_filterlist.addrs,
166-
new_size * sizeof(unsigned long), GFP_ATOMIC);
167-
168-
if (new_addrs == NULL) {
169-
/* leave filterlist itself untouched */
170-
ret = -ENOMEM;
171-
goto out;
159+
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
160+
if (report_filterlist.used == report_filterlist.size) {
161+
/* Check we pre-allocated enough, and retry if not. */
162+
if (report_filterlist.used >= new_size) {
163+
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
164+
kfree(new_addrs); /* kfree(NULL) is safe */
165+
delay_free = new_addrs = NULL;
166+
goto retry_alloc;
172167
}
173168

169+
if (report_filterlist.used)
170+
memcpy(new_addrs, report_filterlist.addrs, report_filterlist.used * sizeof(unsigned long));
171+
delay_free = report_filterlist.addrs; /* free the old list */
172+
report_filterlist.addrs = new_addrs; /* switch to the new list */
174173
report_filterlist.size = new_size;
175-
report_filterlist.addrs = new_addrs;
176174
}
177175

178176
/* Note: deduplicating should be done in userspace. */
179177
report_filterlist.addrs[report_filterlist.used++] =
180178
kallsyms_lookup_name(func);
181179
report_filterlist.sorted = false;
182180

183-
out:
184-
spin_unlock_irqrestore(&report_filterlist_lock, flags);
181+
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
185182

183+
kfree(delay_free);
186184
return ret;
187185
}
188186

@@ -199,13 +197,13 @@ static int show_info(struct seq_file *file, void *v)
199197
}
200198

201199
/* show filter functions, and filter type */
202-
spin_lock_irqsave(&report_filterlist_lock, flags);
200+
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
203201
seq_printf(file, "\n%s functions: %s\n",
204202
report_filterlist.whitelist ? "whitelisted" : "blacklisted",
205203
report_filterlist.used == 0 ? "none" : "");
206204
for (i = 0; i < report_filterlist.used; ++i)
207205
seq_printf(file, " %ps\n", (void *)report_filterlist.addrs[i]);
208-
spin_unlock_irqrestore(&report_filterlist_lock, flags);
206+
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
209207

210208
return 0;
211209
}

0 commit comments

Comments
 (0)