Skip to content

Commit dcd0b62

Browse files
spandruvadarafaeljw
authored andcommitted
powercap: intel_rapl: Fix possible recursive lock warning
With the RAPL PMU addition, there is a recursive locking when CPU online callback function calls rapl_package_add_pmu(). Here cpu_hotplug_lock is already acquired by cpuhp_thread_fun() and rapl_package_add_pmu() tries to acquire again. <4>[ 8.197433] ============================================ <4>[ 8.197437] WARNING: possible recursive locking detected <4>[ 8.197440] 6.19.0-rc1-lgci-xe-xe-4242-05b7c58b3367dca84+ #1 Not tainted <4>[ 8.197444] -------------------------------------------- <4>[ 8.197447] cpuhp/0/20 is trying to acquire lock: <4>[ 8.197450] ffffffff83487870 (cpu_hotplug_lock){++++}-{0:0}, at: rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197463] but task is already holding lock: <4>[ 8.197466] ffffffff83487870 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x6d/0x290 <4>[ 8.197477] other info that might help us debug this: <4>[ 8.197480] Possible unsafe locking scenario: <4>[ 8.197483] CPU0 <4>[ 8.197485] ---- <4>[ 8.197487] lock(cpu_hotplug_lock); <4>[ 8.197490] lock(cpu_hotplug_lock); <4>[ 8.197493] *** DEADLOCK *** .. .. <4>[ 8.197542] __lock_acquire+0x146e/0x2790 <4>[ 8.197548] lock_acquire+0xc4/0x2c0 <4>[ 8.197550] ? rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197556] cpus_read_lock+0x41/0x110 <4>[ 8.197558] ? rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197561] rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197565] rapl_cpu_online+0x85/0x87 [intel_rapl_msr] <4>[ 8.197568] ? __pfx_rapl_cpu_online+0x10/0x10 [intel_rapl_msr] <4>[ 8.197570] cpuhp_invoke_callback+0x41f/0x6c0 <4>[ 8.197573] ? cpuhp_thread_fun+0x6d/0x290 <4>[ 8.197575] cpuhp_thread_fun+0x1e2/0x290 <4>[ 8.197578] ? smpboot_thread_fn+0x26/0x290 <4>[ 8.197581] smpboot_thread_fn+0x12f/0x290 <4>[ 8.197584] ? __pfx_smpboot_thread_fn+0x10/0x10 <4>[ 8.197586] kthread+0x11f/0x250 <4>[ 8.197589] ? __pfx_kthread+0x10/0x10 <4>[ 8.197592] ret_from_fork+0x344/0x3a0 <4>[ 8.197595] ? __pfx_kthread+0x10/0x10 <4>[ 8.197597] ret_from_fork_asm+0x1a/0x30 <4>[ 8.197604] </TASK> Fix this issue in the same way as rapl powercap package domain is added from the same CPU online callback by introducing another interface which doesn't call cpus_read_lock(). Add rapl_package_add_pmu_locked() and rapl_package_remove_pmu_locked() which don't call cpus_read_lock(). Fixes: 748d6ba ("powercap: intel_rapl: Enable MSR-based RAPL PMU support") Reported-by: Borah, Chaitanya Kumar <[email protected]> Closes: https://lore.kernel.org/linux-pm/[email protected]/T/#u Tested-by: Kuppuswamy Sathyanarayanan <[email protected]> Tested-by: RavitejaX Veesam <[email protected]> Signed-off-by: Srinivas Pandruvada <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Rafael J. Wysocki <[email protected]>
1 parent efc4c35 commit dcd0b62

File tree

3 files changed

+24
-8
lines changed

3 files changed

+24
-8
lines changed

drivers/powercap/intel_rapl_common.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,16 +2032,14 @@ static int rapl_pmu_update(struct rapl_package *rp)
20322032
return ret;
20332033
}
20342034

2035-
int rapl_package_add_pmu(struct rapl_package *rp)
2035+
int rapl_package_add_pmu_locked(struct rapl_package *rp)
20362036
{
20372037
struct rapl_package_pmu_data *data = &rp->pmu_data;
20382038
int idx;
20392039

20402040
if (rp->has_pmu)
20412041
return -EEXIST;
20422042

2043-
guard(cpus_read_lock)();
2044-
20452043
for (idx = 0; idx < rp->nr_domains; idx++) {
20462044
struct rapl_domain *rd = &rp->domains[idx];
20472045
int domain = rd->id;
@@ -2091,17 +2089,23 @@ int rapl_package_add_pmu(struct rapl_package *rp)
20912089

20922090
return rapl_pmu_update(rp);
20932091
}
2092+
EXPORT_SYMBOL_GPL(rapl_package_add_pmu_locked);
2093+
2094+
int rapl_package_add_pmu(struct rapl_package *rp)
2095+
{
2096+
guard(cpus_read_lock)();
2097+
2098+
return rapl_package_add_pmu_locked(rp);
2099+
}
20942100
EXPORT_SYMBOL_GPL(rapl_package_add_pmu);
20952101

2096-
void rapl_package_remove_pmu(struct rapl_package *rp)
2102+
void rapl_package_remove_pmu_locked(struct rapl_package *rp)
20972103
{
20982104
struct rapl_package *pos;
20992105

21002106
if (!rp->has_pmu)
21012107
return;
21022108

2103-
guard(cpus_read_lock)();
2104-
21052109
list_for_each_entry(pos, &rapl_packages, plist) {
21062110
/* PMU is still needed */
21072111
if (pos->has_pmu && pos != rp)
@@ -2111,6 +2115,14 @@ void rapl_package_remove_pmu(struct rapl_package *rp)
21112115
perf_pmu_unregister(&rapl_pmu.pmu);
21122116
memset(&rapl_pmu, 0, sizeof(struct rapl_pmu));
21132117
}
2118+
EXPORT_SYMBOL_GPL(rapl_package_remove_pmu_locked);
2119+
2120+
void rapl_package_remove_pmu(struct rapl_package *rp)
2121+
{
2122+
guard(cpus_read_lock)();
2123+
2124+
rapl_package_remove_pmu_locked(rp);
2125+
}
21142126
EXPORT_SYMBOL_GPL(rapl_package_remove_pmu);
21152127
#endif
21162128

drivers/powercap/intel_rapl_msr.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ static int rapl_cpu_online(unsigned int cpu)
8282
if (IS_ERR(rp))
8383
return PTR_ERR(rp);
8484
if (rapl_msr_pmu)
85-
rapl_package_add_pmu(rp);
85+
rapl_package_add_pmu_locked(rp);
8686
}
8787
cpumask_set_cpu(cpu, &rp->cpumask);
8888
return 0;
@@ -101,7 +101,7 @@ static int rapl_cpu_down_prep(unsigned int cpu)
101101
lead_cpu = cpumask_first(&rp->cpumask);
102102
if (lead_cpu >= nr_cpu_ids) {
103103
if (rapl_msr_pmu)
104-
rapl_package_remove_pmu(rp);
104+
rapl_package_remove_pmu_locked(rp);
105105
rapl_remove_package_cpuslocked(rp);
106106
} else if (rp->lead_cpu == cpu) {
107107
rp->lead_cpu = lead_cpu;

include/linux/intel_rapl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,14 @@ void rapl_remove_package(struct rapl_package *rp);
214214

215215
#ifdef CONFIG_PERF_EVENTS
216216
int rapl_package_add_pmu(struct rapl_package *rp);
217+
int rapl_package_add_pmu_locked(struct rapl_package *rp);
217218
void rapl_package_remove_pmu(struct rapl_package *rp);
219+
void rapl_package_remove_pmu_locked(struct rapl_package *rp);
218220
#else
219221
static inline int rapl_package_add_pmu(struct rapl_package *rp) { return 0; }
222+
static inline int rapl_package_add_pmu_locked(struct rapl_package *rp) { return 0; }
220223
static inline void rapl_package_remove_pmu(struct rapl_package *rp) { }
224+
static inline void rapl_package_remove_pmu_locked(struct rapl_package *rp) { }
221225
#endif
222226

223227
#endif /* __INTEL_RAPL_H__ */

0 commit comments

Comments
 (0)