Skip to content

Commit 91f75eb

Browse files
yghannamsuryasaimadhu
authored andcommitted
x86/MCE/AMD, EDAC/mce_amd: Support non-uniform MCA bank type enumeration
AMD systems currently lay out MCA bank types such that the type of bank number "i" is either the same across all CPUs or is Reserved/Read-as-Zero. For example: Bank # | CPUx | CPUy 0 LS LS 1 RAZ UMC 2 CS CS 3 SMU RAZ Future AMD systems will lay out MCA bank types such that the type of bank number "i" may be different across CPUs. For example: Bank # | CPUx | CPUy 0 LS LS 1 RAZ UMC 2 CS NBIO 3 SMU RAZ Change the structures that cache MCA bank types to be per-CPU and update smca_get_bank_type() to handle this change. Move some SMCA-specific structures to amd.c from mce.h, since they no longer need to be global. Break out the "count" for bank types from struct smca_hwid, since this should provide a per-CPU count rather than a system-wide count. Apply the "const" qualifier to the struct smca_hwid_mcatypes array. The values in this array should not change at runtime. Signed-off-by: Yazen Ghannam <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 5176a93 commit 91f75eb

File tree

4 files changed

+39
-51
lines changed

4 files changed

+39
-51
lines changed

arch/x86/include/asm/mce.h

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -329,30 +329,14 @@ enum smca_bank_types {
329329
N_SMCA_BANK_TYPES
330330
};
331331

332-
#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
333-
334-
struct smca_hwid {
335-
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
336-
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
337-
u8 count; /* Number of instances. */
338-
};
339-
340-
struct smca_bank {
341-
struct smca_hwid *hwid;
342-
u32 id; /* Value of MCA_IPID[InstanceId]. */
343-
u8 sysfs_id; /* Value used for sysfs name. */
344-
};
345-
346-
extern struct smca_bank smca_banks[MAX_NR_BANKS];
347-
348332
extern const char *smca_get_long_name(enum smca_bank_types t);
349333
extern bool amd_mce_is_memory_error(struct mce *m);
350334

351335
extern int mce_threshold_create_device(unsigned int cpu);
352336
extern int mce_threshold_remove_device(unsigned int cpu);
353337

354338
void mce_amd_feature_init(struct cpuinfo_x86 *c);
355-
enum smca_bank_types smca_get_bank_type(unsigned int bank);
339+
enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
356340
#else
357341

358342
static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,22 @@ static const char * const smca_umc_block_names[] = {
7171
"misc_umc"
7272
};
7373

74+
#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
75+
76+
struct smca_hwid {
77+
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
78+
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
79+
};
80+
81+
struct smca_bank {
82+
const struct smca_hwid *hwid;
83+
u32 id; /* Value of MCA_IPID[InstanceId]. */
84+
u8 sysfs_id; /* Value used for sysfs name. */
85+
};
86+
87+
static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks);
88+
static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts);
89+
7490
struct smca_bank_name {
7591
const char *name; /* Short name for sysfs */
7692
const char *long_name; /* Long name for pretty-printing */
@@ -126,22 +142,22 @@ const char *smca_get_long_name(enum smca_bank_types t)
126142
}
127143
EXPORT_SYMBOL_GPL(smca_get_long_name);
128144

129-
enum smca_bank_types smca_get_bank_type(unsigned int bank)
145+
enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
130146
{
131147
struct smca_bank *b;
132148

133149
if (bank >= MAX_NR_BANKS)
134150
return N_SMCA_BANK_TYPES;
135151

136-
b = &smca_banks[bank];
152+
b = &per_cpu(smca_banks, cpu)[bank];
137153
if (!b->hwid)
138154
return N_SMCA_BANK_TYPES;
139155

140156
return b->hwid->bank_type;
141157
}
142158
EXPORT_SYMBOL_GPL(smca_get_bank_type);
143159

144-
static struct smca_hwid smca_hwid_mcatypes[] = {
160+
static const struct smca_hwid smca_hwid_mcatypes[] = {
145161
/* { bank_type, hwid_mcatype } */
146162

147163
/* Reserved type */
@@ -202,9 +218,6 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
202218
{ SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
203219
};
204220

205-
struct smca_bank smca_banks[MAX_NR_BANKS];
206-
EXPORT_SYMBOL_GPL(smca_banks);
207-
208221
/*
209222
* In SMCA enabled processors, we can have multiple banks for a given IP type.
210223
* So to define a unique name for each bank, we use a temp c-string to append
@@ -260,8 +273,9 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
260273

261274
static void smca_configure(unsigned int bank, unsigned int cpu)
262275
{
276+
u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
277+
const struct smca_hwid *s_hwid;
263278
unsigned int i, hwid_mcatype;
264-
struct smca_hwid *s_hwid;
265279
u32 high, low;
266280
u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank);
267281

@@ -297,10 +311,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
297311

298312
smca_set_misc_banks_map(bank, cpu);
299313

300-
/* Return early if this bank was already initialized. */
301-
if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0)
302-
return;
303-
304314
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
305315
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
306316
return;
@@ -311,10 +321,11 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
311321

312322
for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
313323
s_hwid = &smca_hwid_mcatypes[i];
324+
314325
if (hwid_mcatype == s_hwid->hwid_mcatype) {
315-
smca_banks[bank].hwid = s_hwid;
316-
smca_banks[bank].id = low;
317-
smca_banks[bank].sysfs_id = s_hwid->count++;
326+
this_cpu_ptr(smca_banks)[bank].hwid = s_hwid;
327+
this_cpu_ptr(smca_banks)[bank].id = low;
328+
this_cpu_ptr(smca_banks)[bank].sysfs_id = bank_counts[s_hwid->bank_type]++;
318329
break;
319330
}
320331
}
@@ -600,7 +611,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
600611

601612
bool amd_filter_mce(struct mce *m)
602613
{
603-
enum smca_bank_types bank_type = smca_get_bank_type(m->bank);
614+
enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
604615
struct cpuinfo_x86 *c = &boot_cpu_data;
605616

606617
/* See Family 17h Models 10h-2Fh Erratum #1114. */
@@ -638,7 +649,7 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
638649
} else if (c->x86 == 0x17 &&
639650
(c->x86_model >= 0x10 && c->x86_model <= 0x2F)) {
640651

641-
if (smca_get_bank_type(bank) != SMCA_IF)
652+
if (smca_get_bank_type(smp_processor_id(), bank) != SMCA_IF)
642653
return;
643654

644655
msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank);
@@ -706,7 +717,7 @@ bool amd_mce_is_memory_error(struct mce *m)
706717
u8 xec = (m->status >> 16) & 0x1f;
707718

708719
if (mce_flags.smca)
709-
return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;
720+
return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0;
710721

711722
return m->bank == 4 && xec == 0x8;
712723
}
@@ -1022,7 +1033,7 @@ static struct kobj_type threshold_ktype = {
10221033
.release = threshold_block_release,
10231034
};
10241035

1025-
static const char *get_name(unsigned int bank, struct threshold_block *b)
1036+
static const char *get_name(unsigned int cpu, unsigned int bank, struct threshold_block *b)
10261037
{
10271038
enum smca_bank_types bank_type;
10281039

@@ -1033,7 +1044,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
10331044
return th_names[bank];
10341045
}
10351046

1036-
bank_type = smca_get_bank_type(bank);
1047+
bank_type = smca_get_bank_type(cpu, bank);
10371048
if (bank_type >= N_SMCA_BANK_TYPES)
10381049
return NULL;
10391050

@@ -1043,12 +1054,12 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
10431054
return NULL;
10441055
}
10451056

1046-
if (smca_banks[bank].hwid->count == 1)
1057+
if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
10471058
return smca_get_name(bank_type);
10481059

10491060
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
1050-
"%s_%x", smca_get_name(bank_type),
1051-
smca_banks[bank].sysfs_id);
1061+
"%s_%u", smca_get_name(bank_type),
1062+
per_cpu(smca_banks, cpu)[bank].sysfs_id);
10521063
return buf_mcatype;
10531064
}
10541065

@@ -1104,7 +1115,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
11041115
else
11051116
tb->blocks = b;
11061117

1107-
err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b));
1118+
err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b));
11081119
if (err)
11091120
goto out_free;
11101121
recurse:
@@ -1159,7 +1170,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
11591170
struct device *dev = this_cpu_read(mce_device);
11601171
struct amd_northbridge *nb = NULL;
11611172
struct threshold_bank *b = NULL;
1162-
const char *name = get_name(bank, NULL);
1173+
const char *name = get_name(cpu, bank, NULL);
11631174
int err = 0;
11641175

11651176
if (!dev)

drivers/edac/mce_amd.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,20 +1166,13 @@ static void decode_mc6_mce(struct mce *m)
11661166
/* Decode errors according to Scalable MCA specification */
11671167
static void decode_smca_error(struct mce *m)
11681168
{
1169-
struct smca_hwid *hwid;
1170-
enum smca_bank_types bank_type;
1169+
enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
11711170
const char *ip_name;
11721171
u8 xec = XEC(m->status, xec_mask);
11731172

1174-
if (m->bank >= ARRAY_SIZE(smca_banks))
1173+
if (bank_type >= N_SMCA_BANK_TYPES)
11751174
return;
11761175

1177-
hwid = smca_banks[m->bank].hwid;
1178-
if (!hwid)
1179-
return;
1180-
1181-
bank_type = hwid->bank_type;
1182-
11831176
if (bank_type == SMCA_RESERVED) {
11841177
pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
11851178
return;

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2647,7 +2647,7 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
26472647
* and error occurred in DramECC (Extended error code = 0) then only
26482648
* process the error, else bail out.
26492649
*/
2650-
if (!m || !((smca_get_bank_type(m->bank) == SMCA_UMC_V2) &&
2650+
if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
26512651
(XEC(m->status, 0x3f) == 0x0)))
26522652
return NOTIFY_DONE;
26532653

0 commit comments

Comments
 (0)