Skip to content

Commit 17fae12

Browse files
aeglKAGA-KOKO
authored andcommitted
x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned
An interesting thing happened when a guest Linux instance took a machine check. The VMM unmapped the bad page from guest physical space and passed the machine check to the guest. Linux took all the normal actions to offline the page from the process that was using it. But then guest Linux crashed because it said there was a second machine check inside the kernel with this stack trace: do_memory_failure set_mce_nospec set_memory_uc _set_memory_uc change_page_attr_set_clr cpa_flush clflush_cache_range_opt This was odd, because a CLFLUSH instruction shouldn't raise a machine check (it isn't consuming the data). Further investigation showed that the VMM had passed in another machine check because is appeared that the guest was accessing the bad page. Fix is to check the scope of the poison by checking the MCi_MISC register. If the entire page is affected, then unmap the page. If only part of the page is affected, then mark the page as uncacheable. This assumes that VMMs will do the logical thing and pass in the "whole page scope" via the MCi_MISC register (since they unmapped the entire page). [ bp: Adjust to x86/entry changes. ] Fixes: 284ce40 ("x86/memory_failure: Introduce {set, clear}_mce_nospec()") Reported-by: Jue Wang <[email protected]> Signed-off-by: Tony Luck <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Jue Wang <[email protected]> Cc: <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent f77d26a commit 17fae12

File tree

4 files changed

+31
-12
lines changed

4 files changed

+31
-12
lines changed

arch/x86/include/asm/set_memory.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,28 +86,35 @@ int set_direct_map_default_noflush(struct page *page);
8686
extern int kernel_set_to_readonly;
8787

8888
#ifdef CONFIG_X86_64
89-
static inline int set_mce_nospec(unsigned long pfn)
89+
/*
90+
* Prevent speculative access to the page by either unmapping
91+
* it (if we do not require access to any part of the page) or
92+
* marking it uncacheable (if we want to try to retrieve data
93+
* from non-poisoned lines in the page).
94+
*/
95+
static inline int set_mce_nospec(unsigned long pfn, bool unmap)
9096
{
9197
unsigned long decoy_addr;
9298
int rc;
9399

94100
/*
95-
* Mark the linear address as UC to make sure we don't log more
96-
* errors because of speculative access to the page.
97101
* We would like to just call:
98-
* set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1);
102+
* set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1);
99103
* but doing that would radically increase the odds of a
100104
* speculative access to the poison page because we'd have
101105
* the virtual address of the kernel 1:1 mapping sitting
102106
* around in registers.
103107
* Instead we get tricky. We create a non-canonical address
104108
* that looks just like the one we want, but has bit 63 flipped.
105-
* This relies on set_memory_uc() properly sanitizing any __pa()
109+
* This relies on set_memory_XX() properly sanitizing any __pa()
106110
* results with __PHYSICAL_MASK or PTE_PFN_MASK.
107111
*/
108112
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
109113

110-
rc = set_memory_uc(decoy_addr, 1);
114+
if (unmap)
115+
rc = set_memory_np(decoy_addr, 1);
116+
else
117+
rc = set_memory_uc(decoy_addr, 1);
111118
if (rc)
112119
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
113120
return rc;

arch/x86/kernel/cpu/mce/core.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,14 @@ bool mce_is_memory_error(struct mce *m)
520520
}
521521
EXPORT_SYMBOL_GPL(mce_is_memory_error);
522522

523+
static bool whole_page(struct mce *m)
524+
{
525+
if (!mca_cfg.ser || !(m->status & MCI_STATUS_MISCV))
526+
return true;
527+
528+
return MCI_MISC_ADDR_LSB(m->misc) >= PAGE_SHIFT;
529+
}
530+
523531
bool mce_is_correctable(struct mce *m)
524532
{
525533
if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
@@ -573,7 +581,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
573581

574582
pfn = mce->addr >> PAGE_SHIFT;
575583
if (!memory_failure(pfn, 0)) {
576-
set_mce_nospec(pfn);
584+
set_mce_nospec(pfn, whole_page(mce));
577585
mce->kflags |= MCE_HANDLED_UC;
578586
}
579587

@@ -1173,11 +1181,12 @@ static void kill_me_maybe(struct callback_head *cb)
11731181
int flags = MF_ACTION_REQUIRED;
11741182

11751183
pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
1176-
if (!(p->mce_status & MCG_STATUS_RIPV))
1184+
1185+
if (!p->mce_ripv)
11771186
flags |= MF_MUST_KILL;
11781187

11791188
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
1180-
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
1189+
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
11811190
return;
11821191
}
11831192

@@ -1331,7 +1340,8 @@ void noinstr do_machine_check(struct pt_regs *regs)
13311340
BUG_ON(!on_thread_stack() || !user_mode(regs));
13321341

13331342
current->mce_addr = m.addr;
1334-
current->mce_status = m.mcgstatus;
1343+
current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
1344+
current->mce_whole_page = whole_page(&m);
13351345
current->mce_kill_me.func = kill_me_maybe;
13361346
if (kill_it)
13371347
current->mce_kill_me.func = kill_me_now;

include/linux/sched.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1304,7 +1304,9 @@ struct task_struct {
13041304

13051305
#ifdef CONFIG_X86_MCE
13061306
u64 mce_addr;
1307-
u64 mce_status;
1307+
__u64 mce_ripv : 1,
1308+
mce_whole_page : 1,
1309+
__mce_reserved : 62;
13081310
struct callback_head mce_kill_me;
13091311
#endif
13101312

include/linux/set_memory.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ static inline int set_direct_map_default_noflush(struct page *page)
2626
#endif
2727

2828
#ifndef set_mce_nospec
29-
static inline int set_mce_nospec(unsigned long pfn)
29+
static inline int set_mce_nospec(unsigned long pfn, bool unmap)
3030
{
3131
return 0;
3232
}

0 commit comments

Comments
 (0)