Skip to content

Commit db8268d

Browse files
ChangSeokBaesuryasaimadhu
authored andcommitted
x86/arch_prctl: Add controls for dynamic XSTATE components
Dynamically enabled XSTATE features are by default disabled for all processes. A process has to request permission to use such a feature. To support this implement a architecture specific prctl() with the options: - ARCH_GET_XCOMP_SUPP Copies the supported feature bitmap into the user space provided u64 storage. The pointer is handed in via arg2 - ARCH_GET_XCOMP_PERM Copies the process wide permitted feature bitmap into the user space provided u64 storage. The pointer is handed in via arg2 - ARCH_REQ_XCOMP_PERM Request permission for a feature set. A feature set can be mapped to a facility, e.g. AMX, and can require one or more XSTATE components to be enabled. The feature argument is the number of the highest XSTATE component which is required for a facility to work. The request argument is not a user supplied bitmap because that makes filtering harder (think seccomp) and even impossible because to support 32bit tasks the argument would have to be a pointer. The permission mechanism works this way: Task asks for permission for a facility and kernel checks whether that's supported. If supported it does: 1) Check whether permission has already been granted 2) Compute the size of the required kernel and user space buffer (sigframe) size. 3) Validate that no task has a sigaltstack installed which is smaller than the resulting sigframe size 4) Add the requested feature bit(s) to the permission bitmap of current->group_leader->fpu and store the sizes in the group leaders fpu struct as well. If that is successful then the feature is still not enabled for any of the tasks. The first usage of a related instruction will result in a #NM trap. The trap handler validates the permission bit of the tasks group leader and if permitted it installs a larger kernel buffer and transfers the permission and size info to the new fpstate container which makes all the FPU functions which require per task information aware of the extended feature set. [ tglx: Adopted to new base code, added missing serialization, massaged namings, comments and changelog ] Signed-off-by: Chang S. Bae <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Chang S. Bae <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent c33f0a8 commit db8268d

File tree

6 files changed

+178
-3
lines changed

6 files changed

+178
-3
lines changed

arch/x86/include/asm/fpu/api.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,4 +151,8 @@ static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
151151
return gfpu->fpstate->is_confidential;
152152
}
153153

154+
/* prctl */
155+
struct task_struct;
156+
extern long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2);
157+
154158
#endif /* _ASM_X86_FPU_API_H */

arch/x86/include/asm/proto.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,6 @@ void x86_report_nx(void);
4040
extern int reboot_force;
4141

4242
long do_arch_prctl_common(struct task_struct *task, int option,
43-
unsigned long cpuid_enabled);
43+
unsigned long arg2);
4444

4545
#endif /* _ASM_X86_PROTO_H */

arch/x86/include/uapi/asm/prctl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
#define ARCH_GET_CPUID 0x1011
1111
#define ARCH_SET_CPUID 0x1012
1212

13+
#define ARCH_GET_XCOMP_SUPP 0x1021
14+
#define ARCH_GET_XCOMP_PERM 0x1022
15+
#define ARCH_REQ_XCOMP_PERM 0x1023
16+
1317
#define ARCH_MAP_VDSO_X32 0x2001
1418
#define ARCH_MAP_VDSO_32 0x2002
1519
#define ARCH_MAP_VDSO_64 0x2003

arch/x86/kernel/fpu/xstate.c

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/compat.h>
99
#include <linux/cpu.h>
1010
#include <linux/mman.h>
11+
#include <linux/nospec.h>
1112
#include <linux/pkeys.h>
1213
#include <linux/seq_file.h>
1314
#include <linux/proc_fs.h>
@@ -18,6 +19,8 @@
1819
#include <asm/fpu/xcr.h>
1920

2021
#include <asm/tlbflush.h>
22+
#include <asm/prctl.h>
23+
#include <asm/elf.h>
2124

2225
#include "internal.h"
2326
#include "legacy.h"
@@ -1298,6 +1301,159 @@ void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
12981301
EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
12991302
#endif
13001303

1304+
#ifdef CONFIG_X86_64
1305+
static int validate_sigaltstack(unsigned int usize)
1306+
{
1307+
struct task_struct *thread, *leader = current->group_leader;
1308+
unsigned long framesize = get_sigframe_size();
1309+
1310+
lockdep_assert_held(&current->sighand->siglock);
1311+
1312+
/* get_sigframe_size() is based on fpu_user_cfg.max_size */
1313+
framesize -= fpu_user_cfg.max_size;
1314+
framesize += usize;
1315+
for_each_thread(leader, thread) {
1316+
if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1317+
return -ENOSPC;
1318+
}
1319+
return 0;
1320+
}
1321+
1322+
static int __xstate_request_perm(u64 permitted, u64 requested)
1323+
{
1324+
/*
1325+
* This deliberately does not exclude !XSAVES as we still might
1326+
* decide to optionally context switch XCR0 or talk the silicon
1327+
* vendors into extending XFD for the pre AMX states.
1328+
*/
1329+
bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
1330+
struct fpu *fpu = &current->group_leader->thread.fpu;
1331+
unsigned int ksize, usize;
1332+
u64 mask;
1333+
int ret;
1334+
1335+
/* Check whether fully enabled */
1336+
if ((permitted & requested) == requested)
1337+
return 0;
1338+
1339+
/* Calculate the resulting kernel state size */
1340+
mask = permitted | requested;
1341+
ksize = xstate_calculate_size(mask, compacted);
1342+
1343+
/* Calculate the resulting user state size */
1344+
mask &= XFEATURE_MASK_USER_SUPPORTED;
1345+
usize = xstate_calculate_size(mask, false);
1346+
1347+
ret = validate_sigaltstack(usize);
1348+
if (ret)
1349+
return ret;
1350+
1351+
/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1352+
WRITE_ONCE(fpu->perm.__state_perm, requested);
1353+
/* Protected by sighand lock */
1354+
fpu->perm.__state_size = ksize;
1355+
fpu->perm.__user_state_size = usize;
1356+
return ret;
1357+
}
1358+
1359+
/*
1360+
* Permissions array to map facilities with more than one component
1361+
*/
1362+
static const u64 xstate_prctl_req[XFEATURE_MAX] = {
1363+
/* [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE, */
1364+
};
1365+
1366+
static int xstate_request_perm(unsigned long idx)
1367+
{
1368+
u64 permitted, requested;
1369+
int ret;
1370+
1371+
if (idx >= XFEATURE_MAX)
1372+
return -EINVAL;
1373+
1374+
/*
1375+
* Look up the facility mask which can require more than
1376+
* one xstate component.
1377+
*/
1378+
idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1379+
requested = xstate_prctl_req[idx];
1380+
if (!requested)
1381+
return -EOPNOTSUPP;
1382+
1383+
if ((fpu_user_cfg.max_features & requested) != requested)
1384+
return -EOPNOTSUPP;
1385+
1386+
/* Lockless quick check */
1387+
permitted = xstate_get_host_group_perm();
1388+
if ((permitted & requested) == requested)
1389+
return 0;
1390+
1391+
/* Protect against concurrent modifications */
1392+
spin_lock_irq(&current->sighand->siglock);
1393+
permitted = xstate_get_host_group_perm();
1394+
ret = __xstate_request_perm(permitted, requested);
1395+
spin_unlock_irq(&current->sighand->siglock);
1396+
return ret;
1397+
}
1398+
#else /* CONFIG_X86_64 */
1399+
static inline int xstate_request_perm(unsigned long idx)
1400+
{
1401+
return -EPERM;
1402+
}
1403+
#endif /* !CONFIG_X86_64 */
1404+
1405+
/**
1406+
* fpu_xstate_prctl - xstate permission operations
1407+
* @tsk: Redundant pointer to current
1408+
* @option: A subfunction of arch_prctl()
1409+
* @arg2: option argument
1410+
* Return: 0 if successful; otherwise, an error code
1411+
*
1412+
* Option arguments:
1413+
*
1414+
* ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1415+
* ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1416+
* ARCH_REQ_XCOMP_PERM: Facility number requested
1417+
*
1418+
* For facilities which require more than one XSTATE component, the request
1419+
* must be the highest state component number related to that facility,
1420+
* e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1421+
* XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1422+
*/
1423+
long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
1424+
{
1425+
u64 __user *uptr = (u64 __user *)arg2;
1426+
u64 permitted, supported;
1427+
unsigned long idx = arg2;
1428+
1429+
if (tsk != current)
1430+
return -EPERM;
1431+
1432+
switch (option) {
1433+
case ARCH_GET_XCOMP_SUPP:
1434+
supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
1435+
return put_user(supported, uptr);
1436+
1437+
case ARCH_GET_XCOMP_PERM:
1438+
/*
1439+
* Lockless snapshot as it can also change right after the
1440+
* dropping the lock.
1441+
*/
1442+
permitted = xstate_get_host_group_perm();
1443+
permitted &= XFEATURE_MASK_USER_SUPPORTED;
1444+
return put_user(permitted, uptr);
1445+
1446+
case ARCH_REQ_XCOMP_PERM:
1447+
if (!IS_ENABLED(CONFIG_X86_64))
1448+
return -EOPNOTSUPP;
1449+
1450+
return xstate_request_perm(idx);
1451+
1452+
default:
1453+
return -EINVAL;
1454+
}
1455+
}
1456+
13011457
#ifdef CONFIG_PROC_PID_ARCH_STATUS
13021458
/*
13031459
* Report the amount of time elapsed in millisecond since last AVX512

arch/x86/kernel/fpu/xstate.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
1515
xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
1616
}
1717

18+
static inline u64 xstate_get_host_group_perm(void)
19+
{
20+
/* Pairs with WRITE_ONCE() in xstate_request_perm() */
21+
return READ_ONCE(current->group_leader->thread.fpu.perm.__state_perm);
22+
}
23+
1824
enum xstate_copy_mode {
1925
XSTATE_COPY_FP,
2026
XSTATE_COPY_FX,

arch/x86/kernel/process.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <asm/apic.h>
3131
#include <linux/uaccess.h>
3232
#include <asm/mwait.h>
33+
#include <asm/fpu/api.h>
3334
#include <asm/fpu/sched.h>
3435
#include <asm/debugreg.h>
3536
#include <asm/nmi.h>
@@ -1003,13 +1004,17 @@ unsigned long get_wchan(struct task_struct *p)
10031004
}
10041005

10051006
long do_arch_prctl_common(struct task_struct *task, int option,
1006-
unsigned long cpuid_enabled)
1007+
unsigned long arg2)
10071008
{
10081009
switch (option) {
10091010
case ARCH_GET_CPUID:
10101011
return get_cpuid_mode();
10111012
case ARCH_SET_CPUID:
1012-
return set_cpuid_mode(task, cpuid_enabled);
1013+
return set_cpuid_mode(task, arg2);
1014+
case ARCH_GET_XCOMP_SUPP:
1015+
case ARCH_GET_XCOMP_PERM:
1016+
case ARCH_REQ_XCOMP_PERM:
1017+
return fpu_xstate_prctl(task, option, arg2);
10131018
}
10141019

10151020
return -EINVAL;

0 commit comments

Comments
 (0)