1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
|
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __X86_KERNEL_FPU_XSTATE_H
#define __X86_KERNEL_FPU_XSTATE_H
#include <asm/cpufeature.h>
#include <asm/fpu/xstate.h>
#include <asm/fpu/xcr.h>
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u64, xfd_state);
#endif
static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
{
/*
* XRSTORS requires these bits set in xcomp_bv, or it will
* trigger #GP:
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVES))
xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
}
static inline u64 xstate_get_group_perm(bool guest)
{
struct fpu *fpu = ¤t->group_leader->thread.fpu;
struct fpu_state_perm *perm;
/* Pairs with WRITE_ONCE() in xstate_request_perm() */
perm = guest ? &fpu->guest_perm : &fpu->perm;
return READ_ONCE(perm->__state_perm);
}
static inline u64 xstate_get_host_group_perm(void)
{
return xstate_get_group_perm(false);
}
enum xstate_copy_mode {
XSTATE_COPY_FP,
XSTATE_COPY_FX,
XSTATE_COPY_XSAVE,
};
struct membuf;
extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf);
extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf);
extern void fpu__init_cpu_xstate(void);
extern void fpu__init_system_xstate(unsigned int legacy_size);
extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
static inline u64 xfeatures_mask_supervisor(void)
{
return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
}
static inline u64 xfeatures_mask_independent(void)
{
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR))
return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
return XFEATURE_MASK_INDEPENDENT;
}
/* XSAVE/XRSTOR wrapper functions */
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
#else
#define REX_PREFIX
#endif
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
/*
* After this @err contains 0 on success or the trap number when the
* operation raises an exception.
*/
#define XSTATE_OP(op, st, lmask, hmask, err) \
asm volatile("1:" op "\n\t" \
"xor %[err], %[err]\n" \
"2:\n\t" \
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \
: [err] "=a" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
/*
* If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
* format and supervisor states in addition to modified optimization in
* XSAVEOPT.
*
* Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
* supports modified optimization which is not supported by XSAVE.
*
* We use XSAVE as a fallback.
*
* The 661 label is defined in the ALTERNATIVE* macros as the address of the
* original instruction which gets replaced. We need to use it here as the
* address of the instruction where we might get an exception at.
*/
#define XSTATE_XSAVE(st, lmask, hmask, err) \
asm volatile(ALTERNATIVE_2(XSAVE, \
XSAVEOPT, X86_FEATURE_XSAVEOPT, \
XSAVES, X86_FEATURE_XSAVES) \
"\n" \
"xor %[err], %[err]\n" \
"3:\n" \
_ASM_EXTABLE_TYPE_REG(661b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
/*
* Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
* XSAVE area format.
*/
#define XSTATE_XRESTORE(st, lmask, hmask) \
asm volatile(ALTERNATIVE(XRSTOR, \
XRSTORS, X86_FEATURE_XSAVES) \
"\n" \
"3:\n" \
_ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \
: \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_DEBUG_FPU)
extern void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor);
#else
static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) { }
#endif
#ifdef CONFIG_X86_64
static inline void xfd_update_state(struct fpstate *fpstate)
{
if (fpu_state_size_dynamic()) {
u64 xfd = fpstate->xfd;
if (__this_cpu_read(xfd_state) != xfd) {
wrmsrl(MSR_IA32_XFD, xfd);
__this_cpu_write(xfd_state, xfd);
}
}
}
extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu);
#else
static inline void xfd_update_state(struct fpstate *fpstate) { }
static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) {
return -EPERM;
}
#endif
/*
* Save processor xstate to xsave area.
*
* Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features
* and command line options. The choice is permanent until the next reboot.
*/
static inline void os_xsave(struct fpstate *fpstate)
{
u64 mask = fpstate->xfeatures;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
WARN_ON_FPU(!alternatives_patched);
xfd_validate_state(fpstate, mask, false);
XSTATE_XSAVE(&fpstate->regs.xsave, lmask, hmask, err);
/* We should never fault when copying to a kernel buffer: */
WARN_ON_FPU(err);
}
/*
* Restore processor xstate from xsave area.
*
* Uses XRSTORS when XSAVES is used, XRSTOR otherwise.
*/
static inline void os_xrstor(struct fpstate *fpstate, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
xfd_validate_state(fpstate, mask, true);
XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
}
/* Restore of supervisor state. Does not require XFD */
static inline void os_xrstor_supervisor(struct fpstate *fpstate)
{
u64 mask = xfeatures_mask_supervisor();
u32 lmask = mask;
u32 hmask = mask >> 32;
XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
}
/*
* XSAVE itself always writes all requested xfeatures. Removing features
* from the request bitmap reduces the features which are written.
* Generate a mask of features which must be written to a sigframe. The
* unset features can be optimized away and not written.
*
* This optimization is user-visible. Only use for states where
* uninitialized sigframe contents are tolerable, like dynamic features.
*
* Users of buffers produced with this optimization must check XSTATE_BV
* to determine which features have been optimized out.
*/
static inline u64 xfeatures_need_sigframe_write(void)
{
u64 xfeaures_to_write;
/* In-use features must be written: */
xfeaures_to_write = xfeatures_in_use();
/* Also write all non-optimizable sigframe features: */
xfeaures_to_write |= XFEATURE_MASK_USER_SUPPORTED &
~XFEATURE_MASK_SIGFRAME_INITOPT;
return xfeaures_to_write;
}
/*
* Save xstate to user space xsave area.
*
* We don't use modified optimization because xrstor/xrstors might track
* a different application.
*
* We don't use compacted format xsave area for backward compatibility for
* old applications which don't understand the compacted format of the
* xsave area.
*
* The caller has to zero buf::header before calling this because XSAVE*
* does not touch the reserved fields in the header.
*/
static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
{
/*
* Include the features which are not xsaved/rstored by the kernel
* internally, e.g. PKRU. That's user space ABI and also required
* to allow the signal handler to modify PKRU.
*/
struct fpstate *fpstate = current->thread.fpu.fpstate;
u64 mask = fpstate->user_xfeatures;
u32 lmask;
u32 hmask;
int err;
/* Optimize away writing unnecessary xfeatures: */
if (fpu_state_size_dynamic())
mask &= xfeatures_need_sigframe_write();
lmask = mask;
hmask = mask >> 32;
xfd_validate_state(fpstate, mask, false);
stac();
XSTATE_OP(XSAVE, buf, lmask, hmask, err);
clac();
return err;
}
/*
* Restore xstate from user space xsave area.
*/
static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask)
{
struct xregs_state *xstate = ((__force struct xregs_state *)buf);
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
xfd_validate_state(current->thread.fpu.fpstate, mask, true);
stac();
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
clac();
return err;
}
/*
* Restore xstate from kernel space xsave area, return an error code instead of
* an exception.
*/
static inline int os_xrstor_safe(struct fpstate *fpstate, u64 mask)
{
struct xregs_state *xstate = &fpstate->regs.xsave;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
/* Ensure that XFD is up to date */
xfd_update_state(fpstate);
if (cpu_feature_enabled(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
return err;
}
#endif
|