summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/fpu/xstate.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-21 03:08:56 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-21 03:08:56 +0100
commit643ad15d47410d37d43daf3ef1c8ac52c281efa5 (patch)
treea864860cfe04c994c03d7946e12b3351e38a168b /arch/x86/kernel/fpu/xstate.c
parentMerge branch 'efi-core-for-linus' of git://git.kernel.org/pub/scm/linux/kerne... (diff)
parentx86/mm/pkeys: Fix mismerge of protection keys CPUID bits (diff)
downloadlinux-643ad15d47410d37d43daf3ef1c8ac52c281efa5.tar.xz
linux-643ad15d47410d37d43daf3ef1c8ac52c281efa5.zip
Merge branch 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 protection key support from Ingo Molnar: "This tree adds support for a new memory protection hardware feature that is available in upcoming Intel CPUs: 'protection keys' (pkeys). There's a background article at LWN.net: https://lwn.net/Articles/643797/ The gist is that protection keys allow the encoding of user-controllable permission masks in the pte. So instead of having a fixed protection mask in the pte (which needs a system call to change and works on a per page basis), the user can map a (handful of) protection mask variants and can change the masks runtime relatively cheaply, without having to change every single page in the affected virtual memory range. This allows the dynamic switching of the protection bits of large amounts of virtual memory, via user-space instructions. It also allows more precise control of MMU permission bits: for example the executable bit is separate from the read bit (see more about that below). This tree adds the MM infrastructure and low level x86 glue needed for that, plus it adds a high level API to make use of protection keys - if a user-space application calls: mmap(..., PROT_EXEC); or mprotect(ptr, sz, PROT_EXEC); (note PROT_EXEC-only, without PROT_READ/WRITE), the kernel will notice this special case, and will set a special protection key on this memory range. It also sets the appropriate bits in the Protection Keys User Rights (PKRU) register so that the memory becomes unreadable and unwritable. So using protection keys the kernel is able to implement 'true' PROT_EXEC on x86 CPUs: without protection keys PROT_EXEC implies PROT_READ as well. Unreadable executable mappings have security advantages: they cannot be read via information leaks to figure out ASLR details, nor can they be scanned for ROP gadgets - and they cannot be used by exploits for data purposes either. We know about no user-space code that relies on pure PROT_EXEC mappings today, but binary loaders could start making use of this new feature to map binaries and libraries in a more secure fashion. There is other pending pkeys work that offers more high level system call APIs to manage protection keys - but those are not part of this pull request. Right now there's a Kconfig that controls this feature (CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) that is default enabled (like most x86 CPU feature enablement code that has no runtime overhead), but it's not user-configurable at the moment. If there's any serious problem with this then we can make it configurable and/or flip the default" * 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits) x86/mm/pkeys: Fix mismerge of protection keys CPUID bits mm/pkeys: Fix siginfo ABI breakage caused by new u64 field x86/mm/pkeys: Fix access_error() denial of writes to write-only VMA mm/core, x86/mm/pkeys: Add execute-only protection keys support x86/mm/pkeys: Create an x86 arch_calc_vm_prot_bits() for VMA flags x86/mm/pkeys: Allow kernel to modify user pkey rights register x86/fpu: Allow setting of XSAVE state x86/mm: Factor out LDT init from context init mm/core, x86/mm/pkeys: Add arch_validate_pkey() mm/core, arch, powerpc: Pass a protection key in to calc_vm_flag_bits() x86/mm/pkeys: Actually enable Memory Protection Keys in the CPU x86/mm/pkeys: Add Kconfig prompt to existing config option x86/mm/pkeys: Dump pkey from VMA in /proc/pid/smaps x86/mm/pkeys: Dump PKRU with other kernel registers mm/core, x86/mm/pkeys: Differentiate instruction fetches x86/mm/pkeys: Optimize fault handling in access_error() mm/core: Do not enforce PKEY permissions on remote mm access um, pkeys: Add UML arch_*_access_permitted() methods mm/gup, x86/mm/pkeys: Check VMAs and PTEs for protection keys x86/mm/gup: Simplify get_user_pages() PTE bit handling ...
Diffstat (limited to 'arch/x86/kernel/fpu/xstate.c')
-rw-r--r--arch/x86/kernel/fpu/xstate.c185
1 files changed, 181 insertions, 4 deletions
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6e8354f5a593..b48ef35b28d4 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -5,6 +5,7 @@
*/
#include <linux/compat.h>
#include <linux/cpu.h>
+#include <linux/pkeys.h>
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
@@ -13,6 +14,11 @@
#include <asm/tlbflush.h>
+/*
+ * Although we spell it out in here, the Processor Trace
+ * xfeature is completely unused. We use other mechanisms
+ * to save/restore PT state in Linux.
+ */
static const char *xfeature_names[] =
{
"x87 floating point registers" ,
@@ -23,6 +29,8 @@ static const char *xfeature_names[] =
"AVX-512 opmask" ,
"AVX-512 Hi256" ,
"AVX-512 ZMM_Hi256" ,
+ "Processor Trace (unused)" ,
+ "Protection Keys User registers",
"unknown xstate feature" ,
};
@@ -56,6 +64,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
+ setup_clear_cpu_cap(X86_FEATURE_PKU);
}
/*
@@ -234,7 +243,7 @@ static void __init print_xstate_feature(u64 xstate_mask)
const char *feature_name;
if (cpu_has_xfeatures(xstate_mask, &feature_name))
- pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name);
+ pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
}
/*
@@ -250,6 +259,7 @@ static void __init print_xstate_features(void)
print_xstate_feature(XFEATURE_MASK_OPMASK);
print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
+ print_xstate_feature(XFEATURE_MASK_PKRU);
}
/*
@@ -466,6 +476,7 @@ static void check_xstate_against_struct(int nr)
XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state);
XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
+ XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state);
/*
* Make *SURE* to add any feature numbers in below if
@@ -473,7 +484,8 @@ static void check_xstate_against_struct(int nr)
* numbers.
*/
if ((nr < XFEATURE_YMM) ||
- (nr >= XFEATURE_MAX)) {
+ (nr >= XFEATURE_MAX) ||
+ (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) {
WARN_ONCE(1, "no structure for xstate: %d\n", nr);
XSTATE_WARN_ON(1);
}
@@ -671,6 +683,19 @@ void fpu__resume_cpu(void)
}
/*
+ * Given an xstate feature mask, calculate where in the xsave
+ * buffer the state is. Callers should ensure that the buffer
+ * is valid.
+ *
+ * Note: does not work for compacted buffers.
+ */
+void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
+{
+ int feature_nr = fls64(xstate_feature_mask) - 1;
+
+ return (void *)xsave + xstate_comp_offsets[feature_nr];
+}
+/*
* Given the xsave area and a state inside, this function returns the
* address of the state.
*
@@ -690,7 +715,6 @@ void fpu__resume_cpu(void)
*/
void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
{
- int feature_nr = fls64(xstate_feature) - 1;
/*
* Do we even *have* xsave state?
*/
@@ -718,7 +742,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
if (!(xsave->header.xfeatures & xstate_feature))
return NULL;
- return (void *)xsave + xstate_comp_offsets[feature_nr];
+ return __raw_xsave_addr(xsave, xstate_feature);
}
EXPORT_SYMBOL_GPL(get_xsave_addr);
@@ -753,3 +777,156 @@ const void *get_xsave_field_ptr(int xsave_state)
return get_xsave_addr(&fpu->state.xsave, xsave_state);
}
+
+
+/*
+ * Set xfeatures (aka XSTATE_BV) bit for a feature that we want
+ * to take out of its "init state". This will ensure that an
+ * XRSTOR actually restores the state.
+ */
+static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
+ int xstate_feature_mask)
+{
+ xsave->header.xfeatures |= xstate_feature_mask;
+}
+
+/*
+ * This function is safe to call whether the FPU is in use or not.
+ *
+ * Note that this only works on the current task.
+ *
+ * Inputs:
+ * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
+ * XFEATURE_MASK_SSE, etc...)
+ * @xsave_state_ptr: a pointer to a copy of the state that you would
+ * like written in to the current task's FPU xsave state. This pointer
+ * must not be located in the current tasks's xsave area.
+ * Output:
+ * address of the state in the xsave area or NULL if the state
+ * is not present or is in its 'init state'.
+ */
+static void fpu__xfeature_set_state(int xstate_feature_mask,
+ void *xstate_feature_src, size_t len)
+{
+ struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+ struct fpu *fpu = &current->thread.fpu;
+ void *dst;
+
+ if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
+ WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
+ return;
+ }
+
+ /*
+ * Tell the FPU code that we need the FPU state to be in
+ * 'fpu' (not in the registers), and that we need it to
+ * be stable while we write to it.
+ */
+ fpu__current_fpstate_write_begin();
+
+ /*
+ * This method *WILL* *NOT* work for compact-format
+ * buffers. If the 'xstate_feature_mask' is unset in
+ * xcomp_bv then we may need to move other feature state
+ * "up" in the buffer.
+ */
+ if (xsave->header.xcomp_bv & xstate_feature_mask) {
+ WARN_ON_ONCE(1);
+ goto out;
+ }
+
+ /* find the location in the xsave buffer of the desired state */
+ dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
+
+ /*
+ * Make sure that the pointer being passed in did not
+ * come from the xsave buffer itself.
+ */
+ WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
+
+ /* put the caller-provided data in the location */
+ memcpy(dst, xstate_feature_src, len);
+
+ /*
+ * Mark the xfeature so that the CPU knows there is state
+ * in the buffer now.
+ */
+ fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
+out:
+ /*
+ * We are done writing to the 'fpu'. Reenable preeption
+ * and (possibly) move the fpstate back in to the fpregs.
+ */
+ fpu__current_fpstate_write_end();
+}
+
+#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
+#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
+
+/*
+ * This will go out and modify the XSAVE buffer so that PKRU is
+ * set to a particular state for access to 'pkey'.
+ *
+ * PKRU state does affect kernel access to user memory. We do
+ * not modfiy PKRU *itself* here, only the XSAVE state that will
+ * be restored in to PKRU when we return back to userspace.
+ */
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+ struct pkru_state *old_pkru_state;
+ struct pkru_state new_pkru_state;
+ int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
+ u32 new_pkru_bits = 0;
+
+ /*
+ * This check implies XSAVE support. OSPKE only gets
+ * set if we enable XSAVE and we enable PKU in XCR0.
+ */
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
+ return -EINVAL;
+
+ /* Set the bits we need in PKRU */
+ if (init_val & PKEY_DISABLE_ACCESS)
+ new_pkru_bits |= PKRU_AD_BIT;
+ if (init_val & PKEY_DISABLE_WRITE)
+ new_pkru_bits |= PKRU_WD_BIT;
+
+ /* Shift the bits in to the correct place in PKRU for pkey. */
+ new_pkru_bits <<= pkey_shift;
+
+ /* Locate old copy of the state in the xsave buffer */
+ old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
+
+ /*
+ * When state is not in the buffer, it is in the init
+ * state, set it manually. Otherwise, copy out the old
+ * state.
+ */
+ if (!old_pkru_state)
+ new_pkru_state.pkru = 0;
+ else
+ new_pkru_state.pkru = old_pkru_state->pkru;
+
+ /* mask off any old bits in place */
+ new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
+ /* Set the newly-requested bits */
+ new_pkru_state.pkru |= new_pkru_bits;
+
+ /*
+ * We could theoretically live without zeroing pkru.pad.
+ * The current XSAVE feature state definition says that
+ * only bytes 0->3 are used. But we do not want to
+ * chance leaking kernel stack out to userspace in case a
+ * memcpy() of the whole xsave buffer was done.
+ *
+ * They're in the same cacheline anyway.
+ */
+ new_pkru_state.pad = 0;
+
+ fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state,
+ sizeof(new_pkru_state));
+
+ return 0;
+}