diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 03:08:56 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 03:08:56 +0100 |
commit | 643ad15d47410d37d43daf3ef1c8ac52c281efa5 (patch) | |
tree | a864860cfe04c994c03d7946e12b3351e38a168b /include | |
parent | Merge branch 'efi-core-for-linus' of git://git.kernel.org/pub/scm/linux/kerne... (diff) | |
parent | x86/mm/pkeys: Fix mismerge of protection keys CPUID bits (diff) | |
download | linux-643ad15d47410d37d43daf3ef1c8ac52c281efa5.tar.xz linux-643ad15d47410d37d43daf3ef1c8ac52c281efa5.zip |
Merge branch 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 protection key support from Ingo Molnar:
"This tree adds support for a new memory protection hardware feature
that is available in upcoming Intel CPUs: 'protection keys' (pkeys).
There's a background article at LWN.net:
https://lwn.net/Articles/643797/
The gist is that protection keys allow the encoding of
user-controllable permission masks in the pte. So instead of having a
fixed protection mask in the pte (which needs a system call to change
and works on a per page basis), the user can map a (handful of)
protection mask variants and can change the masks runtime relatively
cheaply, without having to change every single page in the affected
virtual memory range.
This allows the dynamic switching of the protection bits of large
amounts of virtual memory, via user-space instructions. It also
allows more precise control of MMU permission bits: for example the
executable bit is separate from the read bit (see more about that
below).
This tree adds the MM infrastructure and low level x86 glue needed for
that, plus it adds a high level API to make use of protection keys -
if a user-space application calls:
mmap(..., PROT_EXEC);
or
mprotect(ptr, sz, PROT_EXEC);
(note PROT_EXEC-only, without PROT_READ/WRITE), the kernel will notice
this special case, and will set a special protection key on this
memory range. It also sets the appropriate bits in the Protection
Keys User Rights (PKRU) register so that the memory becomes unreadable
and unwritable.
So using protection keys the kernel is able to implement 'true'
PROT_EXEC on x86 CPUs: without protection keys PROT_EXEC implies
PROT_READ as well. Unreadable executable mappings have security
advantages: they cannot be read via information leaks to figure out
ASLR details, nor can they be scanned for ROP gadgets - and they
cannot be used by exploits for data purposes either.
We know about no user-space code that relies on pure PROT_EXEC
mappings today, but binary loaders could start making use of this new
feature to map binaries and libraries in a more secure fashion.
There is other pending pkeys work that offers more high level system
call APIs to manage protection keys - but those are not part of this
pull request.
Right now there's a Kconfig that controls this feature
(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) that is default enabled
(like most x86 CPU feature enablement code that has no runtime
overhead), but it's not user-configurable at the moment. If there's
any serious problem with this then we can make it configurable and/or
flip the default"
* 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits)
x86/mm/pkeys: Fix mismerge of protection keys CPUID bits
mm/pkeys: Fix siginfo ABI breakage caused by new u64 field
x86/mm/pkeys: Fix access_error() denial of writes to write-only VMA
mm/core, x86/mm/pkeys: Add execute-only protection keys support
x86/mm/pkeys: Create an x86 arch_calc_vm_prot_bits() for VMA flags
x86/mm/pkeys: Allow kernel to modify user pkey rights register
x86/fpu: Allow setting of XSAVE state
x86/mm: Factor out LDT init from context init
mm/core, x86/mm/pkeys: Add arch_validate_pkey()
mm/core, arch, powerpc: Pass a protection key in to calc_vm_flag_bits()
x86/mm/pkeys: Actually enable Memory Protection Keys in the CPU
x86/mm/pkeys: Add Kconfig prompt to existing config option
x86/mm/pkeys: Dump pkey from VMA in /proc/pid/smaps
x86/mm/pkeys: Dump PKRU with other kernel registers
mm/core, x86/mm/pkeys: Differentiate instruction fetches
x86/mm/pkeys: Optimize fault handling in access_error()
mm/core: Do not enforce PKEY permissions on remote mm access
um, pkeys: Add UML arch_*_access_permitted() methods
mm/gup, x86/mm/pkeys: Check VMAs and PTEs for protection keys
x86/mm/gup: Simplify get_user_pages() PTE bit handling
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/mm_hooks.h | 12 | ||||
-rw-r--r-- | include/linux/mm.h | 99 | ||||
-rw-r--r-- | include/linux/mman.h | 6 | ||||
-rw-r--r-- | include/linux/pkeys.h | 33 | ||||
-rw-r--r-- | include/uapi/asm-generic/siginfo.h | 17 |
5 files changed, 149 insertions, 18 deletions
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index 866aa461efa5..cc5d9a1405df 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -26,4 +26,16 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool execute, bool foreign) +{ + /* by default, allow everything */ + return true; +} + +static inline bool arch_pte_access_permitted(pte_t pte, bool write) +{ + /* by default, allow everything */ + return true; +} #endif /* _ASM_GENERIC_MM_HOOKS_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d42501c8bb4..450fc977ed02 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -193,8 +193,26 @@ extern unsigned int kobjsize(const void *objp); #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ +#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS +#define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_1 33 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) +#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) +#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) +#define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) +#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ + #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ +#if defined (CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) +# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 +# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 /* A protection key is a 4-bit value */ +# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 +# define VM_PKEY_BIT2 VM_HIGH_ARCH_2 +# define VM_PKEY_BIT3 VM_HIGH_ARCH_3 +#endif #elif defined(CONFIG_PPC) # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) @@ -256,6 +274,8 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */ #define FAULT_FLAG_TRIED 0x20 /* Second try */ #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ +#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ +#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ /* * vm_fault is filled by the the pagefault handler and passed to the vma's @@ -1224,24 +1244,82 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int foll_flags, struct page **pages, struct vm_area_struct **vmas, int *nonblocking); -long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - struct vm_area_struct **vmas); -long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - int *locked); +long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + struct vm_area_struct **vmas); +long get_user_pages6(unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + struct vm_area_struct **vmas); +long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, unsigned int gup_flags); -long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, +long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); +/* suppress warnings from use in EXPORT_SYMBOL() */ +#ifndef __DISABLE_GUP_DEPRECATED +#define __gup_deprecated __deprecated +#else +#define __gup_deprecated +#endif +/* + * These macros provide backward-compatibility with the old + * get_user_pages() variants which took tsk/mm. These + * functions/macros provide both compile-time __deprecated so we + * can catch old-style use and not break the build. The actual + * functions also have WARN_ON()s to let us know at runtime if + * the get_user_pages() should have been the "remote" variant. + * + * These are hideous, but temporary. + * + * If you run into one of these __deprecated warnings, look + * at how you are calling get_user_pages(). If you are calling + * it with current/current->mm as the first two arguments, + * simply remove those arguments. The behavior will be the same + * as it is now. If you are calling it on another task, use + * get_user_pages_remote() instead. + * + * Any questions? Ask Dave Hansen <dave@sr71.net> + */ +long +__gup_deprecated +get_user_pages8(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + struct vm_area_struct **vmas); +#define GUP_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages, ...) \ + get_user_pages +#define get_user_pages(...) GUP_MACRO(__VA_ARGS__, \ + get_user_pages8, x, \ + get_user_pages6, x, x, x, x, x)(__VA_ARGS__) + +__gup_deprecated +long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + int *locked); +#define GUPL_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages_locked, ...) \ + get_user_pages_locked +#define get_user_pages_locked(...) GUPL_MACRO(__VA_ARGS__, \ + get_user_pages_locked8, x, \ + get_user_pages_locked6, x, x, x, x)(__VA_ARGS__) + +__gup_deprecated +long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages); +#define GUPU_MACRO(_1, _2, _3, _4, _5, _6, _7, get_user_pages_unlocked, ...) \ + get_user_pages_unlocked +#define get_user_pages_unlocked(...) GUPU_MACRO(__VA_ARGS__, \ + get_user_pages_unlocked7, x, \ + get_user_pages_unlocked5, x, x, x, x)(__VA_ARGS__) + /* Container for pinned pfns / pages */ struct frame_vector { unsigned int nr_allocated; /* Number of frames we have space for */ @@ -2169,6 +2247,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_MLOCK 0x1000 /* lock present pages */ +#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/include/linux/mman.h b/include/linux/mman.h index 16373c8f5f57..33e17f6a327a 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -35,7 +35,7 @@ static inline void vm_unacct_memory(long pages) */ #ifndef arch_calc_vm_prot_bits -#define arch_calc_vm_prot_bits(prot) 0 +#define arch_calc_vm_prot_bits(prot, pkey) 0 #endif #ifndef arch_vm_get_page_prot @@ -70,12 +70,12 @@ static inline int arch_validate_prot(unsigned long prot) * Combine the mmap "prot" argument into "vm_flags" used internally. */ static inline unsigned long -calc_vm_prot_bits(unsigned long prot) +calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { return _calc_vm_trans(prot, PROT_READ, VM_READ ) | _calc_vm_trans(prot, PROT_WRITE, VM_WRITE) | _calc_vm_trans(prot, PROT_EXEC, VM_EXEC) | - arch_calc_vm_prot_bits(prot); + arch_calc_vm_prot_bits(prot, pkey); } /* diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h new file mode 100644 index 000000000000..1d405a2b7272 --- /dev/null +++ b/include/linux/pkeys.h @@ -0,0 +1,33 @@ +#ifndef _LINUX_PKEYS_H +#define _LINUX_PKEYS_H + +#include <linux/mm_types.h> +#include <asm/mmu_context.h> + +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE) + +#ifdef CONFIG_ARCH_HAS_PKEYS +#include <asm/pkeys.h> +#else /* ! CONFIG_ARCH_HAS_PKEYS */ +#define arch_max_pkey() (1) +#define execute_only_pkey(mm) (0) +#define arch_override_mprotect_pkey(vma, prot, pkey) (0) +#define PKEY_DEDICATED_EXECUTE_ONLY 0 +#endif /* ! CONFIG_ARCH_HAS_PKEYS */ + +/* + * This is called from mprotect_pkey(). + * + * Returns true if the protection keys is valid. + */ +static inline bool validate_pkey(int pkey) +{ + if (pkey < 0) + return false; + return (pkey < arch_max_pkey()); +} + +#endif /* _LINUX_PKEYS_H */ diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 1e3552037a5a..1abaf62c86fc 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -91,10 +91,15 @@ typedef struct siginfo { int _trapno; /* TRAP # which caused the signal */ #endif short _addr_lsb; /* LSB of the reported address */ - struct { - void __user *_lower; - void __user *_upper; - } _addr_bnd; + union { + /* used when si_code=SEGV_BNDERR */ + struct { + void __user *_lower; + void __user *_upper; + } _addr_bnd; + /* used when si_code=SEGV_PKUERR */ + __u32 _pkey; + }; } _sigfault; /* SIGPOLL */ @@ -137,6 +142,7 @@ typedef struct siginfo { #define si_addr_lsb _sifields._sigfault._addr_lsb #define si_lower _sifields._sigfault._addr_bnd._lower #define si_upper _sifields._sigfault._addr_bnd._upper +#define si_pkey _sifields._sigfault._pkey #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd #ifdef __ARCH_SIGSYS @@ -206,7 +212,8 @@ typedef struct siginfo { #define SEGV_MAPERR (__SI_FAULT|1) /* address not mapped to object */ #define SEGV_ACCERR (__SI_FAULT|2) /* invalid permissions for mapped object */ #define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ -#define NSIGSEGV 3 +#define SEGV_PKUERR (__SI_FAULT|4) /* failed protection key checks */ +#define NSIGSEGV 4 /* * SIGBUS si_codes |