From f7d83c1cf3c77ae45876792aee5285ae970413ac Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Aug 2017 13:26:03 -0700 Subject: x86: Implement thread_struct whitelist for hardened usercopy This whitelists the FPU register state portion of the thread_struct for copying to userspace, instead of the default entire struct. This is needed because FPU register state is dynamically sized, so it doesn't bypass the hardened usercopy checks. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Mathias Krause Signed-off-by: Kees Cook Acked-by: Rik van Riel --- arch/x86/include/asm/processor.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cc16fa882e3e..2b037b7fe0eb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -492,6 +492,14 @@ struct thread_struct { */ }; +/* Whitelist the FPU state from the task_struct for hardened usercopy. */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = offsetof(struct thread_struct, fpu.state); + *size = fpu_kernel_xstate_size; +} + /* * Thread-synchronous status. * -- cgit v1.2.3 From 55f49fcb879fbeebf2a8c1ac7c9e6d90df55f798 Mon Sep 17 00:00:00 2001 From: William Grant Date: Tue, 30 Jan 2018 22:22:55 +1100 Subject: x86/mm: Fix overlap of i386 CPU_ENTRY_AREA with FIX_BTMAP Since commit 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap"), i386's CPU_ENTRY_AREA has been mapped to the memory area just below FIXADDR_START. But already immediately before FIXADDR_START is the FIX_BTMAP area, which means that early_ioremap can collide with the entry area. It's especially bad on PAE where FIX_BTMAP_BEGIN gets aligned to exactly match CPU_ENTRY_AREA_BASE, so the first early_ioremap slot clobbers the IDT and causes interrupts during early boot to reset the system. The overlap wasn't a problem before the CPU entry area was introduced, as the fixmap has classically been preceded by the pkmap or vmalloc areas, neither of which is used until early_ioremap is out of the picture. Relocate CPU_ENTRY_AREA to below FIX_BTMAP, not just below the permanent fixmap area. Fixes: commit 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap") Signed-off-by: William Grant Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/7041d181-a019-e8b9-4e4e-48215f841e2c@canonical.com --- arch/x86/include/asm/fixmap.h | 6 ++++-- arch/x86/include/asm/pgtable_32_types.h | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 64c4a30e0d39..e203169931c7 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -137,8 +137,10 @@ enum fixed_addresses { extern void reserve_top_address(unsigned long reserve); -#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE) extern int fixmaps_set; diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index ce245b0cdfca..0777e18a1d23 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -44,8 +44,9 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ */ #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ + & PMD_MASK) #define PKMAP_BASE \ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) -- cgit v1.2.3 From 37a8f7c38339b22b69876d6f5a0ab851565284e3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 Jan 2018 10:38:50 -0800 Subject: x86/asm: Move 'status' from thread_struct to thread_info The TS_COMPAT bit is very hot and is accessed from code paths that mostly also touch thread_info::flags. Move it into struct thread_info to improve cache locality. The only reason it was in thread_struct is that there was a brief period during which arch-specific fields were not allowed in struct thread_info. Linus suggested further changing: ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); to: if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED))) ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); on the theory that frequently dirtying the cacheline even in pure 64-bit code that never needs to modify status hurts performance. That could be a reasonable followup patch, but I suspect it matters less on top of this patch. Suggested-by: Linus Torvalds Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Kernel Hardening Link: https://lkml.kernel.org/r/03148bcc1b217100e6e8ecf6a5468c45cf4304b6.1517164461.git.luto@kernel.org --- arch/x86/entry/common.c | 4 ++-- arch/x86/include/asm/processor.h | 2 -- arch/x86/include/asm/syscall.h | 6 +++--- arch/x86/include/asm/thread_info.h | 3 ++- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/signal.c | 2 +- 7 files changed, 11 insertions(+), 12 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d7d3cc24baf4..99081340d19a 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -206,7 +206,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) * special case only applies after poking regs and before the * very next return to user mode. */ - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif user_enter_irqoff(); @@ -304,7 +304,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) unsigned int nr = (unsigned int)regs->orig_ax; #ifdef CONFIG_IA32_EMULATION - current->thread.status |= TS_COMPAT; + ti->status |= TS_COMPAT; #endif if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index efbde088a718..513f9604c192 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -460,8 +460,6 @@ struct thread_struct { unsigned short gsindex; #endif - u32 status; /* thread synchronous flags */ - #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index e3c95e8e61c5..03eedc21246d 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 00223333821a..eda3b6823ca4 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -55,6 +55,7 @@ struct task_struct; struct thread_info { unsigned long flags; /* low level flags */ + u32 status; /* thread synchronous flags */ }; #define INIT_THREAD_INFO(tsk) \ @@ -221,7 +222,7 @@ static inline int arch_within_stack_frames(const void * const stack, #define in_ia32_syscall() true #else #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ - current->thread.status & TS_COMPAT) + current_thread_info()->status & TS_COMPAT) #endif /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c75466232016..9eb448c7859d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -557,7 +557,7 @@ static void __set_personality_x32(void) * Pretend to come from a x32 execve. */ task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT; - current->thread.status &= ~TS_COMPAT; + current_thread_info()->status &= ~TS_COMPAT; #endif } @@ -571,7 +571,7 @@ static void __set_personality_ia32(void) current->personality |= force_personality32; /* Prepare the first "return" to user space */ task_pt_regs(current)->orig_ax = __NR_ia32_execve; - current->thread.status |= TS_COMPAT; + current_thread_info()->status |= TS_COMPAT; #endif } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f37d18124648..ed5c4cdf0a34 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) */ regs->orig_ax = value; if (syscall_get_nr(child, regs) >= 0) - child->thread.status |= TS_I386_REGS_POKED; + child->thread_info.status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b9e00e8f1c9b..4cdc0b27ec82 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * than the tracee. */ #ifdef CONFIG_IA32_EMULATION - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI -- cgit v1.2.3 From babdde2698d482b6c0de1eab4f697cf5856c5859 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:28 -0800 Subject: x86: Implement array_index_mask_nospec array_index_nospec() uses a mask to sanitize user controllable array indexes, i.e. generate a 0 mask if 'index' >= 'size', and a ~0 mask otherwise. While the default array_index_mask_nospec() handles the carry-bit from the (index - size) result in software. The x86 array_index_mask_nospec() does the same, but the carry-bit is handled in the processor CF flag without conditional instructions in the control flow. Suggested-by: Linus Torvalds Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727414808.33451.1873237130672785331.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/x86/include/asm/barrier.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 7fb336210e1b..173b38f5fe88 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -24,6 +24,30 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif +/** + * array_index_mask_nospec() - generate a mask that is ~0UL when the + * bounds check succeeds and 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * Returns: + * 0 - (index < size) + */ +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + asm ("cmp %1,%2; sbb %0,%0;" + :"=r" (mask) + :"r"(size),"r" (index) + :"cc"); + return mask; +} + +/* Override the default implementation from linux/nospec.h. */ +#define array_index_mask_nospec array_index_mask_nospec + #ifdef CONFIG_X86_PPRO_FENCE #define dma_rmb() rmb() #else -- cgit v1.2.3 From b3d7ad85b80bbc404635dca80f5b129f6242bc7a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:33 -0800 Subject: x86: Introduce barrier_nospec Rename the open coded form of this instruction sequence from rdtsc_ordered() into a generic barrier primitive, barrier_nospec(). One of the mitigations for Spectre variant1 vulnerabilities is to fence speculative execution after successfully validating a bounds check. I.e. force the result of a bounds check to resolve in the instruction pipeline to ensure speculative execution honors that result before potentially operating on out-of-bounds data. No functional changes. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727415361.33451.9049453007262764675.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/x86/include/asm/barrier.h | 4 ++++ arch/x86/include/asm/msr.h | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 173b38f5fe88..30d406146016 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -48,6 +48,10 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, /* Override the default implementation from linux/nospec.h. */ #define array_index_mask_nospec array_index_mask_nospec +/* Prevent speculative execution past this barrier. */ +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ + "lfence", X86_FEATURE_LFENCE_RDTSC) + #ifdef CONFIG_X86_PPRO_FENCE #define dma_rmb() rmb() #else diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 07962f5f6fba..30df295f6d94 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -214,8 +214,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) * that some other imaginary CPU is updating continuously with a * time stamp. */ - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, - "lfence", X86_FEATURE_LFENCE_RDTSC); + barrier_nospec(); return rdtsc(); } -- cgit v1.2.3 From b3bbfb3fb5d25776b8e3f361d2eedaabb0b496cd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:39 -0800 Subject: x86: Introduce __uaccess_begin_nospec() and uaccess_try_nospec For __get_user() paths, do not allow the kernel to speculate on the value of a user controlled pointer. In addition to the 'stac' instruction for Supervisor Mode Access Protection (SMAP), a barrier_nospec() causes the access_ok() result to resolve in the pipeline before the CPU might take any speculative action on the pointer value. Given the cost of 'stac' the speculation barrier is placed after 'stac' to hopefully overlap the cost of disabling SMAP with the cost of flushing the instruction pipeline. Since __get_user is a major kernel interface that deals with user controlled pointers, the __uaccess_begin_nospec() mechanism will prevent speculative execution past an access_ok() permission check. While speculative execution past access_ok() is not enough to lead to a kernel memory leak, it is a necessary precondition. To be clear, __uaccess_begin_nospec() is addressing a class of potential problems near __get_user() usages. Note, that while the barrier_nospec() in __uaccess_begin_nospec() is used to protect __get_user(), pointer masking similar to array_index_nospec() will be used for get_user() since it incorporates a bounds check near the usage. uaccess_try_nospec provides the same mechanism for get_user_try. No functional changes. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727415922.33451.5796614273104346583.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/x86/include/asm/uaccess.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 574dff4d2913..663e9bde9fc9 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -124,6 +124,11 @@ extern int __get_user_bad(void); #define __uaccess_begin() stac() #define __uaccess_end() clac() +#define __uaccess_begin_nospec() \ +({ \ + stac(); \ + barrier_nospec(); \ +}) /* * This is a type: either unsigned long, if the argument fits into @@ -487,6 +492,10 @@ struct __large_struct { unsigned long buf[100]; }; __uaccess_begin(); \ barrier(); +#define uaccess_try_nospec do { \ + current->thread.uaccess_err = 0; \ + __uaccess_begin_nospec(); \ + #define uaccess_catch(err) \ __uaccess_end(); \ (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ -- cgit v1.2.3 From 304ec1b050310548db33063e567123fae8fd0301 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:49 -0800 Subject: x86/uaccess: Use __uaccess_begin_nospec() and uaccess_try_nospec Quoting Linus: I do think that it would be a good idea to very expressly document the fact that it's not that the user access itself is unsafe. I do agree that things like "get_user()" want to be protected, but not because of any direct bugs or problems with get_user() and friends, but simply because get_user() is an excellent source of a pointer that is obviously controlled from a potentially attacking user space. So it's a prime candidate for then finding _subsequent_ accesses that can then be used to perturb the cache. __uaccess_begin_nospec() covers __get_user() and copy_from_iter() where the limit check is far away from the user pointer de-reference. In those cases a barrier_nospec() prevents speculation with a potential pointer to privileged memory. uaccess_try_nospec covers get_user_try. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727416953.33451.10508284228526170604.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/x86/include/asm/uaccess.h | 6 +++--- arch/x86/include/asm/uaccess_32.h | 6 +++--- arch/x86/include/asm/uaccess_64.h | 12 ++++++------ arch/x86/lib/usercopy_32.c | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 663e9bde9fc9..aae77eb8491c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -450,7 +450,7 @@ do { \ ({ \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -557,7 +557,7 @@ struct __large_struct { unsigned long buf[100]; }; * get_user_ex(...); * } get_user_catch(err) */ -#define get_user_try uaccess_try +#define get_user_try uaccess_try_nospec #define get_user_catch(err) uaccess_catch(err) #define get_user_ex(x, ptr) do { \ @@ -591,7 +591,7 @@ extern void __cmpxchg_wrong_size(void) __typeof__(ptr) __uval = (uval); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ switch (size) { \ case 1: \ { \ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 72950401b223..ba2dc1930630 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -29,21 +29,21 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: ret = 0; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u8 *)to, from, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: ret = 0; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u16 *)to, from, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: ret = 0; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u32 *)to, from, ret, "l", "k", "=r", 4); __uaccess_end(); diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f07ef3c575db..62546b3a398e 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -55,31 +55,31 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) return copy_user_generic(dst, (__force void *)src, size); switch (size) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, ret, "l", "k", "=r", 4); __uaccess_end(); return ret; case 8: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 8); __uaccess_end(); return ret; case 10: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 10); if (likely(!ret)) @@ -89,7 +89,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) __uaccess_end(); return ret; case 16: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 16); if (likely(!ret)) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index de3436719e26..7add8ba06887 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -331,7 +331,7 @@ do { \ unsigned long __copy_user_ll(void *to, const void *from, unsigned long n) { - __uaccess_begin(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else @@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - __uaccess_begin(); + __uaccess_begin_nospec(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); -- cgit v1.2.3 From 18bf3c3ea8ece8f03b6fc58508f2dfd23c7711c7 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Mon, 29 Jan 2018 22:04:47 +0000 Subject: x86/speculation: Use Indirect Branch Prediction Barrier in context switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flush indirect branches when switching into a process that marked itself non dumpable. This protects high value processes like gpg better, without having too high performance overhead. If done naïvely, we could switch to a kernel idle thread and then back to the original process, such as: process A -> idle -> process A In such scenario, we do not have to do IBPB here even though the process is non-dumpable, as we are switching back to the same process after a hiatus. To avoid the redundant IBPB, which is expensive, we track the last mm user context ID. The cost is to have an extra u64 mm context id to track the last mm we were using before switching to the init_mm used by idle. Avoiding the extra IBPB is probably worth the extra memory for this common scenario. For those cases where tlb_defer_switch_to_init_mm() returns true (non PCID), lazy tlb will defer switch to init_mm, so we will not be changing the mm for the process A -> idle -> process A switch. So IBPB will be skipped for this case. Thanks to the reviewers and Andy Lutomirski for the suggestion of using ctx_id which got rid of the problem of mm pointer recycling. Signed-off-by: Tim Chen Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: linux@dominikbrodowski.net Cc: peterz@infradead.org Cc: bp@alien8.de Cc: luto@kernel.org Cc: pbonzini@redhat.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517263487-3708-1-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/tlbflush.h | 2 ++ arch/x86/mm/tlb.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d33e4a26dc7e..2b8f18ca5874 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -174,6 +174,8 @@ struct tlb_state { struct mm_struct *loaded_mm; u16 loaded_mm_asid; u16 next_asid; + /* last user mm's ctx id */ + u64 last_ctx_id; /* * We can be in one of several states: diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5bfe61a5e8e3..012d02624848 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -6,13 +6,14 @@ #include #include #include +#include #include #include +#include #include #include #include -#include /* * TLB flushing, formerly SMP-only @@ -247,6 +248,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, } else { u16 new_asid; bool need_flush; + u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); + + /* + * Avoid user/user BTB poisoning by flushing the branch + * predictor when switching between processes. This stops + * one process from doing Spectre-v2 attacks on another. + * + * As an optimization, flush indirect branches only when + * switching into processes that disable dumping. This + * protects high value processes like gpg, without having + * too high performance overhead. IBPB is *expensive*! + * + * This will not flush branches when switching into kernel + * threads. It will also not flush if we switch to idle + * thread and back to the same process. It will flush if we + * switch to a different non-dumpable process. + */ + if (tsk && tsk->mm && + tsk->mm->context.ctx_id != last_ctx_id && + get_dumpable(tsk->mm) != SUID_DUMP_USER) + indirect_branch_prediction_barrier(); if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* @@ -292,6 +314,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); } + /* + * Record last user mm's context id, so we can avoid + * flushing branch buffer with IBPB if we switch back + * to the same user. + */ + if (next != &init_mm) + this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); + this_cpu_write(cpu_tlbstate.loaded_mm, next); this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); } @@ -369,6 +399,7 @@ void initialize_tlbstate_and_flush(void) write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ + this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); -- cgit v1.2.3 From af189c95a371b59f493dbe0f50c0a09724868881 Mon Sep 17 00:00:00 2001 From: Darren Kenny Date: Fri, 2 Feb 2018 19:12:20 +0000 Subject: x86/speculation: Fix typo IBRS_ATT, which should be IBRS_ALL Fixes: 117cc7a908c83 ("x86/retpoline: Fill return stack buffer on vmexit") Signed-off-by: Darren Kenny Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Cc: Tom Lendacky Cc: Andi Kleen Cc: Borislav Petkov Cc: Masami Hiramatsu Cc: Arjan van de Ven Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180202191220.blvgkgutojecxr3b@starbug-vm.ie.oracle.com --- arch/x86/include/asm/nospec-branch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d15d471348b8..4d57894635f2 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -150,7 +150,7 @@ extern char __indirect_thunk_end[]; * On VMEXIT we must ensure that no RSB predictions learned in the guest * can be followed in the host, by overwriting the RSB completely. Both * retpoline and IBRS mitigations for Spectre v2 need this; only on future - * CPUs with IBRS_ATT *might* it be avoided. + * CPUs with IBRS_ALL *might* it be avoided. */ static inline void vmexit_fill_RSB(void) { -- cgit v1.2.3