diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-23 19:43:04 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-23 19:43:04 +0200 |
commit | d82924c3b8d0607094b94fab290a33c5ad7d586c (patch) | |
tree | 2afbc9066262b5987c8568a09f80db5198025689 /arch/x86/mm | |
parent | Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/k... (diff) | |
parent | x86/speculation: Propagate information about RSB filling mitigation to sysfs (diff) | |
download | linux-d82924c3b8d0607094b94fab290a33c5ad7d586c.tar.xz linux-d82924c3b8d0607094b94fab290a33c5ad7d586c.zip |
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 pti updates from Ingo Molnar:
"The main changes:
- Make the IBPB barrier more strict and add STIBP support (Jiri
Kosina)
- Micro-optimize and clean up the entry code (Andy Lutomirski)
- ... plus misc other fixes"
* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/speculation: Propagate information about RSB filling mitigation to sysfs
x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation
x86/speculation: Apply IBPB more strictly to avoid cross-process data leak
x86/speculation: Add RETPOLINE_AMD support to the inline asm CALL_NOSPEC variant
x86/CPU: Fix unused variable warning when !CONFIG_IA32_EMULATION
x86/pti/64: Remove the SYSCALL64 entry trampoline
x86/entry/64: Use the TSS sp2 slot for SYSCALL/SYSRET scratch space
x86/entry/64: Document idtentry
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/cpu_entry_area.c | 36 | ||||
-rw-r--r-- | arch/x86/mm/pti.c | 33 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 31 |
3 files changed, 52 insertions, 48 deletions
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 076ebdce9bd4..12d7e7fb4efd 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -15,7 +15,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) #ifdef CONFIG_X86_64 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); -static DEFINE_PER_CPU(struct kcore_list, kcore_entry_trampoline); #endif struct cpu_entry_area *get_cpu_entry_area(int cpu) @@ -83,8 +82,6 @@ static void percpu_setup_debug_store(int cpu) static void __init setup_cpu_entry_area(int cpu) { #ifdef CONFIG_X86_64 - extern char _entry_trampoline[]; - /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ pgprot_t gdt_prot = PAGE_KERNEL_RO; pgprot_t tss_prot = PAGE_KERNEL_RO; @@ -146,43 +143,10 @@ static void __init setup_cpu_entry_area(int cpu) cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, &per_cpu(exception_stacks, cpu), sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); - - cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, - __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); - /* - * The cpu_entry_area alias addresses are not in the kernel binary - * so they do not show up in /proc/kcore normally. This adds entries - * for them manually. - */ - kclist_add_remap(&per_cpu(kcore_entry_trampoline, cpu), - _entry_trampoline, - &get_cpu_entry_area(cpu)->entry_trampoline, PAGE_SIZE); #endif percpu_setup_debug_store(cpu); } -#ifdef CONFIG_X86_64 -int arch_get_kallsym(unsigned int symnum, unsigned long *value, char *type, - char *name) -{ - unsigned int cpu, ncpu = 0; - - if (symnum >= num_possible_cpus()) - return -EINVAL; - - for_each_possible_cpu(cpu) { - if (ncpu++ >= symnum) - break; - } - - *value = (unsigned long)&get_cpu_entry_area(cpu)->entry_trampoline; - *type = 't'; - strlcpy(name, "__entry_SYSCALL_64_trampoline", KSYM_NAME_LEN); - - return 0; -} -#endif - static __init void setup_cpu_entry_area_ptes(void) { #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index c1fc1ae6b429..4fee5c3003ed 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -434,11 +434,42 @@ static void __init pti_clone_p4d(unsigned long addr) } /* - * Clone the CPU_ENTRY_AREA into the user space visible page table. + * Clone the CPU_ENTRY_AREA and associated data into the user space visible + * page table. */ static void __init pti_clone_user_shared(void) { + unsigned int cpu; + pti_clone_p4d(CPU_ENTRY_AREA_BASE); + + for_each_possible_cpu(cpu) { + /* + * The SYSCALL64 entry code needs to be able to find the + * thread stack and needs one word of scratch space in which + * to spill a register. All of this lives in the TSS, in + * the sp1 and sp2 slots. + * + * This is done for all possible CPUs during boot to ensure + * that it's propagated to all mms. If we were to add one of + * these mappings during CPU hotplug, we would need to take + * some measure to make sure that every mm that subsequently + * ran on that CPU would have the relevant PGD entry in its + * pagetables. The usual vmalloc_fault() mechanism would not + * work for page faults taken in entry_SYSCALL_64 before RSP + * is set up. + */ + + unsigned long va = (unsigned long)&per_cpu(cpu_tss_rw, cpu); + phys_addr_t pa = per_cpu_ptr_to_phys((void *)va); + pte_t *target_pte; + + target_pte = pti_user_pagetable_walk_pte(va); + if (WARN_ON(!target_pte)) + return; + + *target_pte = pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL); + } } #else /* CONFIG_X86_64 */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7d68489cfdb1..bddd6b3cee1d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -7,6 +7,7 @@ #include <linux/export.h> #include <linux/cpu.h> #include <linux/debugfs.h> +#include <linux/ptrace.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> @@ -180,6 +181,19 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) } } +static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) +{ + /* + * Check if the current (previous) task has access to the memory + * of the @tsk (next) task. If access is denied, make sure to + * issue a IBPB to stop user->user Spectre-v2 attacks. + * + * Note: __ptrace_may_access() returns 0 or -ERRNO. + */ + return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && + ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); +} + void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -286,18 +300,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * one process from doing Spectre-v2 attacks on another. * * As an optimization, flush indirect branches only when - * switching into processes that disable dumping. This - * protects high value processes like gpg, without having - * too high performance overhead. IBPB is *expensive*! - * - * This will not flush branches when switching into kernel - * threads. It will also not flush if we switch to idle - * thread and back to the same process. It will flush if we - * switch to a different non-dumpable process. + * switching into a processes that can't be ptrace by the + * current one (as in such case, attacker has much more + * convenient way how to tamper with the next process than + * branch buffer poisoning). */ - if (tsk && tsk->mm && - tsk->mm->context.ctx_id != last_ctx_id && - get_dumpable(tsk->mm) != SUID_DUMP_USER) + if (static_cpu_has(X86_FEATURE_USE_IBPB) && + ibpb_needed(tsk, last_ctx_id)) indirect_branch_prediction_barrier(); if (IS_ENABLED(CONFIG_VMAP_STACK)) { |