diff options
Diffstat (limited to 'arch')
48 files changed, 492 insertions, 340 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 24862d15f3a3..7eab9d0a4b43 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -759,6 +759,12 @@ config HAVE_IRQ_EXIT_ON_IRQ_STACK This spares a stack switch and improves cache usage on softirq processing. +config HAVE_SOFTIRQ_ON_OWN_STACK + bool + help + Architecture provides a function to run __do_softirq() on a + seperate stack. + config PGTABLE_LEVELS int default 2 diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 278462186ac4..0840b7e5be65 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -63,6 +63,7 @@ config PARISC select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE select HAVE_KPROBES_ON_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS select SET_FS help diff --git a/arch/parisc/include/asm/hardirq.h b/arch/parisc/include/asm/hardirq.h index fad29aa6f45f..1e4fbd0fd944 100644 --- a/arch/parisc/include/asm/hardirq.h +++ b/arch/parisc/include/asm/hardirq.h @@ -12,10 +12,6 @@ #include <linux/threads.h> #include <linux/irq.h> -#ifdef CONFIG_IRQSTACKS -#define __ARCH_HAS_DO_SOFTIRQ -#endif - typedef struct { unsigned int __softirq_pending; unsigned int kernel_stack_usage; diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 49cd6d2caefb..1632d525ad64 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <asm/io.h> +#include <asm/softirq_stack.h> #include <asm/smp.h> #include <asm/ldcw.h> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 107bb4319e0e..888d1ad436ce 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -237,6 +237,7 @@ config PPC select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN + select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 4f983ca4030a..f3f264e441a7 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -37,8 +37,6 @@ extern int distribute_irqs; struct pt_regs; -#define __ARCH_HAS_DO_SOFTIRQ - #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) /* * Per-cpu stacks for handling critical, debug and machine check diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index cc7a6271b6b4..4009a7b429d7 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -65,6 +65,7 @@ #include <asm/livepatch.h> #include <asm/asm-prototypes.h> #include <asm/hw_irq.h> +#include <asm/softirq_stack.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c72874f09741..e00c02ab5706 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -182,6 +182,7 @@ config S390 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ + select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_VIRT_CPU_ACCOUNTING_IDLE diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index dfbc3c6c0674..58668ffb5488 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -18,7 +18,6 @@ #define or_softirq_pending(x) (S390_lowcore.softirq_pending |= (x)) #define __ARCH_IRQ_STAT -#define __ARCH_HAS_DO_SOFTIRQ #define __ARCH_IRQ_EXIT_IRQS_DISABLED static inline void ack_bad_irq(unsigned int irq) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f8a8b9428ae2..a1a2f75f3c09 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -27,6 +27,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/stacktrace.h> +#include <asm/softirq_stack.h> #include "entry.h" DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 52646f52f130..bccd0da08128 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -55,6 +55,7 @@ config SUPERH select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_UID16 + select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select IRQ_FORCED_THREADING diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h index 6d44c32ef047..839551ce398c 100644 --- a/arch/sh/include/asm/irq.h +++ b/arch/sh/include/asm/irq.h @@ -51,7 +51,6 @@ asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs); #ifdef CONFIG_IRQSTACKS extern void irq_ctx_init(int cpu); extern void irq_ctx_exit(int cpu); -# define __ARCH_HAS_DO_SOFTIRQ #else # define irq_ctx_init(cpu) do { } while (0) # define irq_ctx_exit(cpu) do { } while (0) diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index ab5f790b0cd2..ef0f0827cf57 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -20,6 +20,7 @@ #include <linux/uaccess.h> #include <asm/thread_info.h> #include <cpu/mmu_context.h> +#include <asm/softirq_stack.h> atomic_t irq_err_count; diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c9c34dc52b7d..f0d22d530645 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -97,6 +97,7 @@ config SPARC64 select ARCH_HAS_PTE_SPECIAL select PCI_DOMAINS if PCI select ARCH_HAS_GIGANTIC_PAGE + select HAVE_SOFTIRQ_ON_OWN_STACK config ARCH_PROC_KCORE_TEXT def_bool y diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 4d748e93b974..154df2cf19f4 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -93,7 +93,6 @@ void arch_trigger_cpumask_backtrace(const struct cpumask *mask, extern void *hardirq_stack[NR_CPUS]; extern void *softirq_stack[NR_CPUS]; -#define __ARCH_HAS_DO_SOFTIRQ #define NO_IRQ 0xffffffff diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 3ec9f1402aad..c8848bb681a1 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -42,6 +42,7 @@ #include <asm/head.h> #include <asm/hypervisor.h> #include <asm/cacheflush.h> +#include <asm/softirq_stack.h> #include "entry.h" #include "cpumap.h" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 21f851179ff0..019e0e1a5ee0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -187,6 +187,7 @@ config X86 select HAVE_HW_BREAKPOINT select HAVE_IDE select HAVE_IOREMAP_PROT + select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP @@ -220,6 +221,7 @@ config X86 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION select HAVE_FUNCTION_ARG_ACCESS_API + select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR select HAVE_STACK_VALIDATION if X86_64 select HAVE_STATIC_CALL diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 0904f5676e4d..a2433ae8a65e 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -249,30 +249,23 @@ static __always_inline bool get_and_clear_inhcall(void) { return false; } static __always_inline void restore_inhcall(bool inhcall) { } #endif -static void __xen_pv_evtchn_do_upcall(void) +static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs) { - irq_enter_rcu(); + struct pt_regs *old_regs = set_irq_regs(regs); + inc_irq_stat(irq_hv_callback_count); xen_hvm_evtchn_do_upcall(); - irq_exit_rcu(); + set_irq_regs(old_regs); } __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) { - struct pt_regs *old_regs; + irqentry_state_t state = irqentry_enter(regs); bool inhcall; - irqentry_state_t state; - state = irqentry_enter(regs); - old_regs = set_irq_regs(regs); - - instrumentation_begin(); - run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); - instrumentation_begin(); - - set_irq_regs(old_regs); + run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); inhcall = get_and_clear_inhcall(); if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) { diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ce0464d630a2..400908dff42e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -754,47 +754,6 @@ SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs) SYM_CODE_END(.Lbad_gs) .previous -/* - * rdi: New stack pointer points to the top word of the stack - * rsi: Function pointer - * rdx: Function argument (can be NULL if none) - */ -SYM_FUNC_START(asm_call_on_stack) -SYM_INNER_LABEL(asm_call_sysvec_on_stack, SYM_L_GLOBAL) -SYM_INNER_LABEL(asm_call_irq_on_stack, SYM_L_GLOBAL) - /* - * Save the frame pointer unconditionally. This allows the ORC - * unwinder to handle the stack switch. - */ - pushq %rbp - mov %rsp, %rbp - - /* - * The unwinder relies on the word at the top of the new stack - * page linking back to the previous RSP. - */ - mov %rsp, (%rdi) - mov %rdi, %rsp - /* Move the argument to the right place */ - mov %rdx, %rdi - -1: - .pushsection .discard.instr_begin - .long 1b - . - .popsection - - CALL_NOSPEC rsi - -2: - .pushsection .discard.instr_end - .long 2b - . - .popsection - - /* Restore the previous stack pointer from RBP. */ - leaveq - ret -SYM_FUNC_END(asm_call_on_stack) - #ifdef CONFIG_XEN_PV /* * A note on the "critical region" in our callback handler. diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 41e2e2e1b439..5eb3bdf36a41 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -187,23 +187,22 @@ __visible noinstr void func(struct pt_regs *regs, unsigned long error_code) * has to be done in the function body if necessary. */ #define DEFINE_IDTENTRY_IRQ(func) \ -static __always_inline void __##func(struct pt_regs *regs, u8 vector); \ +static void __##func(struct pt_regs *regs, u32 vector); \ \ __visible noinstr void func(struct pt_regs *regs, \ unsigned long error_code) \ { \ irqentry_state_t state = irqentry_enter(regs); \ + u32 vector = (u32)(u8)error_code; \ \ instrumentation_begin(); \ - irq_enter_rcu(); \ kvm_set_cpu_l1tf_flush_l1d(); \ - __##func (regs, (u8)error_code); \ - irq_exit_rcu(); \ + run_irq_on_irqstack_cond(__##func, regs, vector); \ instrumentation_end(); \ irqentry_exit(regs, state); \ } \ \ -static __always_inline void __##func(struct pt_regs *regs, u8 vector) +static noinline void __##func(struct pt_regs *regs, u32 vector) /** * DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points @@ -237,10 +236,8 @@ __visible noinstr void func(struct pt_regs *regs) \ irqentry_state_t state = irqentry_enter(regs); \ \ instrumentation_begin(); \ - irq_enter_rcu(); \ kvm_set_cpu_l1tf_flush_l1d(); \ run_sysvec_on_irqstack_cond(__##func, regs); \ - irq_exit_rcu(); \ instrumentation_end(); \ irqentry_exit(regs, state); \ } \ diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index a8c3d284fa46..95a448fbb44c 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -7,9 +7,12 @@ * Copyright (C) IBM Corporation, 2009 */ +#include <asm/byteorder.h> /* insn_attr_t is defined in inat.h */ #include <asm/inat.h> +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) + struct insn_field { union { insn_value_t value; @@ -20,6 +23,48 @@ struct insn_field { unsigned char nbytes; }; +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->nbytes = n; +} + +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; +} + +#else + +struct insn_field { + insn_value_t value; + union { + insn_value_t little; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->little = __cpu_to_le32(v); + p->nbytes = n; +} + +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; + p->value = __le32_to_cpu(p->little); +} +#endif + struct insn { struct insn_field prefixes; /* * Prefixes diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 528c8a71fe7f..f70d2ca3887e 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -25,8 +25,6 @@ static inline int irq_canonicalize(int irq) extern int irq_init_percpu_irqstack(unsigned int cpu); -#define __ARCH_HAS_DO_SOFTIRQ - struct irq_desc; extern void fixup_irqs(void); diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 775816965c6a..9b2a0ff76c73 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -7,100 +7,217 @@ #include <asm/processor.h> #ifdef CONFIG_X86_64 -static __always_inline bool irqstack_active(void) -{ - return __this_cpu_read(irq_count) != -1; -} - -void asm_call_on_stack(void *sp, void (*func)(void), void *arg); -void asm_call_sysvec_on_stack(void *sp, void (*func)(struct pt_regs *regs), - struct pt_regs *regs); -void asm_call_irq_on_stack(void *sp, void (*func)(struct irq_desc *desc), - struct irq_desc *desc); -static __always_inline void __run_on_irqstack(void (*func)(void)) -{ - void *tos = __this_cpu_read(hardirq_stack_ptr); - - __this_cpu_add(irq_count, 1); - asm_call_on_stack(tos - 8, func, NULL); - __this_cpu_sub(irq_count, 1); +/* + * Macro to inline switching to an interrupt stack and invoking function + * calls from there. The following rules apply: + * + * - Ordering: + * + * 1. Write the stack pointer into the top most place of the irq + * stack. This ensures that the various unwinders can link back to the + * original stack. + * + * 2. Switch the stack pointer to the top of the irq stack. + * + * 3. Invoke whatever needs to be done (@asm_call argument) + * + * 4. Pop the original stack pointer from the top of the irq stack + * which brings it back to the original stack where it left off. + * + * - Function invocation: + * + * To allow flexible usage of the macro, the actual function code including + * the store of the arguments in the call ABI registers is handed in via + * the @asm_call argument. + * + * - Local variables: + * + * @tos: + * The @tos variable holds a pointer to the top of the irq stack and + * _must_ be allocated in a non-callee saved register as this is a + * restriction coming from objtool. + * + * Note, that (tos) is both in input and output constraints to ensure + * that the compiler does not assume that R11 is left untouched in + * case this macro is used in some place where the per cpu interrupt + * stack pointer is used again afterwards + * + * - Function arguments: + * The function argument(s), if any, have to be defined in register + * variables at the place where this is invoked. Storing the + * argument(s) in the proper register(s) is part of the @asm_call + * + * - Constraints: + * + * The constraints have to be done very carefully because the compiler + * does not know about the assembly call. + * + * output: + * As documented already above the @tos variable is required to be in + * the output constraints to make the compiler aware that R11 cannot be + * reused after the asm() statement. + * + * For builds with CONFIG_UNWIND_FRAME_POINTER ASM_CALL_CONSTRAINT is + * required as well as this prevents certain creative GCC variants from + * misplacing the ASM code. + * + * input: + * - func: + * Immediate, which tells the compiler that the function is referenced. + * + * - tos: + * Register. The actual register is defined by the variable declaration. + * + * - function arguments: + * The constraints are handed in via the 'argconstr' argument list. They + * describe the register arguments which are used in @asm_call. + * + * clobbers: + * Function calls can clobber anything except the callee-saved + * registers. Tell the compiler. + */ +#define call_on_irqstack(func, asm_call, argconstr...) \ +{ \ + register void *tos asm("r11"); \ + \ + tos = ((void *)__this_cpu_read(hardirq_stack_ptr)); \ + \ + asm_inline volatile( \ + "movq %%rsp, (%[tos]) \n" \ + "movq %[tos], %%rsp \n" \ + \ + asm_call \ + \ + "popq %%rsp \n" \ + \ + : "+r" (tos), ASM_CALL_CONSTRAINT \ + : [__func] "i" (func), [tos] "r" (tos) argconstr \ + : "cc", "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", \ + "memory" \ + ); \ } -static __always_inline void -__run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs), - struct pt_regs *regs) -{ - void *tos = __this_cpu_read(hardirq_stack_ptr); - - __this_cpu_add(irq_count, 1); - asm_call_sysvec_on_stack(tos - 8, func, regs); - __this_cpu_sub(irq_count, 1); +/* Macros to assert type correctness for run_*_on_irqstack macros */ +#define assert_function_type(func, proto) \ + static_assert(__builtin_types_compatible_p(typeof(&func), proto)) + +#define assert_arg_type(arg, proto) \ + static_assert(__builtin_types_compatible_p(typeof(arg), proto)) + +/* + * Macro to invoke system vector and device interrupt C handlers. + */ +#define call_on_irqstack_cond(func, regs, asm_call, constr, c_args...) \ +{ \ + /* \ + * User mode entry and interrupt on the irq stack do not \ + * switch stacks. If from user mode the task stack is empty. \ + */ \ + if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \ + irq_enter_rcu(); \ + func(c_args); \ + irq_exit_rcu(); \ + } else { \ + /* \ + * Mark the irq stack inuse _before_ and unmark _after_ \ + * switching stacks. Interrupts are disabled in both \ + * places. Invoke the stack switch macro with the call \ + * sequence which matches the above direct invocation. \ + */ \ + __this_cpu_write(hardirq_stack_inuse, true); \ + call_on_irqstack(func, asm_call, constr); \ + __this_cpu_write(hardirq_stack_inuse, false); \ + } \ } -static __always_inline void -__run_irq_on_irqstack(void (*func)(struct irq_desc *desc), - struct irq_desc *desc) -{ - void *tos = __this_cpu_read(hardirq_stack_ptr); - - __this_cpu_add(irq_count, 1); - asm_call_irq_on_stack(tos - 8, func, desc); - __this_cpu_sub(irq_count, 1); +/* + * Function call sequence for __call_on_irqstack() for system vectors. + * + * Note that irq_enter_rcu() and irq_exit_rcu() do not use the input + * mechanism because these functions are global and cannot be optimized out + * when compiling a particular source file which uses one of these macros. + * + * The argument (regs) does not need to be pushed or stashed in a callee + * saved register to be safe vs. the irq_enter_rcu() call because the + * clobbers already prevent the compiler from storing it in a callee + * clobbered register. As the compiler has to preserve @regs for the final + * call to idtentry_exit() anyway, it's likely that it does not cause extra + * effort for this asm magic. + */ +#define ASM_CALL_SYSVEC \ + "call irq_enter_rcu \n" \ + "movq %[arg1], %%rdi \n" \ + "call %P[__func] \n" \ + "call irq_exit_rcu \n" + +#define SYSVEC_CONSTRAINTS , [arg1] "r" (regs) + +#define run_sysvec_on_irqstack_cond(func, regs) \ +{ \ + assert_function_type(func, void (*)(struct pt_regs *)); \ + assert_arg_type(regs, struct pt_regs *); \ + \ + call_on_irqstack_cond(func, regs, ASM_CALL_SYSVEC, \ + SYSVEC_CONSTRAINTS, regs); \ } -#else /* CONFIG_X86_64 */ -static inline bool irqstack_active(void) { return false; } -static inline void __run_on_irqstack(void (*func)(void)) { } -static inline void __run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs), - struct pt_regs *regs) { } -static inline void __run_irq_on_irqstack(void (*func)(struct irq_desc *desc), - struct irq_desc *desc) { } -#endif /* !CONFIG_X86_64 */ - -static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs) -{ - if (IS_ENABLED(CONFIG_X86_32)) - return false; - if (!regs) - return !irqstack_active(); - return !user_mode(regs) && !irqstack_active(); +/* + * As in ASM_CALL_SYSVEC above the clobbers force the compiler to store + * @regs and @vector in callee saved registers. + */ +#define ASM_CALL_IRQ \ + "call irq_enter_rcu \n" \ + "movq %[arg1], %%rdi \n" \ + "movl %[arg2], %%esi \n" \ + "call %P[__func] \n" \ + "call irq_exit_rcu \n" + +#define IRQ_CONSTRAINTS , [arg1] "r" (regs), [arg2] "r" (vector) + +#define run_irq_on_irqstack_cond(func, regs, vector) \ +{ \ + assert_function_type(func, void (*)(struct pt_regs *, u32)); \ + assert_arg_type(regs, struct pt_regs *); \ + assert_arg_type(vector, u32); \ + \ + call_on_irqstack_cond(func, regs, ASM_CALL_IRQ, \ + IRQ_CONSTRAINTS, regs, vector); \ } - -static __always_inline void run_on_irqstack_cond(void (*func)(void), - struct pt_regs *regs) -{ - lockdep_assert_irqs_disabled(); - - if (irq_needs_irq_stack(regs)) - __run_on_irqstack(func); - else - func(); +#define ASM_CALL_SOFTIRQ \ + "call %P[__func] \n" + +/* + * Macro to invoke __do_softirq on the irq stack. This is only called from + * task context when bottom halfs are about to be reenabled and soft + * interrupts are pending to be processed. The interrupt stack cannot be in + * use here. + */ +#define do_softirq_own_stack() \ +{ \ + __this_cpu_write(hardirq_stack_inuse, true); \ + call_on_irqstack(__do_softirq, ASM_CALL_SOFTIRQ); \ + __this_cpu_write(hardirq_stack_inuse, false); \ } -static __always_inline void -run_sysvec_on_irqstack_cond(void (*func)(struct pt_regs *regs), - struct pt_regs *regs) -{ - lockdep_assert_irqs_disabled(); - - if (irq_needs_irq_stack(regs)) - __run_sysvec_on_irqstack(func, regs); - else - func(regs); +#else /* CONFIG_X86_64 */ +/* System vector handlers always run on the stack they interrupted. */ +#define run_sysvec_on_irqstack_cond(func, regs) \ +{ \ + irq_enter_rcu(); \ + func(regs); \ + irq_exit_rcu(); \ } -static __always_inline void -run_irq_on_irqstack_cond(void (*func)(struct irq_desc *desc), struct irq_desc *desc, - struct pt_regs *regs) -{ - lockdep_assert_irqs_disabled(); - - if (irq_needs_irq_stack(regs)) - __run_irq_on_irqstack(func, desc); - else - func(desc); +/* Switches to the irq stack within func() */ +#define run_irq_on_irqstack_cond(func, regs, vector) \ +{ \ + irq_enter_rcu(); \ + func(regs, vector); \ + irq_exit_rcu(); \ } +#endif /* !CONFIG_X86_64 */ + #endif diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index fdbffec4cfde..5a2baf28a1dc 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -40,6 +40,8 @@ #define ORC_REG_MAX 15 #ifndef __ASSEMBLY__ +#include <asm/byteorder.h> + /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -51,10 +53,18 @@ struct orc_entry { s16 sp_offset; s16 bp_offset; +#if defined(__LITTLE_ENDIAN_BITFIELD) unsigned sp_reg:4; unsigned bp_reg:4; unsigned type:2; unsigned end:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + unsigned bp_reg:4; + unsigned sp_reg:4; + unsigned unused:5; + unsigned end:1; + unsigned type:2; +#endif } __packed; #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c20a52b5534b..dc6d149bf851 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -426,8 +426,6 @@ struct irq_stack { char stack[IRQ_STACK_SIZE]; } __aligned(IRQ_STACK_SIZE); -DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); - #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); #else @@ -454,7 +452,8 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu) return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); } -DECLARE_PER_CPU(unsigned int, irq_count); +DECLARE_PER_CPU(void *, hardirq_stack_ptr); +DECLARE_PER_CPU(bool, hardirq_stack_inuse); extern asmlinkage void ignore_sysret(void); /* Save actual FS/GS selectors and bases to current->thread */ @@ -473,9 +472,9 @@ struct stack_canary { }; DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif -/* Per CPU softirq stack pointer */ +DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); -#endif /* X86_64 */ +#endif /* !X86_64 */ extern unsigned int fpu_kernel_xstate_size; extern unsigned int fpu_user_xstate_size; diff --git a/arch/x86/include/asm/softirq_stack.h b/arch/x86/include/asm/softirq_stack.h new file mode 100644 index 000000000000..889d53d6a0e1 --- /dev/null +++ b/arch/x86/include/asm/softirq_stack.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SOFTIRQ_STACK_H +#define _ASM_X86_SOFTIRQ_STACK_H + +#ifdef CONFIG_X86_64 +# include <asm/irq_stack.h> +#else +# include <asm-generic/softirq_stack.h> +#endif + +#endif diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 664d4610d700..8e574c0afef8 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -48,17 +48,8 @@ UNWIND_HINT_REGS base=\base offset=\offset partial=1 .endm -.macro UNWIND_HINT_FUNC sp_offset=8 - UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=\sp_offset type=UNWIND_HINT_TYPE_CALL -.endm - -/* - * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN - * and sibling calls. On these, sp_offset denotes the expected offset from - * initial_func_cfi. - */ -.macro UNWIND_HINT_RET_OFFSET sp_offset=8 - UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset +.macro UNWIND_HINT_FUNC + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index f1bb57b0e41e..cf340d85946a 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 5d3a0b8fd379..56b6865afb2a 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -1,12 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0-only */ .text #include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/segment.h> #include <asm/pgtable_types.h> #include <asm/page_types.h> #include <asm/msr.h> #include <asm/asm-offsets.h> #include <asm/frame.h> +#include <asm/nospec-branch.h> # Copyright 2003 Pavel Machek <pavel@suse.cz @@ -39,6 +41,7 @@ SYM_FUNC_START(wakeup_long64) movq saved_rbp, %rbp movq saved_rip, %rax + ANNOTATE_RETPOLINE_SAFE jmp *%rax SYM_FUNC_END(wakeup_long64) @@ -126,6 +129,7 @@ SYM_FUNC_START(do_suspend_lowlevel) FRAME_END jmp restore_processor_state SYM_FUNC_END(do_suspend_lowlevel) +STACK_FRAME_NON_STANDARD do_suspend_lowlevel .data saved_rbp: .quad 0 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7f4c081f59f0..138e1435ac00 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2137,18 +2137,11 @@ void __init register_lapic_address(unsigned long address) * Local APIC interrupts */ -/** - * spurious_interrupt - Catch all for interrupts raised on unused vectors - * @regs: Pointer to pt_regs on stack - * @vector: The vector number - * - * This is invoked from ASM entry code to catch all interrupts which - * trigger on an entry which is routed to the common_spurious idtentry - * point. - * - * Also called from sysvec_spurious_apic_interrupt(). +/* + * Common handling code for spurious_interrupt and spurious_vector entry + * points below. No point in allowing the compiler to inline it twice. */ -DEFINE_IDTENTRY_IRQ(spurious_interrupt) +static noinline void handle_spurious_interrupt(u8 vector) { u32 v; @@ -2183,9 +2176,23 @@ out: trace_spurious_apic_exit(vector); } +/** + * spurious_interrupt - Catch all for interrupts raised on unused vectors + * @regs: Pointer to pt_regs on stack + * @vector: The vector number + * + * This is invoked from ASM entry code to catch all interrupts which + * trigger on an entry which is routed to the common_spurious idtentry + * point. + */ +DEFINE_IDTENTRY_IRQ(spurious_interrupt) +{ + handle_spurious_interrupt(vector); +} + DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt) { - __spurious_interrupt(regs, SPURIOUS_APIC_VECTOR); + handle_spurious_interrupt(SPURIOUS_APIC_VECTOR); } /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 35ad8480c464..c5e23f351f67 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1739,8 +1739,8 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); -DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; +DEFINE_PER_CPU(void *, hardirq_stack_ptr); +DEFINE_PER_CPU(bool, hardirq_stack_inuse); DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 1dd851397bd9..5601b95944fa 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -128,12 +128,21 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); - unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long)); + unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); + unsigned long *begin; /* - * This is a software stack, so 'end' can be a valid stack pointer. - * It just means the stack is empty. + * @end points directly to the top most stack entry to avoid a -8 + * adjustment in the stack switch hotpath. Adjust it back before + * calculating @begin. + */ + end++; + begin = end - (IRQ_STACK_SIZE / sizeof(long)); + + /* + * Due to the switching logic RSP can never be == @end because the + * final operation is 'popq %rsp' which means after that RSP points + * to the original stack and not to @end. */ if (stack < begin || stack >= end) return false; @@ -143,8 +152,9 @@ static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info info->end = end; /* - * The next stack pointer is the first thing pushed by the entry code - * after switching to the irq stack. + * The next stack pointer is stored at the top of the irq stack + * before switching to the irq stack. Actual stack entries are all + * below that. */ info->next_sp = (unsigned long *)*(end - 1); diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 0d54099c2a3a..7c273846c687 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -184,6 +184,7 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) * It is also used to copy the retq for trampolines. */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) + UNWIND_HINT_FUNC retq SYM_FUNC_END(ftrace_epilogue) @@ -276,7 +277,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) restore_mcount_regs 8 /* Restore flags */ popfq - UNWIND_HINT_RET_OFFSET + UNWIND_HINT_FUNC jmp ftrace_epilogue SYM_FUNC_END(ftrace_regs_caller) @@ -333,8 +334,7 @@ SYM_FUNC_START(ftrace_graph_caller) retq SYM_FUNC_END(ftrace_graph_caller) -SYM_CODE_START(return_to_handler) - UNWIND_HINT_EMPTY +SYM_FUNC_START(return_to_handler) subq $24, %rsp /* Save the return values */ @@ -349,5 +349,5 @@ SYM_CODE_START(return_to_handler) movq (%rsp), %rax addq $24, %rsp JMP_NOSPEC rdi -SYM_CODE_END(return_to_handler) +SYM_FUNC_END(return_to_handler) #endif diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c5dd50369e2f..1507b983cd8d 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -227,7 +227,7 @@ static __always_inline void handle_irq(struct irq_desc *desc, struct pt_regs *regs) { if (IS_ENABLED(CONFIG_X86_64)) - run_irq_on_irqstack_cond(desc->handle_irq, desc, regs); + generic_handle_irq_desc(desc); else __handle_irq(desc, regs); } diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 0b79efc87be5..044902d5a3c4 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -22,6 +22,7 @@ #include <asm/apic.h> #include <asm/nospec-branch.h> +#include <asm/softirq_stack.h> #ifdef CONFIG_DEBUG_STACKOVERFLOW diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 440eed558558..1c0fb96b9e39 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -20,6 +20,7 @@ #include <linux/sched/task_stack.h> #include <asm/cpu_entry_area.h> +#include <asm/softirq_stack.h> #include <asm/irq_stack.h> #include <asm/io_apic.h> #include <asm/apic.h> @@ -48,7 +49,8 @@ static int map_irq_stack(unsigned int cpu) if (!va) return -ENOMEM; - per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; + /* Store actual TOS to avoid adjustment in the hotpath */ + per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; return 0; } #else @@ -60,7 +62,8 @@ static int map_irq_stack(unsigned int cpu) { void *va = per_cpu_ptr(&irq_stack_backing_store, cpu); - per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; + /* Store actual TOS to avoid adjustment in the hotpath */ + per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; return 0; } #endif @@ -71,8 +74,3 @@ int irq_init_percpu_irqstack(unsigned int cpu) return 0; return map_irq_stack(cpu); } - -void do_softirq_own_stack(void) -{ - run_on_irqstack_cond(__do_softirq, NULL); -} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad582f9ac5a6..d08307df69ad 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && - this_cpu_read(irq_count) != -1); + this_cpu_read(hardirq_stack_inuse)); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) switch_fpu_prepare(prev_fpu, cpu); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 73f800100066..2a1d47f47eee 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -471,7 +471,7 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_REG_SP_INDIRECT: - sp = state->sp + orc->sp_offset; + sp = state->sp; indirect = true; break; @@ -521,6 +521,9 @@ bool unwind_next_frame(struct unwind_state *state) if (indirect) { if (!deref_stack_reg(state, sp, &sp)) goto err; + + if (orc->sp_reg == ORC_REG_SP_INDIRECT) + sp += orc->sp_offset; } /* Find IP, SP and possibly regs: */ diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 404279563891..435630a6ec97 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -5,6 +5,7 @@ * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ +#include <linux/kernel.h> #ifdef __KERNEL__ #include <linux/string.h> #else @@ -15,15 +16,28 @@ #include <asm/emulate_prefix.h> +#define leXX_to_cpu(t, r) \ +({ \ + __typeof__(t) v; \ + switch (sizeof(t)) { \ + case 4: v = le32_to_cpu(r); break; \ + case 2: v = le16_to_cpu(r); break; \ + case 1: v = r; break; \ + default: \ + BUILD_BUG(); break; \ + } \ + v; \ +}) + /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r = *(t*)((insn)->next_byte + n); r; }) + ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) @@ -147,9 +161,9 @@ found: b = insn->prefixes.bytes[3]; for (i = 0; i < nb; i++) if (prefixes->bytes[i] == lb) - prefixes->bytes[i] = b; + insn_set_byte(prefixes, i, b); } - insn->prefixes.bytes[3] = lb; + insn_set_byte(&insn->prefixes, 3, lb); } /* Decode REX prefix */ @@ -157,8 +171,7 @@ found: b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); if (inat_is_rex_prefix(attr)) { - insn->rex_prefix.value = b; - insn->rex_prefix.nbytes = 1; + insn_field_set(&insn->rex_prefix, b, 1); insn->next_byte++; if (X86_REX_W(b)) /* REX.W overrides opnd_size */ @@ -181,13 +194,13 @@ found: if (X86_MODRM_MOD(b2) != 3) goto vex_end; } - insn->vex_prefix.bytes[0] = b; - insn->vex_prefix.bytes[1] = b2; + insn_set_byte(&insn->vex_prefix, 0, b); + insn_set_byte(&insn->vex_prefix, 1, b2); if (inat_is_evex_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); - insn->vex_prefix.bytes[2] = b2; + insn_set_byte(&insn->vex_prefix, 2, b2); b2 = peek_nbyte_next(insn_byte_t, insn, 3); - insn->vex_prefix.bytes[3] = b2; + insn_set_byte(&insn->vex_prefix, 3, b2); insn->vex_prefix.nbytes = 4; insn->next_byte += 4; if (insn->x86_64 && X86_VEX_W(b2)) @@ -195,7 +208,7 @@ found: insn->opnd_bytes = 8; } else if (inat_is_vex3_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); - insn->vex_prefix.bytes[2] = b2; + insn_set_byte(&insn->vex_prefix, 2, b2); insn->vex_prefix.nbytes = 3; insn->next_byte += 3; if (insn->x86_64 && X86_VEX_W(b2)) @@ -207,7 +220,7 @@ found: * Makes it easier to decode vex.W, vex.vvvv, * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. */ - insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f); insn->vex_prefix.nbytes = 2; insn->next_byte += 2; } @@ -243,7 +256,7 @@ void insn_get_opcode(struct insn *insn) /* Get first opcode */ op = get_next(insn_byte_t, insn); - opcode->bytes[0] = op; + insn_set_byte(opcode, 0, op); opcode->nbytes = 1; /* Check if there is VEX prefix or not */ @@ -295,8 +308,7 @@ void insn_get_modrm(struct insn *insn) if (inat_has_modrm(insn->attr)) { mod = get_next(insn_byte_t, insn); - modrm->value = mod; - modrm->nbytes = 1; + insn_field_set(modrm, mod, 1); if (inat_is_group(insn->attr)) { pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_group_attribute(mod, pfx_id, @@ -334,7 +346,7 @@ int insn_rip_relative(struct insn *insn) * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. */ - return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); + return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5); } /** @@ -353,11 +365,11 @@ void insn_get_sib(struct insn *insn) if (!insn->modrm.got) insn_get_modrm(insn); if (insn->modrm.nbytes) { - modrm = (insn_byte_t)insn->modrm.value; + modrm = insn->modrm.bytes[0]; if (insn->addr_bytes != 2 && X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { - insn->sib.value = get_next(insn_byte_t, insn); - insn->sib.nbytes = 1; + insn_field_set(&insn->sib, + get_next(insn_byte_t, insn), 1); } } insn->sib.got = 1; @@ -407,19 +419,18 @@ void insn_get_displacement(struct insn *insn) if (mod == 3) goto out; if (mod == 1) { - insn->displacement.value = get_next(signed char, insn); - insn->displacement.nbytes = 1; + insn_field_set(&insn->displacement, + get_next(signed char, insn), 1); } else if (insn->addr_bytes == 2) { if ((mod == 0 && rm == 6) || mod == 2) { - insn->displacement.value = - get_next(short, insn); - insn->displacement.nbytes = 2; + insn_field_set(&insn->displacement, + get_next(short, insn), 2); } } else { if ((mod == 0 && rm == 5) || mod == 2 || (mod == 0 && base == 5)) { - insn->displacement.value = get_next(int, insn); - insn->displacement.nbytes = 4; + insn_field_set(&insn->displacement, + get_next(int, insn), 4); } } } @@ -435,18 +446,14 @@ static int __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: - insn->moffset1.value = get_next(short, insn); - insn->moffset1.nbytes = 2; + insn_field_set(&insn->moffset1, get_next(short, insn), 2); break; case 4: - insn->moffset1.value = get_next(int, insn); - insn->moffset1.nbytes = 4; + insn_field_set(&insn->moffset1, get_next(int, insn), 4); break; case 8: - insn->moffset1.value = get_next(int, insn); - insn->moffset1.nbytes = 4; - insn->moffset2.value = get_next(int, insn); - insn->moffset2.nbytes = 4; + insn_field_set(&insn->moffset1, get_next(int, insn), 4); + insn_field_set(&insn->moffset2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -464,13 +471,11 @@ static int __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate.value = get_next(short, insn); - insn->immediate.nbytes = 2; + insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case 4: case 8: - insn->immediate.value = get_next(int, insn); - insn->immediate.nbytes = 4; + insn_field_set(&insn->immediate, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -487,18 +492,15 @@ static int __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate1.value = get_next(short, insn); - insn->immediate1.nbytes = 2; + insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: - insn->immediate1.value = get_next(int, insn); + insn_field_set(&insn->immediate1, get_next(int, insn), 4); insn->immediate1.nbytes = 4; break; case 8: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; - insn->immediate2.value = get_next(int, insn); - insn->immediate2.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); + insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -515,12 +517,10 @@ static int __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate1.value = get_next(short, insn); - insn->immediate1.nbytes = 2; + insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); break; case 8: /* ptr16:64 is not exist (no segment) */ @@ -528,8 +528,7 @@ static int __get_immptr(struct insn *insn) default: /* opnd_bytes must be modified manually */ goto err_out; } - insn->immediate2.value = get_next(unsigned short, insn); - insn->immediate2.nbytes = 2; + insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2); insn->immediate1.got = insn->immediate2.got = 1; return 1; @@ -565,22 +564,17 @@ void insn_get_immediate(struct insn *insn) switch (inat_immediate_size(insn->attr)) { case INAT_IMM_BYTE: - insn->immediate.value = get_next(signed char, insn); - insn->immediate.nbytes = 1; + insn_field_set(&insn->immediate, get_next(signed char, insn), 1); break; case INAT_IMM_WORD: - insn->immediate.value = get_next(short, insn); - insn->immediate.nbytes = 2; + insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case INAT_IMM_DWORD: - insn->immediate.value = get_next(int, insn); - insn->immediate.nbytes = 4; + insn_field_set(&insn->immediate, get_next(int, insn), 4); break; case INAT_IMM_QWORD: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; - insn->immediate2.value = get_next(int, insn); - insn->immediate2.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); + insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; case INAT_IMM_PTR: if (!__get_immptr(insn)) @@ -599,8 +593,7 @@ void insn_get_immediate(struct insn *insn) goto err_out; } if (inat_has_second_immediate(insn->attr)) { - insn->immediate2.value = get_next(signed char, insn); - insn->immediate2.nbytes = 1; + insn_field_set(&insn->immediate2, get_next(signed char, insn), 1); } done: insn->immediate.got = 1; diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index b4c43a9b1483..f6fb1d218dcc 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -28,7 +28,7 @@ SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg) jmp .Lspec_trap_\@ .Ldo_rop_\@: mov %\reg, (%_ASM_SP) - UNWIND_HINT_RET_OFFSET + UNWIND_HINT_FUNC ret SYM_FUNC_END(__x86_retpoline_\reg) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 43b4d864817e..d2ccadc247e6 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -16,6 +16,7 @@ #include <asm/boot.h> #include <asm/processor-flags.h> #include <asm/msr.h> +#include <asm/nospec-branch.h> #include <xen/interface/elfnote.h> __HEAD @@ -105,6 +106,7 @@ SYM_CODE_START_LOCAL(pvh_start_xen) /* startup_64 expects boot_params in %rsi. */ mov $_pa(pvh_bootparams), %rsi mov $_pa(startup_64), %rax + ANNOTATE_RETPOLINE_SAFE jmp *%rax #else /* CONFIG_X86_64 */ diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile index 6907b523e856..3ff80156f21a 100644 --- a/arch/x86/power/Makefile +++ b/arch/x86/power/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y # __restore_processor_state() restores %gs after S3 resume and so should not # itself be stack-protected diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 7918b8415f13..d9bed596d849 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -21,6 +21,53 @@ #include <asm/asm-offsets.h> #include <asm/processor-flags.h> #include <asm/frame.h> +#include <asm/nospec-branch.h> + + /* code below belongs to the image kernel */ + .align PAGE_SIZE +SYM_FUNC_START(restore_registers) + /* go back to the original page tables */ + movq %r9, %cr3 + + /* Flush TLB, including "global" things (vmalloc) */ + movq mmu_cr4_features(%rip), %rax + movq %rax, %rdx + andq $~(X86_CR4_PGE), %rdx + movq %rdx, %cr4; # turn off PGE + movq %cr3, %rcx; # flush TLB + movq %rcx, %cr3 + movq %rax, %cr4; # turn PGE back on + + /* We don't restore %rax, it must be 0 anyway */ + movq $saved_context, %rax + movq pt_regs_sp(%rax), %rsp + movq pt_regs_bp(%rax), %rbp + movq pt_regs_si(%rax), %rsi + movq pt_regs_di(%rax), %rdi + movq pt_regs_bx(%rax), %rbx + movq pt_regs_cx(%rax), %rcx + movq pt_regs_dx(%rax), %rdx + movq pt_regs_r8(%rax), %r8 + movq pt_regs_r9(%rax), %r9 + movq pt_regs_r10(%rax), %r10 + movq pt_regs_r11(%rax), %r11 + movq pt_regs_r12(%rax), %r12 + movq pt_regs_r13(%rax), %r13 + movq pt_regs_r14(%rax), %r14 + movq pt_regs_r15(%rax), %r15 + pushq pt_regs_flags(%rax) + popfq + + /* Saved in save_processor_state. */ + lgdt saved_context_gdt_desc(%rax) + + xorl %eax, %eax + + /* tell the hibernation core that we've just restored the memory */ + movq %rax, in_suspend(%rip) + + ret +SYM_FUNC_END(restore_registers) SYM_FUNC_START(swsusp_arch_suspend) movq $saved_context, %rax @@ -52,7 +99,7 @@ SYM_FUNC_START(swsusp_arch_suspend) ret SYM_FUNC_END(swsusp_arch_suspend) -SYM_CODE_START(restore_image) +SYM_FUNC_START(restore_image) /* prepare to jump to the image kernel */ movq restore_jump_address(%rip), %r8 movq restore_cr3(%rip), %r9 @@ -66,11 +113,12 @@ SYM_CODE_START(restore_image) /* jump to relocated restore code */ movq relocated_restore_code(%rip), %rcx + ANNOTATE_RETPOLINE_SAFE jmpq *%rcx -SYM_CODE_END(restore_image) +SYM_FUNC_END(restore_image) /* code below has been relocated to a safe page */ -SYM_CODE_START(core_restore_code) +SYM_FUNC_START(core_restore_code) /* switch to temporary page tables */ movq %rax, %cr3 /* flush TLB */ @@ -97,51 +145,6 @@ SYM_CODE_START(core_restore_code) .Ldone: /* jump to the restore_registers address from the image header */ + ANNOTATE_RETPOLINE_SAFE jmpq *%r8 -SYM_CODE_END(core_restore_code) - - /* code below belongs to the image kernel */ - .align PAGE_SIZE -SYM_FUNC_START(restore_registers) - /* go back to the original page tables */ - movq %r9, %cr3 - - /* Flush TLB, including "global" things (vmalloc) */ - movq mmu_cr4_features(%rip), %rax - movq %rax, %rdx - andq $~(X86_CR4_PGE), %rdx - movq %rdx, %cr4; # turn off PGE - movq %cr3, %rcx; # flush TLB - movq %rcx, %cr3 - movq %rax, %cr4; # turn PGE back on - - /* We don't restore %rax, it must be 0 anyway */ - movq $saved_context, %rax - movq pt_regs_sp(%rax), %rsp - movq pt_regs_bp(%rax), %rbp - movq pt_regs_si(%rax), %rsi - movq pt_regs_di(%rax), %rdi - movq pt_regs_bx(%rax), %rbx - movq pt_regs_cx(%rax), %rcx - movq pt_regs_dx(%rax), %rdx - movq pt_regs_r8(%rax), %r8 - movq pt_regs_r9(%rax), %r9 - movq pt_regs_r10(%rax), %r10 - movq pt_regs_r11(%rax), %r11 - movq pt_regs_r12(%rax), %r12 - movq pt_regs_r13(%rax), %r13 - movq pt_regs_r14(%rax), %r14 - movq pt_regs_r15(%rax), %r15 - pushq pt_regs_flags(%rax) - popfq - - /* Saved in save_processor_state. */ - lgdt saved_context_gdt_desc(%rax) - - xorl %eax, %eax - - /* tell the hibernation core that we've just restored the memory */ - movq %rax, in_suspend(%rip) - - ret -SYM_FUNC_END(restore_registers) +SYM_FUNC_END(core_restore_code) diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 55b1ab378974..bddfc9a46645 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -29,14 +29,14 @@ posttest: $(obj)/insn_decoder_test vmlinux $(obj)/insn_sanity hostprogs += insn_decoder_test insn_sanity # -I needed for generated C source and C source which in the kernel tree. -HOSTCFLAGS_insn_decoder_test.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/ +HOSTCFLAGS_insn_decoder_test.o := -Wall -I$(srctree)/tools/arch/x86/lib/ -I$(srctree)/tools/arch/x86/include/ -I$(objtree)/arch/x86/lib/ -HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ +HOSTCFLAGS_insn_sanity.o := -Wall -I$(srctree)/tools/arch/x86/lib/ -I$(srctree)/tools/arch/x86/include/ -I$(objtree)/arch/x86/lib/ # Dependencies are also needed. -$(obj)/insn_decoder_test.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c +$(obj)/insn_decoder_test.o: $(srctree)/tools/arch/x86/lib/insn.c $(srctree)/tools/arch/x86/lib/inat.c $(srctree)/tools/arch/x86/include/asm/inat_types.h $(srctree)/tools/arch/x86/include/asm/inat.h $(srctree)/tools/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c -$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c +$(obj)/insn_sanity.o: $(srctree)/tools/arch/x86/lib/insn.c $(srctree)/tools/arch/x86/lib/inat.c $(srctree)/tools/arch/x86/include/asm/inat_types.h $(srctree)/tools/arch/x86/include/asm/inat.h $(srctree)/tools/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c HOST_EXTRACFLAGS += -I$(srctree)/tools/include hostprogs += relocs diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c index 185ceba9d289..c6a0000ae635 100644 --- a/arch/x86/tools/insn_sanity.c +++ b/arch/x86/tools/insn_sanity.c @@ -14,10 +14,6 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> - -#define unlikely(cond) (cond) -#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) - #include <asm/insn.h> #include <inat.c> #include <insn.c> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index fc5c5ba4aacb..40b5779fce21 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_xen-asm.o := y ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 02f31341e435..1e626444712b 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -14,6 +14,7 @@ #include <asm/thread_info.h> #include <asm/asm.h> #include <asm/frame.h> +#include <asm/unwind_hints.h> #include <xen/interface/xen.h> @@ -118,6 +119,7 @@ SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) + UNWIND_HINT_EMPTY pop %rcx pop %r11 jmp \name @@ -157,6 +159,7 @@ xen_pv_trap asm_exc_xen_hypervisor_callback SYM_CODE_START(xen_early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS + UNWIND_HINT_EMPTY pop %rcx pop %r11 jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE @@ -183,6 +186,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 * rsp->rax } */ SYM_CODE_START(xen_iret) + UNWIND_HINT_EMPTY pushq $0 jmp hypercall_iret SYM_CODE_END(xen_iret) @@ -203,7 +207,8 @@ SYM_CODE_END(xen_iret) */ /* Normal 64-bit system call target */ -SYM_FUNC_START(xen_syscall_target) +SYM_CODE_START(xen_syscall_target) + UNWIND_HINT_EMPTY popq %rcx popq %r11 @@ -216,12 +221,13 @@ SYM_FUNC_START(xen_syscall_target) movq $__USER_CS, 1*8(%rsp) jmp entry_SYSCALL_64_after_hwframe -SYM_FUNC_END(xen_syscall_target) +SYM_CODE_END(xen_syscall_target) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ -SYM_FUNC_START(xen_syscall32_target) +SYM_CODE_START(xen_syscall32_target) + UNWIND_HINT_EMPTY popq %rcx popq %r11 @@ -234,10 +240,11 @@ SYM_FUNC_START(xen_syscall32_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSCALL_compat_after_hwframe -SYM_FUNC_END(xen_syscall32_target) +SYM_CODE_END(xen_syscall32_target) /* 32-bit compat sysenter target */ -SYM_FUNC_START(xen_sysenter_target) +SYM_CODE_START(xen_sysenter_target) + UNWIND_HINT_EMPTY /* * NB: Xen is polite and clears TF from EFLAGS for us. This means * that we don't need to guard against single step exceptions here. @@ -254,17 +261,18 @@ SYM_FUNC_START(xen_sysenter_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSENTER_compat_after_hwframe -SYM_FUNC_END(xen_sysenter_target) +SYM_CODE_END(xen_sysenter_target) #else /* !CONFIG_IA32_EMULATION */ -SYM_FUNC_START_ALIAS(xen_syscall32_target) -SYM_FUNC_START(xen_sysenter_target) +SYM_CODE_START(xen_syscall32_target) +SYM_CODE_START(xen_sysenter_target) + UNWIND_HINT_EMPTY lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 jmp hypercall_iret -SYM_FUNC_END(xen_sysenter_target) -SYM_FUNC_END_ALIAS(xen_syscall32_target) +SYM_CODE_END(xen_sysenter_target) +SYM_CODE_END(xen_syscall32_target) #endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 2d7c8f34f56c..cb6538ae2fe0 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -68,8 +68,9 @@ SYM_CODE_END(asm_cpu_bringup_and_idle) .balign PAGE_SIZE SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) - UNWIND_HINT_EMPTY - .skip 32 + UNWIND_HINT_FUNC + .skip 31, 0x90 + ret .endr #define HYPERCALL(n) \ |