diff options
Diffstat (limited to 'arch/powerpc/kernel/interrupt.c')
-rw-r--r-- | arch/powerpc/kernel/interrupt.c | 393 |
1 files changed, 261 insertions, 132 deletions
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 05fa3ae56e25..acd6a3f5ec9d 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -26,6 +26,83 @@ unsigned long global_dbcr0[NR_CPUS]; typedef long (*syscall_fn)(long, long, long, long, long, long); +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); +static inline bool exit_must_hard_disable(void) +{ + return static_branch_unlikely(&interrupt_exit_not_reentrant); +} +#elif defined(CONFIG_PPC64) +static inline bool exit_must_hard_disable(void) +{ + return false; +} +#endif + +/* + * local irqs must be disabled. Returns false if the caller must re-enable + * them, check for new work, and try again. + * + * This should be called with local irqs disabled, but if they were previously + * enabled when the interrupt handler returns (indicating a process-context / + * synchronous interrupt) then irqs_enabled should be true. + */ +static notrace __always_inline bool prep_irq_for_user_exit(void) +{ + user_enter_irqoff(); + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + +#ifdef CONFIG_PPC32 + __hard_EE_RI_disable(); +#else + if (exit_must_hard_disable()) + __hard_EE_RI_disable(); + + /* This pattern matches prep_irq_for_idle */ + if (unlikely(lazy_irq_pending_nocheck())) { + if (exit_must_hard_disable()) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + __hard_RI_enable(); + } + trace_hardirqs_off(); + user_exit_irqoff(); + + return false; + } +#endif + return true; +} + +/* + * restartable is true then EE/RI can be left on because interrupts are handled + * with a restart sequence. + */ +static notrace __always_inline bool prep_irq_for_kernel_enabled_exit(bool restartable) +{ + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + +#ifdef CONFIG_PPC32 + __hard_EE_RI_disable(); +#else + if (exit_must_hard_disable() || !restartable) + __hard_EE_RI_disable(); + + /* This pattern matches prep_irq_for_idle */ + if (unlikely(lazy_irq_pending_nocheck())) { + if (exit_must_hard_disable() || !restartable) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + __hard_RI_enable(); + } + trace_hardirqs_off(); + + return false; + } +#endif + return true; +} + /* Has to run notrace because it is entered not completely "reconciled" */ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, @@ -144,71 +221,6 @@ notrace long system_call_exception(long r3, long r4, long r5, return f(r3, r4, r5, r6, r7, r8); } -/* - * local irqs must be disabled. Returns false if the caller must re-enable - * them, check for new work, and try again. - * - * This should be called with local irqs disabled, but if they were previously - * enabled when the interrupt handler returns (indicating a process-context / - * synchronous interrupt) then irqs_enabled should be true. - */ -static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) -{ - /* This must be done with RI=1 because tracing may touch vmaps */ - trace_hardirqs_on(); - - /* This pattern matches prep_irq_for_idle */ - if (clear_ri) - __hard_EE_RI_disable(); - else - __hard_irq_disable(); -#ifdef CONFIG_PPC64 - if (unlikely(lazy_irq_pending_nocheck())) { - /* Took an interrupt, may have more exit work to do. */ - if (clear_ri) - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - return false; - } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); -#endif - return true; -} - -static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) -{ - if (__prep_irq_for_enabled_exit(clear_ri)) - return true; - - /* - * Must replay pending soft-masked interrupts now. Don't just - * local_irq_enabe(); local_irq_disable(); because if we are - * returning from an asynchronous interrupt here, another one - * might hit after irqs are enabled, and it would exit via this - * same path allowing another to fire, and so on unbounded. - * - * If interrupts were enabled when this interrupt exited, - * indicating a process context (synchronous) interrupt, - * local_irq_enable/disable can be used, which will enable - * interrupts rather than keeping them masked (unclear how - * much benefit this is over just replaying for all cases, - * because we immediately disable again, so all we're really - * doing is allowing hard interrupts to execute directly for - * a very small time, rather than being masked and replayed). - */ - if (irqs_enabled) { - local_irq_enable(); - local_irq_disable(); - } else { - replay_soft_interrupts(); - } - - return false; -} - static notrace void booke_load_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -231,57 +243,11 @@ static notrace void booke_load_dbcr0(void) #endif } -/* - * This should be called after a syscall returns, with r3 the return value - * from the syscall. If this function returns non-zero, the system call - * exit assembly should additionally load all GPR registers and CTR and XER - * from the interrupt frame. - * - * The function graph tracer can not trace the return side of this function, - * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. - */ -notrace unsigned long syscall_exit_prepare(unsigned long r3, - struct pt_regs *regs, - long scv) +static notrace unsigned long syscall_exit_prepare_main(unsigned long r3, + struct pt_regs *regs) { unsigned long ti_flags; unsigned long ret = 0; - bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - - CT_WARN_ON(ct_state() == CONTEXT_USER); - - kuap_assert_locked(); - - regs->result = r3; - - /* Check whether the syscall is issued inside a restartable sequence */ - rseq_syscall(regs); - - ti_flags = current_thread_info()->flags; - - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { - if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { - r3 = -r3; - regs->ccr |= 0x10000000; /* Set SO bit in CR */ - } - } - - if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { - if (ti_flags & _TIF_RESTOREALL) - ret = _TIF_RESTOREALL; - else - regs->gpr[3] = r3; - clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); - } else { - regs->gpr[3] = r3; - } - - if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { - do_syscall_trace_leave(regs); - ret |= _TIF_RESTOREALL; - } - - local_irq_disable(); again: ti_flags = READ_ONCE(current_thread_info()->flags); @@ -327,11 +293,7 @@ again: } } - user_enter_irqoff(); - - /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { - user_exit_irqoff(); + if (!prep_irq_for_user_exit()) { local_irq_enable(); local_irq_disable(); goto again; @@ -352,6 +314,95 @@ again: return ret; } +/* + * This should be called after a syscall returns, with r3 the return value + * from the syscall. If this function returns non-zero, the system call + * exit assembly should additionally load all GPR registers and CTR and XER + * from the interrupt frame. + * + * The function graph tracer can not trace the return side of this function, + * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. + */ +notrace unsigned long syscall_exit_prepare(unsigned long r3, + struct pt_regs *regs, + long scv) +{ + unsigned long ti_flags; + unsigned long ret = 0; + bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; + + CT_WARN_ON(ct_state() == CONTEXT_USER); + + kuap_assert_locked(); + + regs->result = r3; + + /* Check whether the syscall is issued inside a restartable sequence */ + rseq_syscall(regs); + + ti_flags = current_thread_info()->flags; + + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { + if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { + r3 = -r3; + regs->ccr |= 0x10000000; /* Set SO bit in CR */ + } + } + + if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { + if (ti_flags & _TIF_RESTOREALL) + ret = _TIF_RESTOREALL; + else + regs->gpr[3] = r3; + clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); + } else { + regs->gpr[3] = r3; + } + + if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { + do_syscall_trace_leave(regs); + ret |= _TIF_RESTOREALL; + } + + local_irq_disable(); + ret |= syscall_exit_prepare_main(r3, regs); + +#ifdef CONFIG_PPC64 + regs->exit_result = ret; +#endif + + return ret; +} + +#ifdef CONFIG_PPC64 +notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs) +{ + /* + * This is called when detecting a soft-pending interrupt as well as + * an alternate-return interrupt. So we can't just have the alternate + * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless + * the soft-pending case were to fix things up as well). RI might be + * disabled, in which case it gets re-enabled by __hard_irq_disable(). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + trace_hardirqs_off(); + user_exit_irqoff(); + account_cpu_user_entry(); + + BUG_ON(!user_mode(regs)); + + regs->exit_result |= syscall_exit_prepare_main(r3, regs); + + return regs->exit_result; +} +#endif + notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) { unsigned long ti_flags; @@ -405,10 +456,7 @@ again: } } - user_enter_irqoff(); - - if (unlikely(!__prep_irq_for_enabled_exit(true))) { - user_exit_irqoff(); + if (!prep_irq_for_user_exit()) { local_irq_enable(); local_irq_disable(); goto again; @@ -422,6 +470,10 @@ again: account_cpu_user_exit(); +#ifdef CONFIG_PPC64 + regs->exit_result = ret; +#endif + /* Restore user access locks last */ kuap_user_restore(regs); kuep_unlock(); @@ -436,6 +488,8 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) unsigned long flags; unsigned long ret = 0; unsigned long kuap; + bool stack_store = current_thread_info()->flags & + _TIF_EMULATE_STACK_STORE; if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && unlikely(!(regs->msr & MSR_RI))) @@ -450,11 +504,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) kuap = kuap_get_and_assert_locked(); - if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { - clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); - ret = 1; - } - local_irq_save(flags); if (!arch_irq_disabled_regs(regs)) { @@ -469,17 +518,54 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) + /* + * Stack store exit can't be restarted because the interrupt + * stack frame might have been clobbered. + */ + if (!prep_irq_for_kernel_enabled_exit(unlikely(stack_store))) { + /* + * Replay pending soft-masked interrupts now. Don't + * just local_irq_enabe(); local_irq_disable(); because + * if we are returning from an asynchronous interrupt + * here, another one might hit after irqs are enabled, + * and it would exit via this same path allowing + * another to fire, and so on unbounded. + */ + hard_irq_disable(); + replay_soft_interrupts(); + /* Took an interrupt, may have more exit work to do. */ goto again; - } else { - /* Returning to a kernel context with local irqs disabled. */ - __hard_EE_RI_disable(); + } #ifdef CONFIG_PPC64 + /* + * An interrupt may clear MSR[EE] and set this concurrently, + * but it will be marked pending and the exit will be retried. + * This leaves a racy window where MSR[EE]=0 and HARD_DIS is + * clear, until interrupt_exit_kernel_restart() calls + * hard_irq_disable(), which will set HARD_DIS again. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + + } else { + if (unlikely(stack_store)) + __hard_EE_RI_disable(); + /* + * Returning to a kernel context with local irqs disabled. + * Here, if EE was enabled in the interrupted context, enable + * it on return as well. A problem exists here where a soft + * masked interrupt may have cleared MSR[EE] and set HARD_DIS + * here, and it will still exist on return to the caller. This + * will be resolved by the masked interrupt firing again. + */ if (regs->msr & MSR_EE) local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; -#endif +#endif /* CONFIG_PPC64 */ } + if (unlikely(stack_store)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); + ret = 1; + } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -494,3 +580,46 @@ again: return ret; } + +#ifdef CONFIG_PPC64 +notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs) +{ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + trace_hardirqs_off(); + user_exit_irqoff(); + account_cpu_user_entry(); + + BUG_ON(!user_mode(regs)); + + regs->exit_result |= interrupt_exit_user_prepare(regs); + + return regs->exit_result; +} + +/* + * No real need to return a value here because the stack store case does not + * get restarted. + */ +notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs) +{ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + if (regs->softe == IRQS_ENABLED) + trace_hardirqs_off(); + + BUG_ON(user_mode(regs)); + + return interrupt_exit_kernel_prepare(regs); +} +#endif |