diff options
Diffstat (limited to 'arch/powerpc/kernel')
42 files changed, 1527 insertions, 1889 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index cddadccf551d..0ea6c4aa3a20 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -31,6 +31,18 @@ CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE) endif +KASAN_SANITIZE_early_32.o := n +KASAN_SANITIZE_cputable.o := n +KASAN_SANITIZE_prom_init.o := n +KASAN_SANITIZE_btext.o := n + +ifdef CONFIG_KASAN +CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING +endif + obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ process.o systbl.o idle.o \ @@ -93,7 +105,7 @@ extra-y += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o -obj-$(CONFIG_PPC32) += entry_32.o setup_32.o +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_BOOTX_TEXT) += btext.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 86a61e5f8285..8e02444e9d3d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -147,6 +147,9 @@ int main(void) #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu); #endif +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) + OFFSET(KUAP, thread_struct, kuap); +#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM OFFSET(PACATMSCRATCH, paca_struct, tm_scratch); @@ -268,7 +271,6 @@ int main(void) OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime); OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime); OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); - OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost); OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); #else /* CONFIG_PPC64 */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE @@ -332,6 +334,10 @@ int main(void) STACK_PT_REGS_OFFSET(_PPR, ppr); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_KUAP + STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); +#endif + #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); @@ -766,23 +772,6 @@ int main(void) OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl); #endif -#ifdef CONFIG_PPC_POWERNV - OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr); - OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state); - OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask); - OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); - OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); - OFFSET(PACA_DONT_STOP, paca_struct, dont_stop); -#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) - STOP_SPR(STOP_PID, pid); - STOP_SPR(STOP_LDBAR, ldbar); - STOP_SPR(STOP_FSCR, fscr); - STOP_SPR(STOP_HFSCR, hfscr); - STOP_SPR(STOP_MMCR1, mmcr1); - STOP_SPR(STOP_MMCR2, mmcr2); - STOP_SPR(STOP_MMCRA, mmcra); -#endif - DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 53102764fd2f..f2ed3ef4b129 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -759,23 +759,22 @@ static void cacheinfo_create_index_dir(struct cache *cache, int index, index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); if (!index_dir) - goto err; + return; index_dir->cache = cache; rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, cache_dir->kobj, "index%d", index); - if (rc) - goto err; + if (rc) { + kobject_put(&index_dir->kobj); + kfree(index_dir); + return; + } index_dir->next = cache_dir->index; cache_dir->index = index_dir; cacheinfo_create_index_opt_attrs(index_dir); - - return; -err: - kfree(index_dir); } static void cacheinfo_sysfs_populate(unsigned int cpu_id, diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 1eab54bc6ee9..cd12f362b61f 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2147,7 +2147,11 @@ void __init set_cur_cpu_spec(struct cpu_spec *s) struct cpu_spec *t = &the_cpu_spec; t = PTRRELOC(t); - *t = *s; + /* + * use memcpy() instead of *t = *s so that GCC replaces it + * by __memcpy() when KASAN is active + */ + memcpy(t, s, sizeof(*t)); *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; } @@ -2161,8 +2165,11 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, t = PTRRELOC(t); old = *t; - /* Copy everything, then do fixups */ - *t = *s; + /* + * Copy everything, then do fixups. Use memcpy() instead of *t = *s + * so that GCC replaces it by __memcpy() when KASAN is active + */ + memcpy(t, s, sizeof(*t)); /* * If we are overriding a previous value derived from the real diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index b6fe883b1016..5ec3b3835925 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -18,6 +18,7 @@ #include <asm/dbell.h> #include <asm/irq_regs.h> #include <asm/kvm_ppc.h> +#include <asm/trace.h> #ifdef CONFIG_SMP @@ -81,6 +82,7 @@ void doorbell_exception(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); + trace_doorbell_entry(regs); ppc_msgsync(); @@ -91,6 +93,7 @@ void doorbell_exception(struct pt_regs *regs) smp_ipi_demux_relaxed(); /* already performed the barrier */ + trace_doorbell_exit(regs); irq_exit(); set_irq_regs(old_regs); } diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c new file mode 100644 index 000000000000..3482118ffe76 --- /dev/null +++ b/arch/powerpc/kernel/early_32.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Early init before relocation + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <asm/setup.h> +#include <asm/sections.h> +#include <asm/asm-prototypes.h> + +/* + * We're called here very early in the boot. + * + * Note that the kernel may be running at an address which is different + * from the address that it was linked at, so we must use RELOC/PTRRELOC + * to access static data (including strings). -- paulus + */ +notrace unsigned long __init early_init(unsigned long dt_ptr) +{ + unsigned long offset = reloc_offset(); + + /* First zero the BSS */ + memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start); + + /* + * Identify the CPU type and fix up code sections + * that depend on which cpu we have. + */ + identify_cpu(offset, mfspr(SPRN_PVR)); + + apply_feature_fixups(); + + return KERNELBASE + offset; +} diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index b61cfd29c76f..c18f3490a77e 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -36,15 +36,10 @@ #include <asm/asm-405.h> #include <asm/feature-fixups.h> #include <asm/barrier.h> +#include <asm/kup.h> +#include <asm/bug.h> -/* - * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. - */ -#if MSR_KERNEL >= 0x10000 -#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l -#else -#define LOAD_MSR_KERNEL(r, x) li r,(x) -#endif +#include "head_32.h" /* * Align to 4k in order to ensure that all functions modyfing srr0/srr1 @@ -150,8 +145,8 @@ transfer_to_handler: stw r12,_CTR(r11) stw r2,_XER(r11) mfspr r12,SPRN_SPRG_THREAD - addi r2,r12,-THREAD beq 2f /* if from user, fix up THREAD.regs */ + addi r2, r12, -THREAD addi r11,r1,STACK_FRAME_OVERHEAD stw r11,PT_REGS(r12) #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) @@ -161,6 +156,9 @@ transfer_to_handler: andis. r12,r12,DBCR0_IDM@h #endif ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_lock r11, r12 +#endif #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) beq+ 3f /* From user and task is ptraced - load up global dbcr0 */ @@ -186,6 +184,8 @@ transfer_to_handler: 2: /* if from kernel, check interrupted DOZE/NAP mode and * check for stack overflow */ + kuap_save_and_lock r11, r12, r9, r2, r0 + addi r2, r12, -THREAD lwz r9,KSP_LIMIT(r12) cmplw r1,r9 /* if r1 <= ksp_limit */ ble- stack_ovf /* then the kernel stack overflowed */ @@ -207,26 +207,43 @@ transfer_to_handler_cont: mtspr SPRN_NRI, r0 #endif #ifdef CONFIG_TRACE_IRQFLAGS + /* + * When tracing IRQ state (lockdep) we enable the MMU before we call + * the IRQ tracing functions as they might access vmalloc space or + * perform IOs for console output. + * + * To speed up the syscall path where interrupts stay on, let's check + * first if we are changing the MSR value at all. + */ + tophys(r12, r1) + lwz r12,_MSR(r12) + andi. r12,r12,MSR_EE + bne 1f + + /* MSR isn't changing, just transition directly */ +#endif + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r10 + mtlr r9 + SYNC + RFI /* jump to handler, enable MMU */ + +#ifdef CONFIG_TRACE_IRQFLAGS +1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to + * keep interrupts disabled at this point otherwise we might risk + * taking an interrupt before we tell lockdep they are enabled. + */ lis r12,reenable_mmu@h ori r12,r12,reenable_mmu@l + LOAD_MSR_KERNEL(r0, MSR_KERNEL) mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR1,r0 SYNC RFI -reenable_mmu: /* re-enable mmu so we can */ - mfmsr r10 - lwz r12,_MSR(r1) - xor r10,r10,r12 - andi. r10,r10,MSR_EE /* Did EE change? */ - beq 1f +reenable_mmu: /* - * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. - * If from user mode there is only one stack frame on the stack, and - * accessing CALLER_ADDR1 will cause oops. So we need create a dummy - * stack frame to make trace_hardirqs_off happy. - * - * This is handy because we also need to save a bunch of GPRs, + * We save a bunch of GPRs, * r3 can be different from GPR3(r1) at this point, r9 and r11 * contains the old MSR and handler address respectively, * r4 & r5 can contain page fault arguments that need to be passed @@ -234,14 +251,19 @@ reenable_mmu: /* re-enable mmu so we can */ * they aren't useful past this point (aren't syscall arguments), * the rest is restored from the exception frame. */ + stwu r1,-32(r1) stw r9,8(r1) stw r11,12(r1) stw r3,16(r1) stw r4,20(r1) stw r5,24(r1) - bl trace_hardirqs_off - lwz r5,24(r1) + + /* If we are disabling interrupts (normal case), simply log it with + * lockdep + */ +1: bl trace_hardirqs_off +2: lwz r5,24(r1) lwz r4,20(r1) lwz r3,16(r1) lwz r11,12(r1) @@ -251,15 +273,9 @@ reenable_mmu: /* re-enable mmu so we can */ lwz r6,GPR6(r1) lwz r7,GPR7(r1) lwz r8,GPR8(r1) -1: mtctr r11 + mtctr r11 mtlr r9 bctr /* jump to handler */ -#else /* CONFIG_TRACE_IRQFLAGS */ - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r10 - mtlr r9 - SYNC - RFI /* jump to handler, enable MMU */ #endif /* CONFIG_TRACE_IRQFLAGS */ #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) @@ -272,6 +288,7 @@ reenable_mmu: /* re-enable mmu so we can */ lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ + kuap_restore r11, r2, r3, r4, r5 b fast_exception_return #endif @@ -301,6 +318,33 @@ stack_ovf: SYNC RFI +#ifdef CONFIG_TRACE_IRQFLAGS +trace_syscall_entry_irq_off: + /* + * Syscall shouldn't happen while interrupts are disabled, + * so let's do a warning here. + */ +0: trap + EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING + bl trace_hardirqs_on + + /* Now enable for real */ + LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE) + mtmsr r10 + + REST_GPR(0, r1) + REST_4GPRS(3, r1) + REST_2GPRS(7, r1) + b DoSyscall +#endif /* CONFIG_TRACE_IRQFLAGS */ + + .globl transfer_to_syscall +transfer_to_syscall: +#ifdef CONFIG_TRACE_IRQFLAGS + andi. r12,r9,MSR_EE + beq- trace_syscall_entry_irq_off +#endif /* CONFIG_TRACE_IRQFLAGS */ + /* * Handle a system call. */ @@ -312,33 +356,14 @@ _GLOBAL(DoSyscall) stw r3,ORIG_GPR3(r1) li r12,0 stw r12,RESULT(r1) - lwz r11,_CCR(r1) /* Clear SO bit in CR */ - rlwinm r11,r11,0,4,2 - stw r11,_CCR(r1) #ifdef CONFIG_TRACE_IRQFLAGS - /* Return from syscalls can (and generally will) hard enable - * interrupts. You aren't supposed to call a syscall with - * interrupts disabled in the first place. However, to ensure - * that we get it right vs. lockdep if it happens, we force - * that hard enable here with appropriate tracing if we see - * that we have been called with interrupts off - */ + /* Make sure interrupts are enabled */ mfmsr r11 andi. r12,r11,MSR_EE - bne+ 1f - /* We came in with interrupts disabled, we enable them now */ - bl trace_hardirqs_on - mfmsr r11 - lwz r0,GPR0(r1) - lwz r3,GPR3(r1) - lwz r4,GPR4(r1) - ori r11,r11,MSR_EE - lwz r5,GPR5(r1) - lwz r6,GPR6(r1) - lwz r7,GPR7(r1) - lwz r8,GPR8(r1) - mtmsr r11 -1: + /* We came in with interrupts disabled, we WARN and mark them enabled + * for lockdep now */ +0: tweqi r12, 0 + EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING #endif /* CONFIG_TRACE_IRQFLAGS */ lwz r11,TI_FLAGS(r2) andi. r11,r11,_TIF_SYSCALL_DOTRACE @@ -392,8 +417,7 @@ syscall_exit_cont: lwz r8,_MSR(r1) #ifdef CONFIG_TRACE_IRQFLAGS /* If we are going to return from the syscall with interrupts - * off, we trace that here. It shouldn't happen though but we - * want to catch the bugger if it does right ? + * off, we trace that here. It shouldn't normally happen. */ andi. r10,r8,MSR_EE bne+ 1f @@ -422,12 +446,11 @@ BEGIN_FTR_SECTION lwarx r7,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) stwcx. r0,0,r1 /* to clear the reservation */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - andi. r4,r8,MSR_PR - beq 3f ACCOUNT_CPU_USER_EXIT(r2, r5, r7) -3: +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_unlock r5, r7 #endif + kuap_check r2, r4 lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -678,6 +701,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ + kuap_check r2, r4 #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all @@ -820,6 +844,9 @@ restore_user: bnel- load_dbcr0 #endif ACCOUNT_CPU_USER_EXIT(r2, r10, r11) +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_unlock r10, r11 +#endif b restore @@ -866,12 +893,12 @@ resume_kernel: /* check current_thread_info->preempt_count */ lwz r0,TI_PREEMPT(r2) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ - bne restore + bne restore_kuap andi. r8,r8,_TIF_NEED_RESCHED - beq+ restore + beq+ restore_kuap lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ - beq restore /* don't schedule if so */ + beq restore_kuap /* don't schedule if so */ #ifdef CONFIG_TRACE_IRQFLAGS /* Lockdep thinks irqs are enabled, we need to call * preempt_schedule_irq with IRQs off, so we inform lockdep @@ -879,10 +906,7 @@ resume_kernel: */ bl trace_hardirqs_off #endif -1: bl preempt_schedule_irq - lwz r3,TI_FLAGS(r2) - andi. r0,r3,_TIF_NEED_RESCHED - bne- 1b + bl preempt_schedule_irq #ifdef CONFIG_TRACE_IRQFLAGS /* And now, to properly rebalance the above, we tell lockdep they * are being turned back on, which will happen when we return @@ -890,6 +914,8 @@ resume_kernel: bl trace_hardirqs_on #endif #endif /* CONFIG_PREEMPT */ +restore_kuap: + kuap_restore r1, r2, r9, r10, r0 /* interrupts are hard-disabled at this point */ restore: @@ -913,28 +939,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) * off in this assembly code while peeking at TI_FLAGS() and such. However * we need to inform it if the exception turned interrupts off, and we * are about to trun them back on. - * - * The problem here sadly is that we don't know whether the exceptions was - * one that turned interrupts off or not. So we always tell lockdep about - * turning them on here when we go back to wherever we came from with EE - * on, even if that may meen some redudant calls being tracked. Maybe later - * we could encode what the exception did somewhere or test the exception - * type in the pt_regs but that sounds overkill */ andi. r10,r9,MSR_EE beq 1f - /* - * Since the ftrace irqsoff latency trace checks CALLER_ADDR1, - * which is the stack frame here, we need to force a stack frame - * in case we came from user space. - */ stwu r1,-32(r1) mflr r0 stw r0,4(r1) - stwu r1,-32(r1) bl trace_hardirqs_on - lwz r1,0(r1) - lwz r1,0(r1) + addi r1, r1, 32 lwz r9,_MSR(r1) 1: #endif /* CONFIG_TRACE_IRQFLAGS */ @@ -1197,6 +1209,7 @@ load_dbcr0: .section .bss .align 4 + .global global_dbcr0 global_dbcr0: .space 8*NR_CPUS .previous @@ -1207,9 +1220,10 @@ do_work: /* r10 contains MSR_KERNEL here */ beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ - /* Note: We don't need to inform lockdep that we are enabling - * interrupts here. As far as it knows, they are already enabled - */ +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_on + mfmsr r10 +#endif ori r10,r10,MSR_EE SYNC MTMSRD(r10) /* hard-enable interrupts */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 15c67d2c0534..d978af78bf2a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -46,6 +46,7 @@ #include <asm/exception-64e.h> #endif #include <asm/feature-fixups.h> +#include <asm/kup.h> /* * System calls. @@ -120,6 +121,9 @@ END_BTB_FLUSH_SECTION addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ + + kuap_check_amr r10, r11 + #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) BEGIN_FW_FTR_SECTION beq 33f @@ -275,6 +279,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) + kuap_check_amr r10, r11 + #ifdef CONFIG_PPC_BOOK3S /* * Clear MSR_RI, MSR_EE is already and remains disabled. We could do @@ -296,6 +302,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r8, PACATMSCRATCH(r13) #endif + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ ld r2,GPR2(r1) ld r1,GPR1(r1) @@ -306,8 +316,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) RFI_TO_USER b . /* prevent speculative execution */ - /* exit to kernel */ -1: ld r2,GPR2(r1) +1: /* exit to kernel */ + kuap_restore_amr r2 + + ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 @@ -594,6 +606,8 @@ _GLOBAL(_switch) std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ + kuap_check_amr r9, r10 + FLUSH_COUNT_CACHE /* @@ -851,13 +865,7 @@ resume_kernel: * sure we are soft-disabled first and reconcile irq state. */ RECONCILE_IRQ_STATE(r3,r4) -1: bl preempt_schedule_irq - - /* Re-test flags and eventually loop */ - ld r9, PACA_THREAD_INFO(r13) - ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED - bne 1b + bl preempt_schedule_irq /* * arch_local_irq_restore() from preempt_schedule_irq above may @@ -942,6 +950,8 @@ fast_exception_return: ld r4,_XER(r1) mtspr SPRN_XER,r4 + kuap_check_amr r5, r6 + REST_8GPRS(5, r1) andi. r0,r3,MSR_RI @@ -974,6 +984,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -1006,6 +1020,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r0,GPR0(r1) ld r2,GPR2(r1) ld r3,GPR3(r1) + + kuap_restore_amr r4 + ld r4,GPR4(r1) ld r1,GPR1(r1) RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9481a117e242..6b86055e5251 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -19,6 +19,7 @@ #include <asm/cpuidle.h> #include <asm/head-64.h> #include <asm/feature-fixups.h> +#include <asm/kup.h> /* * There are a few constraints to be concerned with. @@ -120,7 +121,9 @@ EXC_VIRT_NONE(0x4000, 0x100) mfspr r10,SPRN_SRR1 ; \ rlwinm. r10,r10,47-31,30,31 ; \ beq- 1f ; \ - cmpwi cr3,r10,2 ; \ + cmpwi cr1,r10,2 ; \ + mfspr r3,SPRN_SRR1 ; \ + bltlr cr1 ; /* no state loss, return to idle caller */ \ BRANCH_TO_C000(r10, system_reset_idle_common) ; \ 1: \ KVMTEST_PR(n) ; \ @@ -144,8 +147,11 @@ TRAMP_KVM(PACA_EXNMI, 0x100) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) - mfspr r12,SPRN_SRR1 - b pnv_powersave_wakeup + /* + * This must be a direct branch (without linker branch stub) because + * we can not use TOC at this point as r2 may not be restored yet. + */ + b idle_return_gpr_loss #endif /* @@ -309,6 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ + kuap_save_amr_and_lock r9, r10, cr1 /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ @@ -427,17 +434,17 @@ EXC_COMMON_BEGIN(machine_check_idle_common) * Then decrement MCE nesting after finishing with the stack. */ ld r3,_MSR(r1) + ld r4,_LINK(r1) lhz r11,PACA_IN_MCE(r13) subi r11,r11,1 sth r11,PACA_IN_MCE(r13) - /* Turn off the RI bit because SRR1 is used by idle wakeup code. */ - /* Recoverability could be improved by reducing the use of SRR1. */ - li r11,0 - mtmsrd r11,1 - - b pnv_powersave_wakeup_mce + mtlr r4 + rlwinm r10,r3,47-31,30,31 + cmpwi cr1,r10,2 + bltlr cr1 /* no state loss, return to idle caller */ + b idle_return_gpr_loss #endif /* * Handle machine check early in real mode. We come here with @@ -1109,6 +1116,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ EXCEPTION_PROLOG_COMMON_1() + /* We don't touch AMR here, we never go to virtual mode */ EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 45a8d0be1c96..25f063f56ec5 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -36,6 +36,7 @@ #include <linux/sysfs.h> #include <linux/slab.h> #include <linux/cma.h> +#include <linux/hugetlb.h> #include <asm/debugfs.h> #include <asm/page.h> diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 529dcc21c3f9..cecd57e1d046 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -63,6 +63,7 @@ _GLOBAL(load_fp_state) REST_32FPVSRS(0, R4, R3) blr EXPORT_SYMBOL(load_fp_state) +_ASM_NOKPROBE_SYMBOL(load_fp_state); /* used by restore_math */ /* * Store FP state into memory, including FPSCR diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e25b615e9f9e..755fab9641d6 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -37,6 +37,8 @@ #include <asm/export.h> #include <asm/feature-fixups.h> +#include "head_32.h" + /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ #define LOAD_BAT(n, reg, RA, RB) \ /* see the comment for clear_bats() -- Cort */ \ @@ -160,6 +162,10 @@ __after_mmu_off: bl flush_tlbs bl initial_bats + bl load_segment_registers +#ifdef CONFIG_KASAN + bl early_hash_table +#endif #if defined(CONFIG_BOOTX_TEXT) bl setup_disp_bat #endif @@ -205,7 +211,7 @@ __after_mmu_off: */ turn_on_mmu: mfmsr r0 - ori r0,r0,MSR_DR|MSR_IR + ori r0,r0,MSR_DR|MSR_IR|MSR_RI mtspr SPRN_SRR1,r0 lis r0,start_here@h ori r0,r0,start_here@l @@ -242,103 +248,6 @@ __secondary_hold_spinloop: __secondary_hold_acknowledge: .long -1 -/* - * Exception entry code. This code runs with address translation - * turned off, i.e. using physical addresses. - * We assume sprg3 has the physical address of the current - * task's thread_struct. - */ -#define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcr r10; \ - EXCEPTION_PROLOG_1; \ - EXCEPTION_PROLOG_2 - -#define EXCEPTION_PROLOG_1 \ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ - andi. r11,r11,MSR_PR; \ - tophys(r11,r1); /* use tophys(r1) if kernel */ \ - beq 1f; \ - mfspr r11,SPRN_SPRG_THREAD; \ - lwz r11,TASK_STACK-THREAD(r11); \ - addi r11,r11,THREAD_SIZE; \ - tophys(r11,r11); \ -1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ - - -#define EXCEPTION_PROLOG_2 \ - stw r10,_CCR(r11); /* save registers */ \ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_SRR0; \ - mfspr r9,SPRN_SRR1; \ - stw r1,GPR1(r11); \ - stw r1,0(r11); \ - tovirt(r1,r11); /* set new kernel sp */ \ - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ - MTMSRD(r10); /* (except for mach check in rtas) */ \ - stw r0,GPR0(r11); \ - lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \ - addi r10,r10,STACK_FRAME_REGS_MARKER@l; \ - stw r10,8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* - * Note: code which follows this uses cr0.eq (set if from kernel), - * r11, r12 (SRR0), and r9 (SRR1). - * - * Note2: once we have set r1 we are in a position to take exceptions - * again, and we could thus set MSR:RI at that point. - */ - -/* - * Exception vectors. - */ -#define EXCEPTION(n, label, hdlr, xfer) \ - . = n; \ - DO_KVM n; \ -label: \ - EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - li r10,MSR_KERNEL; \ - copyee(r10, r9); \ - bl tfer; \ -i##n: \ - .long hdlr; \ - .long ret - -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ - ret_from_except) - /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and putting it back to what it was (unknown_exception) when done. */ @@ -387,7 +296,11 @@ DataAccess: EXCEPTION_PROLOG mfspr r10,SPRN_DSISR stw r10,_DSISR(r11) +#ifdef CONFIG_PPC_KUAP + andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h +#else andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h +#endif bne 1f /* if not, try to put a PTE */ mfspr r4,SPRN_DAR /* into the hash table */ rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ @@ -428,7 +341,7 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, alignment_exception) + EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -449,24 +362,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) bl load_up_fpu /* if from user, just load it up */ b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) /* System call */ . = 0xc00 DO_KVM 0xc00 SystemCall: - EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + SYSCALL_ENTRY 0xc00 /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -522,9 +434,9 @@ InstructionTLBMiss: andc. r1,r1,r0 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - ori r1, r1, 0xe05 /* clear out reserved bits */ - andc r1, r0, r1 /* PP = user? 2 : 0 */ + rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + ori r1, r1, 0xe06 /* clear out reserved bits */ + andc r1, r0, r1 /* PP = user? 1 : 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -590,11 +502,11 @@ DataLoadTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ ori r1,r1,0xe04 /* clear out reserved bits */ - andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */ + andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -670,9 +582,9 @@ DataStoreTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - li r1,0xe05 /* clear out reserved bits & PP lsb */ - andc r1,r0,r1 /* PP = user? 2: 0 */ + rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + li r1,0xe06 /* clear out reserved bits & PP msb */ + andc r1,r0,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -698,35 +610,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #define altivec_assist_exception unknown_exception #endif - EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) - EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) + EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD) EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) - EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD) + EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD) . = 0x3000 @@ -738,7 +650,7 @@ AltiVecUnavailable: b fast_exception_return #endif /* CONFIG_ALTIVEC */ 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) + EXC_XFER_LITE(0xf20, altivec_unavailable_exception) PerformanceMonitor: EXCEPTION_PROLOG @@ -880,11 +792,24 @@ _ENTRY(__restore_cpu_setup) blr #endif /* !defined(CONFIG_PPC_BOOK3S_32) */ - /* * Load stuff into the MMU. Intended to be called with * IR=0 and DR=0. */ +#ifdef CONFIG_KASAN +early_hash_table: + sync /* Force all PTE updates to finish */ + isync + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + /* Load the SDR1 register (hash table base & size) */ + lis r6, early_hash - PAGE_OFFSET@h + ori r6, r6, 3 /* 256kB table */ + mtspr SPRN_SDR1, r6 + blr +#endif + load_up_mmu: sync /* Force all PTE updates to finish */ isync @@ -896,14 +821,6 @@ load_up_mmu: tophys(r6,r6) lwz r6,_SDR1@l(r6) mtspr SPRN_SDR1,r6 - li r0,16 /* load up segment register values */ - mtctr r0 /* for context 0 */ - lis r3,0x2000 /* Ku = 1, VSID = 0 */ - li r4,0 -3: mtsrin r3,r4 - addi r3,r3,0x111 /* increment VSID */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b /* Load the BAT registers with the values set up by MMU_init. MMU_init takes care of whether we're on a 601 or not. */ @@ -925,6 +842,32 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +load_segment_registers: + li r0, NUM_USER_SEGMENTS /* load up user segment register values */ + mtctr r0 /* for context 0 */ + li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ +#ifdef CONFIG_PPC_KUEP + oris r3, r3, SR_NX@h /* Set Nx */ +#endif +#ifdef CONFIG_PPC_KUAP + oris r3, r3, SR_KS@h /* Set Ks */ +#endif + li r4, 0 +3: mtsrin r3, r4 + addi r3, r3, 0x111 /* increment VSID */ + addis r4, r4, 0x1000 /* address of next segment */ + bdnz 3b + li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */ + mtctr r0 /* for context 0 */ + rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */ + rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */ + oris r3, r3, SR_KP@h /* Kp = 1 */ +3: mtsrin r3, r4 + addi r3, r3, 0x111 /* increment VSID */ + addis r4, r4, 0x1000 /* address of next segment */ + bdnz 3b + blr + /* * This is where the main kernel code starts. */ @@ -950,11 +893,17 @@ start_here: * Do early platform-specific initialization, * and set up the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init bl __save_cpu_setup bl MMU_init +BEGIN_MMU_FTR_SECTION + bl MMU_init_hw_patch +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* * Go back to running unmapped so we can load up new values @@ -1006,7 +955,12 @@ _ENTRY(switch_mmu_context) blt- 4f mulli r3,r3,897 /* multiply context by skew factor */ rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ - addis r3,r3,0x6000 /* Set Ks, Ku bits */ +#ifdef CONFIG_PPC_KUEP + oris r3, r3, SR_NX@h /* Set Nx */ +#endif +#ifdef CONFIG_PPC_KUAP + oris r3, r3, SR_KS@h /* Set Ks */ +#endif li r0,NUM_USER_SEGMENTS mtctr r0 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h new file mode 100644 index 000000000000..4a692553651f --- /dev/null +++ b/arch/powerpc/kernel/head_32.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __HEAD_32_H__ +#define __HEAD_32_H__ + +#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */ + +/* + * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE. + */ +.macro __LOAD_MSR_KERNEL r, x +.if \x >= 0x8000 + lis \r, (\x)@h + ori \r, \r, (\x)@l +.else + li \r, (\x) +.endif +.endm +#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x + +/* + * Exception entry code. This code runs with address translation + * turned off, i.e. using physical addresses. + * We assume sprg3 has the physical address of the current + * task's thread_struct. + */ + +.macro EXCEPTION_PROLOG + mtspr SPRN_SPRG_SCRATCH0,r10 + mtspr SPRN_SPRG_SCRATCH1,r11 + mfcr r10 + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 +.endm + +.macro EXCEPTION_PROLOG_1 + mfspr r11,SPRN_SRR1 /* check whether user or kernel */ + andi. r11,r11,MSR_PR + tophys(r11,r1) /* use tophys(r1) if kernel */ + beq 1f + mfspr r11,SPRN_SPRG_THREAD + lwz r11,TASK_STACK-THREAD(r11) + addi r11,r11,THREAD_SIZE + tophys(r11,r11) +1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ +.endm + +.macro EXCEPTION_PROLOG_2 + stw r10,_CCR(r11) /* save registers */ + stw r12,GPR12(r11) + stw r9,GPR9(r11) + mfspr r10,SPRN_SPRG_SCRATCH0 + stw r10,GPR10(r11) + mfspr r12,SPRN_SPRG_SCRATCH1 + stw r12,GPR11(r11) + mflr r10 + stw r10,_LINK(r11) + mfspr r12,SPRN_SRR0 + mfspr r9,SPRN_SRR1 + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1,r11) /* set new kernel sp */ +#ifdef CONFIG_40x + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#else + li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */ + MTMSRD(r10) /* (except for mach check in rtas) */ +#endif + stw r0,GPR0(r11) + lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + addi r10,r10,STACK_FRAME_REGS_MARKER@l + stw r10,8(r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) +.endm + +.macro SYSCALL_ENTRY trapno + mfspr r12,SPRN_SPRG_THREAD + mfcr r10 + lwz r11,TASK_STACK-THREAD(r12) + mflr r9 + addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE + rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ + tophys(r11,r11) + stw r10,_CCR(r11) /* save registers */ + mfspr r10,SPRN_SRR0 + stw r9,_LINK(r11) + mfspr r9,SPRN_SRR1 + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1,r11) /* set new kernel sp */ + stw r10,_NIP(r11) +#ifdef CONFIG_40x + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#else + LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */ + MTMSRD(r10) /* (except for mach check in rtas) */ +#endif + lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + stw r2,GPR2(r11) + addi r10,r10,STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r11) + li r2, \trapno + 1 + stw r10,8(r11) + stw r2,_TRAP(r11) + SAVE_GPR(0, r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) + addi r11,r1,STACK_FRAME_OVERHEAD + addi r2,r12,-THREAD + stw r11,PT_REGS(r12) +#if defined(CONFIG_40x) + /* Check to see if the dbcr0 register is set up to debug. Use the + internal debug mode bit to do this. */ + lwz r12,THREAD_DBCR0(r12) + andis. r12,r12,DBCR0_IDM@h +#endif + ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) +#if defined(CONFIG_40x) + beq+ 3f + /* From user and task is ptraced - load up global dbcr0 */ + li r12,-1 /* clear all pending debug events */ + mtspr SPRN_DBSR,r12 + lis r11,global_dbcr0@ha + tophys(r11,r11) + addi r11,r11,global_dbcr0@l + lwz r12,0(r11) + mtspr SPRN_DBCR0,r12 + lwz r12,4(r11) + addi r12,r12,-1 + stw r12,4(r11) +#endif + +3: + tovirt(r2, r2) /* set r2 to current */ + lis r11, transfer_to_syscall@h + ori r11, r11, transfer_to_syscall@l +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * If MSR is changing we need to keep interrupts disabled at this point + * otherwise we might risk taking an interrupt before we tell lockdep + * they are enabled. + */ + LOAD_MSR_KERNEL(r10, MSR_KERNEL) + rlwimi r10, r9, 0, MSR_EE +#else + LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE) +#endif +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) + mtspr SPRN_NRI, r0 +#endif + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + SYNC + RFI /* jump to handler, enable MMU */ +.endm + +/* + * Note: code which follows this uses cr0.eq (set if from kernel), + * r11, r12 (SRR0), and r9 (SRR1). + * + * Note2: once we have set r1 we are in a position to take exceptions + * again, and we could thus set MSR:RI at that point. + */ + +/* + * Exception vectors. + */ +#ifdef CONFIG_PPC_BOOK3S +#define START_EXCEPTION(n, label) \ + . = n; \ + DO_KVM n; \ +label: + +#else +#define START_EXCEPTION(n, label) \ + . = n; \ +label: + +#endif + +#define EXCEPTION(n, label, hdlr, xfer) \ + START_EXCEPTION(n, label) \ + EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ + li r10,trap; \ + stw r10,_TRAP(r11); \ + LOAD_MSR_KERNEL(r10, msr); \ + bl tfer; \ + .long hdlr; \ + .long ret + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ + ret_from_except) + +#endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a9c934f2319b..cf54b784100d 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -44,6 +44,8 @@ #include <asm/export.h> #include <asm/asm-405.h> +#include "head_32.h" + /* As with the other PowerPC ports, it is expected that when code * execution begins here, the following registers contain valid, yet * optional, information: @@ -99,46 +101,6 @@ _ENTRY(saved_ksp_limit) .space 4 /* - * Exception vector entry code. This code runs with address translation - * turned off (i.e. using physical addresses). We assume SPRG_THREAD has - * the physical address of the current task thread_struct. - * Note that we have to have decremented r1 before we write to any fields - * of the exception frame, since a critical interrupt could occur at any - * time, and it will write to the area immediately below the current r1. - */ -#define NORMAL_EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mtspr SPRN_SPRG_SCRATCH2,r1; \ - mfcr r10; /* save CR in r10 for now */\ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ - andi. r11,r11,MSR_PR; \ - beq 1f; \ - mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\ - lwz r1,TASK_STACK-THREAD(r1); /* this thread's kernel stack */\ - addi r1,r1,THREAD_SIZE; \ -1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ - tophys(r11,r1); \ - stw r10,_CCR(r11); /* save various registers */\ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH2; \ - mfspr r12,SPRN_SRR0; \ - stw r10,GPR1(r11); \ - mfspr r9,SPRN_SRR1; \ - stw r10,0(r11); \ - rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* * Exception prolog for critical exceptions. This is a little different * from the normal exception prolog above since a critical exception * can potentially occur at any point during normal exception processing. @@ -177,6 +139,9 @@ _ENTRY(saved_ksp_limit) tovirt(r1,r11); \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ stw r0,GPR0(r11); \ + lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\ + addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ + stw r10, 8(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) @@ -196,53 +161,12 @@ _ENTRY(saved_ksp_limit) /* * Exception vectors. */ -#define START_EXCEPTION(n, label) \ - . = n; \ -label: - -#define EXCEPTION(n, label, hdlr, xfer) \ - START_EXCEPTION(n, label); \ - NORMAL_EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ CRITICAL_EXCEPTION_PROLOG; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, \ - ret_from_crit_exc) - -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - lis r10,msr@h; \ - ori r10,r10,msr@l; \ - copyee(r10, r9); \ - bl tfer; \ - .long hdlr; \ - .long ret - -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ - ret_from_except) - + crit_transfer_to_handler, ret_from_crit_exc) /* * 0x0100 - Critical Interrupt Exception @@ -393,7 +317,7 @@ label: * This is caused by a fetch from non-execute or guarded pages. */ START_EXCEPTION(0x0400, InstructionAccess) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mr r4,r12 /* Pass SRR0 as arg2 */ li r5,0 /* Pass zero as arg3 */ EXC_XFER_LITE(0x400, handle_page_fault) @@ -403,33 +327,32 @@ label: /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ stw r4,_DEAR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, alignment_exception) + EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r4,SPRN_ESR /* Grab the ESR and save it */ stw r4,_ESR(r11) addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x700, program_check_exception) - EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) - NORMAL_EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + SYSCALL_ENTRY 0xc00 - EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ . = 0x1000 @@ -646,25 +569,25 @@ label: mfspr r10, SPRN_SPRG_SCRATCH0 b InstructionAccess - EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) #ifdef CONFIG_IBM405_ERR51 /* 405GP errata 51 */ START_EXCEPTION(0x1700, Trap_17) b DTLBMiss #else - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) #endif - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case @@ -726,11 +649,11 @@ label: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_TEMPLATE(DebugException, 0x2002, \ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ Decrementer: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -738,9 +661,9 @@ Decrementer: /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ FITException: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD; - EXC_XFER_EE(0x1010, unknown_exception) + EXC_XFER_STD(0x1010, unknown_exception) /* Watchdog Timer (WDT) Exception. (from 0x1020) */ WDTException: @@ -748,15 +671,14 @@ WDTException: addi r3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), - NOCOPY, crit_transfer_to_handler, - ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) /* * The other Data TLB exceptions bail out to this point * if they can't resolve the lightweight TLB fault. */ DataAccess: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ @@ -848,6 +770,9 @@ start_here: /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 37117ab11584..f15fba58c744 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -203,6 +203,9 @@ _ENTRY(_start); /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init @@ -278,16 +281,15 @@ interrupt_base: FP_UNAVAILABLE_EXCEPTION #else EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \ - FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) + FloatingPointUnavailable, unknown_exception, EXC_XFER_STD) #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL) - EXC_XFER_EE_LITE(0x0c00, DoSyscall) + SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ - AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION @@ -295,7 +297,7 @@ interrupt_base: /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3fad8d499767..5321a11c2835 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -968,7 +968,9 @@ start_here_multiplatform: /* Restore parameters passed from prom_init/kexec */ mr r3,r31 - bl early_setup /* also sets r13 and SPRG_PACA */ + LOAD_REG_ADDR(r12, DOTSYM(early_setup)) + mtctr r12 + bctrl /* also sets r13 and SPRG_PACA */ LOAD_REG_ADDR(r3, start_here_common) ld r4,PACAKMSR(r13) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 03c73b4c6435..885be7f3d29a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -33,6 +33,8 @@ #include <asm/export.h> #include <asm/code-patching-asm.h> +#include "head_32.h" + #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ #define SIMPLE_KERNEL_ADDRESS 1 @@ -123,102 +125,6 @@ instruction_counter: .space 4 #endif -/* - * Exception entry code. This code runs with address translation - * turned off, i.e. using physical addresses. - * We assume sprg3 has the physical address of the current - * task's thread_struct. - */ -#define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0, r10; \ - mtspr SPRN_SPRG_SCRATCH1, r11; \ - mfcr r10; \ - EXCEPTION_PROLOG_1; \ - EXCEPTION_PROLOG_2 - -#define EXCEPTION_PROLOG_1 \ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ - andi. r11,r11,MSR_PR; \ - tophys(r11,r1); /* use tophys(r1) if kernel */ \ - beq 1f; \ - mfspr r11,SPRN_SPRG_THREAD; \ - lwz r11,TASK_STACK-THREAD(r11); \ - addi r11,r11,THREAD_SIZE; \ - tophys(r11,r11); \ -1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ - - -#define EXCEPTION_PROLOG_2 \ - stw r10,_CCR(r11); /* save registers */ \ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_SRR0; \ - mfspr r9,SPRN_SRR1; \ - stw r1,GPR1(r11); \ - stw r1,0(r11); \ - tovirt(r1,r11); /* set new kernel sp */ \ - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ - mtmsr r10; \ - stw r0,GPR0(r11); \ - lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \ - addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* - * Note: code which follows this uses cr0.eq (set if from kernel), - * r11, r12 (SRR0), and r9 (SRR1). - * - * Note2: once we have set r1 we are in a position to take exceptions - * again, and we could thus set MSR:RI at that point. - */ - -/* - * Exception vectors. - */ -#define EXCEPTION(n, label, hdlr, xfer) \ - . = n; \ -label: \ - EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - li r10,MSR_KERNEL; \ - copyee(r10, r9); \ - bl tfer; \ -i##n: \ - .long hdlr; \ - .long ret - -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ - ret_from_except) - /* System reset */ EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD) @@ -261,7 +167,7 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, alignment_exception) + EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -273,19 +179,18 @@ Alignment: /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) /* System call */ . = 0xc00 SystemCall: - EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + SYSCALL_ENTRY 0xc00 /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. @@ -615,13 +520,13 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to @@ -643,7 +548,7 @@ DataBreakpoint: mfspr r4,SPRN_BAR stw r4,_DAR(r11) mfspr r5,SPRN_DSISR - EXC_XFER_EE(0x1c00, do_break) + EXC_XFER_STD(0x1c00, do_break) 11: mtcr r10 mfspr r10, SPRN_SPRG_SCRATCH0 @@ -663,10 +568,10 @@ InstructionBreakpoint: mfspr r10, SPRN_SPRG_SCRATCH0 rfi #else - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) #endif - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) . = 0x2000 @@ -853,6 +758,9 @@ start_here: /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 1b22a8dea399..bfeb469e8106 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -6,6 +6,8 @@ #include <asm/kvm_asm.h> #include <asm/kvm_booke_hv_asm.h> +#ifdef __ASSEMBLY__ + /* * Macros used for common Book-e exception handling */ @@ -81,6 +83,101 @@ END_BTB_FLUSH_SECTION SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) +.macro SYSCALL_ENTRY trapno intno + mfspr r10, SPRN_SPRG_THREAD +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mtspr SPRN_SPRG_WSCRATCH0, r10 + stw r11, THREAD_NORMSAVE(0)(r10) + stw r13, THREAD_NORMSAVE(2)(r10) + mfcr r13 /* save CR in r13 for now */ + mfspr r11, SPRN_SRR1 + mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ + bf 3, 1975f + b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1 +1975: + mr r12, r13 + lwz r13, THREAD_NORMSAVE(2)(r10) +FTR_SECTION_ELSE +#endif + mfcr r12 +#ifdef CONFIG_KVM_BOOKE_HV +ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) +#endif + BOOKE_CLEAR_BTB(r11) + lwz r11, TASK_STACK - THREAD(r10) + rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ + ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) + stw r12, _CCR(r11) /* save various registers */ + mflr r12 + stw r12,_LINK(r11) + mfspr r12,SPRN_SRR0 + stw r1, GPR1(r11) + mfspr r9,SPRN_SRR1 + stw r1, 0(r11) + mr r1, r11 + stw r12,_NIP(r11) + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ + lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + stw r2,GPR2(r11) + addi r12, r12, STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r11) + li r2, \trapno + 1 + stw r12, 8(r11) + stw r2,_TRAP(r11) + SAVE_GPR(0, r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) + + addi r11,r1,STACK_FRAME_OVERHEAD + addi r2,r10,-THREAD + stw r11,PT_REGS(r10) + /* Check to see if the dbcr0 register is set up to debug. Use the + internal debug mode bit to do this. */ + lwz r12,THREAD_DBCR0(r10) + andis. r12,r12,DBCR0_IDM@h + ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) + beq+ 3f + /* From user and task is ptraced - load up global dbcr0 */ + li r12,-1 /* clear all pending debug events */ + mtspr SPRN_DBSR,r12 + lis r11,global_dbcr0@ha + tophys(r11,r11) + addi r11,r11,global_dbcr0@l +#ifdef CONFIG_SMP + lwz r9,TASK_CPU(r2) + slwi r9,r9,3 + add r11,r11,r9 +#endif + lwz r12,0(r11) + mtspr SPRN_DBCR0,r12 + lwz r12,4(r11) + addi r12,r12,-1 + stw r12,4(r11) + +3: + tovirt(r2, r2) /* set r2 to current */ + lis r11, transfer_to_syscall@h + ori r11, r11, transfer_to_syscall@l +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * If MSR is changing we need to keep interrupts disabled at this point + * otherwise we might risk taking an interrupt before we tell lockdep + * they are enabled. + */ + lis r10, MSR_KERNEL@h + ori r10, r10, MSR_KERNEL@l + rlwimi r10, r9, 0, MSR_EE +#else + lis r10, (MSR_KERNEL | MSR_EE)@h + ori r10, r10, (MSR_KERNEL | MSR_EE)@l +#endif + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + SYNC + RFI /* jump to handler, enable MMU */ +.endm + /* To handle the additional exception priority levels on 40x and Book-E * processors we allocate a stack per additional priority level. * @@ -217,8 +314,7 @@ label: CRITICAL_EXCEPTION_PROLOG(intno); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, \ - ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) #define MCHECK_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(label); \ @@ -227,36 +323,23 @@ label: stw r5,_ESR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, mcheck_transfer_to_handler, \ - ret_from_mcheck_exc) + mcheck_transfer_to_handler, ret_from_mcheck_exc) -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ li r10,trap; \ stw r10,_TRAP(r11); \ lis r10,msr@h; \ ori r10,r10,msr@l; \ - copyee(r10, r9); \ bl tfer; \ .long hdlr; \ .long ret -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - #define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ ret_from_except_full) #define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ ret_from_except) /* Check for a single step debug exception while in an exception @@ -323,7 +406,7 @@ label: /* continue normal handling for a debug exception... */ \ 2: mfspr r4,SPRN_DBSR; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) + EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc) #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ @@ -376,7 +459,7 @@ label: /* continue normal handling for a critical exception... */ \ 2: mfspr r4,SPRN_DBSR; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) #define DATA_STORAGE_EXCEPTION \ START_EXCEPTION(DataStorage) \ @@ -401,7 +484,7 @@ label: mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_EE(0x0600, alignment_exception) + EXC_XFER_STD(0x0600, alignment_exception) #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ @@ -426,9 +509,9 @@ label: bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ 1: addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + EXC_XFER_STD(0x800, kernel_fp_unavailable_exception) -#ifndef __ASSEMBLY__ +#else /* __ASSEMBLY__ */ struct exception_regs { unsigned long mas0; unsigned long mas1; diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 32332e24e421..6621f230cc37 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -268,6 +268,9 @@ set_ivor: /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif mr r3,r30 mr r4,r31 bl machine_init @@ -380,7 +383,7 @@ interrupt_base: EXC_XFER_LITE(0x0300, handle_page_fault) 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x0300, CacheLockingException) + EXC_XFER_LITE(0x0300, CacheLockingException) /* Instruction Storage Interrupt */ INSTRUCTION_STORAGE_EXCEPTION @@ -401,21 +404,20 @@ interrupt_base: #ifdef CONFIG_E200 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - program_check_exception, EXC_XFER_EE) + program_check_exception, EXC_XFER_STD) #else EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) #endif #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG(SYSCALL) - EXC_XFER_EE_LITE(0x0c00, DoSyscall) + SYSCALL_ENTRY 0xc00 SYSCALL /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION @@ -423,7 +425,7 @@ interrupt_base: /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ EXCEPTION(0x3100, FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT @@ -633,25 +635,25 @@ END_BTB_FLUSH_SECTION bl load_up_spe b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x2010, KernelSPE) + EXC_XFER_LITE(0x2010, KernelSPE) #elif defined(CONFIG_SPE_POSSIBLE) EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) #endif /* CONFIG_SPE_POSSIBLE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, - SPEFloatingPointException, EXC_XFER_EE) + SPEFloatingPointException, EXC_XFER_STD) /* SPE Floating Point Round */ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - SPEFloatingPointRoundException, EXC_XFER_EE) + SPEFloatingPointRoundException, EXC_XFER_STD) #elif defined(CONFIG_SPE_POSSIBLE) EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) #endif /* CONFIG_SPE_POSSIBLE */ @@ -674,10 +676,10 @@ END_BTB_FLUSH_SECTION unknown_exception) /* Hypercall */ - EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE) + EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD) /* Embedded Hypervisor Privilege */ - EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE) + EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD) interrupt_end: diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index fec8a6773119..da307dd93ee3 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -29,11 +29,15 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/debugfs.h> +#include <linux/init.h> #include <asm/hw_breakpoint.h> #include <asm/processor.h> #include <asm/sstep.h> #include <asm/debug.h> +#include <asm/debugfs.h> +#include <asm/hvcall.h> #include <linux/uaccess.h> /* @@ -174,7 +178,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, if (!ppc_breakpoint_available()) return -ENODEV; length_max = 8; /* DABR */ - if (cpu_has_feature(CPU_FTR_DAWR)) { + if (dawr_enabled()) { length_max = 512 ; /* 64 doublewords */ /* DAWR region can't cross 512 boundary */ if ((attr->bp_addr >> 9) != @@ -376,3 +380,59 @@ void hw_breakpoint_pmu_read(struct perf_event *bp) { /* TODO */ } + +bool dawr_force_enable; +EXPORT_SYMBOL_GPL(dawr_force_enable); + +static ssize_t dawr_write_file_bool(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct arch_hw_breakpoint null_brk = {0, 0, 0}; + size_t rc; + + /* Send error to user if they hypervisor won't allow us to write DAWR */ + if ((!dawr_force_enable) && + (firmware_has_feature(FW_FEATURE_LPAR)) && + (set_dawr(&null_brk) != H_SUCCESS)) + return -1; + + rc = debugfs_write_file_bool(file, user_buf, count, ppos); + if (rc) + return rc; + + /* If we are clearing, make sure all CPUs have the DAWR cleared */ + if (!dawr_force_enable) + smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0); + + return rc; +} + +static const struct file_operations dawr_enable_fops = { + .read = debugfs_read_file_bool, + .write = dawr_write_file_bool, + .open = simple_open, + .llseek = default_llseek, +}; + +static int __init dawr_force_setup(void) +{ + dawr_force_enable = false; + + if (cpu_has_feature(CPU_FTR_DAWR)) { + /* Don't setup sysfs file for user control on P8 */ + dawr_force_enable = true; + return 0; + } + + if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) { + /* Turn DAWR off by default, but allow admin to turn it on */ + dawr_force_enable = false; + debugfs_create_file_unsafe("dawr_enable_dangerous", 0600, + powerpc_debugfs_root, + &dawr_force_enable, + &dawr_enable_fops); + } + return 0; +} +arch_initcall(dawr_force_setup); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 7f5ac2e8581b..2dfbd5d5b932 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -1,956 +1,188 @@ /* - * This file contains idle entry/exit functions for POWER7, - * POWER8 and POWER9 CPUs. + * Copyright 2018, IBM Corporation. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. + * + * This file contains general idle entry/exit functions to save + * and restore stack and NVGPRs which allows C code to call idle + * states that lose GPRs, and it will return transparently with + * SRR1 wakeup reason return value. + * + * The platform / CPU caller must ensure SPRs and any other non-GPR + * state is saved and restored correctly, handle KVM, interrupts, etc. */ -#include <linux/threads.h> -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/cputable.h> -#include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ppc-opcode.h> -#include <asm/hw_irq.h> -#include <asm/kvm_book3s_asm.h> -#include <asm/opal.h> #include <asm/cpuidle.h> -#include <asm/exception-64s.h> -#include <asm/book3s/64/mmu-hash.h> -#include <asm/mmu.h> -#include <asm/asm-compat.h> -#include <asm/feature-fixups.h> - -#undef DEBUG - -/* - * Use unused space in the interrupt stack to save and restore - * registers for winkle support. - */ -#define _MMCR0 GPR0 -#define _SDR1 GPR3 -#define _PTCR GPR3 -#define _RPR GPR4 -#define _SPURR GPR5 -#define _PURR GPR6 -#define _TSCR GPR7 -#define _DSCR GPR8 -#define _AMOR GPR9 -#define _WORT GPR10 -#define _WORC GPR11 -#define _LPCR GPR12 - -#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16 - .text - -/* - * Used by threads before entering deep idle states. Saves SPRs - * in interrupt stack frame - */ -save_sprs_to_stack: - /* - * Note all register i.e per-core, per-subcore or per-thread is saved - * here since any thread in the core might wake up first - */ -BEGIN_FTR_SECTION - /* - * Note - SDR1 is dropped in Power ISA v3. Hence not restoring - * SDR1 here - */ - mfspr r3,SPRN_PTCR - std r3,_PTCR(r1) - mfspr r3,SPRN_LPCR - std r3,_LPCR(r1) -FTR_SECTION_ELSE - mfspr r3,SPRN_SDR1 - std r3,_SDR1(r1) -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) - mfspr r3,SPRN_RPR - std r3,_RPR(r1) - mfspr r3,SPRN_SPURR - std r3,_SPURR(r1) - mfspr r3,SPRN_PURR - std r3,_PURR(r1) - mfspr r3,SPRN_TSCR - std r3,_TSCR(r1) - mfspr r3,SPRN_DSCR - std r3,_DSCR(r1) - mfspr r3,SPRN_AMOR - std r3,_AMOR(r1) - mfspr r3,SPRN_WORT - std r3,_WORT(r1) - mfspr r3,SPRN_WORC - std r3,_WORC(r1) /* - * On POWER9, there are idle states such as stop4, invoked via cpuidle, - * that lose hypervisor resources. In such cases, we need to save - * additional SPRs before entering those idle states so that they can - * be restored to their older values on wakeup from the idle state. + * Desired PSSCR in r3 * - * On POWER8, the only such deep idle state is winkle which is used - * only in the context of CPU-Hotplug, where these additional SPRs are - * reinitiazed to a sane value. Hence there is no need to save/restore - * these SPRs. + * No state will be lost regardless of wakeup mechanism (interrupt or NIA). + * + * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can + * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR, + * and must blr, to return to caller with r3 set according to caller's expected + * return code (for Book3S/64 that is SRR1). */ -BEGIN_FTR_SECTION - blr -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - -power9_save_additional_sprs: - mfspr r3, SPRN_PID - mfspr r4, SPRN_LDBAR - std r3, STOP_PID(r13) - std r4, STOP_LDBAR(r13) - - mfspr r3, SPRN_FSCR - mfspr r4, SPRN_HFSCR - std r3, STOP_FSCR(r13) - std r4, STOP_HFSCR(r13) - - mfspr r3, SPRN_MMCRA - mfspr r4, SPRN_MMCR0 - std r3, STOP_MMCRA(r13) - std r4, _MMCR0(r1) - - mfspr r3, SPRN_MMCR1 - mfspr r4, SPRN_MMCR2 - std r3, STOP_MMCR1(r13) - std r4, STOP_MMCR2(r13) - blr - -power9_restore_additional_sprs: - ld r3,_LPCR(r1) - ld r4, STOP_PID(r13) - mtspr SPRN_LPCR,r3 - mtspr SPRN_PID, r4 - - ld r3, STOP_LDBAR(r13) - ld r4, STOP_FSCR(r13) - mtspr SPRN_LDBAR, r3 - mtspr SPRN_FSCR, r4 - - ld r3, STOP_HFSCR(r13) - ld r4, STOP_MMCRA(r13) - mtspr SPRN_HFSCR, r3 - mtspr SPRN_MMCRA, r4 - - ld r3, _MMCR0(r1) - ld r4, STOP_MMCR1(r13) - mtspr SPRN_MMCR0, r3 - mtspr SPRN_MMCR1, r4 - - ld r3, STOP_MMCR2(r13) - ld r4, PACA_SPRG_VDSO(r13) - mtspr SPRN_MMCR2, r3 - mtspr SPRN_SPRG3, r4 +_GLOBAL(isa300_idle_stop_noloss) + mtspr SPRN_PSSCR,r3 + PPC_STOP + li r3,0 blr /* - * Used by threads when the lock bit of core_idle_state is set. - * Threads will spin in HMT_LOW until the lock bit is cleared. - * r14 - pointer to core_idle_state - * r15 - used to load contents of core_idle_state - * r9 - used as a temporary variable + * Desired PSSCR in r3 + * + * GPRs may be lost, so they are saved here. Wakeup is by interrupt only. + * The SRESET wakeup returns to this function's caller by calling + * idle_return_gpr_loss with r3 set to desired return value. + * + * A wakeup without GPR loss may alteratively be handled as in + * isa300_idle_stop_noloss and blr directly, as an optimisation. + * + * The caller is responsible for saving/restoring SPRs, MSR, timebase, + * etc. */ - -core_idle_lock_held: - HMT_LOW -3: lwz r15,0(r14) - andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - bne 3b - HMT_MEDIUM - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bne- core_idle_lock_held - blr +_GLOBAL(isa300_idle_stop_mayloss) + mtspr SPRN_PSSCR,r3 + std r1,PACAR1(r13) + mflr r4 + mfcr r5 + /* use stack red zone rather than a new frame for saving regs */ + std r2,-8*0(r1) + std r14,-8*1(r1) + std r15,-8*2(r1) + std r16,-8*3(r1) + std r17,-8*4(r1) + std r18,-8*5(r1) + std r19,-8*6(r1) + std r20,-8*7(r1) + std r21,-8*8(r1) + std r22,-8*9(r1) + std r23,-8*10(r1) + std r24,-8*11(r1) + std r25,-8*12(r1) + std r26,-8*13(r1) + std r27,-8*14(r1) + std r28,-8*15(r1) + std r29,-8*16(r1) + std r30,-8*17(r1) + std r31,-8*18(r1) + std r4,-8*19(r1) + std r5,-8*20(r1) + /* 168 bytes */ + PPC_STOP + b . /* catch bugs */ /* - * Pass requested state in r3: - * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 - * - Requested PSSCR value in POWER9 + * Desired return value in r3 + * + * The idle wakeup SRESET interrupt can call this after calling + * to return to the idle sleep function caller with r3 as the return code. * - * Address of idle handler to branch to in realmode in r4 + * This must not be used if idle was entered via a _noloss function (use + * a simple blr instead). */ -pnv_powersave_common: - /* Use r3 to pass state nap/sleep/winkle */ - /* NAP is a state loss, we create a regs frame on the - * stack, fill it up with the state we care about and - * stick a pointer to it in PACAR1. We really only - * need to save PC, some CR bits and the NV GPRs, - * but for now an interrupt frame will do. - */ - mtctr r4 - - mflr r0 - std r0,16(r1) - stdu r1,-INT_FRAME_SIZE(r1) - std r0,_LINK(r1) - std r0,_NIP(r1) - - /* We haven't lost state ... yet */ - li r0,0 - stb r0,PACA_NAPSTATELOST(r13) - - /* Continue saving state */ - SAVE_GPR(2, r1) - SAVE_NVGPRS(r1) - mfcr r5 - std r5,_CCR(r1) - std r1,PACAR1(r13) - -BEGIN_FTR_SECTION - /* - * POWER9 does not require real mode to stop, and presently does not - * set hwthread_state for KVM (threads don't share MMU context), so - * we can remain in virtual mode for this. - */ - bctr -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - /* - * POWER8 - * Go to real mode to do the nap, as required by the architecture. - * Also, we need to be in real mode before setting hwthread_state, - * because as soon as we do that, another thread can switch - * the MMU context to the guest. - */ - LOAD_REG_IMMEDIATE(r7, MSR_IDLE) - mtmsrd r7,0 - bctr +_GLOBAL(idle_return_gpr_loss) + ld r1,PACAR1(r13) + ld r4,-8*19(r1) + ld r5,-8*20(r1) + mtlr r4 + mtcr r5 + /* + * KVM nap requires r2 to be saved, rather than just restoring it + * from PACATOC. This could be avoided for that less common case + * if KVM saved its r2. + */ + ld r2,-8*0(r1) + ld r14,-8*1(r1) + ld r15,-8*2(r1) + ld r16,-8*3(r1) + ld r17,-8*4(r1) + ld r18,-8*5(r1) + ld r19,-8*6(r1) + ld r20,-8*7(r1) + ld r21,-8*8(r1) + ld r22,-8*9(r1) + ld r23,-8*10(r1) + ld r24,-8*11(r1) + ld r25,-8*12(r1) + ld r26,-8*13(r1) + ld r27,-8*14(r1) + ld r28,-8*15(r1) + ld r29,-8*16(r1) + ld r30,-8*17(r1) + ld r31,-8*18(r1) + blr /* * This is the sequence required to execute idle instructions, as * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. + * + * The 0(r1) slot is used to save r2 in isa206, so use that here. */ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ + std r2,0(r1); \ ptesync; \ - ld r0,0(r1); \ -236: cmpd cr0,r0,r0; \ + ld r2,0(r1); \ +236: cmpd cr0,r2,r2; \ bne 236b; \ - IDLE_INST; - - - .globl pnv_enter_arch207_idle_mode -pnv_enter_arch207_idle_mode: -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ - li r4,KVM_HWTHREAD_IN_IDLE - /******************************************************/ - /* N O T E W E L L ! ! ! N O T E W E L L */ - /* The following store to HSTATE_HWTHREAD_STATE(r13) */ - /* MUST occur in real mode, i.e. with the MMU off, */ - /* and the MMU must stay off until we clear this flag */ - /* and test HSTATE_HWTHREAD_REQ(r13) in */ - /* pnv_powersave_wakeup in this file. */ - /* The reason is that another thread can switch the */ - /* MMU to a guest context whenever this flag is set */ - /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ - /* that would potentially cause this thread to start */ - /* executing instructions from guest memory in */ - /* hypervisor mode, leading to a host crash or data */ - /* corruption, or worse. */ - /******************************************************/ - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - stb r3,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr3,r3,PNV_THREAD_SLEEP - bge cr3,2f - IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) - /* No return */ -2: - /* Sleep or winkle */ - lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) - li r5,0 - beq cr3,3f - lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h -3: -lwarx_loop1: - lwarx r15,0,r14 - - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bnel- core_idle_lock_held - - add r15,r15,r5 /* Add if winkle */ - andc r15,r15,r7 /* Clear thread bit */ - - andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS - -/* - * If cr0 = 0, then current thread is the last thread of the core entering - * sleep. Last thread needs to execute the hardware bug workaround code if - * required by the platform. - * Make the workaround call unconditionally here. The below branch call is - * patched out when the idle states are discovered if the platform does not - * require it. - */ -.global pnv_fastsleep_workaround_at_entry -pnv_fastsleep_workaround_at_entry: - beq fastsleep_workaround_at_entry - - stwcx. r15,0,r14 - bne- lwarx_loop1 - isync - -common_enter: /* common code for all the threads entering sleep or winkle */ - bgt cr3,enter_winkle - IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) - -fastsleep_workaround_at_entry: - oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - stwcx. r15,0,r14 - bne- lwarx_loop1 - isync - - /* Fast sleep workaround */ - li r3,1 - li r4,1 - bl opal_config_cpu_idle_state - - /* Unlock */ - xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - lwsync - stw r15,0(r14) - b common_enter - -enter_winkle: - bl save_sprs_to_stack - - IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) - -/* - * r3 - PSSCR value corresponding to the requested stop state. - */ -power_enter_stop: -/* - * Check if we are executing the lite variant with ESL=EC=0 - */ - andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED - clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ - bne .Lhandle_esl_ec_set - PPC_STOP - li r3,0 /* Since we didn't lose state, return 0 */ - std r3, PACA_REQ_PSSCR(r13) - - /* - * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so - * it can determine if the wakeup reason is an HMI in - * CHECK_HMI_INTERRUPT. - * - * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup - * reason, so there is no point setting r12 to SRR1. - * - * Further, we clear r12 here, so that we don't accidentally enter the - * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI. - */ - li r12, 0 - b pnv_wakeup_noloss - -.Lhandle_esl_ec_set: -BEGIN_FTR_SECTION - /* - * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after - * a state-loss idle. Saving and restoring MMCR0 over idle is a - * workaround. - */ - mfspr r4,SPRN_MMCR0 - std r4,_MMCR0(r1) -END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1) + IDLE_INST; \ + b . /* catch bugs */ /* - * Check if the requested state is a deep idle state. - */ - LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) - ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) - cmpd r3,r4 - bge .Lhandle_deep_stop - PPC_STOP /* Does not return (system reset interrupt) */ - -.Lhandle_deep_stop: -/* - * Entering deep idle state. - * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to - * stack and enter stop - */ - lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) - -lwarx_loop_stop: - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bnel- core_idle_lock_held - andc r15,r15,r7 /* Clear thread bit */ - - stwcx. r15,0,r14 - bne- lwarx_loop_stop - isync - - bl save_sprs_to_stack - - PPC_STOP /* Does not return (system reset interrupt) */ - -/* - * Entered with MSR[EE]=0 and no soft-masked interrupts pending. - * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE). - */ -_GLOBAL(power7_idle_insn) - /* Now check if user or arch enabled NAP mode */ - LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode) - b pnv_powersave_common - -#define CHECK_HMI_INTERRUPT \ -BEGIN_FTR_SECTION_NESTED(66); \ - rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \ -FTR_SECTION_ELSE_NESTED(66); \ - rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \ -ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ - cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ - bne+ 20f; \ - /* Invoke opal call to handle hmi */ \ - ld r2,PACATOC(r13); \ - ld r1,PACAR1(r13); \ - std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r3,0; /* NULL argument */ \ - bl hmi_exception_realmode; \ - nop; \ - ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ -20: nop; - -/* - * Entered with MSR[EE]=0 and no soft-masked interrupts pending. - * r3 contains desired PSSCR register value. + * Desired instruction type in r3 * - * Offline (CPU unplug) case also must notify KVM that the CPU is - * idle. - */ -_GLOBAL(power9_offline_stop) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* - * Tell KVM we're entering idle. - * This does not have to be done in real mode because the P9 MMU - * is independent per-thread. Some steppings share radix/hash mode - * between threads, but in that case KVM has a barrier sync in real - * mode before and after switching between radix and hash. - */ - li r4,KVM_HWTHREAD_IN_IDLE - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - /* fall through */ - -_GLOBAL(power9_idle_stop) - std r3, PACA_REQ_PSSCR(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -BEGIN_FTR_SECTION - sync - lwz r5, PACA_DONT_STOP(r13) - cmpwi r5, 0 - bne 1f -END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) -#endif - mtspr SPRN_PSSCR,r3 - LOAD_REG_ADDR(r4,power_enter_stop) - b pnv_powersave_common - /* No return */ -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -1: - /* - * We get here when TM / thread reconfiguration bug workaround - * code wants to get the CPU into SMT4 mode, and therefore - * we are being asked not to stop. - */ - li r3, 0 - std r3, PACA_REQ_PSSCR(r13) - blr /* return 0 for wakeup cause / SRR1 value */ -#endif - -/* - * Called from machine check handler for powersave wakeups. - * Low level machine check processing has already been done. Now just - * go through the wake up path to get everything in order. + * GPRs may be lost, so they are saved here. Wakeup is by interrupt only. + * The SRESET wakeup returns to this function's caller by calling + * idle_return_gpr_loss with r3 set to desired return value. * - * r3 - The original SRR1 value. - * Original SRR[01] have been clobbered. - * MSR_RI is clear. - */ -.global pnv_powersave_wakeup_mce -pnv_powersave_wakeup_mce: - /* Set cr3 for pnv_powersave_wakeup */ - rlwinm r11,r3,47-31,30,31 - cmpwi cr3,r11,2 - - /* - * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake - * reason into r12, which allows reuse of the system reset wakeup - * code without being mistaken for another type of wakeup. - */ - oris r12,r3,SRR1_WAKEMCE_RESVD@h - - b pnv_powersave_wakeup - -/* - * Called from reset vector for powersave wakeups. - * cr3 - set to gt if waking up with partial/complete hypervisor state loss - * r12 - SRR1 - */ -.global pnv_powersave_wakeup -pnv_powersave_wakeup: - ld r2, PACATOC(r13) - -BEGIN_FTR_SECTION - bl pnv_restore_hyp_resource_arch300 -FTR_SECTION_ELSE - bl pnv_restore_hyp_resource_arch207 -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) - - li r0,PNV_THREAD_RUNNING - stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ - - mr r3,r12 - -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - lbz r0,HSTATE_HWTHREAD_STATE(r13) - cmpwi r0,KVM_HWTHREAD_IN_KERNEL - beq 0f - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync -0: lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 1f - b kvm_start_guest -1: -#endif - - /* Return SRR1 from power7_nap() */ - blt cr3,pnv_wakeup_noloss - b pnv_wakeup_loss - -/* - * Check whether we have woken up with hypervisor state loss. - * If yes, restore hypervisor state and return back to link. + * A wakeup without GPR loss may alteratively be handled as in + * isa300_idle_stop_noloss and blr directly, as an optimisation. * - * cr3 - set to gt if waking up with partial/complete hypervisor state loss - */ -pnv_restore_hyp_resource_arch300: - /* - * Workaround for POWER9, if we lost resources, the ERAT - * might have been mixed up and needs flushing. We also need - * to reload MMCR0 (see comment above). We also need to set - * then clear bit 60 in MMCRA to ensure the PMU starts running. - */ - blt cr3,1f -BEGIN_FTR_SECTION - PPC_INVALIDATE_ERAT - ld r1,PACAR1(r13) - ld r4,_MMCR0(r1) - mtspr SPRN_MMCR0,r4 -END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1) - mfspr r4,SPRN_MMCRA - ori r4,r4,(1 << (63-60)) - mtspr SPRN_MMCRA,r4 - xori r4,r4,(1 << (63-60)) - mtspr SPRN_MMCRA,r4 -1: - /* - * POWER ISA 3. Use PSSCR to determine if we - * are waking up from deep idle state - */ - LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) - ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) - - /* - * 0-3 bits correspond to Power-Saving Level Status - * which indicates the idle state we are waking up from - */ - mfspr r5, SPRN_PSSCR - rldicl r5,r5,4,60 - li r0, 0 /* clear requested_psscr to say we're awake */ - std r0, PACA_REQ_PSSCR(r13) - cmpd cr4,r5,r4 - bge cr4,pnv_wakeup_tb_loss /* returns to caller */ - - blr /* Waking up without hypervisor state loss. */ - -/* Same calling convention as arch300 */ -pnv_restore_hyp_resource_arch207: - /* - * POWER ISA 2.07 or less. - * Check if we slept with sleep or winkle. - */ - lbz r4,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r4,PNV_THREAD_NAP - bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ - - /* - * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking - * up from nap. At this stage CR3 shouldn't contains 'gt' since that - * indicates we are waking with hypervisor state loss from nap. - */ - bgt cr3,. - - blr /* Waking up without hypervisor state loss */ - -/* - * Called if waking up from idle state which can cause either partial or - * complete hyp state loss. - * In POWER8, called if waking up from fastsleep or winkle - * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state - * - * r13 - PACA - * cr3 - gt if waking up with partial/complete hypervisor state loss - * - * If ISA300: - * cr4 - gt or eq if waking up from complete hypervisor state loss. + * The caller is responsible for saving/restoring SPRs, MSR, timebase, + * etc. * - * If ISA207: - * r4 - PACA_THREAD_IDLE_STATE + * This must be called in real-mode (MSR_IDLE). */ -pnv_wakeup_tb_loss: - ld r1,PACAR1(r13) - /* - * Before entering any idle state, the NVGPRs are saved in the stack. - * If there was a state loss, or PACA_NAPSTATELOST was set, then the - * NVGPRs are restored. If we are here, it is likely that state is lost, - * but not guaranteed -- neither ISA207 nor ISA300 tests to reach - * here are the same as the test to restore NVGPRS: - * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300, - * and SRR1 test for restoring NVGPRs. - * - * We are about to clobber NVGPRs now, so set NAPSTATELOST to - * guarantee they will always be restored. This might be tightened - * with careful reading of specs (particularly for ISA300) but this - * is already a slow wakeup path and it's simpler to be safe. - */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) - - /* - * - * Save SRR1 and LR in NVGPRs as they might be clobbered in - * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required - * to determine the wakeup reason if we branch to kvm_start_guest. LR - * is required to return back to reset vector after hypervisor state - * restore is complete. - */ - mr r19,r12 - mr r18,r4 - mflr r17 -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) - lbz r7,PACA_THREAD_MASK(r13) - - /* - * Take the core lock to synchronize against other threads. - * - * Lock bit is set in one of the 2 cases- - * a. In the sleep/winkle enter path, the last thread is executing - * fastsleep workaround code. - * b. In the wake up path, another thread is executing fastsleep - * workaround undo code or resyncing timebase or restoring context - * In either case loop until the lock bit is cleared. - */ -1: - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bnel- core_idle_lock_held - oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - stwcx. r15,0,r14 - bne- 1b - isync - - andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS - cmpwi cr2,r9,0 - - /* - * At this stage - * cr2 - eq if first thread to wakeup in core - * cr3- gt if waking up with partial/complete hypervisor state loss - * ISA300: - * cr4 - gt or eq if waking up from complete hypervisor state loss. - */ - -BEGIN_FTR_SECTION - /* - * Were we in winkle? - * If yes, check if all threads were in winkle, decrement our - * winkle count, set all thread winkle bits if all were in winkle. - * Check if our thread has a winkle bit set, and set cr4 accordingly - * (to match ISA300, above). Pseudo-code for core idle state - * transitions for ISA207 is as follows (everything happens atomically - * due to store conditional and/or lock bit): - * - * nap_idle() { } - * nap_wake() { } - * - * sleep_idle() - * { - * core_idle_state &= ~thread_in_core - * } - * - * sleep_wake() - * { - * bool first_in_core, first_in_subcore; - * - * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; - * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; - * - * core_idle_state |= thread_in_core; - * } - * - * winkle_idle() - * { - * core_idle_state &= ~thread_in_core; - * core_idle_state += 1 << WINKLE_COUNT_SHIFT; - * } - * - * winkle_wake() - * { - * bool first_in_core, first_in_subcore, winkle_state_lost; - * - * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; - * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; - * - * core_idle_state |= thread_in_core; - * - * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT)) - * core_idle_state |= THREAD_WINKLE_BITS; - * core_idle_state -= 1 << WINKLE_COUNT_SHIFT; - * - * winkle_state_lost = core_idle_state & - * (thread_in_core << WINKLE_THREAD_SHIFT); - * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT); - * } - * - */ - cmpwi r18,PNV_THREAD_WINKLE +_GLOBAL(isa206_idle_insn_mayloss) + std r1,PACAR1(r13) + mflr r4 + mfcr r5 + /* use stack red zone rather than a new frame for saving regs */ + std r2,-8*0(r1) + std r14,-8*1(r1) + std r15,-8*2(r1) + std r16,-8*3(r1) + std r17,-8*4(r1) + std r18,-8*5(r1) + std r19,-8*6(r1) + std r20,-8*7(r1) + std r21,-8*8(r1) + std r22,-8*9(r1) + std r23,-8*10(r1) + std r24,-8*11(r1) + std r25,-8*12(r1) + std r26,-8*13(r1) + std r27,-8*14(r1) + std r28,-8*15(r1) + std r29,-8*16(r1) + std r30,-8*17(r1) + std r31,-8*18(r1) + std r4,-8*19(r1) + std r5,-8*20(r1) + cmpwi r3,PNV_THREAD_NAP + bne 1f + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) +1: cmpwi r3,PNV_THREAD_SLEEP bne 2f - andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h - subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h - beq 2f - ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */ -2: - /* Shift thread bit to winkle mask, then test if this thread is set, - * and remove it from the winkle bits */ - slwi r8,r7,8 - and r8,r8,r15 - andc r15,r15,r8 - cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */ - - lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) - and r4,r4,r15 - cmpwi r4,0 /* Check if first in subcore */ - - or r15,r15,r7 /* Set thread bit */ - beq first_thread_in_subcore -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - - or r15,r15,r7 /* Set thread bit */ - beq cr2,first_thread_in_core - - /* Not first thread in core or subcore to wake up */ - b clear_lock - -first_thread_in_subcore: - /* - * If waking up from sleep, subcore state is not lost. Hence - * skip subcore state restore - */ - blt cr4,subcore_state_restored - - /* Restore per-subcore state */ - ld r4,_SDR1(r1) - mtspr SPRN_SDR1,r4 - - ld r4,_RPR(r1) - mtspr SPRN_RPR,r4 - ld r4,_AMOR(r1) - mtspr SPRN_AMOR,r4 - -subcore_state_restored: - /* - * Check if the thread is also the first thread in the core. If not, - * skip to clear_lock. - */ - bne cr2,clear_lock - -first_thread_in_core: - - /* - * First thread in the core waking up from any state which can cause - * partial or complete hypervisor state loss. It needs to - * call the fastsleep workaround code if the platform requires it. - * Call it unconditionally here. The below branch instruction will - * be patched out if the platform does not have fastsleep or does not - * require the workaround. Patching will be performed during the - * discovery of idle-states. - */ -.global pnv_fastsleep_workaround_at_exit -pnv_fastsleep_workaround_at_exit: - b fastsleep_workaround_at_exit - -timebase_resync: - /* - * Use cr3 which indicates that we are waking up with atleast partial - * hypervisor state loss to determine if TIMEBASE RESYNC is needed. - */ - ble cr3,.Ltb_resynced - /* Time base re-sync */ - bl opal_resync_timebase; - /* - * If waking up from sleep (POWER8), per core state - * is not lost, skip to clear_lock. - */ -.Ltb_resynced: - blt cr4,clear_lock - - /* - * First thread in the core to wake up and its waking up with - * complete hypervisor state loss. Restore per core hypervisor - * state. - */ -BEGIN_FTR_SECTION - ld r4,_PTCR(r1) - mtspr SPRN_PTCR,r4 - ld r4,_RPR(r1) - mtspr SPRN_RPR,r4 - ld r4,_AMOR(r1) - mtspr SPRN_AMOR,r4 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - - ld r4,_TSCR(r1) - mtspr SPRN_TSCR,r4 - ld r4,_WORC(r1) - mtspr SPRN_WORC,r4 - -clear_lock: - xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - lwsync - stw r15,0(r14) - -common_exit: - /* - * Common to all threads. - * - * If waking up from sleep, hypervisor state is not lost. Hence - * skip hypervisor state restore. - */ - blt cr4,hypervisor_state_restored - - /* Waking up from winkle */ - -BEGIN_MMU_FTR_SECTION - b no_segments -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) - /* Restore SLB from PACA */ - ld r8,PACA_SLBSHADOWPTR(r13) - - .rept SLB_NUM_BOLTED - li r3, SLBSHADOW_SAVEAREA - LDX_BE r5, r8, r3 - addi r3, r3, 8 - LDX_BE r6, r8, r3 - andis. r7,r5,SLB_ESID_V@h - beq 1f - slbmte r6,r5 -1: addi r8,r8,16 - .endr -no_segments: - - /* Restore per thread state */ - - ld r4,_SPURR(r1) - mtspr SPRN_SPURR,r4 - ld r4,_PURR(r1) - mtspr SPRN_PURR,r4 - ld r4,_DSCR(r1) - mtspr SPRN_DSCR,r4 - ld r4,_WORT(r1) - mtspr SPRN_WORT,r4 - - /* Call cur_cpu_spec->cpu_restore() */ - LOAD_REG_ADDR(r4, cur_cpu_spec) - ld r4,0(r4) - ld r12,CPU_SPEC_RESTORE(r4) -#ifdef PPC64_ELF_ABI_v1 - ld r12,0(r12) -#endif - mtctr r12 - bctrl - -/* - * On POWER9, we can come here on wakeup from a cpuidle stop state. - * Hence restore the additional SPRs to the saved value. - * - * On POWER8, we come here only on winkle. Since winkle is used - * only in the case of CPU-Hotplug, we don't need to restore - * the additional SPRs. - */ -BEGIN_FTR_SECTION - bl power9_restore_additional_sprs -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) -hypervisor_state_restored: - - mr r12,r19 - mtlr r17 - blr /* return to pnv_powersave_wakeup */ - -fastsleep_workaround_at_exit: - li r3,1 - li r4,0 - bl opal_config_cpu_idle_state - b timebase_resync - -/* - * R3 here contains the value that will be returned to the caller - * of power7_nap. - * R12 contains SRR1 for CHECK_HMI_INTERRUPT. - */ -.global pnv_wakeup_loss -pnv_wakeup_loss: - ld r1,PACAR1(r13) -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - REST_NVGPRS(r1) - REST_GPR(2, r1) - ld r4,PACAKMSR(r13) - ld r5,_LINK(r1) - ld r6,_CCR(r1) - addi r1,r1,INT_FRAME_SIZE - mtlr r5 - mtcr r6 - mtmsrd r4 - blr + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) +2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) -/* - * R3 here contains the value that will be returned to the caller - * of power7_nap. - * R12 contains SRR1 for CHECK_HMI_INTERRUPT. - */ -pnv_wakeup_noloss: - lbz r0,PACA_NAPSTATELOST(r13) - cmpwi r0,0 - bne pnv_wakeup_loss - ld r1,PACAR1(r13) -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - ld r4,PACAKMSR(r13) - ld r5,_NIP(r1) - ld r6,_CCR(r1) - addi r1,r1,INT_FRAME_SIZE - mtlr r5 - mtcr r6 - mtmsrd r4 - blr diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8a936723c791..ada901af4950 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -81,10 +81,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); -int __irq_offset_value; - #ifdef CONFIG_PPC32 -EXPORT_SYMBOL(__irq_offset_value); atomic_t ppc_n_lost_interrupts; #ifdef CONFIG_TAU_INT @@ -261,16 +258,9 @@ notrace void arch_local_irq_restore(unsigned long mask) */ irq_happened = get_irq_happened(); if (!irq_happened) { - /* - * FIXME. Here we'd like to be able to do: - * - * #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - * WARN_ON(!(mfmsr() & MSR_EE)); - * #endif - * - * But currently it hits in a few paths, we should fix those and - * enable the warning. - */ +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(!(mfmsr() & MSR_EE)); +#endif return; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index b5fec1f9751a..4581377cfc98 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -112,6 +112,7 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->srr1 = regs->msr; mce->gpr3 = regs->gpr[3]; mce->in_use = 1; + mce->cpu = get_paca()->paca_index; /* Mark it recovered if we have handled it and MSR(RI=1). */ if (handled && (regs->msr & MSR_RI)) @@ -121,6 +122,8 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->initiator = mce_err->initiator; mce->severity = mce_err->severity; + mce->sync_error = mce_err->sync_error; + mce->error_class = mce_err->error_class; /* * Populate the mce error_type and type-specific error_type. @@ -310,7 +313,11 @@ static void machine_check_process_queued_event(struct irq_work *work) void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest) { - const char *level, *sevstr, *subtype; + const char *level, *sevstr, *subtype, *err_type; + uint64_t ea = 0, pa = 0; + int n = 0; + char dar_str[50]; + char pa_str[50]; static const char *mc_ue_types[] = { "Indeterminate", "Instruction fetch", @@ -357,6 +364,13 @@ void machine_check_print_event_info(struct machine_check_event *evt, "Store (timeout)", "Page table walk Load/Store (timeout)", }; + static const char *mc_error_class[] = { + "Unknown", + "Hardware error", + "Probable Hardware error (some chance of software cause)", + "Software error", + "Probable Software error (some chance of hardware cause)", + }; /* Print things out */ if (evt->version != MCE_V1) { @@ -371,9 +385,9 @@ void machine_check_print_event_info(struct machine_check_event *evt, break; case MCE_SEV_WARNING: level = KERN_WARNING; - sevstr = ""; + sevstr = "Warning"; break; - case MCE_SEV_ERROR_SYNC: + case MCE_SEV_SEVERE: level = KERN_ERR; sevstr = "Severe"; break; @@ -384,101 +398,107 @@ void machine_check_print_event_info(struct machine_check_event *evt, break; } - printk("%s%s Machine check interrupt [%s]\n", level, sevstr, - evt->disposition == MCE_DISPOSITION_RECOVERED ? - "Recovered" : "Not recovered"); - - if (in_guest) { - printk("%s Guest NIP: %016llx\n", level, evt->srr0); - } else if (user_mode) { - printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, - evt->srr0, current->pid, current->comm); - } else { - printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, - (void *)evt->srr0); - } - - printk("%s Initiator: %s\n", level, - evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); switch (evt->error_type) { case MCE_ERROR_TYPE_UE: + err_type = "UE"; subtype = evt->u.ue_error.ue_error_type < ARRAY_SIZE(mc_ue_types) ? mc_ue_types[evt->u.ue_error.ue_error_type] : "Unknown"; - printk("%s Error type: UE [%s]\n", level, subtype); if (evt->u.ue_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.ue_error.effective_address); + ea = evt->u.ue_error.effective_address; if (evt->u.ue_error.physical_address_provided) - printk("%s Physical address: %016llx\n", - level, evt->u.ue_error.physical_address); + pa = evt->u.ue_error.physical_address; break; case MCE_ERROR_TYPE_SLB: + err_type = "SLB"; subtype = evt->u.slb_error.slb_error_type < ARRAY_SIZE(mc_slb_types) ? mc_slb_types[evt->u.slb_error.slb_error_type] : "Unknown"; - printk("%s Error type: SLB [%s]\n", level, subtype); if (evt->u.slb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.slb_error.effective_address); + ea = evt->u.slb_error.effective_address; break; case MCE_ERROR_TYPE_ERAT: + err_type = "ERAT"; subtype = evt->u.erat_error.erat_error_type < ARRAY_SIZE(mc_erat_types) ? mc_erat_types[evt->u.erat_error.erat_error_type] : "Unknown"; - printk("%s Error type: ERAT [%s]\n", level, subtype); if (evt->u.erat_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.erat_error.effective_address); + ea = evt->u.erat_error.effective_address; break; case MCE_ERROR_TYPE_TLB: + err_type = "TLB"; subtype = evt->u.tlb_error.tlb_error_type < ARRAY_SIZE(mc_tlb_types) ? mc_tlb_types[evt->u.tlb_error.tlb_error_type] : "Unknown"; - printk("%s Error type: TLB [%s]\n", level, subtype); if (evt->u.tlb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.tlb_error.effective_address); + ea = evt->u.tlb_error.effective_address; break; case MCE_ERROR_TYPE_USER: + err_type = "User"; subtype = evt->u.user_error.user_error_type < ARRAY_SIZE(mc_user_types) ? mc_user_types[evt->u.user_error.user_error_type] : "Unknown"; - printk("%s Error type: User [%s]\n", level, subtype); if (evt->u.user_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.user_error.effective_address); + ea = evt->u.user_error.effective_address; break; case MCE_ERROR_TYPE_RA: + err_type = "Real address"; subtype = evt->u.ra_error.ra_error_type < ARRAY_SIZE(mc_ra_types) ? mc_ra_types[evt->u.ra_error.ra_error_type] : "Unknown"; - printk("%s Error type: Real address [%s]\n", level, subtype); if (evt->u.ra_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.ra_error.effective_address); + ea = evt->u.ra_error.effective_address; break; case MCE_ERROR_TYPE_LINK: + err_type = "Link"; subtype = evt->u.link_error.link_error_type < ARRAY_SIZE(mc_link_types) ? mc_link_types[evt->u.link_error.link_error_type] : "Unknown"; - printk("%s Error type: Link [%s]\n", level, subtype); if (evt->u.link_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.link_error.effective_address); + ea = evt->u.link_error.effective_address; break; default: case MCE_ERROR_TYPE_UNKNOWN: - printk("%s Error type: Unknown\n", level); + err_type = "Unknown"; + subtype = ""; break; } + + dar_str[0] = pa_str[0] = '\0'; + if (ea && evt->srr0 != ea) { + /* Load/Store address */ + n = sprintf(dar_str, "DAR: %016llx ", ea); + if (pa) + sprintf(dar_str + n, "paddr: %016llx ", pa); + } else if (pa) { + sprintf(pa_str, " paddr: %016llx", pa); + } + + printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", + level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", + err_type, subtype, dar_str, + evt->disposition == MCE_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + + if (in_guest || user_mode) { + printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n", + level, evt->cpu, current->pid, current->comm, + in_guest ? "Guest " : "", evt->srr0, pa_str); + } else { + printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n", + level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str); + } + + subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ? + mc_error_class[evt->error_class] : "Unknown"; + printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); } EXPORT_SYMBOL_GPL(machine_check_print_event_info); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 6b800eec31f2..b5e876efe864 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -36,7 +36,7 @@ * Convert an address related to an mm to a PFN. NOTE: we are in real * mode, we could potentially race with page table updates. */ -static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) +unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) { pte_t *ptep; unsigned long flags; @@ -131,213 +131,232 @@ struct mce_ierror_table { bool nip_valid; /* nip is a valid indicator of faulting address */ unsigned int error_type; unsigned int error_subtype; + unsigned int error_class; unsigned int initiator; unsigned int severity; + bool sync_error; }; static const struct mce_ierror_table mce_p7_ierror_table[] = { { 0x00000000001c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000100000, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000180000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; static const struct mce_ierror_table mce_p8_ierror_table[] = { { 0x00000000081c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000100000, true, - MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000180000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008000000, true, - MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008040000, true, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; static const struct mce_ierror_table mce_p9_ierror_table[] = { { 0x00000000081c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000100000, true, - MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000180000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008000000, true, - MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008040000, true, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000080c0000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008100000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008140000, false, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, - MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ { 0x00000000081c0000, 0x0000000008180000, false, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ -{ 0x00000000081c0000, 0x00000000081c0000, true, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; struct mce_derror_table { unsigned long dsisr_value; bool dar_valid; /* dar is a valid indicator of faulting address */ unsigned int error_type; unsigned int error_subtype; + unsigned int error_class; unsigned int initiator; unsigned int severity; + bool sync_error; }; static const struct mce_derror_table mce_p7_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000040, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static const struct mce_derror_table mce_p8_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00002000, true, - MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00001000, true, MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000200, true, MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static const struct mce_derror_table mce_p9_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00002000, true, - MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00001000, true, MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000200, false, - MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000040, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000020, false, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000010, false, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000008, false, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr, uint64_t *phys_addr) @@ -404,6 +423,7 @@ static int mce_handle_ierror(struct pt_regs *regs, /* now fill in mce_error_info */ mce_err->error_type = table[i].error_type; + mce_err->error_class = table[i].error_class; switch (table[i].error_type) { case MCE_ERROR_TYPE_UE: mce_err->u.ue_error_type = table[i].error_subtype; @@ -427,11 +447,12 @@ static int mce_handle_ierror(struct pt_regs *regs, mce_err->u.link_error_type = table[i].error_subtype; break; } + mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; if (table[i].nip_valid) { *addr = regs->nip; - if (mce_err->severity == MCE_SEV_ERROR_SYNC && + if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { unsigned long pfn; @@ -448,8 +469,10 @@ static int mce_handle_ierror(struct pt_regs *regs, } mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; - mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->error_class = MCE_ECLASS_UNKNOWN; + mce_err->severity = MCE_SEV_SEVERE; mce_err->initiator = MCE_INITIATOR_CPU; + mce_err->sync_error = true; return 0; } @@ -496,6 +519,7 @@ static int mce_handle_derror(struct pt_regs *regs, /* now fill in mce_error_info */ mce_err->error_type = table[i].error_type; + mce_err->error_class = table[i].error_class; switch (table[i].error_type) { case MCE_ERROR_TYPE_UE: mce_err->u.ue_error_type = table[i].error_subtype; @@ -519,11 +543,12 @@ static int mce_handle_derror(struct pt_regs *regs, mce_err->u.link_error_type = table[i].error_subtype; break; } + mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; if (table[i].dar_valid) *addr = regs->dar; - else if (mce_err->severity == MCE_SEV_ERROR_SYNC && + else if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { /* * We do a maximum of 4 nested MCE calls, see @@ -539,8 +564,10 @@ static int mce_handle_derror(struct pt_regs *regs, return handled; mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; - mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->error_class = MCE_ECLASS_UNKNOWN; + mce_err->severity = MCE_SEV_SEVERE; mce_err->initiator = MCE_INITIATOR_CPU; + mce_err->sync_error = true; return 0; } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index e7382abee868..9cc91d03ab62 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -267,12 +267,12 @@ void copy_mm_to_paca(struct mm_struct *mm) get_paca()->mm_ctx_id = context->id; #ifdef CONFIG_PPC_MM_SLICES - VM_BUG_ON(!mm->context.slb_addr_limit); - get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit; - memcpy(&get_paca()->mm_ctx_low_slices_psize, - &context->low_slices_psize, sizeof(context->low_slices_psize)); - memcpy(&get_paca()->mm_ctx_high_slices_psize, - &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); + VM_BUG_ON(!mm_ctx_slb_addr_limit(context)); + get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context); + memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context), + LOW_SLICE_ARRAY_SZ); + memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context), + TASK_SLICE_ARRAY_SZ(context)); #else /* CONFIG_PPC_MM_SLICES */ get_paca()->mm_ctx_user_psize = context->user_psize; get_paca()->mm_ctx_sllp = context->sllp; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index dd9e0d5386ee..87da40129927 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -67,6 +67,7 @@ #include <asm/cpu_has_feature.h> #include <asm/asm-prototypes.h> #include <asm/stacktrace.h> +#include <asm/hw_breakpoint.h> #include <linux/kprobes.h> #include <linux/kdebug.h> @@ -133,7 +134,8 @@ static int __init enable_strict_msr_control(char *str) } early_param("ppc_strict_facility_enable", enable_strict_msr_control); -unsigned long msr_check_and_set(unsigned long bits) +/* notrace because it's called by restore_math */ +unsigned long notrace msr_check_and_set(unsigned long bits) { unsigned long oldmsr = mfmsr(); unsigned long newmsr; @@ -152,7 +154,8 @@ unsigned long msr_check_and_set(unsigned long bits) } EXPORT_SYMBOL_GPL(msr_check_and_set); -void __msr_check_and_clear(unsigned long bits) +/* notrace because it's called by restore_math */ +void notrace __msr_check_and_clear(unsigned long bits) { unsigned long oldmsr = mfmsr(); unsigned long newmsr; @@ -525,7 +528,17 @@ void giveup_all(struct task_struct *tsk) } EXPORT_SYMBOL(giveup_all); -void restore_math(struct pt_regs *regs) +/* + * The exception exit path calls restore_math() with interrupts hard disabled + * but the soft irq state not "reconciled". ftrace code that calls + * local_irq_save/restore causes warnings. + * + * Rather than complicate the exit path, just don't trace restore_math. This + * could be done by having ftrace entry code check for this un-reconciled + * condition where MSR[EE]=0 and PACA_IRQ_HARD_DIS is not set, and + * temporarily fix it up for the duration of the ftrace call. + */ +void notrace restore_math(struct pt_regs *regs) { unsigned long msr; @@ -784,7 +797,7 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) return __set_dabr(dabr, dabrx); } -static inline int set_dawr(struct arch_hw_breakpoint *brk) +int set_dawr(struct arch_hw_breakpoint *brk) { unsigned long dawr, dawrx, mrd; @@ -816,7 +829,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) { memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); - if (cpu_has_feature(CPU_FTR_DAWR)) + if (dawr_enabled()) // Power8 or later set_dawr(brk); else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) @@ -830,8 +843,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) /* Check if we have DAWR or DABR hardware */ bool ppc_breakpoint_available(void) { - if (cpu_has_feature(CPU_FTR_DAWR)) - return true; /* POWER8 DAWR */ + if (dawr_enabled()) + return true; /* POWER8 DAWR or POWER9 forced DAWR */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) return false; /* POWER9 with DAWR disabled */ /* DABR: Everything but POWER8 and POWER9 */ @@ -1151,11 +1164,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, thread_pkey_regs_restore(new_thread, old_thread); } -#ifdef CONFIG_PPC_BOOK3S_64 -#define CP_SIZE 128 -static const u8 dummy_copy_buffer[CP_SIZE] __attribute__((aligned(CP_SIZE))); -#endif - struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { @@ -1729,7 +1737,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ #ifdef CONFIG_PPC_BOOK3S_64 - preload_new_slb_context(start, sp); + if (!radix_enabled()) + preload_new_slb_context(start, sp); #endif #endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index f33ff4163a51..00682b8df330 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -154,10 +154,8 @@ static struct prom_t __prombss prom; static unsigned long __prombss prom_entry; -#define PROM_SCRATCH_SIZE 256 - static char __prombss of_stdout_device[256]; -static char __prombss prom_scratch[PROM_SCRATCH_SIZE]; +static char __prombss prom_scratch[256]; static unsigned long __prombss dt_header_start; static unsigned long __prombss dt_struct_start, dt_struct_end; @@ -224,6 +222,135 @@ static bool __prombss rtas_has_query_cpu_stopped; #define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR) #define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR) +/* Copied from lib/string.c and lib/kstrtox.c */ + +static int __init prom_strcmp(const char *cs, const char *ct) +{ + unsigned char c1, c2; + + while (1) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + } + return 0; +} + +static char __init *prom_strcpy(char *dest, const char *src) +{ + char *tmp = dest; + + while ((*dest++ = *src++) != '\0') + /* nothing */; + return tmp; +} + +static int __init prom_strncmp(const char *cs, const char *ct, size_t count) +{ + unsigned char c1, c2; + + while (count) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + count--; + } + return 0; +} + +static size_t __init prom_strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +static int __init prom_memcmp(const void *cs, const void *ct, size_t count) +{ + const unsigned char *su1, *su2; + int res = 0; + + for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) + if ((res = *su1 - *su2) != 0) + break; + return res; +} + +static char __init *prom_strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = prom_strlen(s2); + if (!l2) + return (char *)s1; + l1 = prom_strlen(s1); + while (l1 >= l2) { + l1--; + if (!prom_memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} + +static size_t __init prom_strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = prom_strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + +#ifdef CONFIG_PPC_PSERIES +static int __init prom_strtobool(const char *s, bool *res) +{ + if (!s) + return -EINVAL; + + switch (s[0]) { + case 'y': + case 'Y': + case '1': + *res = true; + return 0; + case 'n': + case 'N': + case '0': + *res = false; + return 0; + case 'o': + case 'O': + switch (s[1]) { + case 'n': + case 'N': + *res = true; + return 0; + case 'f': + case 'F': + *res = false; + return 0; + default: + break; + } + default: + break; + } + + return -EINVAL; +} +#endif /* This is the one and *ONLY* place where we actually call open * firmware. @@ -501,14 +628,14 @@ static int __init prom_next_node(phandle *nodep) } } -static inline int prom_getprop(phandle node, const char *pname, - void *value, size_t valuelen) +static inline int __init prom_getprop(phandle node, const char *pname, + void *value, size_t valuelen) { return call_prom("getprop", 4, 1, node, ADDR(pname), (u32)(unsigned long) value, (u32) valuelen); } -static inline int prom_getproplen(phandle node, const char *pname) +static inline int __init prom_getproplen(phandle node, const char *pname) { return call_prom("getproplen", 2, 1, node, ADDR(pname)); } @@ -555,7 +682,7 @@ static int __init prom_setprop(phandle node, const char *nodename, add_string(&p, tohex((u32)(unsigned long) value)); add_string(&p, tohex(valuelen)); add_string(&p, tohex(ADDR(pname))); - add_string(&p, tohex(strlen(pname))); + add_string(&p, tohex(prom_strlen(pname))); add_string(&p, "property"); *p = 0; return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd); @@ -631,33 +758,30 @@ static void __init early_cmdline_parse(void) const char *opt; char *p; - int l __maybe_unused = 0; + int l = 0; prom_cmd_line[0] = 0; p = prom_cmd_line; if ((long)prom.chosen > 0) l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1); -#ifdef CONFIG_CMDLINE - if (l <= 0 || p[0] == '\0') /* dbl check */ - strlcpy(prom_cmd_line, - CONFIG_CMDLINE, sizeof(prom_cmd_line)); -#endif /* CONFIG_CMDLINE */ + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */ + prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line)); prom_printf("command line: %s\n", prom_cmd_line); #ifdef CONFIG_PPC64 - opt = strstr(prom_cmd_line, "iommu="); + opt = prom_strstr(prom_cmd_line, "iommu="); if (opt) { prom_printf("iommu opt is: %s\n", opt); opt += 6; while (*opt && *opt == ' ') opt++; - if (!strncmp(opt, "off", 3)) + if (!prom_strncmp(opt, "off", 3)) prom_iommu_off = 1; - else if (!strncmp(opt, "force", 5)) + else if (!prom_strncmp(opt, "force", 5)) prom_iommu_force_on = 1; } #endif - opt = strstr(prom_cmd_line, "mem="); + opt = prom_strstr(prom_cmd_line, "mem="); if (opt) { opt += 4; prom_memory_limit = prom_memparse(opt, (const char **)&opt); @@ -669,13 +793,13 @@ static void __init early_cmdline_parse(void) #ifdef CONFIG_PPC_PSERIES prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); - opt = strstr(prom_cmd_line, "disable_radix"); + opt = prom_strstr(prom_cmd_line, "disable_radix"); if (opt) { opt += 13; if (*opt && *opt == '=') { bool val; - if (kstrtobool(++opt, &val)) + if (prom_strtobool(++opt, &val)) prom_radix_disable = false; else prom_radix_disable = val; @@ -1028,7 +1152,7 @@ static int __init prom_count_smt_threads(void) type[0] = 0; prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "cpu")) + if (prom_strcmp(type, "cpu")) continue; /* * There is an entry for each smt thread, each entry being @@ -1138,8 +1262,14 @@ static void __init prom_check_platform_support(void) int prop_len = prom_getproplen(prom.chosen, "ibm,arch-vec-5-platform-support"); - /* First copy the architecture vec template */ - ibm_architecture_vec = ibm_architecture_vec_template; + /* + * First copy the architecture vec template + * + * use memcpy() instead of *vec = *vec_template so that GCC replaces it + * by __memcpy() when KASAN is active + */ + memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template, + sizeof(ibm_architecture_vec)); if (prop_len > 1) { int i; @@ -1475,7 +1605,7 @@ static void __init prom_init_mem(void) */ prom_getprop(node, "name", type, sizeof(type)); } - if (strcmp(type, "memory")) + if (prom_strcmp(type, "memory")) continue; plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf)); @@ -1487,8 +1617,8 @@ static void __init prom_init_mem(void) endp = p + (plen / sizeof(cell_t)); #ifdef DEBUG_PROM - memset(path, 0, PROM_SCRATCH_SIZE); - call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + memset(path, 0, sizeof(prom_scratch)); + call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1); prom_debug(" node %s :\n", path); #endif /* DEBUG_PROM */ @@ -1756,19 +1886,19 @@ static void __init prom_initialize_tce_table(void) prom_getprop(node, "device_type", type, sizeof(type)); prom_getprop(node, "model", model, sizeof(model)); - if ((type[0] == 0) || (strstr(type, "pci") == NULL)) + if ((type[0] == 0) || (prom_strstr(type, "pci") == NULL)) continue; /* Keep the old logic intact to avoid regression. */ if (compatible[0] != 0) { - if ((strstr(compatible, "python") == NULL) && - (strstr(compatible, "Speedwagon") == NULL) && - (strstr(compatible, "Winnipeg") == NULL)) + if ((prom_strstr(compatible, "python") == NULL) && + (prom_strstr(compatible, "Speedwagon") == NULL) && + (prom_strstr(compatible, "Winnipeg") == NULL)) continue; } else if (model[0] != 0) { - if ((strstr(model, "ython") == NULL) && - (strstr(model, "peedwagon") == NULL) && - (strstr(model, "innipeg") == NULL)) + if ((prom_strstr(model, "ython") == NULL) && + (prom_strstr(model, "peedwagon") == NULL) && + (prom_strstr(model, "innipeg") == NULL)) continue; } @@ -1796,10 +1926,10 @@ static void __init prom_initialize_tce_table(void) local_alloc_bottom = base; /* It seems OF doesn't null-terminate the path :-( */ - memset(path, 0, PROM_SCRATCH_SIZE); + memset(path, 0, sizeof(prom_scratch)); /* Call OF to setup the TCE hardware */ if (call_prom("package-to-path", 3, 1, node, - path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) { + path, sizeof(prom_scratch) - 1) == PROM_ERROR) { prom_printf("package-to-path failed\n"); } @@ -1917,12 +2047,12 @@ static void __init prom_hold_cpus(void) type[0] = 0; prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "cpu") != 0) + if (prom_strcmp(type, "cpu") != 0) continue; /* Skip non-configured cpus. */ if (prom_getprop(node, "status", type, sizeof(type)) > 0) - if (strcmp(type, "okay") != 0) + if (prom_strcmp(type, "okay") != 0) continue; reg = cpu_to_be32(-1); /* make sparse happy */ @@ -1998,9 +2128,9 @@ static void __init prom_find_mmu(void) return; version[sizeof(version) - 1] = 0; /* XXX might need to add other versions here */ - if (strcmp(version, "Open Firmware, 1.0.5") == 0) + if (prom_strcmp(version, "Open Firmware, 1.0.5") == 0) of_workarounds = OF_WA_CLAIM; - else if (strncmp(version, "FirmWorks,3.", 12) == 0) { + else if (prom_strncmp(version, "FirmWorks,3.", 12) == 0) { of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL; call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim"); } else @@ -2033,7 +2163,7 @@ static void __init prom_init_stdout(void) call_prom("instance-to-path", 3, 1, prom.stdout, path, 255); prom_printf("OF stdout device is: %s\n", of_stdout_device); prom_setprop(prom.chosen, "/chosen", "linux,stdout-path", - path, strlen(path) + 1); + path, prom_strlen(path) + 1); /* instance-to-package fails on PA-Semi */ stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout); @@ -2043,7 +2173,7 @@ static void __init prom_init_stdout(void) /* If it's a display, note it */ memset(type, 0, sizeof(type)); prom_getprop(stdout_node, "device_type", type, sizeof(type)); - if (strcmp(type, "display") == 0) + if (prom_strcmp(type, "display") == 0) prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0); } } @@ -2064,19 +2194,19 @@ static int __init prom_find_machine_type(void) compat[len] = 0; while (i < len) { char *p = &compat[i]; - int sl = strlen(p); + int sl = prom_strlen(p); if (sl == 0) break; - if (strstr(p, "Power Macintosh") || - strstr(p, "MacRISC")) + if (prom_strstr(p, "Power Macintosh") || + prom_strstr(p, "MacRISC")) return PLATFORM_POWERMAC; #ifdef CONFIG_PPC64 /* We must make sure we don't detect the IBM Cell * blades as pSeries due to some firmware issues, * so we do it here. */ - if (strstr(p, "IBM,CBEA") || - strstr(p, "IBM,CPBW-1.0")) + if (prom_strstr(p, "IBM,CBEA") || + prom_strstr(p, "IBM,CPBW-1.0")) return PLATFORM_GENERIC; #endif /* CONFIG_PPC64 */ i += sl + 1; @@ -2093,7 +2223,7 @@ static int __init prom_find_machine_type(void) compat, sizeof(compat)-1); if (len <= 0) return PLATFORM_GENERIC; - if (strcmp(compat, "chrp")) + if (prom_strcmp(compat, "chrp")) return PLATFORM_GENERIC; /* Default to pSeries. We need to know if we are running LPAR */ @@ -2155,19 +2285,19 @@ static void __init prom_check_displays(void) for (node = 0; prom_next_node(&node); ) { memset(type, 0, sizeof(type)); prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "display") != 0) + if (prom_strcmp(type, "display") != 0) continue; /* It seems OF doesn't null-terminate the path :-( */ path = prom_scratch; - memset(path, 0, PROM_SCRATCH_SIZE); + memset(path, 0, sizeof(prom_scratch)); /* * leave some room at the end of the path for appending extra * arguments */ if (call_prom("package-to-path", 3, 1, node, path, - PROM_SCRATCH_SIZE-10) == PROM_ERROR) + sizeof(prom_scratch) - 10) == PROM_ERROR) continue; prom_printf("found display : %s, opening... ", path); @@ -2259,9 +2389,9 @@ static unsigned long __init dt_find_string(char *str) s = os = (char *)dt_string_start; s += 4; while (s < (char *)dt_string_end) { - if (strcmp(s, str) == 0) + if (prom_strcmp(s, str) == 0) return s - os; - s += strlen(s) + 1; + s += prom_strlen(s) + 1; } return 0; } @@ -2294,7 +2424,7 @@ static void __init scan_dt_build_strings(phandle node, } /* skip "name" */ - if (strcmp(namep, "name") == 0) { + if (prom_strcmp(namep, "name") == 0) { *mem_start = (unsigned long)namep; prev_name = "name"; continue; @@ -2306,7 +2436,7 @@ static void __init scan_dt_build_strings(phandle node, namep = sstart + soff; } else { /* Trim off some if we can */ - *mem_start = (unsigned long)namep + strlen(namep) + 1; + *mem_start = (unsigned long)namep + prom_strlen(namep) + 1; dt_string_end = *mem_start; } prev_name = namep; @@ -2363,8 +2493,8 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, /* get it again for debugging */ path = prom_scratch; - memset(path, 0, PROM_SCRATCH_SIZE); - call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + memset(path, 0, sizeof(prom_scratch)); + call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1); /* get and store all properties */ prev_name = ""; @@ -2375,7 +2505,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, break; /* skip "name" */ - if (strcmp(pname, "name") == 0) { + if (prom_strcmp(pname, "name") == 0) { prev_name = "name"; continue; } @@ -2406,7 +2536,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, call_prom("getprop", 4, 1, node, pname, valp, l); *mem_start = _ALIGN(*mem_start, 4); - if (!strcmp(pname, "phandle")) + if (!prom_strcmp(pname, "phandle")) has_phandle = 1; } @@ -2476,8 +2606,8 @@ static void __init flatten_device_tree(void) /* Add "phandle" in there, we'll need it */ namep = make_room(&mem_start, &mem_end, 16, 1); - strcpy(namep, "phandle"); - mem_start = (unsigned long)namep + strlen(namep) + 1; + prom_strcpy(namep, "phandle"); + mem_start = (unsigned long)namep + prom_strlen(namep) + 1; /* Build string array */ prom_printf("Building dt strings...\n"); @@ -2799,7 +2929,7 @@ static void __init fixup_device_tree_efika(void) rv = prom_getprop(node, "model", prop, sizeof(prop)); if (rv == PROM_ERROR) return; - if (strcmp(prop, "EFIKA5K2")) + if (prom_strcmp(prop, "EFIKA5K2")) return; prom_printf("Applying EFIKA device tree fixups\n"); @@ -2807,13 +2937,13 @@ static void __init fixup_device_tree_efika(void) /* Claiming to be 'chrp' is death */ node = call_prom("finddevice", 1, 1, ADDR("/")); rv = prom_getprop(node, "device_type", prop, sizeof(prop)); - if (rv != PROM_ERROR && (strcmp(prop, "chrp") == 0)) + if (rv != PROM_ERROR && (prom_strcmp(prop, "chrp") == 0)) prom_setprop(node, "/", "device_type", "efika", sizeof("efika")); /* CODEGEN,description is exposed in /proc/cpuinfo so fix that too */ rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop)); - if (rv != PROM_ERROR && (strstr(prop, "CHRP"))) + if (rv != PROM_ERROR && (prom_strstr(prop, "CHRP"))) prom_setprop(node, "/", "CODEGEN,description", "Efika 5200B PowerPC System", sizeof("Efika 5200B PowerPC System")); diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 667df97d2595..4cac45cb5de5 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -16,10 +16,18 @@ # If you really need to reference something from prom_init.o add # it to the list below: +grep "^CONFIG_KASAN=y$" .config >/dev/null +if [ $? -eq 0 ] +then + MEM_FUNCS="__memcpy __memset" +else + MEM_FUNCS="memcpy memset" +fi + WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush -_end enter_prom memcpy memset reloc_offset __secondary_hold +_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start -strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224 +logo_linux_clut224 reloc_got2 kernstart_addr memstart_addr linux_banner _stext __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index d9ac7d94656e..684b0b315c32 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -43,6 +43,7 @@ #include <asm/tm.h> #include <asm/asm-prototypes.h> #include <asm/debug.h> +#include <asm/hw_breakpoint.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -3088,7 +3089,7 @@ long arch_ptrace(struct task_struct *child, long request, dbginfo.sizeof_condition = 0; #ifdef CONFIG_HAVE_HW_BREAKPOINT dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; - if (cpu_has_feature(CPU_FTR_DAWR)) + if (dawr_enabled()) dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; #else dbginfo.features = 0; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index b33bafb8fcea..e1c9cf079503 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -57,7 +57,7 @@ void setup_barrier_nospec(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR); - if (!no_nospec) + if (!no_nospec && !cpu_mitigations_off()) enable_barrier_nospec(enable); } @@ -104,6 +104,14 @@ static __init int barrier_nospec_debugfs_init(void) return 0; } device_initcall(barrier_nospec_debugfs_init); + +static __init int security_feature_debugfs_init(void) +{ + debugfs_create_x64("security_features", 0400, powerpc_debugfs_root, + (u64 *)&powerpc_security_features); + return 0; +} +device_initcall(security_feature_debugfs_init); #endif /* CONFIG_DEBUG_FS */ #ifdef CONFIG_PPC_FSL_BOOK3E @@ -116,7 +124,7 @@ static int __init handle_nospectre_v2(char *p) early_param("nospectre_v2", handle_nospectre_v2); void setup_spectre_v2(void) { - if (no_spectrev2) + if (no_spectrev2 || cpu_mitigations_off()) do_btb_flush_fixups(); else btb_flush_enabled = true; @@ -300,7 +308,7 @@ void setup_stf_barrier(void) stf_enabled_flush_types = type; - if (!no_stf_barrier) + if (!no_stf_barrier && !cpu_mitigations_off()) stf_barrier_enable(enable); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2e5dfb6e0823..aad9f5df6ab6 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -67,6 +67,7 @@ #include <asm/livepatch.h> #include <asm/mmu_context.h> #include <asm/cpu_has_feature.h> +#include <asm/kasan.h> #include "setup.h" @@ -133,13 +134,11 @@ int crashing_cpu = -1; /* also used by kexec */ void machine_shutdown(void) { -#ifdef CONFIG_FA_DUMP /* * if fadump is active, cleanup the fadump registration before we * shutdown. */ fadump_cleanup(); -#endif if (ppc_md.machine_shutdown) ppc_md.machine_shutdown(); @@ -200,14 +199,15 @@ static void show_cpuinfo_summary(struct seq_file *m) { struct device_node *root; const char *model = NULL; -#if defined(CONFIG_SMP) && defined(CONFIG_PPC32) unsigned long bogosum = 0; int i; - for_each_online_cpu(i) - bogosum += loops_per_jiffy; - seq_printf(m, "total bogomips\t: %lu.%02lu\n", - bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); -#endif /* CONFIG_SMP && CONFIG_PPC32 */ + + if (IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_PPC32)) { + for_each_online_cpu(i) + bogosum += loops_per_jiffy; + seq_printf(m, "total bogomips\t: %lu.%02lu\n", + bogosum / (500000 / HZ), bogosum / (5000 / HZ) % 100); + } seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); if (ppc_md.name) seq_printf(m, "platform\t: %s\n", ppc_md.name); @@ -221,11 +221,10 @@ static void show_cpuinfo_summary(struct seq_file *m) if (ppc_md.show_cpuinfo != NULL) ppc_md.show_cpuinfo(m); -#ifdef CONFIG_PPC32 /* Display the amount of memory */ - seq_printf(m, "Memory\t\t: %d MB\n", - (unsigned int)(total_memory / (1024 * 1024))); -#endif + if (IS_ENABLED(CONFIG_PPC32)) + seq_printf(m, "Memory\t\t: %d MB\n", + (unsigned int)(total_memory / (1024 * 1024))); } static int show_cpuinfo(struct seq_file *m, void *v) @@ -252,26 +251,24 @@ static int show_cpuinfo(struct seq_file *m, void *v) else seq_printf(m, "unknown (%08x)", pvr); -#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC)) seq_printf(m, ", altivec supported"); -#endif /* CONFIG_ALTIVEC */ seq_printf(m, "\n"); #ifdef CONFIG_TAU - if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) { -#ifdef CONFIG_TAU_AVERAGE - /* more straightforward, but potentially misleading */ - seq_printf(m, "temperature \t: %u C (uncalibrated)\n", - cpu_temp(cpu_id)); -#else - /* show the actual temp sensor range */ - u32 temp; - temp = cpu_temp_both(cpu_id); - seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n", - temp & 0xff, temp >> 16); -#endif + if (cpu_has_feature(CPU_FTR_TAU)) { + if (IS_ENABLED(CONFIG_TAU_AVERAGE)) { + /* more straightforward, but potentially misleading */ + seq_printf(m, "temperature \t: %u C (uncalibrated)\n", + cpu_temp(cpu_id)); + } else { + /* show the actual temp sensor range */ + u32 temp; + temp = cpu_temp_both(cpu_id); + seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n", + temp & 0xff, temp >> 16); + } } #endif /* CONFIG_TAU */ @@ -335,11 +332,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n", maj, min, PVR_VER(pvr), PVR_REV(pvr)); -#ifdef CONFIG_PPC32 - seq_printf(m, "bogomips\t: %lu.%02lu\n", - loops_per_jiffy / (500000/HZ), - (loops_per_jiffy / (5000/HZ)) % 100); -#endif + if (IS_ENABLED(CONFIG_PPC32)) + seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ), + (loops_per_jiffy / (5000 / HZ)) % 100); + seq_printf(m, "\n"); /* If this is the last cpu, print the summary */ @@ -401,8 +397,8 @@ void __init check_for_initrd(void) #ifdef CONFIG_SMP -int threads_per_core, threads_per_subcore, threads_shift; -cpumask_t threads_core_mask; +int threads_per_core, threads_per_subcore, threads_shift __read_mostly; +cpumask_t threads_core_mask __read_mostly; EXPORT_SYMBOL_GPL(threads_per_core); EXPORT_SYMBOL_GPL(threads_per_subcore); EXPORT_SYMBOL_GPL(threads_shift); @@ -740,23 +736,19 @@ void __init setup_panic(void) * BUG() in that case. */ -#ifdef CONFIG_NOT_COHERENT_CACHE -#define KERNEL_COHERENCY 0 -#else -#define KERNEL_COHERENCY 1 -#endif +#define KERNEL_COHERENCY (!IS_ENABLED(CONFIG_NOT_COHERENT_CACHE)) static int __init check_cache_coherency(void) { struct device_node *np; const void *prop; - int devtree_coherency; + bool devtree_coherency; np = of_find_node_by_path("/"); prop = of_get_property(np, "coherency-off", NULL); of_node_put(np); - devtree_coherency = prop ? 0 : 1; + devtree_coherency = prop ? false : true; if (devtree_coherency != KERNEL_COHERENCY) { printk(KERN_ERR @@ -799,12 +791,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) static __init void print_system_info(void) { pr_info("-----------------------------------------------------\n"); -#ifdef CONFIG_PPC_BOOK3S_64 - pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); -#endif -#ifdef CONFIG_PPC_BOOK3S_32 - pr_info("Hash_size = 0x%lx\n", Hash_size); -#endif pr_info("phys_mem_size = 0x%llx\n", (unsigned long long)memblock_phys_mem_size()); @@ -826,18 +812,7 @@ static __init void print_system_info(void) pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - if (htab_address) - pr_info("htab_address = 0x%p\n", htab_address); - if (htab_hash_mask) - pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif -#ifdef CONFIG_PPC_BOOK3S_32 - if (Hash) - pr_info("Hash = 0x%p\n", Hash); - if (Hash_mask) - pr_info("Hash_mask = 0x%lx\n", Hash_mask); -#endif + print_system_hash_info(); if (PHYSICAL_START > 0) pr_info("physical_start = 0x%llx\n", @@ -868,6 +843,8 @@ static void smp_setup_pacas(void) */ void __init setup_arch(char **cmdline_p) { + kasan_init(); + *cmdline_p = boot_command_line; /* Set a half-reasonable default so udelay does something sensible */ @@ -947,20 +924,7 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; -#ifdef CONFIG_PPC_MM_SLICES -#ifdef CONFIG_PPC64 - if (!radix_enabled()) - init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; -#elif defined(CONFIG_PPC_8xx) - init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW; -#else -#error "context.addr_limit not initialized." -#endif -#endif - -#ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_init(&init_mm); -#endif irqstack_early_init(); exc_lvl_early_init(); emergency_stack_init(); @@ -969,9 +933,9 @@ void __init setup_arch(char **cmdline_p) early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif + if (IS_ENABLED(CONFIG_DUMMY_CONSOLE)) + conswitchp = &dummy_con; + if (ppc_md.setup_arch) ppc_md.setup_arch(); @@ -983,10 +947,8 @@ void __init setup_arch(char **cmdline_p) /* Initialize the MMU context management stuff. */ mmu_context_init(); -#ifdef CONFIG_PPC64 /* Interrupt code needs to be 64K-aligned. */ - if ((unsigned long)_stext & 0xffff) + if (IS_ENABLED(CONFIG_PPC64) && (unsigned long)_stext & 0xffff) panic("Kernelbase not 64K-aligned (0x%lx)!\n", (unsigned long)_stext); -#endif } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 4a65e08a6042..3fb9f64f88fd 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -64,34 +64,6 @@ EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); /* - * We're called here very early in the boot. - * - * Note that the kernel may be running at an address which is different - * from the address that it was linked at, so we must use RELOC/PTRRELOC - * to access static data (including strings). -- paulus - */ -notrace unsigned long __init early_init(unsigned long dt_ptr) -{ - unsigned long offset = reloc_offset(); - - /* First zero the BSS -- use memset_io, some platforms don't have - * caches on yet */ - memset_io((void __iomem *)PTRRELOC(&__bss_start), 0, - __bss_stop - __bss_start); - - /* - * Identify the CPU type and fix up code sections - * that depend on which cpu we have. - */ - identify_cpu(offset, mfspr(SPRN_PVR)); - - apply_feature_fixups(); - - return KERNELBASE + offset; -} - - -/* * This is run before start_kernel(), the kernel has been relocated * and we are running with enough of the MMU enabled to have our * proper kernel virtual addresses diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index ba404dd9ce1d..a400854a5036 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -68,6 +68,7 @@ #include <asm/cputhreads.h> #include <asm/hw_irq.h> #include <asm/feature-fixups.h> +#include <asm/kup.h> #include "setup.h" @@ -331,6 +332,12 @@ void __init early_setup(unsigned long dt_ptr) */ configure_exceptions(); + /* + * Configure Kernel Userspace Protection. This needs to happen before + * feature fixups for platforms that implement this using features. + */ + setup_kup(); + /* Apply all the dynamic patching */ apply_feature_fixups(); setup_feature_keys(); @@ -383,6 +390,9 @@ void early_setup_secondary(void) /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); + /* Perform any KUP setup that is per-cpu */ + setup_kup(); + /* * At this point, we can let interrupts switch to virtual mode * (the MMU has been setup), so adjust the MSR in the PACA to @@ -932,7 +942,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush) + if (!no_rfi_flush && !cpu_mitigations_off()) rfi_flush_enable(enable); } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 6794466f6420..06c299ef6132 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, preempt_disable(); /* pull in MSR TS bits from user context */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + regs->msr |= msr & MSR_TS_MASK; /* * Ensure that TM is enabled in regs->msr before we leave the signal @@ -745,6 +745,31 @@ SYSCALL_DEFINE0(rt_sigreturn) if (MSR_TM_SUSPENDED(mfmsr())) tm_reclaim_current(0); + /* + * Disable MSR[TS] bit also, so, if there is an exception in the + * code below (as a page fault in copy_ckvsx_to_user()), it does + * not recheckpoint this task if there was a context switch inside + * the exception. + * + * A major page fault can indirectly call schedule(). A reschedule + * process in the middle of an exception can have a side effect + * (Changing the CPU MSR[TS] state), since schedule() is called + * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended + * (switch_to() calls tm_recheckpoint() for the 'new' process). In + * this case, the process continues to be the same in the CPU, but + * the CPU state just changed. + * + * This can cause a TM Bad Thing, since the MSR in the stack will + * have the MSR[TS]=0, and this is what will be used to RFID. + * + * Clearing MSR[TS] state here will avoid a recheckpoint if there + * is any process reschedule in kernel space. The MSR[TS] state + * does not need to be saved also, since it will be replaced with + * the MSR[TS] that came from user context later, at + * restore_tm_sigcontexts. + */ + regs->msr &= ~MSR_TS_MASK; + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; if (MSR_TM_ACTIVE(msr)) { diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index b18abb0c3dae..00f5a63c8d9a 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -505,3 +505,7 @@ 421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 422 32 futex_time64 sys_futex sys_futex 423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index e8e93c2c7d03..7a1708875d27 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -610,7 +610,7 @@ SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2); SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3); SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4); SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5); -#ifdef CONFIG_DEBUG_KERNEL +#ifdef CONFIG_DEBUG_MISC SYSFS_SPRSETUP(hid0, SPRN_HID0); SYSFS_SPRSETUP(hid1, SPRN_HID1); SYSFS_SPRSETUP(hid4, SPRN_HID4); @@ -639,7 +639,7 @@ SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0); SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1); SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2); SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3); -#endif /* CONFIG_DEBUG_KERNEL */ +#endif /* CONFIG_DEBUG_MISC */ #endif /* HAS_PPC_PMC_PA6T */ #ifdef HAS_PPC_PMC_IBM @@ -680,7 +680,7 @@ static struct device_attribute pa6t_attrs[] = { __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), -#ifdef CONFIG_DEBUG_KERNEL +#ifdef CONFIG_DEBUG_MISC __ATTR(hid0, 0600, show_hid0, store_hid0), __ATTR(hid1, 0600, show_hid1, store_hid1), __ATTR(hid4, 0600, show_hid4, store_hid4), @@ -709,7 +709,7 @@ static struct device_attribute pa6t_attrs[] = { __ATTR(tsr1, 0600, show_tsr1, store_tsr1), __ATTR(tsr2, 0600, show_tsr2, store_tsr2), __ATTR(tsr3, 0600, show_tsr3, store_tsr3), -#endif /* CONFIG_DEBUG_KERNEL */ +#endif /* CONFIG_DEBUG_MISC */ }; #endif /* HAS_PPC_PMC_PA6T */ #endif /* HAS_PPC_PMC_CLASSIC */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc0503ef9c9c..325d60633dfa 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -43,7 +43,6 @@ #include <linux/timex.h> #include <linux/kernel_stat.h> #include <linux/time.h> -#include <linux/clockchips.h> #include <linux/init.h> #include <linux/profile.h> #include <linux/cpu.h> @@ -151,6 +150,8 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; EXPORT_SYMBOL_GPL(ppc_tb_freq); +bool tb_invalid; + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Factor for converting from cputime_t (timebase ticks) to @@ -460,6 +461,13 @@ void __delay(unsigned long loops) diff += 1000000000; spin_cpu_relax(); } while (diff < loops); + } else if (tb_invalid) { + /* + * TB is in error state and isn't ticking anymore. + * HMI handler was unable to recover from TB error. + * Return immediately, so that kernel won't get stuck here. + */ + spin_cpu_relax(); } else { start = get_tbl(); while (get_tbl() - start < loops) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1fd45a8650e1..665f294725cb 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2088,6 +2088,10 @@ void SPEFloatingPointException(struct pt_regs *regs) int code = FPE_FLTUNK; int err; + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + flush_spe_to_thread(current); spefscr = current->thread.spefscr; @@ -2133,6 +2137,10 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) extern int speround_handler(struct pt_regs *regs); int err; + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + preempt_disable(); if (regs->msr & MSR_SPE) giveup_spe(current); diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index ce199f6e4256..06f54d947057 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -26,9 +26,8 @@ GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n -ccflags-y := -shared -fno-common -fno-builtin -ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ - $(call cc-ldoption, -Wl$(comma)--hash-style=both) +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ + -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both asflags-y := -D__VDSO32__ -s obj-y += vdso32_wrapper.o diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 28e7d112aa2f..32ebb3522ea1 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -12,9 +12,8 @@ GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n -ccflags-y := -shared -fno-common -fno-builtin -ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - $(call cc-ldoption, -Wl$(comma)--hash-style=both) +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ + -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both asflags-y := -D__VDSO64__ -s obj-y += vdso64_wrapper.o diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 21165da0052d..8eb867dbad5f 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -21,6 +21,7 @@ _GLOBAL(load_vr_state) REST_32VRS(0,r4,r3) blr EXPORT_SYMBOL(load_vr_state) +_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ /* * Store VMX state into memory, including VSCR. diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3c6ab22a0c4e..af3c15a1d41e 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -77,7 +77,7 @@ static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ -static DEFINE_PER_CPU(struct timer_list, wd_timer); +static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer); static DEFINE_PER_CPU(u64, wd_timer_tb); /* SMP checker bits */ @@ -293,21 +293,21 @@ out: nmi_exit(); } -static void wd_timer_reset(unsigned int cpu, struct timer_list *t) -{ - t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms); - if (wd_timer_period_ms > 1000) - t->expires = __round_jiffies_up(t->expires, cpu); - add_timer_on(t, cpu); -} - -static void wd_timer_fn(struct timer_list *t) +static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return HRTIMER_NORESTART; + + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return HRTIMER_NORESTART; + watchdog_timer_interrupt(cpu); - wd_timer_reset(cpu, t); + hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms)); + + return HRTIMER_RESTART; } void arch_touch_nmi_watchdog(void) @@ -323,37 +323,22 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); -static void start_watchdog_timer_on(unsigned int cpu) -{ - struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); - - per_cpu(wd_timer_tb, cpu) = get_tb(); - - timer_setup(t, wd_timer_fn, TIMER_PINNED); - wd_timer_reset(cpu, t); -} - -static void stop_watchdog_timer_on(unsigned int cpu) -{ - struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); - - del_timer_sync(t); -} - -static int start_wd_on_cpu(unsigned int cpu) +static void start_watchdog(void *arg) { + struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer); + int cpu = smp_processor_id(); unsigned long flags; if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { WARN_ON(1); - return 0; + return; } if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - return 0; + return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) - return 0; + return; wd_smp_lock(&flags); cpumask_set_cpu(cpu, &wd_cpus_enabled); @@ -363,27 +348,40 @@ static int start_wd_on_cpu(unsigned int cpu) } wd_smp_unlock(&flags); - start_watchdog_timer_on(cpu); + *this_cpu_ptr(&wd_timer_tb) = get_tb(); - return 0; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = watchdog_timer_fn; + hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms), + HRTIMER_MODE_REL_PINNED); } -static int stop_wd_on_cpu(unsigned int cpu) +static int start_watchdog_on_cpu(unsigned int cpu) { + return smp_call_function_single(cpu, start_watchdog, NULL, true); +} + +static void stop_watchdog(void *arg) +{ + struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer); + int cpu = smp_processor_id(); unsigned long flags; if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) - return 0; /* Can happen in CPU unplug case */ + return; /* Can happen in CPU unplug case */ - stop_watchdog_timer_on(cpu); + hrtimer_cancel(hrtimer); wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_cpus_enabled); wd_smp_unlock(&flags); wd_smp_clear_cpu_pending(cpu, get_tb()); +} - return 0; +static int stop_watchdog_on_cpu(unsigned int cpu) +{ + return smp_call_function_single(cpu, stop_watchdog, NULL, true); } static void watchdog_calc_timeouts(void) @@ -402,7 +400,7 @@ void watchdog_nmi_stop(void) int cpu; for_each_cpu(cpu, &wd_cpus_enabled) - stop_wd_on_cpu(cpu); + stop_watchdog_on_cpu(cpu); } void watchdog_nmi_start(void) @@ -411,7 +409,7 @@ void watchdog_nmi_start(void) watchdog_calc_timeouts(); for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) - start_wd_on_cpu(cpu); + start_watchdog_on_cpu(cpu); } /* @@ -423,7 +421,8 @@ int __init watchdog_nmi_probe(void) err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", - start_wd_on_cpu, stop_wd_on_cpu); + start_watchdog_on_cpu, + stop_watchdog_on_cpu); if (err < 0) { pr_warn("could not be initialized"); return err; |