From 3a5988b884a33cb3e4ab427b08a020ce32f3b3ba Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 May 2021 09:10:01 +0000 Subject: powerpc/32s: Remove m8260_gorom() Last user of m8260_gorom() was removed by Commit 917f0af9e5a9 ("powerpc: Remove arch/ppc and include/asm-ppc") removed last user of m8260_gorom(). In fact m8260_gorom() was ported to arch/powerpc/ but the platform using it died with arch/ppc/ Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/13f7532f21df3196e8c78b4f82a9c8d5487aca35.1620292185.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 065178f19a3d..adf64fe522f8 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -1256,42 +1256,6 @@ setup_usbgecko_bat: blr #endif -#ifdef CONFIG_8260 -/* Jump into the system reset for the rom. - * We first disable the MMU, and then jump to the ROM reset address. - * - * r3 is the board info structure, r4 is the location for starting. - * I use this for building a small kernel that can load other kernels, - * rather than trying to write or rely on a rom monitor that can tftp load. - */ - .globl m8260_gorom -m8260_gorom: - mfmsr r0 - rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ - sync - mtmsr r0 - sync - mfspr r11, SPRN_HID0 - lis r10, 0 - ori r10,r10,HID0_ICE|HID0_DCE - andc r11, r11, r10 - mtspr SPRN_HID0, r11 - isync - li r5, MSR_ME|MSR_RI - lis r6,2f@h - addis r6,r6,-KERNELBASE@h - ori r6,r6,2f@l - mtspr SPRN_SRR0,r6 - mtspr SPRN_SRR1,r5 - isync - sync - rfi -2: - mtlr r4 - blr -#endif - - /* * We put a few things here that have to be page-aligned. * This stuff goes at the beginning of the data segment, -- cgit v1.2.3 From 1a3c6ceed2533121e857d9b38559839487d31f33 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 5 May 2021 14:02:12 +0000 Subject: powerpc/asm-offset: Remove unused items Following PACA related items are not used anymore by ASM code: PACA_SIZE, PACACONTEXTID, PACALOWSLICESPSIZE, PACAHIGHSLICEPSIZE, PACA_SLB_ADDR_LIMIT, MMUPSIZEDEFSIZE, PACASLBCACHE, PACASLBCACHEPTR, PACASTABRR, PACAVMALLOCSLLP, MMUPSIZESLLP, PACACONTEXTSLLP, PACALPPACAPTR, LPPACA_DTLIDX and PACA_DTL_RIDX. Following items are also not used anymore: SIGSEGV, NMI_MASK, THREAD_DBCR0, KUAP, TI_FLAGS, TI_PREEMPT, DCACHEL1BLOCKSPERPAGE, ICACHEL1BLOCKSIZE, ICACHEL1LOGBLOCKSIZE, ICACHEL1BLOCKSPERPAGE, STACK_REGS_KUAP, KVM_NEED_FLUSH, KVM_FWNMI, VCPU_DEC, VCPU_SPMC, HSTATE_XICS_PHYS, HSTATE_SAVED_XIRR and PPC_DBELL_MSGTYPE. Remove all of them. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1c80981548dc0c4f145109cdd473022c1aad8d2b.1620223302.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/asm-offsets.c | 52 +-------------------------------------- 1 file changed, 1 insertion(+), 51 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 28af4efb4587..0480f4006e0c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -87,10 +87,7 @@ int main(void) #endif #endif OFFSET(MMCONTEXTID, mm_struct, context.id); -#ifdef CONFIG_PPC64 - DEFINE(SIGSEGV, SIGSEGV); - DEFINE(NMI_MASK, NMI_MASK); -#else +#ifdef CONFIG_PPC32 #ifdef CONFIG_PPC_RTAS OFFSET(RTAS_SP, thread_struct, rtas_sp); #endif @@ -154,18 +151,12 @@ int main(void) OFFSET(THREAD_USED_SPE, thread_struct, used_spe); #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - OFFSET(THREAD_DBCR0, thread_struct, debug.dbcr0); -#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu); #endif #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu); #endif -#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) - OFFSET(KUAP, thread_struct, kuap); -#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM OFFSET(PACATMSCRATCH, paca_struct, tm_scratch); @@ -185,19 +176,12 @@ int main(void) sizeof(struct pt_regs) + 16); #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); - OFFSET(TI_PREEMPT, thread_info, preempt_count); #ifdef CONFIG_PPC64 OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size); OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size); - OFFSET(DCACHEL1BLOCKSPERPAGE, ppc64_caches, l1d.blocks_per_page); - OFFSET(ICACHEL1BLOCKSIZE, ppc64_caches, l1i.block_size); - OFFSET(ICACHEL1LOGBLOCKSIZE, ppc64_caches, l1i.log_block_size); - OFFSET(ICACHEL1BLOCKSPERPAGE, ppc64_caches, l1i.blocks_per_page); /* paca */ - DEFINE(PACA_SIZE, sizeof(struct paca_struct)); OFFSET(PACAPACAINDEX, paca_struct, paca_index); OFFSET(PACAPROCSTART, paca_struct, cpu_start); OFFSET(PACAKSAVE, paca_struct, kstack); @@ -212,15 +196,6 @@ int main(void) OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled); -#ifdef CONFIG_PPC_BOOK3S - OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); -#ifdef CONFIG_PPC_MM_SLICES - OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize); - OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize); - OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit); - DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); -#endif /* CONFIG_PPC_MM_SLICES */ -#endif #ifdef CONFIG_PPC_BOOK3E OFFSET(PACAPGD, paca_struct, pgd); @@ -241,21 +216,9 @@ int main(void) #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_BOOK3S_64 - OFFSET(PACASLBCACHE, paca_struct, slb_cache); - OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr); - OFFSET(PACASTABRR, paca_struct, stab_rr); - OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp); -#ifdef CONFIG_PPC_MM_SLICES - OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp); -#else - OFFSET(PACACONTEXTSLLP, paca_struct, mm_ctx_sllp); -#endif /* CONFIG_PPC_MM_SLICES */ OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXNMI, paca_struct, exnmi); -#ifdef CONFIG_PPC_PSERIES - OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); -#endif OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); @@ -264,9 +227,7 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); #endif - OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx); OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count); - OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx); #endif /* CONFIG_PPC_BOOK3S_64 */ OFFSET(PACAEMERGSP, paca_struct, emergency_sp); #ifdef CONFIG_PPC_BOOK3S_64 @@ -343,10 +304,6 @@ int main(void) STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); #endif -#ifdef CONFIG_PPC_KUAP - STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); -#endif - #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) @@ -482,11 +439,9 @@ int main(void) OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid); OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr); OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1); - OFFSET(KVM_NEED_FLUSH, kvm, arch.need_tlb_flush.bits); OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); OFFSET(KVM_RADIX, kvm, arch.radix); - OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled); OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); @@ -514,7 +469,6 @@ int main(void) OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); - OFFSET(VCPU_DEC, kvm_vcpu, arch.dec); OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires); OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); @@ -525,7 +479,6 @@ int main(void) OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); - OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar); OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar); OFFSET(VCPU_SIER, kvm_vcpu, arch.sier); @@ -646,10 +599,8 @@ int main(void) HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); - HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys); HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt); - HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); @@ -757,7 +708,6 @@ int main(void) #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); - DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE); #ifdef CONFIG_PPC_8xx DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE)); -- cgit v1.2.3 From 13c7dad95176d6abaf9608e4d34b2f512689775e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 5 May 2021 14:02:13 +0000 Subject: powerpc/paca: Remove mm_ctx_id and mm_ctx_slb_addr_limit mm_ctx_id and mm_ctx_slb_addr_limit are not used anymore. Remove them. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6e1813953da38c452c131fe3e2a2761a0fddb975.1620223303.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/paca.h | 2 -- arch/powerpc/kernel/paca.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index ec18ac818e3a..ecc8d792a431 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -149,11 +149,9 @@ struct paca_struct { #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_BOOK3S - mm_context_id_t mm_ctx_id; #ifdef CONFIG_PPC_MM_SLICES unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; - unsigned long mm_ctx_slb_addr_limit; #else u16 mm_ctx_user_psize; u16 mm_ctx_sllp; diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 7f5aae3c387d..9bd30cac852b 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -346,10 +346,8 @@ void copy_mm_to_paca(struct mm_struct *mm) #ifdef CONFIG_PPC_BOOK3S mm_context_t *context = &mm->context; - get_paca()->mm_ctx_id = context->id; #ifdef CONFIG_PPC_MM_SLICES VM_BUG_ON(!mm_ctx_slb_addr_limit(context)); - get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context); memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context), LOW_SLICE_ARRAY_SZ); memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context), -- cgit v1.2.3 From f3f6d18417eb44ef393b23570d384d2778ef22dc Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Sat, 8 May 2021 10:06:42 +0530 Subject: powerpc/papr_scm: Reduce error severity if nvdimm stats inaccessible Currently drc_pmem_qeury_stats() generates a dev_err in case "Enable Performance Information Collection" feature is disabled from HMC or performance stats are not available for an nvdimm. The error is of the form below: papr_scm ibm,persistent-memory:ibm,pmemory@44104001: Failed to query performance stats, Err:-10 This error message confuses users as it implies a possible problem with the nvdimm even though its due to a disabled/unavailable feature. We fix this by explicitly handling the H_AUTHORITY and H_UNSUPPORTED errors from the H_SCM_PERFORMANCE_STATS hcall. In case of H_AUTHORITY error an info message is logged instead of an error, saying that "Permission denied while accessing performance stats" and an EPERM error is returned back. In case of H_UNSUPPORTED error we return a EOPNOTSUPP error back from drc_pmem_query_stats() indicating that performance stats-query operation is not supported on this nvdimm. Fixes: 2d02bf835e57 ("powerpc/papr_scm: Fetch nvdimm performance stats from PHYP") Signed-off-by: Vaibhav Jain Reviewed-by: Ira Weiny Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210508043642.114076-1-vaibhav@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index ef26fe40efb0..e2b69cc3beaf 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -310,6 +310,13 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, dev_err(&p->pdev->dev, "Unknown performance stats, Err:0x%016lX\n", ret[0]); return -ENOENT; + } else if (rc == H_AUTHORITY) { + dev_info(&p->pdev->dev, + "Permission denied while accessing performance stats"); + return -EPERM; + } else if (rc == H_UNSUPPORTED) { + dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); + return -EOPNOTSUPP; } else if (rc != H_SUCCESS) { dev_err(&p->pdev->dev, "Failed to query performance stats, Err:%lld\n", rc); -- cgit v1.2.3 From 9a1762a4a4ff3cc096c605212165f59481e84503 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 7 May 2021 16:37:53 +0000 Subject: powerpc/8xx: Update mpc885_ads_defconfig to improve CI mpc885_ads_defconfig is used by several CI robots. A few functionnalities are specific to 8xx and are not covered by other default configuration, so improve build test coverage by adding them to mpc885_ads_defconfig. 8xx is the only platform supporting 16k page size in addition to 4k page size. Considering that 4k page size is widely tested by other configurations, lets make 16k pages the selection for 8xx, as it has demonstrated in the past to be a weakness. CONFIG_PIN_TLB is specific to 8xx, select it as it mainly adds code with removing much. CONFIG_BDI_SWITCH is specific to PPC32 and adds codes. CONFIG_PPC_PTDUMP has specific part for 8xx. CONFIG_MODULES has specific handling for 8xx. CONFIG_SMC_UCODE_PATCH is specific to 8xx for loading microcode. CONFIG_PERF_EVENTS has specific parts for 8xx. CONFIG_MATH_EMULATION is used by 8xx. CONFIG_STRICT_KERNEL_RWX has specificities for 8xx. CONFIG_VIRT_CPU_ACCOUNTING_NATIVE has specific parts for PPC32. CONFIG_IPV6 has specificities for PPC32. CONFIG_BPF_JIT has specificities for PPC32. A few drivers are tightly linked to the 8xx: - CONFIG_SPI_FSL_SPI - CONFIG_CRYPTO_DEV_TALITOS - CONFIG_8xxx_WDT - CONFIG_8xx_GPIO - CONFIG_PPC_EARLY_DEBUG_CPM Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2541e4e415505b27db8ccbb8977035c20e408ef4.1620405461.git.christophe.leroy@csgroup.eu --- arch/powerpc/configs/mpc885_ads_defconfig | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 949ff9ccda5e..d21f266cea9a 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -57,3 +57,28 @@ CONFIG_CRC32_SLICEBY4=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y +CONFIG_PPC_16K_PAGES=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_FS=y +CONFIG_PPC_PTDUMP=y +CONFIG_MODULES=y +CONFIG_SPI=y +CONFIG_SPI_FSL_SPI=y +CONFIG_CRYPTO=y +CONFIG_CRYPTO_DEV_TALITOS=y +CONFIG_8xx_GPIO=y +CONFIG_WATCHDOG=y +CONFIG_8xxx_WDT=y +CONFIG_SMC_UCODE_PATCH=y +CONFIG_ADVANCED_OPTIONS=y +CONFIG_PIN_TLB=y +CONFIG_PERF_EVENTS=y +CONFIG_MATH_EMULATION=y +CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y +CONFIG_STRICT_KERNEL_RWX=y +CONFIG_IPV6=y +CONFIG_BPF_JIT=y +CONFIG_DEBUG_VM_PGTABLE=y +CONFIG_BDI_SWITCH=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_CPM_ADDR=0xff002008 -- cgit v1.2.3 From 930a77c3ad79c30ce9ba8cbad9eded5bc5805343 Mon Sep 17 00:00:00 2001 From: Zhang Jianhua Date: Mon, 10 May 2021 15:51:34 +0800 Subject: powerpc/boot: Fix a typo in partial_decompress() comment outbuf is the output buffer, output_size is the size of the output buffer. Signed-off-by: Zhang Jianhua Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210510075134.252978-1-chris.zjh@huawei.com --- arch/powerpc/boot/decompress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c index 6098b879ac97..977eb15a6d17 100644 --- a/arch/powerpc/boot/decompress.c +++ b/arch/powerpc/boot/decompress.c @@ -99,8 +99,8 @@ static void print_err(char *s) * partial_decompress - decompresses part or all of a compressed buffer * @inbuf: input buffer * @input_size: length of the input buffer - * @outbuf: input buffer - * @output_size: length of the input buffer + * @outbuf: output buffer + * @output_size: length of the output buffer * @skip number of output bytes to ignore * * This function takes compressed data from inbuf, decompresses and write it to -- cgit v1.2.3 From c176c3d58a3ed623e8917acaafe240245e700e33 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 7 May 2021 09:24:43 +0000 Subject: powerpc: Define NR_CPUS all the time include/linux/threads.h defines a default value for CONFIG_NR_CPUS but suggests the architectures should always define it. So do it, when CONFIG_SMP is not selected set CONFIG_NR_CPUS to 1. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/afef0ec65a8ba8651bf4f6e4f4f08a8b6991dbfb.1620379469.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index f998e655b570..885140055b7a 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -476,9 +476,9 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-8192)" - range 2 8192 - depends on SMP + int "Maximum number of CPUs (2-8192)" if SMP + range 2 8192 if SMP + default "1" if !SMP default "32" if PPC64 default "4" -- cgit v1.2.3 From 70d6ebf82bd0cfddaebb54e861fc15e9945a5fc6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 7 May 2021 05:02:02 +0000 Subject: powerpc/603: Avoid a pile of NOPs when not using SW LRU in TLB exceptions The SW LRU is in an MMU feature section. When not used, that's a dozen of NOPs to fetch for nothing. Define an ALT section that does the few remaining operations. That also avoids a double read on SRR1 in the SW LRU case. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/603725297466959419628ef7964aaf3417fb647d.1620363691.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index adf64fe522f8..326262030279 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -518,8 +518,6 @@ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ - mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION li r0,1 mfspr r1,SPRN_SPRG_603_LRU @@ -531,9 +529,15 @@ BEGIN_MMU_FTR_SECTION mfspr r2,SPRN_SRR1 rlwimi r2,r0,31-14,14,14 mtspr SPRN_SRR1,r2 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + mtcrf 0x80,r2 + tlbld r3 + rfi +MMU_FTR_SECTION_ELSE + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 tlbld r3 rfi +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) DataAddressInvalid: mfspr r3,SPRN_SRR1 rlwinm r1,r3,9,6,6 /* Get load/store bit */ @@ -607,9 +611,15 @@ BEGIN_MMU_FTR_SECTION mfspr r2,SPRN_SRR1 rlwimi r2,r0,31-14,14,14 mtspr SPRN_SRR1,r2 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + mtcrf 0x80,r2 + tlbld r3 + rfi +MMU_FTR_SECTION_ELSE + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 tlbld r3 rfi +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #ifndef CONFIG_ALTIVEC #define altivec_assist_exception unknown_exception -- cgit v1.2.3 From fe3dc333d2ed50c9764d281869d87bae0d795ce5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 7 May 2021 05:45:52 +0000 Subject: powerpc/mmu: Don't duplicate radix_enabled() mmu_has_feature(MMU_FTR_TYPE_RADIX) can be evaluated regardless of CONFIG_PPC_RADIX_MMU. When CONFIG_PPC_RADIX_MMU is not set, mmu_has_feature(MMU_FTR_TYPE_RADIX) will evaluate to 'false' at build time because MMU_FTR_TYPE_RADIX wont be included in MMU_FTRS_POSSIBLE. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/62743846cbd493e5d9a02e197c2672a1d30df149.1620366342.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 607168b1aef4..0a6a77437ab8 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -324,7 +324,6 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) } #endif /* !CONFIG_DEBUG_VM */ -#ifdef CONFIG_PPC_RADIX_MMU static inline bool radix_enabled(void) { return mmu_has_feature(MMU_FTR_TYPE_RADIX); @@ -334,17 +333,6 @@ static inline bool early_radix_enabled(void) { return early_mmu_has_feature(MMU_FTR_TYPE_RADIX); } -#else -static inline bool radix_enabled(void) -{ - return false; -} - -static inline bool early_radix_enabled(void) -{ - return false; -} -#endif #ifdef CONFIG_STRICT_KERNEL_RWX static inline bool strict_kernel_rwx_enabled(void) -- cgit v1.2.3 From 0441729e16379649ea0f393a5be68a19ba384d94 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 7 May 2021 06:00:58 +0000 Subject: powerpc/mmu: Remove leftover CONFIG_E200 Commit 39c8bf2b3cc1 ("powerpc: Retire e200 core (mpc555x processor)") removed CONFIG_E200. Commit f9158d58a4e1 ("powerpc/mm: Add mask of always present MMU features") was merged in the same cycle and added a new use of CONFIG_E200. Remove that use. Fixes: f9158d58a4e1 ("powerpc/mm: Add mask of always present MMU features") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/594fa3cc0df11e21644fd6a584851ae4f164b2bf.1620367249.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 0a6a77437ab8..998fe01dd1a8 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -220,7 +220,7 @@ enum { #elif defined(CONFIG_44x) #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_44x #endif -#if defined(CONFIG_E200) || defined(CONFIG_E500) +#ifdef CONFIG_E500 #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E #endif -- cgit v1.2.3 From 8af8d72dc58e90dc945ca627b24968400e0f21b6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 May 2021 13:27:31 +0000 Subject: powerpc/32s: Speed up likely path of kuap_update_sr() In most cases, kuap_update_sr() will update a single segment register. We know that first update will always be done, if there is no segment register to update at all, kuap_update_sr() is not called. Avoid recurring calculations and tests in that case. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/848f18d213b8341939add7302dc4ef80cc7a12e3.1620307636.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 1670dfe9d4f1..c1f7c2e625a6 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -15,11 +15,13 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) { addr &= 0xf0000000; /* align addr to start of segment */ barrier(); /* make sure thread.kuap is updated before playing with SRs */ - while (addr < end) { + for (;;) { mtsr(sr, addr); + addr += 0x10000000; /* address of next segment */ + if (addr >= end) + break; sr += 0x111; /* next VSID */ sr &= 0xf0ffffff; /* clear VSID overflow */ - addr += 0x10000000; /* address of next segment */ } isync(); /* Context sync required after mtsr() */ } -- cgit v1.2.3 From b09049c516af90d4b6643b5d0d2549cd01539086 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 May 2021 13:30:51 +0000 Subject: powerpc: Only pad struct pt_regs when needed If neither KUAP nor PPC64 is selected, there is nothing in the second union of struct pt_regs, so the alignment padding is waste of memory. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d536bbc46094f66b24d3017343be25164f232933.1620307840.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 9c9ab2746168..4fd3a3bd5272 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -47,7 +47,7 @@ struct pt_regs unsigned long result; }; }; - +#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_KUAP) union { struct { #ifdef CONFIG_PPC64 @@ -67,6 +67,7 @@ struct pt_regs }; unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */ }; +#endif }; #endif -- cgit v1.2.3 From ca8cc36901e9bdd01d371f6236faf9f61d1325d1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 May 2021 13:32:18 +0000 Subject: powerpc/32s: Remove asm/book3s/32/hash.h Move the PAGE bits into pgtable.h to be more similar to book3s/64. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7f4aaa479569328a1e5b07c96c08fbca0cd7dd88.1620307890.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/hash.h | 45 ---------------------------- arch/powerpc/include/asm/book3s/32/pgtable.h | 38 ++++++++++++++++++++++- 2 files changed, 37 insertions(+), 46 deletions(-) delete mode 100644 arch/powerpc/include/asm/book3s/32/hash.h diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h deleted file mode 100644 index 2a0a467d2985..000000000000 --- a/arch/powerpc/include/asm/book3s/32/hash.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_BOOK3S_32_HASH_H -#define _ASM_POWERPC_BOOK3S_32_HASH_H -#ifdef __KERNEL__ - -/* - * The "classic" 32-bit implementation of the PowerPC MMU uses a hash - * table containing PTEs, together with a set of 16 segment registers, - * to define the virtual to physical address mapping. - * - * We use the hash table as an extended TLB, i.e. a cache of currently - * active mappings. We maintain a two-level page table tree, much - * like that used by the i386, for the sake of the Linux memory - * management code. Low-level assembler code in hash_low_32.S - * (procedure hash_page) is responsible for extracting ptes from the - * tree and putting them into the hash table when necessary, and - * updating the accessed and modified bits in the page table tree. - */ - -#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ -#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ -#define _PAGE_USER 0x004 /* usermode access allowed */ -#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ -#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ -#define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ -#define _PAGE_WRITETHRU 0x040 /* W: cache write-through */ -#define _PAGE_DIRTY 0x080 /* C: page changed */ -#define _PAGE_ACCESSED 0x100 /* R: page referenced */ -#define _PAGE_EXEC 0x200 /* software: exec allowed */ -#define _PAGE_RW 0x400 /* software: user write access allowed */ -#define _PAGE_SPECIAL 0x800 /* software: Special page */ - -#ifdef CONFIG_PTE_64BIT -/* We never clear the high word of the pte */ -#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE) -#else -#define _PTE_NONE_MASK _PAGE_HASHPTE -#endif - -#define _PMD_PRESENT 0 -#define _PMD_PRESENT_MASK (PAGE_MASK) -#define _PMD_BAD (~PAGE_MASK) - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_BOOK3S_32_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 83c65845a1a9..609c80f67194 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -4,7 +4,43 @@ #include -#include +/* + * The "classic" 32-bit implementation of the PowerPC MMU uses a hash + * table containing PTEs, together with a set of 16 segment registers, + * to define the virtual to physical address mapping. + * + * We use the hash table as an extended TLB, i.e. a cache of currently + * active mappings. We maintain a two-level page table tree, much + * like that used by the i386, for the sake of the Linux memory + * management code. Low-level assembler code in hash_low_32.S + * (procedure hash_page) is responsible for extracting ptes from the + * tree and putting them into the hash table when necessary, and + * updating the accessed and modified bits in the page table tree. + */ + +#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ +#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ +#define _PAGE_USER 0x004 /* usermode access allowed */ +#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ +#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ +#define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ +#define _PAGE_WRITETHRU 0x040 /* W: cache write-through */ +#define _PAGE_DIRTY 0x080 /* C: page changed */ +#define _PAGE_ACCESSED 0x100 /* R: page referenced */ +#define _PAGE_EXEC 0x200 /* software: exec allowed */ +#define _PAGE_RW 0x400 /* software: user write access allowed */ +#define _PAGE_SPECIAL 0x800 /* software: Special page */ + +#ifdef CONFIG_PTE_64BIT +/* We never clear the high word of the pte */ +#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE) +#else +#define _PTE_NONE_MASK _PAGE_HASHPTE +#endif + +#define _PMD_PRESENT 0 +#define _PMD_PRESENT_MASK (PAGE_MASK) +#define _PMD_BAD (~PAGE_MASK) /* And here we include common definitions */ -- cgit v1.2.3 From 34f7f79827ec4db30cff9001dfba19f496473e8d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 7 May 2021 16:42:25 +1000 Subject: selftests/powerpc: Add test of mitigation patching We recently discovered some of our mitigation patching was not safe against other CPUs running concurrently. Add a test which enable/disables all mitigations in a tight loop while also running some stress load. On an unpatched system this almost always leads to an oops and panic/reboot, but we also check if the kernel becomes tainted in case we have a non-fatal oops. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210507064225.1556312-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/security/Makefile | 2 + .../powerpc/security/mitigation-patching.sh | 75 ++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100755 tools/testing/selftests/powerpc/security/mitigation-patching.sh diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 844d18cd5f93..7488315fd847 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0+ TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2 +TEST_PROGS := mitigation-patching.sh + top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh new file mode 100755 index 000000000000..00197acb7ff1 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +set -euo pipefail + +TIMEOUT=10 + +function do_one +{ + local mitigation="$1" + local orig + local start + local now + + orig=$(cat "$mitigation") + + start=$EPOCHSECONDS + now=$start + + while [[ $((now-start)) -lt "$TIMEOUT" ]] + do + echo 0 > "$mitigation" + echo 1 > "$mitigation" + + now=$EPOCHSECONDS + done + + echo "$orig" > "$mitigation" +} + +rc=0 +cd /sys/kernel/debug/powerpc || rc=1 +if [[ "$rc" -ne 0 ]]; then + echo "Error: couldn't cd to /sys/kernel/debug/powerpc" >&2 + exit 1 +fi + +tainted=$(cat /proc/sys/kernel/tainted) +if [[ "$tainted" -ne 0 ]]; then + echo "Error: kernel already tainted!" >&2 + exit 1 +fi + +mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush" + +for m in $mitigations +do + do_one "$m" & +done + +echo "Spawned threads enabling/disabling mitigations ..." + +if stress-ng > /dev/null 2>&1; then + stress="stress-ng" +elif stress > /dev/null 2>&1; then + stress="stress" +else + stress="" +fi + +if [[ -n "$stress" ]]; then + "$stress" -m "$(nproc)" -t "$TIMEOUT" & + echo "Spawned VM stressors ..." +fi + +echo "Waiting for timeout ..." +wait + +tainted=$(cat /proc/sys/kernel/tainted) +if [[ "$tainted" -ne 0 ]]; then + echo "Error: kernel became tainted!" >&2 + exit 1 +fi + +echo "OK" +exit 0 -- cgit v1.2.3 From 9b373899e9606d252364191ce2b385043a8808bc Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 14 May 2021 15:10:41 +0800 Subject: powerpc/pseries/memhotplug: Remove unused inline function dlpar_memory_remove() dlpar_memory_remove() is never used, so can be removed. Signed-off-by: YueHaibing Reviewed-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210514071041.17432-1-yuehaibing@huawei.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 8377f1f7c78e..3d93f1c48e23 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -585,10 +585,6 @@ static inline int pseries_remove_mem_node(struct device_node *np) { return 0; } -static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog) -{ - return -EOPNOTSUPP; -} static int dlpar_remove_lmb(struct drmem_lmb *lmb) { return -EOPNOTSUPP; -- cgit v1.2.3 From c67454615cf95160cb806f7a471158a901eb261d Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 13 May 2021 19:03:40 +0800 Subject: selftests/powerpc: Fix duplicate included pthread.h Clean up the following includecheck warning: ./tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c: pthread.h is included more than once. No functional change. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1620903820-68213-1-git-send-email-jiapeng.chong@linux.alibaba.com --- tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c index e2a0c07e8362..9ef37a9836ac 100644 --- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c +++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c @@ -17,7 +17,6 @@ #include #include #include -#include #include "tm.h" #include "utils.h" -- cgit v1.2.3 From ad06bcfd5b8f989690053e6026cf742886ba9f90 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 10 May 2021 21:19:24 +0800 Subject: powerpc/pseries/ras: Delete a redundant condition branch The statement of the last "if (xxx)" branch is the same as the "else" branch. Delete it to simplify code. No functional change. Signed-off-by: Zhen Lei Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210510131924.3907-1-thunder.leizhen@huawei.com --- arch/powerpc/platforms/pseries/ras.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9d4ef65da7f3..2f636308cf60 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -593,8 +593,6 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.severity = MCE_SEV_SEVERE; else if (severity == RTAS_SEVERITY_ERROR) mce_err.severity = MCE_SEV_SEVERE; - else if (severity == RTAS_SEVERITY_FATAL) - mce_err.severity = MCE_SEV_FATAL; else mce_err.severity = MCE_SEV_FATAL; -- cgit v1.2.3 From feb0e079f43dee055701c1a294785d37341d6f97 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Wed, 12 May 2021 17:28:06 -0300 Subject: powerpc/pseries: Set UNISOLATE on dlpar_memory_remove_by_ic() error As previously done in dlpar_cpu_remove() for CPUs, this patch changes dlpar_memory_remove_by_ic() to unisolate the LMB DRC when the LMB is failed to be removed. The hypervisor, seeing a LMB DRC that was supposed to be removed being unisolated instead, can do error recovery on its side. This change is done in dlpar_memory_remove_by_ic() only because, as of today, only QEMU is using this code path for error recovery (via the PSERIES_HP_ELOG_ID_DRC_IC event). phyp treats it as a no-op. Signed-off-by: Daniel Henrique Barboza Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210512202809.95363-2-danielhb413@gmail.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 3d93f1c48e23..5d831756d1f3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -551,6 +551,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (!drmem_lmb_reserved(lmb)) continue; + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the LMB removal failed. + */ + dlpar_unisolate_drc(lmb->drc_index); + rc = dlpar_add_lmb(lmb); if (rc) pr_err("Failed to add LMB, drc index %x\n", -- cgit v1.2.3 From 2ad216b4d6ff0f92fc645c1bd8838f04fbf09b9d Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Wed, 12 May 2021 17:28:07 -0300 Subject: powerpc/pseries: check DRCONF_MEM_RESERVED in lmb_is_removable() DRCONF_MEM_RESERVED is a flag that represents the "Reserved Memory" status in LOPAR v2.10, section 4.2.8. If a LMB is marked as reserved, quoting LOPAR, "is not to be used or altered by the base OS". This flag is read only in the kernel, being set by the firmware/hypervisor in the DT. As an example, QEMU will set this flag in hw/ppc/spapr.c, spapr_dt_dynamic_memory(). lmb_is_removable() does not check for DRCONF_MEM_RESERVED. This function is used in dlpar_remove_lmb() as a guard before the removal logic. Since it is failing to check for !RESERVED, dlpar_remove_lmb() will fail in a later stage instead of failing in the validation when receiving a reserved LMB as input. lmb_is_removable() is also used in dlpar_memory_remove_by_count() to evaluate if we have enough LMBs to complete the request. The missing !RESERVED check in this case is causing dlpar_memory_remove_by_count() to miscalculate the number of elegible LMBs for the removal, and can make it error out later on instead of failing in the validation with the 'not enough LMBs to satisfy request' message. Making a DRCONF_MEM_RESERVED check in lmb_is_removable() fixes all these issues. Signed-off-by: Daniel Henrique Barboza Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210512202809.95363-3-danielhb413@gmail.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 5d831756d1f3..722832138da1 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -348,7 +348,8 @@ static int pseries_remove_mem_node(struct device_node *np) static bool lmb_is_removable(struct drmem_lmb *lmb) { - if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) + if ((lmb->flags & DRCONF_MEM_RESERVED) || + !(lmb->flags & DRCONF_MEM_ASSIGNED)) return false; #ifdef CONFIG_FA_DUMP -- cgit v1.2.3 From 163e7921750f6cd965666f472c21de056c63dcbc Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Wed, 12 May 2021 17:28:08 -0300 Subject: powerpc/pseries: break early in dlpar_memory_remove_by_count() loops After marking the LMBs as reserved depending on dlpar_remove_lmb() rc, we evaluate whether we need to add the LMBs back or if we can release the LMB DRCs. In both cases, a for_each_drmem_lmb() loop without a break condition is used. This means that we're going to cycle through all LMBs of the partition even after we're done with what we were going to do. This patch adds break conditions in both loops to avoid this. The 'lmbs_removed' variable was renamed to 'lmbs_reserved', and it's now being decremented each time a lmb reservation is removed, indicating that the operation we're doing (adding back LMBs or releasing DRCs) is completed. Signed-off-by: Daniel Henrique Barboza Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210512202809.95363-4-danielhb413@gmail.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 722832138da1..1c6e66faae6f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -402,7 +402,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) { struct drmem_lmb *lmb; - int lmbs_removed = 0; + int lmbs_reserved = 0; int lmbs_available = 0; int rc; @@ -436,12 +436,12 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) */ drmem_mark_lmb_reserved(lmb); - lmbs_removed++; - if (lmbs_removed == lmbs_to_remove) + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_remove) break; } - if (lmbs_removed != lmbs_to_remove) { + if (lmbs_reserved != lmbs_to_remove) { pr_err("Memory hot-remove failed, adding LMB's back\n"); for_each_drmem_lmb(lmb) { @@ -454,6 +454,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) lmb->drc_index); drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; } rc = -EINVAL; @@ -467,6 +471,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) lmb->base_addr); drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; } rc = 0; } -- cgit v1.2.3 From 40999b041e03b32434b2f4a951668e9865a3cb6b Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Wed, 12 May 2021 17:28:09 -0300 Subject: powerpc/pseries: minor enhancements in dlpar_memory_remove_by_ic() We don't need the 'lmbs_available' variable to count the valid LMBs and to check if we have less than 'lmbs_to_remove'. We must ensure that the entire LMB range must be removed, so we can error out immediately if any LMB in the range is marked as reserved. Add a couple of comments explaining the reasoning behind the differences we have in this function in contrast to what it is done in its sister function, dlpar_memory_remove_by_count(). Signed-off-by: Daniel Henrique Barboza Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210512202809.95363-5-danielhb413@gmail.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 28 +++++++++++++++++-------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 1c6e66faae6f..36f66556a7c6 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -517,7 +517,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; - int lmbs_available = 0; int rc; pr_info("Attempting to hot-remove %u LMB(s) at %x\n", @@ -530,18 +529,29 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (rc) return -EINVAL; - /* Validate that there are enough LMBs to satisfy the request */ + /* + * Validate that all LMBs in range are not reserved. Note that it + * is ok if they are !ASSIGNED since our goal here is to remove the + * LMB range, regardless of whether some LMBs were already removed + * by any other reason. + * + * This is a contrast to what is done in remove_by_count() where we + * check for both RESERVED and !ASSIGNED (via lmb_is_removable()), + * because we want to remove a fixed amount of LMBs in that function. + */ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { - if (lmb->flags & DRCONF_MEM_RESERVED) - break; - - lmbs_available++; + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } } - if (lmbs_available < lmbs_to_remove) - return -EINVAL; - for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + /* + * dlpar_remove_lmb() will error out if the LMB is already + * !ASSIGNED, but this case is a no-op for us. + */ if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) continue; -- cgit v1.2.3 From f259fb893c69d60ac1c7192f1974635c554fd716 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 28 Apr 2021 23:27:00 +1000 Subject: powerpc/Makefile: Add ppc32/ppc64_randconfig targets Make it easier to generate a 32 or 64-bit specific randconfig. Signed-off-by: Michael Ellerman Tested-by: Randy Dunlap Reviewed-by: Randy Dunlap Requested-by: Randy Dunlap Link: https://lore.kernel.org/r/20210428132700.3426100-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 10 ++++++++++ arch/powerpc/configs/32-bit.config | 1 + arch/powerpc/configs/64-bit.config | 1 + 3 files changed, 12 insertions(+) create mode 100644 arch/powerpc/configs/32-bit.config create mode 100644 arch/powerpc/configs/64-bit.config diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 3212d076ac6a..712c5e8768ce 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -376,6 +376,16 @@ ppc64_book3e_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \ -f $(srctree)/Makefile allmodconfig +PHONY += ppc32_randconfig +ppc32_randconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/32-bit.config \ + -f $(srctree)/Makefile randconfig + +PHONY += ppc64_randconfig +ppc64_randconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/64-bit.config \ + -f $(srctree)/Makefile randconfig + define archhelp @echo '* zImage - Build default images selected by kernel config' @echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)' diff --git a/arch/powerpc/configs/32-bit.config b/arch/powerpc/configs/32-bit.config new file mode 100644 index 000000000000..ad6546850c68 --- /dev/null +++ b/arch/powerpc/configs/32-bit.config @@ -0,0 +1 @@ +# CONFIG_PPC64 is not set diff --git a/arch/powerpc/configs/64-bit.config b/arch/powerpc/configs/64-bit.config new file mode 100644 index 000000000000..0fe6406929e2 --- /dev/null +++ b/arch/powerpc/configs/64-bit.config @@ -0,0 +1 @@ +CONFIG_PPC64=y -- cgit v1.2.3 From 5b75bd763d369e43e6d09e85eaea22fde37c0e89 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Tue, 11 May 2021 07:18:32 -0500 Subject: powerpc/sstep: Add emulation support for ‘setb’ instruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds emulation support for the following instruction: * Set Boolean (setb) Signed-off-by: Sathvika Vasireddy Tested-by: Naveen N. Rao Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7b735b0c898da0db2af8628a64df2f5114596f22.1620727160.git.sathvika@linux.vnet.ibm.com --- arch/powerpc/lib/sstep.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 45bda2520755..aee42bcc775b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1700,6 +1700,28 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->val = regs->ccr & imm; goto compute_done; + case 128: /* setb */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + /* + * 'ra' encodes the CR field number (bfa) in the top 3 bits. + * Since each CR field is 4 bits, + * we can simply mask off the bottom two bits (bfa * 4) + * to yield the first bit in the CR field. + */ + ra = ra & ~0x3; + /* 'val' stores bits of the CR field (bfa) */ + val = regs->ccr >> (CR0_SHIFT - ra); + /* checks if the LT bit of CR field (bfa) is set */ + if (val & 8) + op->val = -1; + /* checks if the GT bit of CR field (bfa) is set */ + else if (val & 4) + op->val = 1; + else + op->val = 0; + goto compute_done; + case 144: /* mtcrf */ op->type = COMPUTE + SETCC; imm = 0xf0000000UL; -- cgit v1.2.3 From 60060d704c55a9450208b8f0bc5026df9d4ab1d6 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Tue, 11 May 2021 07:18:33 -0500 Subject: powerpc/sstep: Add tests for setb instruction This adds selftests for setb instruction. Signed-off-by: Sathvika Vasireddy Tested-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b05b61ccb5f10279d46fed490796f32ea2ccc270.1620727160.git.sathvika@linux.vnet.ibm.com --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/lib/test_emulate_step.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ac41776661e9..927551dd870b 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -245,6 +245,7 @@ #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe #define PPC_INST_STRING_GEN_MASK 0xfc00067e +#define PPC_INST_SETB 0x7c000100 #define PPC_INST_STSWI 0x7c0005aa #define PPC_INST_STSWX 0x7c00052a #define PPC_INST_TRECHKPT 0x7c0007dd diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 783d1b85ecfe..a0a52fe5e979 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -53,6 +53,8 @@ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ PPC_RAW_ADDI(t, a, i)) +#define TEST_SETB(t, bfa) ppc_inst(PPC_INST_SETB | ___PPC_RT(t) | ___PPC_RA((bfa & 0x7) << 2)) + static void __init init_pt_regs(struct pt_regs *regs) { @@ -929,6 +931,33 @@ static struct compute_test compute_tests[] = { } } }, + { + .mnemonic = "setb", + .cpu_feature = CPU_FTR_ARCH_300, + .subtests = { + { + .descr = "BFA = 1, CR = GT", + .instr = TEST_SETB(20, 1), + .regs = { + .ccr = 0x4000000, + } + }, + { + .descr = "BFA = 4, CR = LT", + .instr = TEST_SETB(20, 4), + .regs = { + .ccr = 0x8000, + } + }, + { + .descr = "BFA = 5, CR = EQ", + .instr = TEST_SETB(20, 5), + .regs = { + .ccr = 0x200, + } + } + } + }, { .mnemonic = "add", .subtests = { -- cgit v1.2.3 From 73e6e4e01134c9ee97433ad1f470c71b0748b08f Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 18 May 2021 13:40:41 -0700 Subject: powerpc/powernv/pci: fix header guard While looking at -Wundef warnings, the #if CONFIG_EEH stood out as a possible candidate to convert to #ifdef CONFIG_EEH. It seems that based on Kconfig dependencies it's not possible to build this file without CONFIG_EEH enabled, but based on upstream discussion, it's not clear yet that CONFIG_EEH should be enabled by default. For now, simply fix the -Wundef warning. Suggested-by: Nathan Chancellor Suggested-by: Joe Perches Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/570 Link: https://lore.kernel.org/lkml/67f6cd269684c9aa8463ff4812c3b4605e6739c3.camel@perches.com/ Link: https://lore.kernel.org/lkml/CAOSf1CGoN5R0LUrU=Y=UWho1Z_9SLgCX8s3SbFJXwJXc5BYz4A@mail.gmail.com/ Link: https://lore.kernel.org/r/20210518204044.2390064-1-ndesaulniers@google.com --- arch/powerpc/platforms/powernv/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b18468dc31ff..6bb3c52633fb 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -711,7 +711,7 @@ int pnv_pci_cfg_write(struct pci_dn *pdn, return PCIBIOS_SUCCESSFUL; } -#if CONFIG_EEH +#ifdef CONFIG_EEH static bool pnv_pci_cfg_check(struct pci_dn *pdn) { struct eeh_dev *edev = NULL; -- cgit v1.2.3 From 6fcb574125e673f33ff058caa54b4e65629f3a08 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 18 May 2021 13:58:57 -0700 Subject: powerpc: Kconfig: disable CONFIG_COMPAT for clang < 12 Until clang-12, clang would attempt to assemble 32b powerpc assembler in 64b emulation mode when using a 64b target triple with -m32, leading to errors during the build of the compat VDSO. Simply disable all of CONFIG_COMPAT; users should upgrade to the latest release of clang for proper support. Suggested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/1160 Link: https://github.com/llvm/llvm-project/commits/2288319733cd5f525bf7e24dece08bfcf9d0ff9e Link: https://groups.google.com/g/clang-built-linux/c/ayNmi3HoNdY/m/XJAGj_G2AgAJ Link: https://lore.kernel.org/r/20210518205858.2440344-1-ndesaulniers@google.com --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 088dd2afcfe4..36ba413f49d3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -289,6 +289,7 @@ config PANIC_TIMEOUT config COMPAT bool "Enable support for 32bit binaries" depends on PPC64 + depends on !CC_IS_CLANG || CLANG_VERSION >= 120000 default y if !CPU_LITTLE_ENDIAN select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION -- cgit v1.2.3 From 7ee3e97e00a3893e354c3993c3f7d9dc127e9c5e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 13 May 2021 09:07:51 +0000 Subject: kprobes: Allow architectures to override optinsn page allocation Some architectures like powerpc require a non standard allocation of optinsn page, because module pages are too far from the kernel for direct branches. Define weak alloc_optinsn_page() and free_optinsn_page(), that fall back on alloc_insn_page() and free_insn_page() when not overridden by the architecture. Suggested-by: Masami Hiramatsu Signed-off-by: Christophe Leroy Reviewed-by: Naveen N. Rao Acked-by: Masami Hiramatsu Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/40a43d6df1fdf41ade36e9a46e60a4df774ca9f6.1620896780.git.christophe.leroy@csgroup.eu --- include/linux/kprobes.h | 3 +++ kernel/kprobes.c | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 1883a4a9f16a..02d4020615a7 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -409,6 +409,9 @@ void dump_kprobe(struct kprobe *kp); void *alloc_insn_page(void); void free_insn_page(void *page); +void *alloc_optinsn_page(void); +void free_optinsn_page(void *page); + int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 745f08fdd7a6..8c0a6fdef771 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -321,11 +321,21 @@ int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, } #ifdef CONFIG_OPTPROBES +void __weak *alloc_optinsn_page(void) +{ + return alloc_insn_page(); +} + +void __weak free_optinsn_page(void *page) +{ + free_insn_page(page); +} + /* For optimized_kprobe buffer */ struct kprobe_insn_cache kprobe_optinsn_slots = { .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex), - .alloc = alloc_insn_page, - .free = free_insn_page, + .alloc = alloc_optinsn_page, + .free = free_optinsn_page, .sym = KPROBE_OPTINSN_PAGE_SYM, .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages), /* .insn_size is initialized later */ -- cgit v1.2.3 From b73c8cccd72ac28beaf262fd6ef4b91411fc8335 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 13 May 2021 09:07:53 +0000 Subject: powerpc/kprobes: Replace ppc_optinsn by common optinsn Commit 51c9c0843993 ("powerpc/kprobes: Implement Optprobes") implemented a powerpc specific version of optinsn in order to workaround the 32Mb limitation for direct branches. Instead of implementing a dedicated powerpc version, use the common optinsn and override the allocation and freeing functions. This also indirectly remove the CLANG warning about is_kprobe_ppc_optinsn_slot() not being use, and the powerpc will now benefit from commit 5b485629ba0d ("kprobes, extable: Identify kprobes trampolines as kernel text area") Suggested-by: Masami Hiramatsu Signed-off-by: Christophe Leroy Acked-by: Masami Hiramatsu Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ec5e85f9f9abcfecc959a03495f4a7858eb4d203.1620896780.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/optprobes.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index cdf87086fa33..a370190cd02a 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -31,11 +31,9 @@ #define TMPL_END_IDX \ (optprobe_template_end - optprobe_template_entry) -DEFINE_INSN_CACHE_OPS(ppc_optinsn); - static bool insn_page_in_use; -static void *__ppc_alloc_insn_page(void) +void *alloc_optinsn_page(void) { if (insn_page_in_use) return NULL; @@ -43,20 +41,11 @@ static void *__ppc_alloc_insn_page(void) return &optinsn_slot; } -static void __ppc_free_insn_page(void *page __maybe_unused) +void free_optinsn_page(void *page) { insn_page_in_use = false; } -struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { - .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), - .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), - /* insn_size initialized later */ - .alloc = __ppc_alloc_insn_page, - .free = __ppc_free_insn_page, - .nr_garbage = 0, -}; - /* * Check if we can optimize this probe. Returns NIP post-emulation if this can * be optimized and 0 otherwise. @@ -136,7 +125,7 @@ NOKPROBE_SYMBOL(optimized_callback); void arch_remove_optimized_kprobe(struct optimized_kprobe *op) { if (op->optinsn.insn) { - free_ppc_optinsn_slot(op->optinsn.insn, 1); + free_optinsn_slot(op->optinsn.insn, 1); op->optinsn.insn = NULL; } } @@ -203,14 +192,12 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) unsigned long nip, size; int rc, i; - kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; - nip = can_optimize(p); if (!nip) return -EILSEQ; /* Allocate instruction slot for detour buffer */ - buff = get_ppc_optinsn_slot(); + buff = get_optinsn_slot(); if (!buff) return -ENOMEM; @@ -297,7 +284,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) return 0; error: - free_ppc_optinsn_slot(buff, 0); + free_optinsn_slot(buff, 0); return -ERANGE; } -- cgit v1.2.3 From 2cec178e35baf57d307c0982fd2e53055bd1e9bb Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 14 May 2021 11:24:20 -0500 Subject: powerpc/xmon: make dumping log buffer contents more reliable Log buffer entries that are too long for dump_log_buf()'s small local buffer are: * silently discarded when a single-line entry is too long; kmsg_dump_get_line() returns true but sets &len to 0. * silently truncated to the last fitting new line when a multi-line entry is too long, e.g. register dumps from __show_regs(); this seems undetectable via the kmsg_dump API. xmon_printf()'s internal buffer is already 1KB; enlarge dump_log_buf()'s own buffer to match and make it statically allocated. Verified that this allows complete printing of register dumps on ppc64le with both CONFIG_PRINTK_TIME=y and CONFIG_PRINTK_CALLER=y. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210514162420.2911458-1-nathanl@linux.ibm.com --- arch/powerpc/xmon/xmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index c8173e92f19d..f73c10869e64 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2975,7 +2975,7 @@ static void dump_log_buf(void) { struct kmsg_dump_iter iter; - unsigned char buf[128]; + static unsigned char buf[1024]; size_t len; if (setjmp(bus_error_jmp) != 0) { -- cgit v1.2.3 From 027f55e87c3094270a3223f7331d033fe15a2b3f Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 14 May 2021 16:44:22 -0500 Subject: tty: hvc: udbg_hvc: retry putc on -EAGAIN hvterm_raw_put_chars() calls hvc_put_chars(), which may return -EAGAIN when the underlying hcall returns a "busy" status, but udbg_hvc_putc() doesn't handle this. When using xmon on a PowerVM guest, this can result in incomplete or garbled output when printing relatively large amounts of data quickly, such as when dumping the kernel log buffer. Call again on -EAGAIN. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210514214422.3019105-1-nathanl@linux.ibm.com --- drivers/tty/hvc/hvc_vio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c index 798f27f40cc2..72b11aa7e0a6 100644 --- a/drivers/tty/hvc/hvc_vio.c +++ b/drivers/tty/hvc/hvc_vio.c @@ -249,7 +249,7 @@ static void udbg_hvc_putc(char c) count = hvterm_hvsi_put_chars(0, &c, 1); break; } - } while(count == 0); + } while (count == 0 || count == -EAGAIN); } static int udbg_hvc_getc_poll(void) -- cgit v1.2.3 From 015d98149b326e0f1f02e44413112ca8b4330543 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 28 May 2021 11:27:52 -0700 Subject: powerpc/barrier: Avoid collision with clang's __lwsync macro A change in clang 13 results in the __lwsync macro being defined as __builtin_ppc_lwsync, which emits 'lwsync' or 'msync' depending on what the target supports. This breaks the build because of -Werror in arch/powerpc, along with thousands of warnings: In file included from arch/powerpc/kernel/pmc.c:12: In file included from include/linux/bug.h:5: In file included from arch/powerpc/include/asm/bug.h:109: In file included from include/asm-generic/bug.h:20: In file included from include/linux/kernel.h:12: In file included from include/linux/bitops.h:32: In file included from arch/powerpc/include/asm/bitops.h:62: arch/powerpc/include/asm/barrier.h:49:9: error: '__lwsync' macro redefined [-Werror,-Wmacro-redefined] #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") ^ :308:9: note: previous definition is here #define __lwsync __builtin_ppc_lwsync ^ 1 error generated. Undefine this macro so that the runtime patching introduced by commit 2d1b2027626d ("powerpc: Fixup lwsync at runtime") continues to work properly with clang and the build no longer breaks. Cc: stable@vger.kernel.org Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/1386 Link: https://github.com/llvm/llvm-project/commit/62b5df7fe2b3fda1772befeda15598fbef96a614 Link: https://lore.kernel.org/r/20210528182752.1852002-1-nathan@kernel.org --- arch/powerpc/include/asm/barrier.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 7ae29cfb06c0..f0e687236484 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -46,6 +46,8 @@ # define SMPWMB eieio #endif +/* clang defines this macro for a builtin, which will not work with runtime patching */ +#undef __lwsync #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") #define dma_rmb() __lwsync() #define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") -- cgit v1.2.3 From 07e2d6cf91079ca01db7fb989a02edd8009dcacd Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Fri, 4 Jun 2021 15:58:25 +0000 Subject: powerpc/ps3: Add firmware version to sysfs Add a new sysfs entry /sys/firmware/ps3/fw-version that exports the PS3's firmware version. The firmware version is available through an LV1 hypercall, and we've been printing it to the boot log, but haven't provided an easy way for user utilities to get it. Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/41509b2da647cd34b1331cc4756c8774b1e284eb.1622822173.git.geoff@infradead.org --- arch/powerpc/platforms/ps3/setup.c | 43 +++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index e9ae5dd03593..3de9145c20bc 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -36,6 +36,7 @@ DEFINE_MUTEX(ps3_gpu_mutex); EXPORT_SYMBOL_GPL(ps3_gpu_mutex); static union ps3_firmware_version ps3_firmware_version; +static char ps3_firmware_version_str[16]; void ps3_get_firmware_version(union ps3_firmware_version *v) { @@ -182,6 +183,40 @@ static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx) return lv1_set_dabr(dabr, dabrx) ? -1 : 0; } +static ssize_t ps3_fw_version_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%s", ps3_firmware_version_str); +} + +static int __init ps3_setup_sysfs(void) +{ + static struct kobj_attribute attr = __ATTR(fw-version, S_IRUGO, + ps3_fw_version_show, NULL); + static struct kobject *kobj; + int result; + + kobj = kobject_create_and_add("ps3", firmware_kobj); + + if (!kobj) { + pr_warn("%s:%d: kobject_create_and_add failed.\n", __func__, + __LINE__); + return -ENOMEM; + } + + result = sysfs_create_file(kobj, &attr.attr); + + if (result) { + pr_warn("%s:%d: sysfs_create_file failed.\n", __func__, + __LINE__); + kobject_put(kobj); + return -ENOMEM; + } + + return 0; +} +core_initcall(ps3_setup_sysfs); + static void __init ps3_setup_arch(void) { u64 tmp; @@ -190,9 +225,11 @@ static void __init ps3_setup_arch(void) lv1_get_version_info(&ps3_firmware_version.raw, &tmp); - printk(KERN_INFO "PS3 firmware version %u.%u.%u\n", - ps3_firmware_version.major, ps3_firmware_version.minor, - ps3_firmware_version.rev); + snprintf(ps3_firmware_version_str, sizeof(ps3_firmware_version_str), + "%u.%u.%u", ps3_firmware_version.major, + ps3_firmware_version.minor, ps3_firmware_version.rev); + + printk(KERN_INFO "PS3 firmware version %s\n", ps3_firmware_version_str); ps3_spu_set_platform(); -- cgit v1.2.3 From ff4a825e4a24cdf7f840461ced6283bf865ab7be Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Fri, 4 Jun 2021 15:58:25 +0000 Subject: powerpc/ps3: Re-align DTB in image Change the PS3 linker script to align the DTB at 8 bytes, the same alignment as that of the of the 'generic' powerpc linker script. Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/245897ed65e402686a4b114ba618e935cb5c6506.1622822173.git.geoff@infradead.org --- arch/powerpc/boot/zImage.ps3.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/zImage.ps3.lds.S b/arch/powerpc/boot/zImage.ps3.lds.S index 7b2ff2eaa73a..d0ffb493614d 100644 --- a/arch/powerpc/boot/zImage.ps3.lds.S +++ b/arch/powerpc/boot/zImage.ps3.lds.S @@ -8,7 +8,7 @@ SECTIONS .kernel:vmlinux.bin : { *(.kernel:vmlinux.bin) } _vmlinux_end = .; - . = ALIGN(4096); + . = ALIGN(8); _dtb_start = .; .kernel:dtb : { *(.kernel:dtb) } _dtb_end = .; -- cgit v1.2.3 From 6caebff168235b6102e5dc57cb95a2374301720a Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Thu, 3 Jun 2021 19:17:01 +0000 Subject: powerpc/ps3: Add CONFIG_PS3_VERBOSE_RESULT option To aid debugging, add a new PS3 kernel config option PS3_VERBOSE_RESULT that, when enabled, will print more verbose messages for the result of LV1 hypercalls. Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0ce4b6969a08094a747bd382dbfd30b72ebc192d.1622746428.git.geoff@infradead.org --- arch/powerpc/include/asm/ps3.h | 2 +- arch/powerpc/platforms/ps3/Kconfig | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index e646c7f218bc..7df145901def 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -232,7 +232,7 @@ enum lv1_result { static inline const char* ps3_result(int result) { -#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) +#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) || defined(CONFIG_PS3_VERBOSE_RESULT) switch (result) { case LV1_SUCCESS: return "LV1_SUCCESS (0)"; diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig index e32406e918d0..ebed94942d39 100644 --- a/arch/powerpc/platforms/ps3/Kconfig +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -85,6 +85,15 @@ config PS3_SYS_MANAGER This support is required for PS3 system control. In general, all users will say Y or M. +config PS3_VERBOSE_RESULT + bool "PS3 Verbose LV1 hypercall results" if PS3_ADVANCED + depends on PPC_PS3 + help + Enables more verbose log mesages for LV1 hypercall results. + + If in doubt, say N here and reduce the size of the kernel by a + small amount. + config PS3_REPOSITORY_WRITE bool "PS3 Repository write support" if PS3_ADVANCED depends on PPC_PS3 -- cgit v1.2.3 From 472b440fd26822c645befe459172dafdc2d225de Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Thu, 3 Jun 2021 19:17:01 +0000 Subject: powerpc/ps3: Warn on PS3 device errors To aid debugging PS3 boot problems change the log level of several PS3 device errors from pr_debug to pr_warn. Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/eb5c1c10da0bbdeb27c8b069187b4f58e429e837.1622746428.git.geoff@infradead.org --- arch/powerpc/platforms/ps3/system-bus.c | 9 +++++---- drivers/ps3/ps3-vuart.c | 2 +- drivers/ps3/ps3av.c | 22 +++++++++++----------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index b431f41c6cb5..1a5665875165 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -64,9 +64,10 @@ static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev) result = lv1_open_device(dev->bus_id, dev->dev_id, 0); if (result) { - pr_debug("%s:%d: lv1_open_device failed: %s\n", __func__, - __LINE__, ps3_result(result)); - result = -EPERM; + pr_warn("%s:%d: lv1_open_device dev=%u.%u(%s) failed: %s\n", + __func__, __LINE__, dev->match_id, dev->match_sub_id, + dev_name(&dev->core), ps3_result(result)); + result = -EPERM; } done: @@ -120,7 +121,7 @@ static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev) result = lv1_gpu_open(0); if (result) { - pr_debug("%s:%d: lv1_gpu_open failed: %s\n", __func__, + pr_warn("%s:%d: lv1_gpu_open failed: %s\n", __func__, __LINE__, ps3_result(result)); result = -EPERM; } diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index e34ae6a442c7..6328abd51ffa 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -358,7 +358,7 @@ static int ps3_vuart_raw_write(struct ps3_system_bus_device *dev, ps3_mm_phys_to_lpar(__pa(buf)), bytes, bytes_written); if (result) { - dev_dbg(&dev->core, "%s:%d: lv1_write_virtual_uart failed: " + dev_warn(&dev->core, "%s:%d: lv1_write_virtual_uart failed: " "%s\n", __func__, __LINE__, ps3_result(result)); return result; } diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c index 9d66257e1da5..516e6d14d32e 100644 --- a/drivers/ps3/ps3av.c +++ b/drivers/ps3/ps3av.c @@ -217,9 +217,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, /* send pkt */ res = ps3av_vuart_write(ps3av->dev, send_buf, write_len); if (res < 0) { - dev_dbg(&ps3av->dev->core, - "%s: ps3av_vuart_write() failed (result=%d)\n", - __func__, res); + dev_warn(&ps3av->dev->core, + "%s:%d: ps3av_vuart_write() failed: %s\n", __func__, + __LINE__, ps3_result(res)); return res; } @@ -230,9 +230,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, res = ps3av_vuart_read(ps3av->dev, recv_buf, PS3AV_HDR_SIZE, timeout); if (res != PS3AV_HDR_SIZE) { - dev_dbg(&ps3av->dev->core, - "%s: ps3av_vuart_read() failed (result=%d)\n", - __func__, res); + dev_warn(&ps3av->dev->core, + "%s:%d: ps3av_vuart_read() failed: %s\n", __func__, + __LINE__, ps3_result(res)); return res; } @@ -240,9 +240,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, res = ps3av_vuart_read(ps3av->dev, &recv_buf->cid, recv_buf->size, timeout); if (res < 0) { - dev_dbg(&ps3av->dev->core, - "%s: ps3av_vuart_read() failed (result=%d)\n", - __func__, res); + dev_warn(&ps3av->dev->core, + "%s:%d: ps3av_vuart_read() failed: %s\n", __func__, + __LINE__, ps3_result(res)); return res; } res += PS3AV_HDR_SIZE; /* total len */ @@ -251,8 +251,8 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, } while (event); if ((cmd | PS3AV_REPLY_BIT) != recv_buf->cid) { - dev_dbg(&ps3av->dev->core, "%s: reply err (result=%x)\n", - __func__, recv_buf->cid); + dev_warn(&ps3av->dev->core, "%s:%d: reply err: %x\n", __func__, + __LINE__, recv_buf->cid); return -EINVAL; } -- cgit v1.2.3 From 9733862e50fdba55e7f1554e4286fcc5302ff28e Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Thu, 3 Jun 2021 19:17:02 +0000 Subject: powerpc/ps3: Add dma_mask to ps3_dma_region Commit f959dcd6ddfd29235030e8026471ac1b022ad2b0 (dma-direct: Fix potential NULL pointer dereference) added a null check on the dma_mask pointer of the kernel's device structure. Add a dma_mask variable to the ps3_dma_region structure and set the device structure's dma_mask pointer to point to this new variable. Fixes runtime errors like these: # WARNING: Fixes tag on line 10 doesn't match correct format # WARNING: Fixes tag on line 10 doesn't match correct format ps3_system_bus_match:349: dev=8.0(sb_01), drv=8.0(ps3flash): match WARNING: CPU: 0 PID: 1 at kernel/dma/mapping.c:151 .dma_map_page_attrs+0x34/0x1e0 ps3flash sb_01: ps3stor_setup:193: map DMA region failed Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/562d0c9ea0100a30c3b186bcc7adb34b0bbd2cd7.1622746428.git.geoff@infradead.org --- arch/powerpc/include/asm/ps3.h | 2 ++ arch/powerpc/platforms/ps3/mm.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index 7df145901def..8a0d8fb35328 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -71,6 +71,7 @@ struct ps3_dma_region_ops; * @bus_addr: The 'translated' bus address of the region. * @len: The length in bytes of the region. * @offset: The offset from the start of memory of the region. + * @dma_mask: Device dma_mask. * @ioid: The IOID of the device who owns this region * @chunk_list: Opaque variable used by the ioc page manager. * @region_ops: struct ps3_dma_region_ops - dma region operations @@ -85,6 +86,7 @@ struct ps3_dma_region { enum ps3_dma_region_type region_type; unsigned long len; unsigned long offset; + u64 dma_mask; /* driver variables (set by ps3_dma_region_create) */ unsigned long bus_addr; diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index d094321964fb..a81eac35d900 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -6,6 +6,7 @@ * Copyright 2006 Sony Corp. */ +#include #include #include #include @@ -1118,6 +1119,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, enum ps3_dma_region_type region_type, void *addr, unsigned long len) { unsigned long lpar_addr; + int result; lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0; @@ -1129,6 +1131,16 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, r->offset -= map.r1.offset; r->len = len ? len : ALIGN(map.total, 1 << r->page_size); + dev->core.dma_mask = &r->dma_mask; + + result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32)); + + if (result < 0) { + dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n", + __func__, __LINE__, result); + return result; + } + switch (dev->dev_type) { case PS3_DEVICE_TYPE_SB: r->region_ops = (USE_DYNAMIC_DMA) -- cgit v1.2.3 From b629f6c0ab8668a186fda2627296d0cbcc45a368 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 1 Jun 2021 12:02:00 -0700 Subject: powerpc/52xx: Add fallthrough in mpc52xx_wdt_ioctl() With gcc 10.3, there is this compiler error: compiler.h:56:26: error: this statement may fall through mpc52xx_gpt.c:586:2: note: here 586 | case WDIOC_GETTIMEOUT: | ^~~~ So add the fallthrough pseudo keyword. Signed-off-by: Tom Rix Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210601190200.2637776-1-trix@redhat.com --- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 8c0d324f657e..3823df235f25 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -582,6 +582,7 @@ static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd, if (ret) break; /* fall through and return the timeout */ + fallthrough; case WDIOC_GETTIMEOUT: /* we need to round here as to avoid e.g. the following -- cgit v1.2.3 From 911bacda4658129bee039dc90fc0c3f193ee2695 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Tue, 1 Jun 2021 16:51:27 +0800 Subject: powerpc/spufs: Remove set but not used variable 'dummy' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes gcc '-Wunused-but-set-variable' warning: # WARNING: Fixes tag on line 3 doesn't match correct format # WARNING: Fixes tag on line 3 doesn't match correct format # WARNING: Fixes tag on line 3 doesn't match correct format # WARNING: Fixes tag on line 3 doesn't match correct format # WARNING: Fixes tag on line 3 doesn't match correct format arch/powerpc/platforms/cell/spufs/switch.c: In function 'check_ppu_mb_stat': arch/powerpc/platforms/cell/spufs/switch.c:1660:6: warning: variable ‘dummy’ set but not used [-Wunused-but-set-variable] arch/powerpc/platforms/cell/spufs/switch.c: In function 'check_ppuint_mb_stat': arch/powerpc/platforms/cell/spufs/switch.c:1675:6: warning: variable ‘dummy’ set but not used [-Wunused-but-set-variable] It never used since introduction. Signed-off-by: Baokun Li Acked-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210601085127.139598-1-libaokun1@huawei.com --- arch/powerpc/platforms/cell/spufs/switch.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index d56b4e3241cd..b41e81b22fdc 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -1657,14 +1657,13 @@ static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu) static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) { struct spu_problem __iomem *prob = spu->problem; - u32 dummy = 0; /* Restore, Step 66: * If CSA.MB_Stat[P]=0 (mailbox empty) then * read from the PPU_MB register. */ if ((csa->prob.mb_stat_R & 0xFF) == 0) { - dummy = in_be32(&prob->pu_mb_R); + in_be32(&prob->pu_mb_R); eieio(); } } @@ -1672,14 +1671,13 @@ static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 dummy = 0UL; /* Restore, Step 66: * If CSA.MB_Stat[I]=0 (mailbox empty) then * read from the PPUINT_MB register. */ if ((csa->prob.mb_stat_R & 0xFF0000) == 0) { - dummy = in_be64(&priv2->puint_mb_R); + in_be64(&priv2->puint_mb_R); eieio(); spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); eieio(); -- cgit v1.2.3 From f377f7da26d2af87e2ddc39190546f62ecdb2bd8 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Tue, 1 Jun 2021 16:53:19 +0800 Subject: powerpc/spider-pci: Remove set but not used variable 'val' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes gcc '-Wunused-but-set-variable' warning: # WARNING: Fixes tag on line 3 doesn't match correct format # WARNING: Fixes tag on line 3 doesn't match correct format # WARNING: Fixes tag on line 3 doesn't match correct format # WARNING: Fixes tag on line 3 doesn't match correct format # WARNING: Fixes tag on line 3 doesn't match correct format # WARNING: Fixes tag on line 3 doesn't match correct format arch/powerpc/platforms/cell/spider-pci.c: In function 'spiderpci_io_flush': arch/powerpc/platforms/cell/spider-pci.c:28:6: warning: variable ‘val’ set but not used [-Wunused-but-set-variable] It never used since introduction. Signed-off-by: Baokun Li Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210601085319.140461-1-libaokun1@huawei.com --- arch/powerpc/platforms/cell/spider-pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index 93ea41680f54..a1c293f42a1f 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -25,10 +25,9 @@ struct spiderpci_iowa_private { static void spiderpci_io_flush(struct iowa_bus *bus) { struct spiderpci_iowa_private *priv; - u32 val; priv = bus->private; - val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); + in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); iosync(); } -- cgit v1.2.3 From 8f6a54bcaf62a791a7bceccc093497f7f53e2e26 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 1 Jun 2021 14:36:25 +0800 Subject: selftests/powerpc: Remove the repeated declaration Function 'event_ebb_init' and 'event_leader_ebb_init' are declared twice in the header file, so remove the repeated declaration. Signed-off-by: Shaokun Zhang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1622529385-5938-1-git-send-email-zhangshaokun@hisilicon.com --- tools/testing/selftests/powerpc/pmu/ebb/ebb.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h index b5bc2b616075..2c803b5b48d6 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h @@ -55,8 +55,6 @@ void ebb_global_disable(void); bool ebb_is_supported(void); void ebb_freeze_pmcs(void); void ebb_unfreeze_pmcs(void); -void event_ebb_init(struct event *e); -void event_leader_ebb_init(struct event *e); int count_pmc(int pmc, uint32_t sample_period); void dump_ebb_state(void); void dump_summary_ebb_state(void); -- cgit v1.2.3 From 4cfdd9201cfb85538975f5c8fb83941c3d463ed2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 17:49:25 +0300 Subject: powerpc/prom_init: Move custom isspace() to its own namespace If by some reason any of the headers will include ctype.h we will have a name collision. Avoid this by moving isspace() to the dedicate namespace. First appearance of the code is in the commit cf68787b68a2 ("powerpc/prom_init: Evaluate mem kernel parameter for early allocation"). Reported-by: kernel test robot Signed-off-by: Andy Shevchenko [mpe: Reformat prom_isxdigit() now that we allow longer lines] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210510144925.58195-1-andriy.shevchenko@linux.intel.com --- arch/powerpc/kernel/prom_init.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 41ed7e33d897..523b31685c4c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -701,13 +701,12 @@ static int __init prom_setprop(phandle node, const char *nodename, } /* We can't use the standard versions because of relocation headaches. */ -#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ - || ('a' <= (c) && (c) <= 'f') \ - || ('A' <= (c) && (c) <= 'F')) +#define prom_isxdigit(c) \ + (('0' <= (c) && (c) <= '9') || ('a' <= (c) && (c) <= 'f') || ('A' <= (c) && (c) <= 'F')) -#define isdigit(c) ('0' <= (c) && (c) <= '9') -#define islower(c) ('a' <= (c) && (c) <= 'z') -#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c)) +#define prom_isdigit(c) ('0' <= (c) && (c) <= '9') +#define prom_islower(c) ('a' <= (c) && (c) <= 'z') +#define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c)) static unsigned long prom_strtoul(const char *cp, const char **endp) { @@ -716,14 +715,14 @@ static unsigned long prom_strtoul(const char *cp, const char **endp) if (*cp == '0') { base = 8; cp++; - if (toupper(*cp) == 'X') { + if (prom_toupper(*cp) == 'X') { cp++; base = 16; } } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) { + while (prom_isxdigit(*cp) && + (value = prom_isdigit(*cp) ? *cp - '0' : prom_toupper(*cp) - 'A' + 10) < base) { result = result * base + value; cp++; } -- cgit v1.2.3 From 87f19ea10100892637d4eee9069fad4ed61cb6a5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 7 May 2021 14:01:09 +0000 Subject: powerpc/perf: Simplify Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/powerpc/Kbuild decend into arch/powerpc/perf/ only when CONFIG_PERF_EVENTS is selected, so there is not need to take CONFIG_PERF_EVENTS into account in arch/powerpc/perf/Makefile. Signed-off-by: Christophe Leroy Reviewed-by: Michal Suchánek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d37f61afca55b5b33787b643890e061ae1c18f5f.1620396045.git.christophe.leroy@csgroup.eu --- arch/powerpc/perf/Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index c02854dea2b2..2f46e31c7612 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_PERF_EVENTS) += callchain.o callchain_$(BITS).o perf_regs.o -ifdef CONFIG_COMPAT -obj-$(CONFIG_PERF_EVENTS) += callchain_32.o -endif +obj-y += callchain.o callchain_$(BITS).o perf_regs.o +obj-$(CONFIG_COMPAT) += callchain_32.o obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ -- cgit v1.2.3 From ddf4a7bcd09439e82c4d6f959f4ff6c53f07466f Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Fri, 11 Jun 2021 17:58:27 +1000 Subject: powerpc/tau: Remove superfluous parameter in alloc_workqueue() call This avoids an (optional) compiler warning: arch/powerpc/kernel/tau_6xx.c: In function 'TAU_init': arch/powerpc/kernel/tau_6xx.c:204:30: error: too many arguments for format [-Werror=format-extra-args] tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); Fixes: b1c6a0a10bfa ("powerpc/tau: Convert from timer to workqueue") Reported-by: Naresh Kamboju Signed-off-by: Finn Thain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a1456e8bbd33ef702e3ff6f14b1bf3919241c62b.1623398307.git.fthain@linux-m68k.org --- arch/powerpc/kernel/tau_6xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 6c31af7f4fa8..b9a047d92ec0 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -201,7 +201,7 @@ static int __init TAU_init(void) tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && !strcmp(cur_cpu_spec->platform, "ppc750"); - tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1); if (!tau_workq) return -ENOMEM; -- cgit v1.2.3 From 4423eff71ca6b8f2c5e0fc4cea33d8cdfe3c3740 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 May 2021 06:08:06 +0000 Subject: powerpc: Force inlining of csum_add() Commit 328e7e487a46 ("powerpc: force inlining of csum_partial() to avoid multiple csum_partial() with GCC10") inlined csum_partial(). Now that csum_partial() is inlined, GCC outlines csum_add() when called by csum_partial(). c064fb28 : c064fb28: 7c 63 20 14 addc r3,r3,r4 c064fb2c: 7c 63 01 94 addze r3,r3 c064fb30: 4e 80 00 20 blr c0665fb8 : c0665fb8: 7c 63 20 14 addc r3,r3,r4 c0665fbc: 7c 63 01 94 addze r3,r3 c0665fc0: 4e 80 00 20 blr c066719c: 7c 9a c0 2e lwzx r4,r26,r24 c06671a0: 38 60 00 00 li r3,0 c06671a4: 7f 1a c2 14 add r24,r26,r24 c06671a8: 4b ff ee 11 bl c0665fb8 c06671ac: 80 98 00 04 lwz r4,4(r24) c06671b0: 4b ff ee 09 bl c0665fb8 c06671b4: 80 98 00 08 lwz r4,8(r24) c06671b8: 4b ff ee 01 bl c0665fb8 c06671bc: a0 98 00 0c lhz r4,12(r24) c06671c0: 4b ff ed f9 bl c0665fb8 c06671c4: 7c 63 18 f8 not r3,r3 c06671c8: 81 3f 00 68 lwz r9,104(r31) c06671cc: 81 5f 00 a0 lwz r10,160(r31) c06671d0: 7d 29 18 14 addc r9,r9,r3 c06671d4: 7d 29 01 94 addze r9,r9 c06671d8: 91 3f 00 68 stw r9,104(r31) c06671dc: 7d 1a 50 50 subf r8,r26,r10 c06671e0: 83 01 00 10 lwz r24,16(r1) c06671e4: 83 41 00 18 lwz r26,24(r1) The sum with 0 is useless, should have been skipped. And there is even one completely unused instance of csum_add(). In file included from ./include/net/checksum.h:22, from ./include/linux/skbuff.h:28, from ./include/linux/icmp.h:16, from net/ipv6/ip6_tunnel.c:23: ./arch/powerpc/include/asm/checksum.h: In function '__ip6_tnl_rcv': ./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline] 94 | static inline __wsum csum_add(__wsum csum, __wsum addend) | ^~~~~~~~ ./arch/powerpc/include/asm/checksum.h:172:31: note: called from here 172 | sum = csum_add(sum, (__force __wsum)*(const u32 *)buff); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline] 94 | static inline __wsum csum_add(__wsum csum, __wsum addend) | ^~~~~~~~ ./arch/powerpc/include/asm/checksum.h:177:31: note: called from here 177 | sum = csum_add(sum, (__force __wsum) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 178 | *(const u32 *)(buff + 4)); | ~~~~~~~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline] 94 | static inline __wsum csum_add(__wsum csum, __wsum addend) | ^~~~~~~~ ./arch/powerpc/include/asm/checksum.h:183:31: note: called from here 183 | sum = csum_add(sum, (__force __wsum) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 184 | *(const u32 *)(buff + 8)); | ~~~~~~~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline] 94 | static inline __wsum csum_add(__wsum csum, __wsum addend) | ^~~~~~~~ ./arch/powerpc/include/asm/checksum.h:186:31: note: called from here 186 | sum = csum_add(sum, (__force __wsum) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 187 | *(const u16 *)(buff + 12)); | ~~~~~~~~~~~~~~~~~~~~~~~~~~ Force inlining of csum_add(). 94c: 80 df 00 a0 lwz r6,160(r31) 950: 7d 28 50 2e lwzx r9,r8,r10 954: 7d 48 52 14 add r10,r8,r10 958: 80 aa 00 04 lwz r5,4(r10) 95c: 80 ff 00 68 lwz r7,104(r31) 960: 7d 29 28 14 addc r9,r9,r5 964: 7d 29 01 94 addze r9,r9 968: 7d 08 30 50 subf r8,r8,r6 96c: 80 aa 00 08 lwz r5,8(r10) 970: a1 4a 00 0c lhz r10,12(r10) 974: 7d 29 28 14 addc r9,r9,r5 978: 7d 29 01 94 addze r9,r9 97c: 7d 29 50 14 addc r9,r9,r10 980: 7d 29 01 94 addze r9,r9 984: 7d 29 48 f8 not r9,r9 988: 7c e7 48 14 addc r7,r7,r9 98c: 7c e7 01 94 addze r7,r7 990: 90 ff 00 68 stw r7,104(r31) In the non-inlined version, the first sum with 0 was performed. Here it is skipped. Signed-off-by: Christophe Leroy Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f7f4d4e364de6e473da874468b903da6e5d97adc.1620713272.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index d5da7ddbf0fc..350de8f90250 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -91,7 +91,7 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, } #define HAVE_ARCH_CSUM_ADD -static inline __wsum csum_add(__wsum csum, __wsum addend) +static __always_inline __wsum csum_add(__wsum csum, __wsum addend) { #ifdef __powerpc64__ u64 res = (__force u64)csum; -- cgit v1.2.3 From 359c2ca74d2fede5c571fbf3f5ee16ba1ad98259 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 May 2021 13:14:53 +0000 Subject: powerpc: Don't handle ALTIVEC/SPE in ASM in _switch(). Do it in C. _switch() saves and restores ALTIVEC and SPE status. For altivec this is redundant with what __switch_to() does with save_sprs() and restore_sprs() and giveup_all() before calling _switch(). Add support for SPI in save_sprs() and restore_sprs() and remove things from _switch(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8ab21fd93d6e0047aa71e6509e5e312f14b2991b.1620998075.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/asm-offsets.c | 2 -- arch/powerpc/kernel/entry_32.S | 35 ----------------------------------- arch/powerpc/kernel/process.c | 9 +++++++++ 3 files changed, 9 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0480f4006e0c..b38dabe4ecb7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -116,7 +116,6 @@ int main(void) #ifdef CONFIG_ALTIVEC OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr); OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area); - OFFSET(THREAD_VRSAVE, thread_struct, vrsave); OFFSET(THREAD_USED_VR, thread_struct, used_vr); OFFSET(VRSTATE_VSCR, thread_vr_state, vscr); OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec); @@ -147,7 +146,6 @@ int main(void) #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); OFFSET(THREAD_ACC, thread_struct, acc); - OFFSET(THREAD_SPEFSCR, thread_struct, spefscr); OFFSET(THREAD_USED_SPE, thread_struct, used_spe); #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9160285cb2f4..6c09ebb853ec 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -176,28 +176,6 @@ _GLOBAL(_switch) /* r3-r12 are caller saved -- Cort */ SAVE_NVGPRS(r1) stw r0,_NIP(r1) /* Return to switch caller */ - mfmsr r11 - li r0,MSR_FP /* Disable floating-point */ -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - oris r0,r0,MSR_VEC@h /* Disable altivec */ - mfspr r12,SPRN_VRSAVE /* save vrsave register value */ - stw r12,THREAD+THREAD_VRSAVE(r2) -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_SPE -BEGIN_FTR_SECTION - oris r0,r0,MSR_SPE@h /* Disable SPE */ - mfspr r12,SPRN_SPEFSCR /* save spefscr register value */ - stw r12,THREAD+THREAD_SPEFSCR(r2) -END_FTR_SECTION_IFSET(CPU_FTR_SPE) -#endif /* CONFIG_SPE */ - and. r0,r0,r11 /* FP or altivec or SPE enabled? */ - beq+ 1f - andc r11,r11,r0 - mtmsr r11 - isync -1: stw r11,_MSR(r1) mfcr r10 stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ @@ -218,19 +196,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) mr r3,r2 addi r2,r4,-THREAD /* Update current */ -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - lwz r0,THREAD+THREAD_VRSAVE(r2) - mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_SPE -BEGIN_FTR_SECTION - lwz r0,THREAD+THREAD_SPEFSCR(r2) - mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */ -END_FTR_SECTION_IFSET(CPU_FTR_SPE) -#endif /* CONFIG_SPE */ - lwz r0,_CCR(r1) mtcrf 0xFF,r0 /* r3-r12 are destroyed -- Cort */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 89e34aa273e2..2bd30acc843c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1129,6 +1129,10 @@ static inline void save_sprs(struct thread_struct *t) if (cpu_has_feature(CPU_FTR_ALTIVEC)) t->vrsave = mfspr(SPRN_VRSAVE); #endif +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE)) + t->spefscr = mfspr(SPRN_SPEFSCR); +#endif #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) t->dscr = mfspr(SPRN_DSCR); @@ -1159,6 +1163,11 @@ static inline void restore_sprs(struct thread_struct *old_thread, old_thread->vrsave != new_thread->vrsave) mtspr(SPRN_VRSAVE, new_thread->vrsave); #endif +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE) && + old_thread->spefscr != new_thread->spefscr) + mtspr(SPRN_SPEFSCR, new_thread->spefscr); +#endif #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) { u64 dscr = get_paca()->dscr_default; -- cgit v1.2.3 From 148a047602462ab04bff20f3529a255b0439d3df Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:00 +0000 Subject: powerpc: Rework PPC_RAW_xxx() macros for prefixed instructions At the time being, we have PPC_RAW_PLXVP() and PPC_RAW_PSTXVP() which provide a 64 bits value, and then it gets split by open coding to format it into a 'struct ppc_inst' instruction. Instead, define a PPC_RAW_xxx_P() and a PPC_RAW_xxx_S() to be used as is. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5d146b31b943e7ad674894421db4feef54804b9b.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 8 ++++---- arch/powerpc/lib/test_emulate_step.c | 6 ++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 927551dd870b..212f014c6589 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -405,10 +405,10 @@ #define PPC_RAW_STXVP(xsp, a, i) (0x18000001 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i)) #define PPC_RAW_LXVPX(xtp, a, b) (0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_STXVPX(xsp, a, b) (0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_RAW_PLXVP(xtp, i, a, pr) \ - ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))) -#define PPC_RAW_PSTXVP(xsp, i, a, pr) \ - ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))) +#define PPC_RAW_PLXVP_P(xtp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) +#define PPC_RAW_PLXVP_S(xtp, i, a, pr) (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i)) +#define PPC_RAW_PSTXVP_P(xsp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) +#define PPC_RAW_PSTXVP_S(xsp, i, a, pr) (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i)) #define PPC_RAW_NAP (0x4c000364) #define PPC_RAW_SLEEP (0x4c0003a4) #define PPC_RAW_WINKLE (0x4c0003e4) diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index a0a52fe5e979..6aef00293d43 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -826,8 +826,7 @@ static void __init test_plxvp_pstxvp(void) * XTp = 32xTX + 2xTp * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1 */ - instr = ppc_inst_prefix(PPC_RAW_PLXVP(34, 0, 3, 0) >> 32, - PPC_RAW_PLXVP(34, 0, 3, 0) & 0xffffffff); + instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0)); stepped = emulate_step(®s, instr); if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { @@ -855,8 +854,7 @@ static void __init test_plxvp_pstxvp(void) * XSp = 32xSX + 2xSp * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1 */ - instr = ppc_inst_prefix(PPC_RAW_PSTXVP(34, 0, 3, 0) >> 32, - PPC_RAW_PSTXVP(34, 0, 3, 0) & 0xffffffff); + instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0)); stepped = emulate_step(®s, instr); -- cgit v1.2.3 From 07cd18320ed816dec8ff6f58a2d8b33294dcceba Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:01 +0000 Subject: powerpc/opcodes: Add shorter macros for registers for use with PPC_RAW_xx() Today we have __REG_Rx macros . They are mainly meant for internal use by macros __PPC_RA() and friends macros which allows uses like __PPC_RA(R12). When used with PPC_RAW_xx() macros, it gives a result which is not very readable. Add shorter macros _Rx in order to improve readability when used with PPC_RAW_xx() macros. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ec34d92b7c2f810622261acfeeed4b0a0f4d01bd.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 212f014c6589..a067df9ca68c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -76,6 +76,40 @@ #define __REGA0_R30 30 #define __REGA0_R31 31 +/* For use with PPC_RAW_() macros */ +#define _R0 0 +#define _R1 1 +#define _R2 2 +#define _R3 3 +#define _R4 4 +#define _R5 5 +#define _R6 6 +#define _R7 7 +#define _R8 8 +#define _R9 9 +#define _R10 10 +#define _R11 11 +#define _R12 12 +#define _R13 13 +#define _R14 14 +#define _R15 15 +#define _R16 16 +#define _R17 17 +#define _R18 18 +#define _R19 19 +#define _R20 20 +#define _R21 21 +#define _R22 22 +#define _R23 23 +#define _R24 24 +#define _R25 25 +#define _R26 26 +#define _R27 27 +#define _R28 28 +#define _R29 29 +#define _R30 30 +#define _R31 31 + #define IMM_L(i) ((uintptr_t)(i) & 0xffff) #define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) #define IMM_DQ(i) ((uintptr_t)(i) & 0xfff0) -- cgit v1.2.3 From 8804d5beef9189fd2eae5aee14e1628436742e02 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:02 +0000 Subject: powerpc/lib/code-patching: Use PPC_RAW_() macros Instead of open coding with PPC_INST_ defines, use PPC_RAW_() macros. It improves readability. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8c92f1d9e825ee47c6f88fe43ad42d2a8cc2ab4a.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/code-patching.h | 12 +++++------- arch/powerpc/kernel/trace/ftrace.c | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index f1d029bf906e..22957c6068c8 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -73,9 +73,9 @@ void __patch_exception(int exc, unsigned long addr); #endif #define OP_RT_RA_MASK 0xffff0000UL -#define LIS_R2 (PPC_INST_ADDIS | __PPC_RT(R2)) -#define ADDIS_R2_R12 (PPC_INST_ADDIS | __PPC_RT(R2) | __PPC_RA(R12)) -#define ADDI_R2_R2 (PPC_INST_ADDI | __PPC_RT(R2) | __PPC_RA(R2)) +#define LIS_R2 (PPC_RAW_LIS(_R2, 0)) +#define ADDIS_R2_R12 (PPC_RAW_ADDIS(_R2, _R12, 0)) +#define ADDI_R2_R2 (PPC_RAW_ADDI(_R2, _R2, 0)) static inline unsigned long ppc_function_entry(void *func) @@ -180,12 +180,10 @@ static inline unsigned long ppc_kallsyms_lookup_name(const char *name) #define R2_STACK_OFFSET 40 #endif -#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \ - ___PPC_RA(__REG_R1) | R2_STACK_OFFSET) +#define PPC_INST_LD_TOC PPC_RAW_LD(_R2, _R1, R2_STACK_OFFSET) /* usually preceded by a mflr r0 */ -#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \ - ___PPC_RA(__REG_R1) | PPC_LR_STKOFF) +#define PPC_INST_STD_LR PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF) #endif /* CONFIG_PPC64 */ #endif /* _ASM_POWERPC_CODE_PATCHING_H */ diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index ffe9537195aa..937220310f99 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -203,7 +203,7 @@ __ftrace_make_nop(struct module *mod, } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); + pr_err("Expected %08lx found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); return -EINVAL; } #endif /* CONFIG_MPROFILE_KERNEL */ -- cgit v1.2.3 From 1c9debbc2eb5391277ae6aa7d95f821e0c28613d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:03 +0000 Subject: powerpc/signal: Use PPC_RAW_xx() macros To improve readability, use PPC_RAW_xx() macros instead of open coding. Those macros are self-explanatory so the comments can go as well. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4ca2bfdca2f47a293d05f61eb3c4e487ee170f1f.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 5 +++-- arch/powerpc/kernel/signal_32.c | 11 ++++------- arch/powerpc/kernel/signal_64.c | 15 ++++++--------- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index a067df9ca68c..e4e7abfa1285 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -275,7 +275,6 @@ #define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 #define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe -#define PPC_INST_SC 0x44000002 #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe #define PPC_INST_STRING_GEN_MASK 0xfc00067e @@ -294,7 +293,6 @@ #define PPC_INST_ADD 0x7c000214 #define PPC_INST_BLR 0x4e800020 #define PPC_INST_BCTR 0x4e800420 -#define PPC_INST_BCTRL 0x4e800421 #define PPC_INST_DIVD 0x7c0003d2 #define PPC_INST_RLDICR 0x78000004 #define PPC_INST_ORI 0x60000000 @@ -418,6 +416,8 @@ #define PPC_RAW_STBCIX(s, a, b) (0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) #define PPC_RAW_DCBFPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21)) #define PPC_RAW_DCBSTPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21)) +#define PPC_RAW_SC() (0x44000002) + /* * Define what the VSX XX1 form instructions will look like, then add * the 128 bit load store instructions based on that. @@ -486,6 +486,7 @@ #define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) #define PPC_RAW_MFLR(t) (PPC_INST_MFLR | ___PPC_RT(t)) #define PPC_RAW_BCTR() (PPC_INST_BCTR) +#define PPC_RAW_BCTRL() (0x4e800421) #define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r)) #define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 8f05ed0da292..a50d091334ea 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -828,10 +828,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32); } else { tramp = (unsigned long)mctx->mc_pad; - /* Set up the sigreturn trampoline: li r0,sigret; sc */ - unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0], - failed); - unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed); asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0])); } unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed); @@ -926,9 +924,8 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32); } else { tramp = (unsigned long)mctx->mc_pad; - /* Set up the sigreturn trampoline: li r0,sigret; sc */ - unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed); - unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed); asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0])); } user_access_end(); diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index f9e1f5428b9e..0f9646682598 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -618,15 +618,12 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) int i; long err = 0; - /* bctrl # call the handler */ - err |= __put_user(PPC_INST_BCTRL, &tramp[0]); - /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ - err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) | - (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]); - /* li r0, __NR_[rt_]sigreturn| */ - err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]); - /* sc */ - err |= __put_user(PPC_INST_SC, &tramp[3]); + /* Call the handler and pop the dummy stackframe*/ + err |= __put_user(PPC_RAW_BCTRL(), &tramp[0]); + err |= __put_user(PPC_RAW_ADDI(_R1, _R1, __SIGNAL_FRAMESIZE), &tramp[1]); + + err |= __put_user(PPC_RAW_LI(_R0, syscall), &tramp[2]); + err |= __put_user(PPC_RAW_SC(), &tramp[3]); /* Minimal traceback info */ for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) -- cgit v1.2.3 From 47b04699d0709f5ff12a8aa0b3050a6246eb570e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:04 +0000 Subject: powerpc/modules: Use PPC_RAW_xx() macros To improve readability, use PPC_RAW_xx() macros instead of open coding. Those macros are self-explanatory so the comments can go as well. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/99d9ee8849d3992beeadb310a665aae01c3abfb1.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 11 +++---- arch/powerpc/kernel/module_32.c | 19 ++++-------- arch/powerpc/kernel/module_64.c | 55 +++++++++++------------------------ 3 files changed, 27 insertions(+), 58 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index e4e7abfa1285..96ca068a3f6e 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -287,12 +287,9 @@ #define PPC_INST_LD 0xe8000000 #define PPC_INST_STD 0xf8000000 #define PPC_INST_MFLR 0x7c0802a6 -#define PPC_INST_MTCTR 0x7c0903a6 -#define PPC_INST_ADDI 0x38000000 #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 #define PPC_INST_BLR 0x4e800020 -#define PPC_INST_BCTR 0x4e800420 #define PPC_INST_DIVD 0x7c0003d2 #define PPC_INST_RLDICR 0x78000004 #define PPC_INST_ORI 0x60000000 @@ -485,12 +482,12 @@ #define PPC_RAW_BLRL() (0x4e800021) #define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) #define PPC_RAW_MFLR(t) (PPC_INST_MFLR | ___PPC_RT(t)) -#define PPC_RAW_BCTR() (PPC_INST_BCTR) +#define PPC_RAW_BCTR() (0x4e800420) #define PPC_RAW_BCTRL() (0x4e800421) -#define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r)) -#define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_MTCTR(r) (0x7c0903a6 | ___PPC_RT(r)) +#define PPC_RAW_ADDI(d, a, i) (0x38000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i) -#define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_ADDIS(d, a, i) (0x3c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_ADDIC(d, a, i) (0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_ADDIC_DOT(d, a, i) (0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i) diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index c27b8687b82a..f417afc08d33 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -145,10 +145,9 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] != (PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val))) + if (entry->jump[0] != PPC_RAW_LIS(_R12, PPC_HA(val))) return 0; - if (entry->jump[1] != (PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | - PPC_LO(val))) + if (entry->jump[1] != PPC_RAW_ADDI(_R12, _R12, PPC_LO(val))) return 0; return 1; } @@ -175,16 +174,10 @@ static uint32_t do_plt_call(void *location, entry++; } - /* - * lis r12, sym@ha - * addi r12, r12, sym@l - * mtctr r12 - * bctr - */ - entry->jump[0] = PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val); - entry->jump[1] = PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | PPC_LO(val); - entry->jump[2] = PPC_INST_MTCTR | __PPC_RS(R12); - entry->jump[3] = PPC_INST_BCTR; + entry->jump[0] = PPC_RAW_LIS(_R12, PPC_HA(val)); + entry->jump[1] = PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)); + entry->jump[2] = PPC_RAW_MTCTR(_R12); + entry->jump[3] = PPC_RAW_BCTR(); pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ae2b188365b1..6baa676e7cb6 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -122,27 +122,19 @@ struct ppc64_stub_entry * the stub, but it's significantly shorter to put these values at the * end of the stub code, and patch the stub address (32-bits relative * to the TOC ptr, r2) into the stub. - * - * addis r11,r2, - * addi r11,r11, - * std r2,R2_STACK_OFFSET(r1) - * ld r12,32(r11) - * ld r2,40(r11) - * mtctr r12 - * bctr */ static u32 ppc64_stub_insns[] = { - PPC_INST_ADDIS | __PPC_RT(R11) | __PPC_RA(R2), - PPC_INST_ADDI | __PPC_RT(R11) | __PPC_RA(R11), + PPC_RAW_ADDIS(_R11, _R2, 0), + PPC_RAW_ADDI(_R11, _R11, 0), /* Save current r2 value in magic place on the stack. */ - PPC_INST_STD | __PPC_RS(R2) | __PPC_RA(R1) | R2_STACK_OFFSET, - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R11) | 32, + PPC_RAW_STD(_R2, _R1, R2_STACK_OFFSET), + PPC_RAW_LD(_R12, _R11, 32), #ifdef PPC64_ELF_ABI_v1 /* Set up new r2 from function descriptor */ - PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R11) | 40, + PPC_RAW_LD(_R2, _R11, 40), #endif - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), }; /* Count how many different 24-bit relocations (different symbol, @@ -336,21 +328,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, #ifdef CONFIG_MPROFILE_KERNEL -#define PACATOC offsetof(struct paca_struct, kernel_toc) - -/* - * ld r12,PACATOC(r13) - * addis r12,r12, - * addi r12,r12, - * mtctr r12 - * bctr - */ static u32 stub_insns[] = { - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, - PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, + PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)), + PPC_RAW_ADDIS(_R12, _R12, 0), + PPC_RAW_ADDI(_R12, _R12, 0), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), }; /* @@ -507,7 +490,7 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) return 1; - if (*instruction != PPC_INST_NOP) { + if (*instruction != PPC_RAW_NOP()) { pr_err("%s: Expected nop after call, got %08x at %pS\n", me->name, *instruction, instruction); return 0; @@ -696,21 +679,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, * ld r2, ...(r12) * add r2, r2, r12 */ - if ((((uint32_t *)location)[0] & ~0xfffc) != - (PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R12))) + if ((((uint32_t *)location)[0] & ~0xfffc) != PPC_RAW_LD(_R2, _R12, 0)) break; - if (((uint32_t *)location)[1] != - (PPC_INST_ADD | __PPC_RT(R2) | __PPC_RA(R2) | __PPC_RB(R12))) + if (((uint32_t *)location)[1] != PPC_RAW_ADD(_R2, _R2, _R12)) break; /* * If found, replace it with: * addis r2, r12, (.TOC.-func)@ha * addi r2, r2, (.TOC.-func)@l */ - ((uint32_t *)location)[0] = PPC_INST_ADDIS | __PPC_RT(R2) | - __PPC_RA(R12) | PPC_HA(value); - ((uint32_t *)location)[1] = PPC_INST_ADDI | __PPC_RT(R2) | - __PPC_RA(R2) | PPC_LO(value); + ((uint32_t *)location)[0] = PPC_RAW_ADDIS(_R2, _R12, PPC_HA(value)); + ((uint32_t *)location)[1] = PPC_RAW_ADDI(_R2, _R2, PPC_LO(value)); break; case R_PPC64_REL16_HA: -- cgit v1.2.3 From e7304597560176d8755e2ae4abb599d0c4efe4f2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:05 +0000 Subject: powerpc/security: Use PPC_RAW_BLR() and PPC_RAW_NOP() On the road to remove all use of PPC_INST_xxx, replace PPC_INST_BLR by PPC_RAW_BLR(). Same for PPC_INST_NOP. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c04f88d0e53d2122fbbe92226892a01ebc668b6a.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 3 +-- arch/powerpc/kernel/security.c | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 96ca068a3f6e..de0ac79b3d5a 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -289,7 +289,6 @@ #define PPC_INST_MFLR 0x7c0802a6 #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 -#define PPC_INST_BLR 0x4e800020 #define PPC_INST_DIVD 0x7c0003d2 #define PPC_INST_RLDICR 0x78000004 #define PPC_INST_ORI 0x60000000 @@ -478,7 +477,7 @@ #define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_ADDC_DOT(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define PPC_RAW_NOP() (PPC_INST_NOP) -#define PPC_RAW_BLR() (PPC_INST_BLR) +#define PPC_RAW_BLR() (0x4e800020) #define PPC_RAW_BLRL() (0x4e800021) #define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) #define PPC_RAW_MFLR(t) (PPC_INST_MFLR | ___PPC_RT(t)) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 0fdfcdd9d880..118f10d14af8 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -438,7 +438,7 @@ static void update_branch_cache_flush(void) site = &patch__call_kvm_flush_link_stack; // This controls the branch from guest_exit_cont to kvm_flush_link_stack if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); } else { // Could use HW flush, but that could also flush count cache patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); @@ -447,11 +447,11 @@ static void update_branch_cache_flush(void) // Patch out the bcctr first, then nop the rest site = &patch__call_flush_branch_caches3; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); site = &patch__call_flush_branch_caches2; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); site = &patch__call_flush_branch_caches1; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); // This controls the branch from _switch to flush_branch_caches if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE && @@ -474,12 +474,12 @@ static void update_branch_cache_flush(void) // If we just need to flush the link stack, early return if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) { patch_instruction_site(&patch__flush_link_stack_return, - ppc_inst(PPC_INST_BLR)); + ppc_inst(PPC_RAW_BLR())); // If we have flush instruction, early return } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) { patch_instruction_site(&patch__flush_count_cache_return, - ppc_inst(PPC_INST_BLR)); + ppc_inst(PPC_RAW_BLR())); } } } -- cgit v1.2.3 From 5a03e1e9728edce8f87e3e0bad6d4cd66329b129 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:06 +0000 Subject: powerpc/ftrace: Use PPC_RAW_MFLR() and PPC_RAW_NOP() Use PPC_RAW_MFLR() instead of open coding with PPC_INST_MFLR. Same for PPC_INST_NOP. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/98fd4d717810b7c4032a1edf62dd6fe638e64329.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 937220310f99..a787a414ea6b 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -162,7 +162,7 @@ __ftrace_make_nop(struct module *mod, #ifdef CONFIG_MPROFILE_KERNEL /* When using -mkernel_profile there is no load to jump over */ - pop = ppc_inst(PPC_INST_NOP); + pop = ppc_inst(PPC_RAW_NOP()); if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { pr_err("Fetching instruction at %lx failed.\n", ip - 4); @@ -170,7 +170,7 @@ __ftrace_make_nop(struct module *mod, } /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) && + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { pr_err("Unexpected instruction %s around bl _mcount\n", ppc_inst_as_str(op)); @@ -278,7 +278,7 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } - op = ppc_inst(PPC_INST_NOP); + op = ppc_inst(PPC_RAW_NOP()); if (patch_instruction((struct ppc_inst *)ip, op)) return -EPERM; @@ -424,7 +424,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) } } - if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_INST_NOP))) { + if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_RAW_NOP()))) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -446,7 +446,7 @@ int ftrace_make_nop(struct module *mod, if (test_24bit_addr(ip, addr)) { /* within range */ old = ftrace_call_replace(ip, addr, 1); - new = ppc_inst(PPC_INST_NOP); + new = ppc_inst(PPC_RAW_NOP()); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) return __ftrace_make_nop_kernel(rec, addr); @@ -510,7 +510,7 @@ static int expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) { /* look for patched "NOP" on ppc64 with -mprofile-kernel */ - if (!ppc_inst_equal(op0, ppc_inst(PPC_INST_NOP))) + if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()))) return 0; return 1; } @@ -596,7 +596,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; /* It should be pointing to a nop */ - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -653,7 +653,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; } - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); return -EINVAL; } @@ -684,7 +684,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) */ if (test_24bit_addr(ip, addr)) { /* within range */ - old = ppc_inst(PPC_INST_NOP); + old = ppc_inst(PPC_RAW_NOP()); new = ftrace_call_replace(ip, addr, 1); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) -- cgit v1.2.3 From e08021f8dbd256f480b7e172aa4e894219c901f2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:07 +0000 Subject: powerpc/ebpf64: Use PPC_RAW_MFLR() Use PPC_RAW_MFLR() instead of open coding with PPC_INST_MFLR. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c1887623e91e8b4da36e669e4c74de86320a5092.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 3 +-- arch/powerpc/net/bpf_jit_comp64.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index de0ac79b3d5a..033a31f86ae2 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -286,7 +286,6 @@ #define PPC_INST_TSR 0x7c0005dd #define PPC_INST_LD 0xe8000000 #define PPC_INST_STD 0xf8000000 -#define PPC_INST_MFLR 0x7c0802a6 #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 #define PPC_INST_DIVD 0x7c0003d2 @@ -480,7 +479,7 @@ #define PPC_RAW_BLR() (0x4e800020) #define PPC_RAW_BLRL() (0x4e800021) #define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) -#define PPC_RAW_MFLR(t) (PPC_INST_MFLR | ___PPC_RT(t)) +#define PPC_RAW_MFLR(t) (0x7c0802a6 | ___PPC_RT(t)) #define PPC_RAW_BCTR() (0x4e800420) #define PPC_RAW_BCTRL() (0x4e800421) #define PPC_RAW_MTCTR(r) (0x7c0903a6 | ___PPC_RT(r)) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 57a8c1153851..7ec9c852f105 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -94,7 +94,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * save/restore LR unless we call other functions */ if (ctx->seen & SEEN_FUNC) { - EMIT(PPC_INST_MFLR | __PPC_RT(R0)); + EMIT(PPC_RAW_MFLR(_R0)); PPC_BPF_STL(0, 1, PPC_LR_STKOFF); } -- cgit v1.2.3 From e0ea08c0cacf9370e3fd3ee8bb7456c61e79db66 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:08 +0000 Subject: powerpc/ebpf32: Use _Rx macros instead of __REG_Rx ones To increase readability, use _Rx macros instead of __REG_Rx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/eb7ec6297b5d16f141c5866da3975b418e47431b.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 218 +++++++++++++++++++------------------- 1 file changed, 109 insertions(+), 109 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index bbb16099e8c7..92e17fbaaabb 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -108,20 +108,20 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), _R3)); EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); - EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx))); + EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); /* * Initialize tail_call_cnt in stack frame if we do tail calls. * Otherwise, put in NOPs so that it can be skipped when we are * invoked through a tail call. */ - if (ctx->seen & SEEN_TAILCALL) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - } else { + if (ctx->seen & SEEN_TAILCALL) + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, _R1, + bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + else EMIT(PPC_RAW_NOP()); - } #define BPF_TAILCALL_PROLOGUE_SIZE 16 @@ -130,30 +130,30 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * save/restore LR unless we call other functions */ if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MFLR(__REG_R0)); + EMIT(PPC_RAW_MFLR(_R0)); /* * Back up non-volatile regs -- registers r18-r31 */ for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) - EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i))); /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8); - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); } /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); - EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1, + EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + EMIT(PPC_RAW_STW(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); } static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) @@ -163,24 +163,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) - EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i))); } void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { - EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_0))); bpf_jit_emit_common_epilogue(image, ctx); /* Tear down our stack frame */ if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx))); + EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_MTLR(_R0)); EMIT(PPC_RAW_BLR()); } @@ -193,9 +193,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun PPC_BL_ABS(func); } else { /* Load function address into r0 */ - EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); - EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); - EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); + EMIT(PPC_RAW_ORI(_R0, _R0, IMM_L(func))); + EMIT(PPC_RAW_MTLR(_R0)); EMIT(PPC_RAW_BLRL()); } } @@ -215,47 +215,47 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * if (index >= array->map.max_entries) * goto out; */ - EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); - EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0)); - EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_CMPLW(b2p_index, _R0)); + EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); PPC_BCC(COND_GE, out); /* * if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; */ - EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT)); + EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT)); /* tail_call_cnt++; */ - EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1)); + EMIT(PPC_RAW_ADDIC(_R0, _R0, 1)); PPC_BCC(COND_GT, out); /* prog = array->ptrs[index]; */ - EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29)); - EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array)); - EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs))); - EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); + EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array)); + EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs))); + EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); /* * if (prog == NULL) * goto out; */ - EMIT(PPC_RAW_CMPLWI(__REG_R3, 0)); + EMIT(PPC_RAW_CMPLWI(_R3, 0)); PPC_BCC(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE)); + EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE)); if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_MTCTR(__REG_R3)); + EMIT(PPC_RAW_MTCTR(_R3)); - EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_1))); /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -352,8 +352,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, _R0)); } if (imm >= 0) EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h)); @@ -362,11 +362,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h)); + EMIT(PPC_RAW_MULW(_R0, dst_reg, src_reg_h)); EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg)); EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg)); EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0)); EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg)); break; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -376,8 +376,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, _R0)); } break; case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ @@ -398,17 +398,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg)); if (imm < 0) EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg)); - EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULHWU(_R0, dst_reg, tmp_reg)); EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg)); - EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0)); break; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ - EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg)); - EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0)); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_DIVWU(_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(_R0, src_reg, _R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0)); break; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ return -EOPNOTSUPP; @@ -420,8 +420,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm == 1) break; - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, _R0)); break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ if (!imm) @@ -430,9 +430,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (!is_power_of_2((u32)imm)) { bpf_set_seen_register(ctx, tmp_reg); PPC_LI32(tmp_reg, imm); - EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg)); - EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0)); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_DIVWU(_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0)); break; } if (imm == 1) @@ -503,8 +503,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - fls(imm), 32 - ffs(imm))); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, _R0)); } break; case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ @@ -555,12 +555,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SRW(_R0, dst_reg, _R0)); EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg)); - EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, _R0)); EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg)); break; @@ -591,12 +591,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0)); EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg)); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0)); EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); break; @@ -627,15 +627,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); - EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0)); + EMIT(PPC_RAW_RLWINM(_R0, tmp_reg, 0, 26, 26)); EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg)); EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); - EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, _R0)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); break; case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ @@ -702,24 +702,24 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(_R0, dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, _R0)); break; case 64: bpf_set_seen_register(ctx, tmp_reg); EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31)); - EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(_R0, dst_reg_h, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7)); - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, _R0)); EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg)); break; } @@ -744,32 +744,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STB(_R0, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STH(_R0, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STW(_R0, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off)); EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4)); break; case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4)); - PPC_EX32(__REG_R0, imm); - EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STW(_R0, dst_reg, off + 4)); + PPC_EX32(_R0, imm); + EMIT(PPC_RAW_STW(_R0, dst_reg, off)); break; /* @@ -780,11 +780,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * /* Get offset into TMP_REG */ EMIT(PPC_RAW_LI(tmp_reg, off)); /* load value from memory into r0 */ - EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0)); + EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0)); /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg)); + EMIT(PPC_RAW_ADD(_R0, _R0, src_reg)); /* store result back */ - EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg)); + EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg)); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); break; @@ -852,14 +852,14 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * return ret; if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8)); - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12)); } bpf_jit_emit_func_call_rel(image, ctx, func_addr); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3)); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4)); break; /* @@ -967,12 +967,12 @@ cond_branch: EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); break; case BPF_JMP | BPF_JSET | BPF_X: - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg_h, src_reg_h)); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg)); break; case BPF_JMP32 | BPF_JSET | BPF_X: { - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg)); break; case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JEQ | BPF_K: @@ -990,11 +990,11 @@ cond_branch: EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); } else { /* sign-extending load ... but unsigned comparison */ - PPC_EX32(__REG_R0, imm); - EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0)); - PPC_LI32(__REG_R0, imm); + PPC_EX32(_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg_h, _R0)); + PPC_LI32(_R0, imm); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); - EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + EMIT(PPC_RAW_CMPLW(dst_reg, _R0)); } break; case BPF_JMP32 | BPF_JNE | BPF_K: @@ -1006,8 +1006,8 @@ cond_branch: if (imm >= 0 && imm < 65536) { EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg, _R0)); } break; } @@ -1022,9 +1022,9 @@ cond_branch: } else { /* sign-extending load */ EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); - PPC_LI32(__REG_R0, imm); + PPC_LI32(_R0, imm); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); - EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + EMIT(PPC_RAW_CMPLW(dst_reg, _R0)); } break; case BPF_JMP32 | BPF_JSGT | BPF_K: @@ -1039,32 +1039,32 @@ cond_branch: EMIT(PPC_RAW_CMPWI(dst_reg, imm)); } else { /* sign-extending load */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_CMPW(dst_reg, _R0)); } break; case BPF_JMP | BPF_JSET | BPF_K: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) { /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); + PPC_LI32(_R0, imm); if (imm < 0) { EMIT(PPC_RAW_CMPWI(dst_reg_h, 0)); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); } - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0)); } break; case BPF_JMP32 | BPF_JSET | BPF_K: /* andi does not sign-extend the immediate */ if (imm >= -32768 && imm < 32768) { /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0)); } break; } -- cgit v1.2.3 From ef909ba954145e35c9e21352133e5e99c64ab3f4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:09 +0000 Subject: powerpc/lib/feature-fixups: Use PPC_RAW_xxx() macros Use PPC_RAW_xxx() macros instead of open coding assembly opcodes. Signed-off-by: Christophe Leroy [mpe: Fix bad converison in do_stf_exit_barrier_fixups()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e79cd8e111ca13bf8c61a384bac365aa7e207647.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 4 ++ arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/lib/feature-fixups.c | 121 +++++++++++++++++----------------- 3 files changed, 65 insertions(+), 61 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 033a31f86ae2..34ee13fb9315 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -412,6 +412,8 @@ #define PPC_RAW_DCBFPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21)) #define PPC_RAW_DCBSTPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21)) #define PPC_RAW_SC() (0x44000002) +#define PPC_RAW_SYNC() (0x7c0004ac) +#define PPC_RAW_ISYNC() (0x4c00012c) /* * Define what the VSX XX1 form instructions will look like, then add @@ -564,6 +566,8 @@ #define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a)) #define PPC_RAW_MFSPR(d, spr) (0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr)) +#define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) +#define PPC_RAW_EIEIO() (0x7c0006ac) /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7c81d3e563b2..c70782eae3a0 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -393,6 +393,7 @@ #define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */ #define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */ #define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */ +#define SPRN_TRIG2 0x372 #define SPRN_PMCR 0x374 /* Power Management Control Register */ #define SPRN_RWMR 0x375 /* Region-Weighting Mode Register */ diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index fe26f2fa0f3f..91bd2000d180 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -91,7 +91,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) return 1; } - nop = ppc_inst(PPC_INST_NOP); + nop = ppc_inst(PPC_RAW_NOP()); for (; dest < end; dest = ppc_inst_next(dest, &nop)) raw_patch_instruction(dest, nop); @@ -128,21 +128,21 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) start = PTRRELOC(&__start___stf_entry_barrier_fixup); end = PTRRELOC(&__stop___stf_entry_barrier_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); i = 0; if (types & STF_BARRIER_FALLBACK) { - instrs[i++] = 0x7d4802a6; /* mflr r10 */ - instrs[i++] = 0x60000000; /* branch patched below */ - instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); } else if (types & STF_BARRIER_EIEIO) { - instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } else if (types & STF_BARRIER_SYNC_ORI) { - instrs[i++] = 0x7c0004ac; /* hwsync */ - instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R10, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } for (i = 0; start < end; start++, i++) { @@ -180,32 +180,31 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) start = PTRRELOC(&__start___stf_exit_barrier_fixup); end = PTRRELOC(&__stop___stf_exit_barrier_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ - instrs[3] = 0x60000000; /* nop */ - instrs[4] = 0x60000000; /* nop */ - instrs[5] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_NOP(); + instrs[4] = PPC_RAW_NOP(); + instrs[5] = PPC_RAW_NOP(); i = 0; if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { if (cpu_has_feature(CPU_FTR_HVMODE)) { - instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ - instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0); } else { - instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ - instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1); } - instrs[i++] = 0x7c0004ac; /* hwsync */ - instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - if (cpu_has_feature(CPU_FTR_HVMODE)) { - instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ - } else { - instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ - } + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R13, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1); + else + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2); } else if (types & STF_BARRIER_EIEIO) { - instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } for (i = 0; start < end; start++, i++) { @@ -258,24 +257,24 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) start = PTRRELOC(&__start___uaccess_flush_fixup); end = PTRRELOC(&__stop___uaccess_flush_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ - instrs[3] = 0x4e800020; /* blr */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_BLR(); i = 0; if (types == L1D_FLUSH_FALLBACK) { - instrs[3] = 0x60000000; /* nop */ + instrs[3] = PPC_RAW_NOP(); /* fallthrough to fallback flush */ } if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; @@ -306,24 +305,24 @@ static int __do_entry_flush_fixups(void *data) long *start, *end; int i; - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); i = 0; if (types == L1D_FLUSH_FALLBACK) { - instrs[i++] = 0x7d4802a6; /* mflr r10 */ - instrs[i++] = 0x60000000; /* branch patched below */ - instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); } if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); /* * If we're patching in or out the fallback flush we need to be careful about the @@ -421,22 +420,22 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) start = PTRRELOC(&__start___rfi_flush_fixup); end = PTRRELOC(&__stop___rfi_flush_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); if (types & L1D_FLUSH_FALLBACK) /* b .+16 to fallback flush */ - instrs[0] = 0x48000010; + instrs[0] = PPC_INST_BRANCH | 16; i = 0; if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; @@ -467,11 +466,11 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ start = fixup_start; end = fixup_end; - instr = 0x60000000; /* nop */ + instr = PPC_RAW_NOP(); if (enable) { pr_info("barrier-nospec: using ORI speculation barrier\n"); - instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } for (i = 0; start < end; start++, i++) { @@ -508,13 +507,13 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ start = fixup_start; end = fixup_end; - instr[0] = PPC_INST_NOP; - instr[1] = PPC_INST_NOP; + instr[0] = PPC_RAW_NOP(); + instr[1] = PPC_RAW_NOP(); if (enable) { pr_info("barrier-nospec: using isync; sync as speculation barrier\n"); - instr[0] = PPC_INST_ISYNC; - instr[1] = PPC_INST_SYNC; + instr[0] = PPC_RAW_ISYNC(); + instr[1] = PPC_RAW_SYNC(); } for (i = 0; start < end; start++, i++) { @@ -536,7 +535,7 @@ static void patch_btb_flush_section(long *curr) end = (void *)curr + *(curr + 1); for (; start < end; start++) { pr_devel("patching dest %lx\n", (unsigned long)start); - patch_instruction((struct ppc_inst *)start, ppc_inst(PPC_INST_NOP)); + patch_instruction((struct ppc_inst *)start, ppc_inst(PPC_RAW_NOP())); } } -- cgit v1.2.3 From deefd0ae990a689089ea1e4f5ad41799d63d4fd9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:10 +0000 Subject: powerpc/traps: Start using PPC_RAW_xx() macros Start using PPC_RAW_xx() macros where relevant. PPC_INST_SYNC is used to both represent the 'sync' instruction and the family of synchronisation instructions. Keep it for the later, maybe we'll change the name in the future to avoid confusion. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0945c155d6cb113431185fc1296ac127359fe29b.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 1 - arch/powerpc/kernel/traps.c | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 34ee13fb9315..615846e631fa 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -256,7 +256,6 @@ #define PPC_INST_LWSYNC 0x7c2004ac #define PPC_INST_SYNC 0x7c0004ac #define PPC_INST_SYNC_MASK 0xfc0007fe -#define PPC_INST_ISYNC 0x4c00012c #define PPC_INST_MCRXR 0x7c000400 #define PPC_INST_MCRXR_MASK 0xfc0007fe #define PPC_INST_MFSPR_PVR 0x7c1f42a6 diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b4ab95c9e94a..c929d93c35d0 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -67,6 +67,7 @@ #include #include #include +#include #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -537,11 +538,11 @@ static inline int check_io_access(struct pt_regs *regs) * For the debug message, we look at the preceding * load or store. */ - if (*nip == PPC_INST_NOP) + if (*nip == PPC_RAW_NOP()) nip -= 2; - else if (*nip == PPC_INST_ISYNC) + else if (*nip == PPC_RAW_ISYNC()) --nip; - if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) { + if (*nip == PPC_RAW_SYNC() || get_op(*nip) == OP_TRAP) { unsigned int rb; --nip; -- cgit v1.2.3 From f30becb5e9ec086257162f78be491c0920c616b7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 10:23:11 +0000 Subject: powerpc: Replace PPC_INST_NOP by PPC_RAW_NOP() On the road to removing all PPC_INST_xx defines in asm/ppc-opcodes.h, change PPC_INST_NOP to PPC_RAW_NOP(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ad46c195ca1b8572629ef07ba6bfe247585239a6.1621506159.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 3 +-- arch/powerpc/kernel/crash_dump.c | 2 +- arch/powerpc/kernel/jump_label.c | 2 +- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/lib/test_emulate_step.c | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 615846e631fa..1f484157ce4f 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -261,7 +261,6 @@ #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe #define PPC_INST_MTMSRD 0x7c000164 -#define PPC_INST_NOP 0x60000000 #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 @@ -476,7 +475,7 @@ #define PPC_RAW_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_ADDC_DOT(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) -#define PPC_RAW_NOP() (PPC_INST_NOP) +#define PPC_RAW_NOP() PPC_RAW_ORI(0, 0, 0) #define PPC_RAW_BLR() (0x4e800020) #define PPC_RAW_BLRL() (0x4e800021) #define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 735e89337398..adb39da684be 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -45,7 +45,7 @@ static void __init create_trampoline(unsigned long addr) * branch to "addr" we jump to ("addr" + 32 MB). Although it requires * two instructions it doesn't require any registers. */ - patch_instruction(p, ppc_inst(PPC_INST_NOP)); + patch_instruction(p, ppc_inst(PPC_RAW_NOP())); patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); } diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index ce87dc5ea23c..90c97246384c 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -16,5 +16,5 @@ void arch_jump_label_transform(struct jump_entry *entry, if (type == JUMP_LABEL_JMP) patch_branch(addr, jump_entry_target(entry), 0); else - patch_instruction(addr, ppc_inst(PPC_INST_NOP)); + patch_instruction(addr, ppc_inst(PPC_RAW_NOP())); } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index d7c1f92152af..a369fb27ccaa 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -85,7 +85,7 @@ notrace void __init machine_init(u64 dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_RAW_NOP())); create_cond_branch(&insn, addr, branch_target(addr), 0x820000); patch_instruction(addr, insn); /* replace b by bne cr0 */ diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 6aef00293d43..1cbfa468813b 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -922,7 +922,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "R0 = LONG_MAX", - .instr = ppc_inst(PPC_INST_NOP), + .instr = ppc_inst(PPC_RAW_NOP()), .regs = { .gpr[0] = LONG_MAX, } -- cgit v1.2.3 From b3a9e523237013477bea914b7fbfbe420428b988 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:38 +0000 Subject: powerpc/inst: Fix sparse detection on get_user_instr() get_user_instr() lacks sparse detection for the __user tag. This is because __gui_ptr is assigned with a cast. Fix that by adding a __chk_user_ptr() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0320e5b41a794fd456ab8c5993bbfadcf9e1d8b4.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/inst.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 268d3bd073c8..887ef150fdda 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -12,6 +12,8 @@ unsigned long __gui_ptr = (unsigned long)ptr; \ struct ppc_inst __gui_inst; \ unsigned int __prefix, __suffix; \ + \ + __chk_user_ptr(ptr); \ __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ if (__gui_ret == 0) { \ if ((__prefix >> 26) == OP_PREFIX) { \ @@ -29,7 +31,10 @@ }) #else /* !CONFIG_PPC64 */ #define ___get_user_instr(gu_op, dest, ptr) \ - gu_op((dest).val, (u32 __user *)(ptr)) +({ \ + __chk_user_ptr(ptr); \ + gu_op((dest).val, (u32 __user *)(ptr)); \ +}) #endif /* CONFIG_PPC64 */ #define get_user_instr(x, ptr) \ -- cgit v1.2.3 From 9134806e149ebb214f122f0f84254096d3768bb2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:39 +0000 Subject: powerpc/inst: Reduce casts in get_user_instr() Declare __gui_ptr as 'u32 *' instead of casting it at each use to 'unsigned int *' (which is an equivalent type). Signed-off-by: Christophe Leroy [mpe: Use u32 * instead of unsigned int *] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2c2123998e05535d08ba03a96ea1eea921d06a86.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/inst.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 887ef150fdda..530583afa946 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -9,16 +9,15 @@ #define ___get_user_instr(gu_op, dest, ptr) \ ({ \ long __gui_ret = 0; \ - unsigned long __gui_ptr = (unsigned long)ptr; \ + u32 __user *__gui_ptr = (u32 __user *)ptr; \ struct ppc_inst __gui_inst; \ unsigned int __prefix, __suffix; \ \ __chk_user_ptr(ptr); \ - __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ + __gui_ret = gu_op(__prefix, __gui_ptr); \ if (__gui_ret == 0) { \ if ((__prefix >> 26) == OP_PREFIX) { \ - __gui_ret = gu_op(__suffix, \ - (unsigned int __user *)__gui_ptr + 1); \ + __gui_ret = gu_op(__suffix, __gui_ptr + 1); \ __gui_inst = ppc_inst_prefix(__prefix, \ __suffix); \ } else { \ -- cgit v1.2.3 From 042e0860e1c1d60a0ab1ff3f16b7f420573133e0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:40 +0000 Subject: powerpc/inst: Improve readability of get_user_instr() and friends Remove unneeded line splits. And remove unneeded local variable initialisation. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fb097fda78cc6852905ef00f8f7bf371b6cc66f7.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/inst.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 530583afa946..dab26b0493d5 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -8,7 +8,7 @@ #define ___get_user_instr(gu_op, dest, ptr) \ ({ \ - long __gui_ret = 0; \ + long __gui_ret; \ u32 __user *__gui_ptr = (u32 __user *)ptr; \ struct ppc_inst __gui_inst; \ unsigned int __prefix, __suffix; \ @@ -18,8 +18,7 @@ if (__gui_ret == 0) { \ if ((__prefix >> 26) == OP_PREFIX) { \ __gui_ret = gu_op(__suffix, __gui_ptr + 1); \ - __gui_inst = ppc_inst_prefix(__prefix, \ - __suffix); \ + __gui_inst = ppc_inst_prefix(__prefix, __suffix); \ } else { \ __gui_inst = ppc_inst(__prefix); \ } \ @@ -36,11 +35,9 @@ }) #endif /* CONFIG_PPC64 */ -#define get_user_instr(x, ptr) \ - ___get_user_instr(get_user, x, ptr) +#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr) -#define __get_user_instr(x, ptr) \ - ___get_user_instr(__get_user, x, ptr) +#define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr) /* * Instruction data type for POWER @@ -75,13 +72,12 @@ static inline u32 ppc_inst_suffix(struct ppc_inst x) static inline bool ppc_inst_prefixed(struct ppc_inst x) { - return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff; + return ppc_inst_primary_opcode(x) == OP_PREFIX && ppc_inst_suffix(x) != 0xff; } static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) { - return ppc_inst_prefix(swab32(ppc_inst_val(x)), - swab32(ppc_inst_suffix(x))); + return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x))); } static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) -- cgit v1.2.3 From 036b5560bebc72c61d955ae0b115e8e69da8a563 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:41 +0000 Subject: powerpc/inst: Avoid pointer dereferencing in ppc_inst_equal() Avoid casting/dereferencing ppc_inst() as u64* , check each member of the struct when relevant. And remove the 0xff initialisation of the suffix for non prefixed instruction. An instruction with 0xff as a suffix might be invalid, but still is a prefixed instruction and has to be considered as this. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d8b155e930b7a9708ca110e8ff0ace6713a7af75.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/inst.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index dab26b0493d5..1ba65052ed21 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -61,7 +61,7 @@ static inline int ppc_inst_primary_opcode(struct ppc_inst x) } #ifdef CONFIG_PPC64 -#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff }) +#define ppc_inst(x) ((struct ppc_inst){ .val = (x) }) #define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) @@ -72,7 +72,7 @@ static inline u32 ppc_inst_suffix(struct ppc_inst x) static inline bool ppc_inst_prefixed(struct ppc_inst x) { - return ppc_inst_primary_opcode(x) == OP_PREFIX && ppc_inst_suffix(x) != 0xff; + return ppc_inst_primary_opcode(x) == OP_PREFIX; } static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) @@ -93,11 +93,6 @@ static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) } } -static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) -{ - return *(u64 *)&x == *(u64 *)&y; -} - #else #define ppc_inst(x) ((struct ppc_inst){ .val = x }) @@ -124,13 +119,17 @@ static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) return *ptr; } +#endif /* CONFIG_PPC64 */ + static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) { - return ppc_inst_val(x) == ppc_inst_val(y); + if (ppc_inst_val(x) != ppc_inst_val(y)) + return false; + if (!ppc_inst_prefixed(x)) + return true; + return ppc_inst_suffix(x) == ppc_inst_suffix(y); } -#endif /* CONFIG_PPC64 */ - static inline int ppc_inst_len(struct ppc_inst x) { return ppc_inst_prefixed(x) ? 8 : 4; -- cgit v1.2.3 From 18c85964b10b7b78a5cb59a4959a5f82fdc77e4c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:42 +0000 Subject: powerpc: Do not dereference code as 'struct ppc_inst' (uprobe, code-patching, feature-fixups) 'struct ppc_inst' is an internal structure to represent an instruction, it is not directly the representation of that instruction in text code. It is not meant to map and dereference code. Dereferencing code directly through 'struct ppc_inst' has two main issues: - On powerpc, structs are expected to be 8 bytes aligned while code is spread every 4 byte. - Should a non prefixed instruction lie at the end of the page and the following page not be mapped, it would generate a page fault. In-memory code must be accessed with ppc_inst_read(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c9a1201dd0a66b4a0f91f0fb46d9385cbf030feb.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/uprobes.c | 2 +- arch/powerpc/lib/code-patching.c | 8 ++++---- arch/powerpc/lib/feature-fixups.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 186f69b11e94..46971bb41d05 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, return -EINVAL; if (cpu_has_feature(CPU_FTR_ARCH_31) && - ppc_inst_prefixed(auprobe->insn) && + ppc_inst_prefixed(ppc_inst_read(&auprobe->insn)) && (addr & 0x3f) == 60) { pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n"); return -EINVAL; diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 870b30d9be2f..0308429b0d1a 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -329,13 +329,13 @@ static unsigned long branch_iform_target(const struct ppc_inst *instr) { signed long imm; - imm = ppc_inst_val(*instr) & 0x3FFFFFC; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x2000000) imm -= 0x4000000; - if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; @@ -345,13 +345,13 @@ static unsigned long branch_bform_target(const struct ppc_inst *instr) { signed long imm; - imm = ppc_inst_val(*instr) & 0xFFFC; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x8000) imm -= 0x10000; - if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 91bd2000d180..5a4bdede1140 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -51,7 +51,7 @@ static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest, instr = ppc_inst_read(src); - if (instr_is_relative_branch(*src)) { + if (instr_is_relative_branch(ppc_inst_read(src))) { struct ppc_inst *target = (struct ppc_inst *)branch_target(src); /* Branch within the section doesn't need translating */ -- cgit v1.2.3 From 6c0d181daabcba286db9711eef8800b566fb1cce Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:43 +0000 Subject: powerpc/lib/code-patching: Make instr_is_branch_to_addr() static instr_is_branch_to_addr() is only used in code-patching.c Make it static. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5f6b9c8c83170ed310953eac2f5b14539bfc964a.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/code-patching.h | 1 - arch/powerpc/lib/code-patching.c | 18 +++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 22957c6068c8..fd60594bdd38 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -59,7 +59,6 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int instr_is_relative_branch(struct ppc_inst instr); int instr_is_relative_link_branch(struct ppc_inst instr); -int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr); unsigned long branch_target(const struct ppc_inst *instr); int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, const struct ppc_inst *src); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 0308429b0d1a..82f2c1edb498 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -367,15 +367,6 @@ unsigned long branch_target(const struct ppc_inst *instr) return 0; } -int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr) -{ - if (instr_is_branch_iform(ppc_inst_read(instr)) || - instr_is_branch_bform(ppc_inst_read(instr))) - return branch_target(instr) == addr; - - return 0; -} - int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, const struct ppc_inst *src) { @@ -410,6 +401,15 @@ void __patch_exception(int exc, unsigned long addr) #ifdef CONFIG_CODE_PATCHING_SELFTEST +static int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr) +{ + if (instr_is_branch_iform(ppc_inst_read(instr)) || + instr_is_branch_bform(ppc_inst_read(instr))) + return branch_target(instr) == addr; + + return 0; +} + static void __init test_trampoline(void) { asm ("nop;\n"); -- cgit v1.2.3 From e90a21ea801d1776d9a786ad02354fd3fe23ce09 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:44 +0000 Subject: powerpc/lib/code-patching: Don't use struct 'ppc_inst' for runnable code in tests. 'struct ppc_inst' is meant to represent an instruction internally, it is not meant to dereference code in memory. For testing code patching, use patch_instruction() to properly write into memory the code to be tested. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d8425fb42a4adebc35b7509f121817eeb02fac31.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/code-patching.c | 95 ++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 82f2c1edb498..60201cb6f3a1 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -422,9 +422,9 @@ static void __init test_branch_iform(void) { int err; struct ppc_inst instr; - unsigned long addr; - - addr = (unsigned long)&instr; + u32 tmp[2]; + struct ppc_inst *iptr = (struct ppc_inst *)tmp; + unsigned long addr = (unsigned long)tmp; /* The simplest case, branch to self, no flags */ check(instr_is_branch_iform(ppc_inst(0x48000000))); @@ -445,52 +445,57 @@ static void __init test_branch_iform(void) check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); /* Absolute branch to 0x100 */ - instr = ppc_inst(0x48000103); - check(instr_is_branch_to_addr(&instr, 0x100)); + patch_instruction(iptr, ppc_inst(0x48000103)); + check(instr_is_branch_to_addr(iptr, 0x100)); /* Absolute branch to 0x420fc */ - instr = ppc_inst(0x480420ff); - check(instr_is_branch_to_addr(&instr, 0x420fc)); + patch_instruction(iptr, ppc_inst(0x480420ff)); + check(instr_is_branch_to_addr(iptr, 0x420fc)); /* Maximum positive relative branch, + 20MB - 4B */ - instr = ppc_inst(0x49fffffc); - check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC)); + patch_instruction(iptr, ppc_inst(0x49fffffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); /* Smallest negative relative branch, - 4B */ - instr = ppc_inst(0x4bfffffc); - check(instr_is_branch_to_addr(&instr, addr - 4)); + patch_instruction(iptr, ppc_inst(0x4bfffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); /* Largest negative relative branch, - 32 MB */ - instr = ppc_inst(0x4a000000); - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); + patch_instruction(iptr, ppc_inst(0x4a000000)); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); /* Branch to self, with link */ - err = create_branch(&instr, &instr, addr, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr)); + err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); /* Branch to self - 0x100, with link */ - err = create_branch(&instr, &instr, addr - 0x100, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); + err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); /* Branch to self + 0x100, no link */ - err = create_branch(&instr, &instr, addr + 0x100, 0); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); + err = create_branch(&instr, iptr, addr + 0x100, 0); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); /* Maximum relative negative offset, - 32 MB */ - err = create_branch(&instr, &instr, addr - 0x2000000, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); + err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); /* Out of range relative negative offset, - 32 MB + 4*/ - err = create_branch(&instr, &instr, addr - 0x2000004, BRANCH_SET_LINK); + err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); check(err); /* Out of range relative positive offset, + 32 MB */ - err = create_branch(&instr, &instr, addr + 0x2000000, BRANCH_SET_LINK); + err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); check(err); /* Unaligned target */ - err = create_branch(&instr, &instr, addr + 3, BRANCH_SET_LINK); + err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); check(err); /* Check flags are masked correctly */ - err = create_branch(&instr, &instr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); + err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); check(ppc_inst_equal(instr, ppc_inst(0x48000000))); } @@ -513,9 +518,10 @@ static void __init test_branch_bform(void) int err; unsigned long addr; struct ppc_inst *iptr, instr; + u32 tmp[2]; unsigned int flags; - iptr = &instr; + iptr = (struct ppc_inst *)tmp; addr = (unsigned long)iptr; /* The simplest case, branch to self, no flags */ @@ -528,39 +534,43 @@ static void __init test_branch_bform(void) check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); /* Absolute conditional branch to 0x100 */ - instr = ppc_inst(0x43ff0103); - check(instr_is_branch_to_addr(&instr, 0x100)); + patch_instruction(iptr, ppc_inst(0x43ff0103)); + check(instr_is_branch_to_addr(iptr, 0x100)); /* Absolute conditional branch to 0x20fc */ - instr = ppc_inst(0x43ff20ff); - check(instr_is_branch_to_addr(&instr, 0x20fc)); + patch_instruction(iptr, ppc_inst(0x43ff20ff)); + check(instr_is_branch_to_addr(iptr, 0x20fc)); /* Maximum positive relative conditional branch, + 32 KB - 4B */ - instr = ppc_inst(0x43ff7ffc); - check(instr_is_branch_to_addr(&instr, addr + 0x7FFC)); + patch_instruction(iptr, ppc_inst(0x43ff7ffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); /* Smallest negative relative conditional branch, - 4B */ - instr = ppc_inst(0x43fffffc); - check(instr_is_branch_to_addr(&instr, addr - 4)); + patch_instruction(iptr, ppc_inst(0x43fffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); /* Largest negative relative conditional branch, - 32 KB */ - instr = ppc_inst(0x43ff8000); - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); + patch_instruction(iptr, ppc_inst(0x43ff8000)); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); /* All condition code bits set & link */ flags = 0x3ff000 | BRANCH_SET_LINK; /* Branch to self */ err = create_cond_branch(&instr, iptr, addr, flags); - check(instr_is_branch_to_addr(&instr, addr)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); /* Branch to self - 0x100 */ err = create_cond_branch(&instr, iptr, addr - 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); /* Branch to self + 0x100 */ err = create_cond_branch(&instr, iptr, addr + 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); /* Maximum relative negative offset, - 32 KB */ err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); /* Out of range relative negative offset, - 32 KB + 4*/ err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); @@ -576,7 +586,8 @@ static void __init test_branch_bform(void) /* Check flags are masked correctly */ err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); } -- cgit v1.2.3 From 69d4d6e5fd9f4e805280ad831932c3df7b9d7cc7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:45 +0000 Subject: powerpc: Don't use 'struct ppc_inst' to reference instruction location 'struct ppc_inst' is an internal representation of an instruction, but in-memory instructions are and will remain a table of 'u32' forever. Replace all 'struct ppc_inst *' used for locating an instruction in memory by 'u32 *'. This removes a lot of undue casts to 'struct ppc_inst *'. It also helps locating ab-use of 'struct ppc_inst' dereference. Signed-off-by: Christophe Leroy [mpe: Fix ppc_inst_next(), use u32 instead of unsigned int] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7062722b087228e42cbd896e39bfdf526d6a340a.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/code-patching.h | 21 ++++--- arch/powerpc/include/asm/inst.h | 16 +++--- arch/powerpc/include/asm/uprobes.h | 4 +- arch/powerpc/kernel/crash_dump.c | 4 +- arch/powerpc/kernel/epapr_paravirt.c | 4 +- arch/powerpc/kernel/jump_label.c | 2 +- arch/powerpc/kernel/kgdb.c | 9 ++- arch/powerpc/kernel/kprobes.c | 17 +++--- arch/powerpc/kernel/mce_power.c | 2 +- arch/powerpc/kernel/optprobes.c | 45 ++++++--------- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/kernel/trace/ftrace.c | 33 ++++++----- arch/powerpc/kernel/uprobes.c | 6 +- arch/powerpc/lib/code-patching.c | 49 ++++++++-------- arch/powerpc/lib/feature-fixups.c | 95 +++++++++++++++---------------- arch/powerpc/mm/maccess.c | 4 +- arch/powerpc/perf/core-book3s.c | 4 +- arch/powerpc/platforms/86xx/mpc86xx_smp.c | 4 +- arch/powerpc/platforms/powermac/smp.c | 4 +- arch/powerpc/xmon/xmon.c | 10 ++-- 20 files changed, 159 insertions(+), 176 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index fd60594bdd38..a95f63788c6b 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -23,13 +23,13 @@ #define BRANCH_ABSOLUTE 0x2 bool is_offset_in_branch_range(long offset); -int create_branch(struct ppc_inst *instr, const struct ppc_inst *addr, +int create_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags); -int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, +int create_cond_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags); -int patch_branch(struct ppc_inst *addr, unsigned long target, int flags); -int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr); -int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr); +int patch_branch(u32 *addr, unsigned long target, int flags); +int patch_instruction(u32 *addr, struct ppc_inst instr); +int raw_patch_instruction(u32 *addr, struct ppc_inst instr); static inline unsigned long patch_site_addr(s32 *site) { @@ -38,18 +38,18 @@ static inline unsigned long patch_site_addr(s32 *site) static inline int patch_instruction_site(s32 *site, struct ppc_inst instr) { - return patch_instruction((struct ppc_inst *)patch_site_addr(site), instr); + return patch_instruction((u32 *)patch_site_addr(site), instr); } static inline int patch_branch_site(s32 *site, unsigned long target, int flags) { - return patch_branch((struct ppc_inst *)patch_site_addr(site), target, flags); + return patch_branch((u32 *)patch_site_addr(site), target, flags); } static inline int modify_instruction(unsigned int *addr, unsigned int clr, unsigned int set) { - return patch_instruction((struct ppc_inst *)addr, ppc_inst((*addr & ~clr) | set)); + return patch_instruction(addr, ppc_inst((*addr & ~clr) | set)); } static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int set) @@ -59,9 +59,8 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int instr_is_relative_branch(struct ppc_inst instr); int instr_is_relative_link_branch(struct ppc_inst instr); -unsigned long branch_target(const struct ppc_inst *instr); -int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, - const struct ppc_inst *src); +unsigned long branch_target(const u32 *instr); +int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src); extern bool is_conditional_branch(struct ppc_inst instr); #ifdef CONFIG_PPC_BOOK3E_64 void __patch_exception(int exc, unsigned long addr); diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 1ba65052ed21..cd8ace0c00b7 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -80,13 +80,13 @@ static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x))); } -static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) +static inline struct ppc_inst ppc_inst_read(const u32 *ptr) { u32 val, suffix; - val = *(u32 *)ptr; + val = *ptr; if ((val >> 26) == OP_PREFIX) { - suffix = *((u32 *)ptr + 1); + suffix = *(ptr + 1); return ppc_inst_prefix(val, suffix); } else { return ppc_inst(val); @@ -114,9 +114,9 @@ static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) return ppc_inst(swab32(ppc_inst_val(x))); } -static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) +static inline struct ppc_inst ppc_inst_read(const u32 *ptr) { - return *ptr; + return ppc_inst(*ptr); } #endif /* CONFIG_PPC64 */ @@ -139,13 +139,13 @@ static inline int ppc_inst_len(struct ppc_inst x) * Return the address of the next instruction, if the instruction @value was * located at @location. */ -static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *value) +static inline u32 *ppc_inst_next(u32 *location, u32 *value) { struct ppc_inst tmp; tmp = ppc_inst_read(value); - return location + ppc_inst_len(tmp); + return (void *)location + ppc_inst_len(tmp); } static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) @@ -177,6 +177,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins __str; \ }) -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src); +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src); #endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 5bf65f5d44a9..fe683371336f 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -24,8 +24,8 @@ typedef ppc_opcode_t uprobe_opcode_t; struct arch_uprobe { union { - struct ppc_inst insn; - struct ppc_inst ixol; + u32 insn[2]; + u32 ixol[2]; }; }; diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index adb39da684be..5693e1c67c2b 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -35,7 +35,7 @@ void __init reserve_kdump_trampoline(void) static void __init create_trampoline(unsigned long addr) { - struct ppc_inst *p = (struct ppc_inst *)addr; + u32 *p = (u32 *)addr; /* The maximum range of a single instruction branch, is the current * instruction's address + (32 MB - 4) bytes. For the trampoline we @@ -46,7 +46,7 @@ static void __init create_trampoline(unsigned long addr) * two instructions it doesn't require any registers. */ patch_instruction(p, ppc_inst(PPC_RAW_NOP())); - patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); + patch_branch(p + 1, addr + PHYSICAL_START, 0); } void __init setup_kdump_trampoline(void) diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 2ed14d4a47f5..93b0f3ec8fb0 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -38,9 +38,9 @@ static int __init early_init_dt_scan_epapr(unsigned long node, for (i = 0; i < (len / 4); i++) { struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); - patch_instruction((struct ppc_inst *)(epapr_hypercall_start + i), inst); + patch_instruction(epapr_hypercall_start + i, inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) - patch_instruction((struct ppc_inst *)(epapr_ev_idle_start + i), inst); + patch_instruction(epapr_ev_idle_start + i, inst); #endif } diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 90c97246384c..5277cf582c16 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -11,7 +11,7 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry); + u32 *addr = (u32 *)jump_entry_code(entry); if (type == JUMP_LABEL_JMP) patch_branch(addr, jump_entry_target(entry), 0); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7dd2ad3603ad..c48565762698 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -417,11 +417,10 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { + u32 instr, *addr = (u32 *)bpt->bpt_addr; int err; - unsigned int instr; - struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = get_kernel_nofault(instr, (unsigned *) addr); + err = get_kernel_nofault(instr, addr); if (err) return err; @@ -429,7 +428,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (err) return -EFAULT; - *(unsigned int *)bpt->saved_instr = instr; + *(u32 *)bpt->saved_instr = instr; return 0; } @@ -438,7 +437,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { int err; unsigned int instr = *(unsigned int *)bpt->saved_instr; - struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; + u32 *addr = (u32 *)bpt->bpt_addr; err = patch_instruction(addr, ppc_inst(instr)); if (err) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index e8c2a6373157..b19dfc45f75c 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -107,7 +107,7 @@ int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; struct kprobe *prev; - struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); + struct ppc_inst insn = ppc_inst_read(p->addr); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -116,15 +116,14 @@ int arch_prepare_kprobe(struct kprobe *p) printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); ret = -EINVAL; } else if ((unsigned long)p->addr & ~PAGE_MASK && - ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) { + ppc_inst_prefixed(ppc_inst_read(p->addr - 1))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } preempt_disable(); prev = get_kprobe(p->addr - 1); preempt_enable_no_resched(); - if (prev && - ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)prev->ainsn.insn))) { + if (prev && ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } @@ -138,7 +137,7 @@ int arch_prepare_kprobe(struct kprobe *p) } if (!ret) { - patch_instruction((struct ppc_inst *)p->ainsn.insn, insn); + patch_instruction(p->ainsn.insn, insn); p->opcode = ppc_inst_val(insn); } @@ -149,13 +148,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); void arch_arm_kprobe(struct kprobe *p) { - patch_instruction((struct ppc_inst *)p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)); + patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)); } NOKPROBE_SYMBOL(arch_arm_kprobe); void arch_disarm_kprobe(struct kprobe *p) { - patch_instruction((struct ppc_inst *)p->addr, ppc_inst(p->opcode)); + patch_instruction(p->addr, ppc_inst(p->opcode)); } NOKPROBE_SYMBOL(arch_disarm_kprobe); @@ -228,7 +227,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; - struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); + struct ppc_inst insn = ppc_inst_read(p->ainsn.insn); /* regs->nip is also adjusted if emulate_step returns 1 */ ret = emulate_step(regs, insn); @@ -439,7 +438,7 @@ int kprobe_post_handler(struct pt_regs *regs) if (!cur || user_mode(regs)) return 0; - len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn)); + len = ppc_inst_len(ppc_inst_read(cur->ainsn.insn)); /* make sure we got here for instruction we have a kprobe on */ if (((unsigned long)cur->ainsn.insn + len) != regs->nip) return 0; diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 667104d4c455..7b76a9bf324f 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -463,7 +463,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, pfn = addr_to_pfn(regs, regs->nip); if (pfn != ULONG_MAX) { instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK); - instr = ppc_inst_read((struct ppc_inst *)instr_addr); + instr = ppc_inst_read((u32 *)instr_addr); if (!analyse_instr(&op, &tmp, instr)) { pfn = addr_to_pfn(regs, op.ea); *addr = op.ea; diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index a370190cd02a..88e205775ded 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -89,9 +89,8 @@ static unsigned long can_optimize(struct kprobe *p) * Ensure that the instruction is not a conditional branch, * and that can be emulated. */ - if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) && - analyse_instr(&op, ®s, - ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) { + if (!is_conditional_branch(ppc_inst_read(p->ainsn.insn)) && + analyse_instr(&op, ®s, ppc_inst_read(p->ainsn.insn)) == 1) { emulate_update_regs(®s, &op); nip = regs.nip; } @@ -132,12 +131,10 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_RAW_LIS(reg, IMM_H(val)))); + patch_instruction(addr, ppc_inst(PPC_RAW_LIS(reg, IMM_H(val)))); addr++; - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val)))); + patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val)))); } /* @@ -147,31 +144,31 @@ static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t * static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr) { /* lis reg,(op)@highest */ - patch_instruction((struct ppc_inst *)addr, + patch_instruction(addr, ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | ((val >> 48) & 0xffff))); addr++; /* ori reg,reg,(op)@higher */ - patch_instruction((struct ppc_inst *)addr, + patch_instruction(addr, ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | ___PPC_RS(reg) | ((val >> 32) & 0xffff))); addr++; /* rldicr reg,reg,32,31 */ - patch_instruction((struct ppc_inst *)addr, + patch_instruction(addr, ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); addr++; /* oris reg,reg,(op)@h */ - patch_instruction((struct ppc_inst *)addr, + patch_instruction(addr, ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | ___PPC_RS(reg) | ((val >> 16) & 0xffff))); addr++; /* ori reg,reg,(op)@l */ - patch_instruction((struct ppc_inst *)addr, + patch_instruction(addr, ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | ___PPC_RS(reg) | (val & 0xffff))); } @@ -225,8 +222,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); pr_devel("Copying template to %p, size %lu\n", buff, size); for (i = 0; i < size; i++) { - rc = patch_instruction((struct ppc_inst *)(buff + i), - ppc_inst(*(optprobe_template_entry + i))); + rc = patch_instruction(buff + i, ppc_inst(*(optprobe_template_entry + i))); if (rc < 0) goto error; } @@ -247,34 +243,30 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) goto error; } - rc = create_branch(&branch_op_callback, - (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), + rc = create_branch(&branch_op_callback, buff + TMPL_CALL_HDLR_IDX, (unsigned long)op_callback_addr, BRANCH_SET_LINK); - rc |= create_branch(&branch_emulate_step, - (struct ppc_inst *)(buff + TMPL_EMULATE_IDX), + rc |= create_branch(&branch_emulate_step, buff + TMPL_EMULATE_IDX, (unsigned long)emulate_step_addr, BRANCH_SET_LINK); if (rc) goto error; - patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), - branch_op_callback); - patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX), - branch_emulate_step); + patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback); + patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step); /* * 3. load instruction to be emulated into relevant register, and */ - temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); + temp = ppc_inst_read(p->ainsn.insn); patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline */ - patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0); + patch_branch(buff + TMPL_RET_IDX, nip, 0); flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX])); @@ -317,10 +309,9 @@ void arch_optimize_kprobes(struct list_head *oplist) */ memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE); - create_branch(&instr, - (struct ppc_inst *)op->kp.addr, + create_branch(&instr, op->kp.addr, (unsigned long)op->optinsn.insn, 0); - patch_instruction((struct ppc_inst *)op->kp.addr, instr); + patch_instruction(op->kp.addr, instr); list_del_init(&op->list); } } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index a369fb27ccaa..7ec5c47fce0e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); */ notrace void __init machine_init(u64 dt_ptr) { - struct ppc_inst *addr = (struct ppc_inst *)patch_site_addr(&patch__memset_nocache); + u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache); struct ppc_inst insn; /* Configure static keys first, now that we're relocated. */ diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index a787a414ea6b..d89c5df4f206 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -49,7 +49,7 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link) addr = ppc_function_entry((void *)addr); /* if (link) set op to 'bl' else 'b' */ - create_branch(&op, (struct ppc_inst *)ip, addr, link ? 1 : 0); + create_branch(&op, (u32 *)ip, addr, link ? 1 : 0); return op; } @@ -79,7 +79,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) } /* replace the text with the new text */ - if (patch_instruction((struct ppc_inst *)ip, new)) + if (patch_instruction((u32 *)ip, new)) return -EPERM; return 0; @@ -94,7 +94,7 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr) addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ - return create_branch(&op, (struct ppc_inst *)ip, addr, 0) == 0; + return create_branch(&op, (u32 *)ip, addr, 0) == 0; } static int is_bl_op(struct ppc_inst op) @@ -208,7 +208,7 @@ __ftrace_make_nop(struct module *mod, } #endif /* CONFIG_MPROFILE_KERNEL */ - if (patch_instruction((struct ppc_inst *)ip, pop)) { + if (patch_instruction((u32 *)ip, pop)) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -280,7 +280,7 @@ __ftrace_make_nop(struct module *mod, op = ppc_inst(PPC_RAW_NOP()); - if (patch_instruction((struct ppc_inst *)ip, op)) + if (patch_instruction((u32 *)ip, op)) return -EPERM; return 0; @@ -380,7 +380,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) return -1; } - if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) { + if (patch_branch((u32 *)tramp, ptr, 0)) { pr_debug("REL24 out of range!\n"); return -1; } @@ -424,7 +424,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) } } - if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_RAW_NOP()))) { + if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -589,10 +589,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { int err; struct ppc_inst op; - unsigned long ip = rec->ip; + u32 *ip = (u32 *)rec->ip; /* read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) + if (copy_inst_from_kernel_nofault(&op, ip)) return -EFAULT; /* It should be pointing to a nop */ @@ -608,8 +608,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* create the branch to the trampoline */ - err = create_branch(&op, (struct ppc_inst *)ip, - rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); if (err) { pr_err("REL24 out of range!\n"); return -EINVAL; @@ -617,7 +616,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) pr_devel("write to %lx\n", rec->ip); - if (patch_instruction((struct ppc_inst *)ip, op)) + if (patch_instruction(ip, op)) return -EPERM; return 0; @@ -762,7 +761,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* The new target may be within range */ if (test_24bit_addr(ip, addr)) { /* within range */ - if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) { + if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -790,12 +789,12 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* Ensure branch is within 24 bits */ - if (create_branch(&op, (struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { + if (create_branch(&op, (u32 *)ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } - if (patch_branch((struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { + if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -851,7 +850,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) struct ppc_inst old, new; int ret; - old = ppc_inst_read((struct ppc_inst *)&ftrace_call); + old = ppc_inst_read((u32 *)&ftrace_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); @@ -859,7 +858,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) /* Also update the regs callback function */ if (!ret) { ip = (unsigned long)(&ftrace_regs_call); - old = ppc_inst_read((struct ppc_inst *)&ftrace_regs_call); + old = ppc_inst_read((u32 *)&ftrace_regs_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); } diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 46971bb41d05..1aefd2ecbb8a 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, return -EINVAL; if (cpu_has_feature(CPU_FTR_ARCH_31) && - ppc_inst_prefixed(ppc_inst_read(&auprobe->insn)) && + ppc_inst_prefixed(ppc_inst_read(auprobe->insn)) && (addr & 0x3f) == 60) { pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n"); return -EINVAL; @@ -119,7 +119,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * support doesn't exist and have to fix-up the next instruction * to be executed. */ - regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, &auprobe->insn); + regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn); user_disable_single_step(current); return 0; @@ -182,7 +182,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) * emulate_step() returns 1 if the insn was successfully emulated. * For all other cases, we need to single-step in hardware. */ - ret = emulate_step(regs, ppc_inst_read(&auprobe->insn)); + ret = emulate_step(regs, ppc_inst_read(auprobe->insn)); if (ret > 0) return true; diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 60201cb6f3a1..b2d692376769 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -18,8 +18,7 @@ #include #include -static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr, - struct ppc_inst *patch_addr) +static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch_addr) { if (!ppc_inst_prefixed(instr)) { u32 val = ppc_inst_val(instr); @@ -40,7 +39,7 @@ failed: return -EFAULT; } -int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +int raw_patch_instruction(u32 *addr, struct ppc_inst instr) { return __patch_instruction(addr, instr, addr); } @@ -148,10 +147,10 @@ static inline int unmap_patch_area(unsigned long addr) return 0; } -static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +static int do_patch_instruction(u32 *addr, struct ppc_inst instr) { int err; - struct ppc_inst *patch_addr = NULL; + u32 *patch_addr = NULL; unsigned long flags; unsigned long text_poke_addr; unsigned long kaddr = (unsigned long)addr; @@ -172,7 +171,7 @@ static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) goto out; } - patch_addr = (struct ppc_inst *)(text_poke_addr + (kaddr & ~PAGE_MASK)); + patch_addr = (u32 *)(text_poke_addr + (kaddr & ~PAGE_MASK)); __patch_instruction(addr, instr, patch_addr); @@ -187,14 +186,14 @@ out: } #else /* !CONFIG_STRICT_KERNEL_RWX */ -static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +static int do_patch_instruction(u32 *addr, struct ppc_inst instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +int patch_instruction(u32 *addr, struct ppc_inst instr) { /* Make sure we aren't patching a freed init section */ if (init_mem_is_free && init_section_contains(addr, 4)) { @@ -205,7 +204,7 @@ int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) } NOKPROBE_SYMBOL(patch_instruction); -int patch_branch(struct ppc_inst *addr, unsigned long target, int flags) +int patch_branch(u32 *addr, unsigned long target, int flags) { struct ppc_inst instr; @@ -257,8 +256,7 @@ bool is_conditional_branch(struct ppc_inst instr) } NOKPROBE_SYMBOL(is_conditional_branch); -int create_branch(struct ppc_inst *instr, - const struct ppc_inst *addr, +int create_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags) { long offset; @@ -278,7 +276,7 @@ int create_branch(struct ppc_inst *instr, return 0; } -int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, +int create_cond_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags) { long offset; @@ -325,7 +323,7 @@ int instr_is_relative_link_branch(struct ppc_inst instr) return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); } -static unsigned long branch_iform_target(const struct ppc_inst *instr) +static unsigned long branch_iform_target(const u32 *instr) { signed long imm; @@ -341,7 +339,7 @@ static unsigned long branch_iform_target(const struct ppc_inst *instr) return (unsigned long)imm; } -static unsigned long branch_bform_target(const struct ppc_inst *instr) +static unsigned long branch_bform_target(const u32 *instr) { signed long imm; @@ -357,7 +355,7 @@ static unsigned long branch_bform_target(const struct ppc_inst *instr) return (unsigned long)imm; } -unsigned long branch_target(const struct ppc_inst *instr) +unsigned long branch_target(const u32 *instr) { if (instr_is_branch_iform(ppc_inst_read(instr))) return branch_iform_target(instr); @@ -367,8 +365,7 @@ unsigned long branch_target(const struct ppc_inst *instr) return 0; } -int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, - const struct ppc_inst *src) +int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src) { unsigned long target; target = branch_target(src); @@ -395,13 +392,13 @@ void __patch_exception(int exc, unsigned long addr) * instruction of the exception, not the first one */ - patch_branch((struct ppc_inst *)(ibase + (exc / 4) + 1), addr, 0); + patch_branch(ibase + (exc / 4) + 1, addr, 0); } #endif #ifdef CONFIG_CODE_PATCHING_SELFTEST -static int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr) +static int instr_is_branch_to_addr(const u32 *instr, unsigned long addr) { if (instr_is_branch_iform(ppc_inst_read(instr)) || instr_is_branch_bform(ppc_inst_read(instr))) @@ -423,7 +420,7 @@ static void __init test_branch_iform(void) int err; struct ppc_inst instr; u32 tmp[2]; - struct ppc_inst *iptr = (struct ppc_inst *)tmp; + u32 *iptr = tmp; unsigned long addr = (unsigned long)tmp; /* The simplest case, branch to self, no flags */ @@ -501,12 +498,12 @@ static void __init test_branch_iform(void) static void __init test_create_function_call(void) { - struct ppc_inst *iptr; + u32 *iptr; unsigned long dest; struct ppc_inst instr; /* Check we can create a function call */ - iptr = (struct ppc_inst *)ppc_function_entry(test_trampoline); + iptr = (u32 *)ppc_function_entry(test_trampoline); dest = ppc_function_entry(test_create_function_call); create_branch(&instr, iptr, dest, BRANCH_SET_LINK); patch_instruction(iptr, instr); @@ -517,11 +514,11 @@ static void __init test_branch_bform(void) { int err; unsigned long addr; - struct ppc_inst *iptr, instr; + struct ppc_inst instr; u32 tmp[2]; + u32 *iptr = tmp; unsigned int flags; - iptr = (struct ppc_inst *)tmp; addr = (unsigned long)iptr; /* The simplest case, branch to self, no flags */ @@ -726,9 +723,9 @@ static void __init test_prefixed_patching(void) extern unsigned int code_patching_test1_expected[]; extern unsigned int end_code_patching_test1[]; - __patch_instruction((struct ppc_inst *)code_patching_test1, + __patch_instruction(code_patching_test1, ppc_inst_prefix(OP_PREFIX << 26, 0x00000000), - (struct ppc_inst *)code_patching_test1); + code_patching_test1); check(!memcmp(code_patching_test1, code_patching_test1_expected, diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 5a4bdede1140..4537459d012f 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -33,18 +33,17 @@ struct fixup_entry { long alt_end_off; }; -static struct ppc_inst *calc_addr(struct fixup_entry *fcur, long offset) +static u32 *calc_addr(struct fixup_entry *fcur, long offset) { /* * We store the offset to the code as a negative offset from * the start of the alt_entry, to support the VDSO. This * routine converts that back into an actual address. */ - return (struct ppc_inst *)((unsigned long)fcur + offset); + return (u32 *)((unsigned long)fcur + offset); } -static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest, - struct ppc_inst *alt_start, struct ppc_inst *alt_end) +static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end) { int err; struct ppc_inst instr; @@ -52,7 +51,7 @@ static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest, instr = ppc_inst_read(src); if (instr_is_relative_branch(ppc_inst_read(src))) { - struct ppc_inst *target = (struct ppc_inst *)branch_target(src); + u32 *target = (u32 *)branch_target(src); /* Branch within the section doesn't need translating */ if (target < alt_start || target > alt_end) { @@ -69,7 +68,7 @@ static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest, static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) { - struct ppc_inst *start, *end, *alt_start, *alt_end, *src, *dest, nop; + u32 *start, *end, *alt_start, *alt_end, *src, *dest; start = calc_addr(fcur, fcur->start_off); end = calc_addr(fcur, fcur->end_off); @@ -91,9 +90,8 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) return 1; } - nop = ppc_inst(PPC_RAW_NOP()); - for (; dest < end; dest = ppc_inst_next(dest, &nop)) - raw_patch_instruction(dest, nop); + for (; dest < end; dest++) + raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP())); return 0; } @@ -152,14 +150,14 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) // See comment in do_entry_flush_fixups() RE order of patching if (types & STF_BARRIER_FALLBACK) { - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_branch((struct ppc_inst *)(dest + 1), + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); } else { - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); } } @@ -212,12 +210,12 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); - patch_instruction((struct ppc_inst *)(dest + 4), ppc_inst(instrs[4])); - patch_instruction((struct ppc_inst *)(dest + 5), ppc_inst(instrs[5])); + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest + 3, ppc_inst(instrs[3])); + patch_instruction(dest + 4, ppc_inst(instrs[4])); + patch_instruction(dest + 5, ppc_inst(instrs[5])); } printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : @@ -281,11 +279,11 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest + 3, ppc_inst(instrs[3])); } printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, @@ -357,14 +355,14 @@ static int __do_entry_flush_fixups(void *data) pr_devel("patching dest %lx\n", (unsigned long)dest); if (types == L1D_FLUSH_FALLBACK) { - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_branch((struct ppc_inst *)(dest + 1), + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK); } else { - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); } } @@ -376,14 +374,14 @@ static int __do_entry_flush_fixups(void *data) pr_devel("patching dest %lx\n", (unsigned long)dest); if (types == L1D_FLUSH_FALLBACK) { - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_branch((struct ppc_inst *)(dest + 1), + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK); } else { - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); } } @@ -442,9 +440,9 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); } printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, @@ -477,7 +475,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instr)); + patch_instruction(dest, ppc_inst(instr)); } printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); @@ -520,8 +518,8 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instr[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instr[1])); + patch_instruction(dest, ppc_inst(instr[0])); + patch_instruction(dest + 1, ppc_inst(instr[1])); } printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); @@ -535,7 +533,7 @@ static void patch_btb_flush_section(long *curr) end = (void *)curr + *(curr + 1); for (; start < end; start++) { pr_devel("patching dest %lx\n", (unsigned long)start); - patch_instruction((struct ppc_inst *)start, ppc_inst(PPC_RAW_NOP())); + patch_instruction(start, ppc_inst(PPC_RAW_NOP())); } } @@ -554,7 +552,7 @@ void do_btb_flush_fixups(void) void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; - struct ppc_inst *dest; + u32 *dest; if (!(value & CPU_FTR_LWSYNC)) return ; @@ -571,13 +569,14 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) static void do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) - struct ppc_inst inst, *src, *dest, *end; + struct ppc_inst inst; + u32 *src, *dest, *end; if (PHYSICAL_START == 0) return; - src = (struct ppc_inst *)(KERNELBASE + PHYSICAL_START); - dest = (struct ppc_inst *)KERNELBASE; + src = (u32 *)(KERNELBASE + PHYSICAL_START); + dest = (u32 *)KERNELBASE; end = (void *)src + (__end_interrupts - _stext); while (src < end) { diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index a3c30a884076..aad7c47e0030 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -12,7 +12,7 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) return is_kernel_addr((unsigned long)unsafe_src); } -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src) { unsigned int val, suffix; int err; @@ -21,7 +21,7 @@ int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) if (err) return err; if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); + err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix)); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 16d4d1b6a1ff..3e864feaacf7 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -460,7 +460,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) sizeof(instr))) return 0; - return branch_target((struct ppc_inst *)&instr); + return branch_target(&instr); } /* Userspace: need copy instruction here then translate it */ @@ -468,7 +468,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) sizeof(instr))) return 0; - target = branch_target((struct ppc_inst *)&instr); + target = branch_target(&instr); if ((!target) || (instr & BRANCH_ABSOLUTE)) return target; diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 87f524e4b09c..8a7e55acf090 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -73,7 +73,7 @@ smp_86xx_kick_cpu(int nr) /* Setup fake reset vector to call __secondary_start_mpc86xx. */ target = (unsigned long) __secondary_start_mpc86xx; - patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); + patch_branch(vector, target, BRANCH_SET_LINK); /* Kick that CPU */ smp_86xx_release_core(nr); @@ -83,7 +83,7 @@ smp_86xx_kick_cpu(int nr) mdelay(1); /* Restore the exception vector */ - patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); + patch_instruction(vector, ppc_inst(save_vector)); local_irq_restore(flags); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index adae2a6712e1..bdfea6d6ab69 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -810,7 +810,7 @@ static int smp_core99_kick_cpu(int nr) * b __secondary_start_pmac_0 + nr*8 */ target = (unsigned long) __secondary_start_pmac_0 + nr * 8; - patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); + patch_branch(vector, target, BRANCH_SET_LINK); /* Put some life in our friend */ pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); @@ -823,7 +823,7 @@ static int smp_core99_kick_cpu(int nr) mdelay(1); /* Restore our exception vector */ - patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); + patch_instruction(vector, ppc_inst(save_vector)); local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index f73c10869e64..bd7ee794be92 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -100,7 +100,7 @@ static long *xmon_fault_jmp[NR_CPUS]; /* Breakpoint stuff */ struct bpt { unsigned long address; - struct ppc_inst *instr; + u32 *instr; atomic_t ref_count; int enabled; unsigned long pad; @@ -946,11 +946,11 @@ static void insert_bpts(void) } patch_instruction(bp->instr, instr); - patch_instruction(ppc_inst_next(bp->instr, &instr), + patch_instruction(ppc_inst_next(bp->instr, bp->instr), ppc_inst(bpinstr)); if (bp->enabled & BP_CIABR) continue; - if (patch_instruction((struct ppc_inst *)bp->address, + if (patch_instruction((u32 *)bp->address, ppc_inst(bpinstr)) != 0) { printf("Couldn't write instruction at %lx, " "disabling breakpoint there\n", bp->address); @@ -992,7 +992,7 @@ static void remove_bpts(void) if (mread_instr(bp->address, &instr) && ppc_inst_equal(instr, ppc_inst(bpinstr)) && patch_instruction( - (struct ppc_inst *)bp->address, ppc_inst_read(bp->instr)) != 0) + (u32 *)bp->address, ppc_inst_read(bp->instr)) != 0) printf("Couldn't remove breakpoint at %lx\n", bp->address); } @@ -2214,7 +2214,7 @@ mread_instr(unsigned long adrs, struct ppc_inst *instr) if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); - *instr = ppc_inst_read((struct ppc_inst *)adrs); + *instr = ppc_inst_read((u32 *)adrs); sync(); /* wait a little while to see if we get a machine check */ __delay(200); -- cgit v1.2.3 From 077c4dedef09796ade917459a5330e3940fb5860 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:46 +0000 Subject: powerpc/inst: Refactor PPC32 and PPC64 versions ppc_inst() ppc_inst_prefixed() ppc_inst_swab() can easily be made common to both PPC32 and PPC64. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d54c63dcac6d190e1cc0d2fe3259d6e621928cdf.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/inst.h | 49 +++++++++++------------------------------ 1 file changed, 13 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index cd8ace0c00b7..b11c0e2f9639 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -60,9 +60,9 @@ static inline int ppc_inst_primary_opcode(struct ppc_inst x) return ppc_inst_val(x) >> 26; } -#ifdef CONFIG_PPC64 #define ppc_inst(x) ((struct ppc_inst){ .val = (x) }) +#ifdef CONFIG_PPC64 #define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) static inline u32 ppc_inst_suffix(struct ppc_inst x) @@ -70,57 +70,34 @@ static inline u32 ppc_inst_suffix(struct ppc_inst x) return x.suffix; } -static inline bool ppc_inst_prefixed(struct ppc_inst x) -{ - return ppc_inst_primary_opcode(x) == OP_PREFIX; -} +#else +#define ppc_inst_prefix(x, y) ppc_inst(x) -static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) +static inline u32 ppc_inst_suffix(struct ppc_inst x) { - return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x))); + return 0; } +#endif /* CONFIG_PPC64 */ + static inline struct ppc_inst ppc_inst_read(const u32 *ptr) { - u32 val, suffix; - - val = *ptr; - if ((val >> 26) == OP_PREFIX) { - suffix = *(ptr + 1); - return ppc_inst_prefix(val, suffix); - } else { - return ppc_inst(val); - } + if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX) + return ppc_inst_prefix(*ptr, *(ptr + 1)); + else + return ppc_inst(*ptr); } -#else - -#define ppc_inst(x) ((struct ppc_inst){ .val = x }) - -#define ppc_inst_prefix(x, y) ppc_inst(x) - static inline bool ppc_inst_prefixed(struct ppc_inst x) { - return false; -} - -static inline u32 ppc_inst_suffix(struct ppc_inst x) -{ - return 0; + return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX; } static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) { - return ppc_inst(swab32(ppc_inst_val(x))); -} - -static inline struct ppc_inst ppc_inst_read(const u32 *ptr) -{ - return ppc_inst(*ptr); + return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x))); } -#endif /* CONFIG_PPC64 */ - static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) { if (ppc_inst_val(x) != ppc_inst_val(y)) -- cgit v1.2.3 From afd3287c8872142ec4298a2b77bd9077e2209c9c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:47 +0000 Subject: powerpc/optprobes: Minimise casts nip is already an unsigned long, no cast needed. op_callback_addr and emulate_step_addr are kprobe_opcode_t *. There value is obtained with ppc_kallsyms_lookup_name() which returns 'unsigned long', and there values are used create_branch() which expects 'unsigned long'. So change them to 'unsigned long' to avoid casting them back and forth. can_optimize() used p->addr several times as 'unsigned long'. Use a local 'unsigned long' variable and avoid casting multiple times. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e03192a6d4123242a275e71ce2ba0bb4d90700c1.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/optprobes.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 88e205775ded..07edc42b3c5b 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -55,6 +55,7 @@ static unsigned long can_optimize(struct kprobe *p) struct pt_regs regs; struct instruction_op op; unsigned long nip = 0; + unsigned long addr = (unsigned long)p->addr; /* * kprobe placed for kretprobe during boot time @@ -62,7 +63,7 @@ static unsigned long can_optimize(struct kprobe *p) * So further checks can be skipped. */ if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) - return (unsigned long)p->addr + sizeof(kprobe_opcode_t); + return addr + sizeof(kprobe_opcode_t); /* * We only support optimizing kernel addresses, but not @@ -70,11 +71,11 @@ static unsigned long can_optimize(struct kprobe *p) * * FIXME: Optimize kprobes placed in module addresses. */ - if (!is_kernel_addr((unsigned long)p->addr)) + if (!is_kernel_addr(addr)) return 0; memset(®s, 0, sizeof(struct pt_regs)); - regs.nip = (unsigned long)p->addr; + regs.nip = addr; regs.trap = 0x0; regs.msr = MSR_KERNEL; @@ -184,7 +185,8 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { struct ppc_inst branch_op_callback, branch_emulate_step, temp; - kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; + unsigned long op_callback_addr, emulate_step_addr; + kprobe_opcode_t *buff; long b_offset; unsigned long nip, size; int rc, i; @@ -212,8 +214,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) goto error; /* Check if the return address is also within 32MB range */ - b_offset = (unsigned long)(buff + TMPL_RET_IDX) - - (unsigned long)nip; + b_offset = (unsigned long)(buff + TMPL_RET_IDX) - nip; if (!is_offset_in_branch_range(b_offset)) goto error; @@ -236,20 +237,18 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 2. branch to optimized_callback() and emulate_step() */ - op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback"); - emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step"); + op_callback_addr = ppc_kallsyms_lookup_name("optimized_callback"); + emulate_step_addr = ppc_kallsyms_lookup_name("emulate_step"); if (!op_callback_addr || !emulate_step_addr) { WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n"); goto error; } rc = create_branch(&branch_op_callback, buff + TMPL_CALL_HDLR_IDX, - (unsigned long)op_callback_addr, - BRANCH_SET_LINK); + op_callback_addr, BRANCH_SET_LINK); rc |= create_branch(&branch_emulate_step, buff + TMPL_EMULATE_IDX, - (unsigned long)emulate_step_addr, - BRANCH_SET_LINK); + emulate_step_addr, BRANCH_SET_LINK); if (rc) goto error; -- cgit v1.2.3 From f38adf86ce4fdae84904f420e175ce5806509c4c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:48 +0000 Subject: powerpc/optprobes: Compact code source a bit. Now that lines can be up to 100 chars long, minimise the amount of split lines to increase readability. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8ebbd977ea8cf8d706d82458f2a21acd44562a99.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/optprobes.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 07edc42b3c5b..55ce7b0e5fb2 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -18,18 +18,12 @@ #include #include -#define TMPL_CALL_HDLR_IDX \ - (optprobe_template_call_handler - optprobe_template_entry) -#define TMPL_EMULATE_IDX \ - (optprobe_template_call_emulate - optprobe_template_entry) -#define TMPL_RET_IDX \ - (optprobe_template_ret - optprobe_template_entry) -#define TMPL_OP_IDX \ - (optprobe_template_op_address - optprobe_template_entry) -#define TMPL_INSN_IDX \ - (optprobe_template_insn - optprobe_template_entry) -#define TMPL_END_IDX \ - (optprobe_template_end - optprobe_template_entry) +#define TMPL_CALL_HDLR_IDX (optprobe_template_call_handler - optprobe_template_entry) +#define TMPL_EMULATE_IDX (optprobe_template_call_emulate - optprobe_template_entry) +#define TMPL_RET_IDX (optprobe_template_ret - optprobe_template_entry) +#define TMPL_OP_IDX (optprobe_template_op_address - optprobe_template_entry) +#define TMPL_INSN_IDX (optprobe_template_insn - optprobe_template_entry) +#define TMPL_END_IDX (optprobe_template_end - optprobe_template_entry) static bool insn_page_in_use; @@ -267,8 +261,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) */ patch_branch(buff + TMPL_RET_IDX, nip, 0); - flush_icache_range((unsigned long)buff, - (unsigned long)(&buff[TMPL_END_IDX])); + flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX])); op->optinsn.insn = buff; @@ -306,10 +299,8 @@ void arch_optimize_kprobes(struct list_head *oplist) * Backup instructions which will be replaced * by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr, - RELATIVEJUMP_SIZE); - create_branch(&instr, op->kp.addr, - (unsigned long)op->optinsn.insn, 0); + memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE); + create_branch(&instr, op->kp.addr, (unsigned long)op->optinsn.insn, 0); patch_instruction(op->kp.addr, instr); list_del_init(&op->list); } @@ -320,8 +311,7 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op) arch_arm_kprobe(&op->kp); } -void arch_unoptimize_kprobes(struct list_head *oplist, - struct list_head *done_list) +void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list) { struct optimized_kprobe *op; struct optimized_kprobe *tmp; @@ -332,8 +322,7 @@ void arch_unoptimize_kprobes(struct list_head *oplist, } } -int arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr) +int arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) { return ((unsigned long)op->kp.addr <= addr && (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); -- cgit v1.2.3 From 0e628ad2d60896de31148fba00cc73623b8c0aa1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 May 2021 13:50:49 +0000 Subject: powerpc/optprobes: use PPC_RAW_ macros Use PPC_RAW_ macros to simplify the code. And use PPC_LO/PPC_HI instead of IMM_L/IMM_H which are for internal use inside ppc-opcode.h Those macros are self explanatory, comments can go as well. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5a167b8ba4d33a5c09cd504f0c862e25ffe85459.1621516826.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 11 +++++----- arch/powerpc/kernel/optprobes.c | 39 +++++++---------------------------- 2 files changed, 12 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 1f484157ce4f..bede76dd3db7 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -287,9 +287,6 @@ #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 #define PPC_INST_DIVD 0x7c0003d2 -#define PPC_INST_RLDICR 0x78000004 -#define PPC_INST_ORI 0x60000000 -#define PPC_INST_ORIS 0x64000000 #define PPC_INST_BRANCH 0x48000000 #define PPC_INST_BL 0x48000001 #define PPC_INST_BRANCH_COND 0x40800000 @@ -349,6 +346,8 @@ #define PPC_LO(v) ((v) & 0xffff) #define PPC_HI(v) (((v) >> 16) & 0xffff) #define PPC_HA(v) PPC_HI((v) + 0x8000) +#define PPC_HIGHER(v) (((v) >> 32) & 0xffff) +#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff) /* * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a @@ -530,8 +529,8 @@ #define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a) -#define PPC_RAW_ORI(d, a, i) (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) -#define PPC_RAW_ORIS(d, a, i) (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ORI(d, a, i) (0x60000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ORIS(d, a, i) (0x64000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_NOR(d, a, b) (0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) @@ -550,7 +549,7 @@ (0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) #define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) #define PPC_RAW_RLDICL(d, a, i, mb) (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb)) -#define PPC_RAW_RLDICR(d, a, i, me) (PPC_INST_RLDICR | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me)) +#define PPC_RAW_RLDICR(d, a, i, me) (0x78000004 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me)) /* slwi = rlwinm Rx, Ry, n, 0, 31-n */ #define PPC_RAW_SLWI(d, a, i) PPC_RAW_RLWINM(d, a, i, 0, 31-(i)) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 55ce7b0e5fb2..2b8fe40069ad 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -126,10 +126,8 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { - patch_instruction(addr, ppc_inst(PPC_RAW_LIS(reg, IMM_H(val)))); - addr++; - - patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HI(val)))); + patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val)))); } /* @@ -138,34 +136,11 @@ static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t * */ static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr) { - /* lis reg,(op)@highest */ - patch_instruction(addr, - ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | - ((val >> 48) & 0xffff))); - addr++; - - /* ori reg,reg,(op)@higher */ - patch_instruction(addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | - ___PPC_RS(reg) | ((val >> 32) & 0xffff))); - addr++; - - /* rldicr reg,reg,32,31 */ - patch_instruction(addr, - ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | - ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); - addr++; - - /* oris reg,reg,(op)@h */ - patch_instruction(addr, - ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | - ___PPC_RS(reg) | ((val >> 16) & 0xffff))); - addr++; - - /* ori reg,reg,(op)@l */ - patch_instruction(addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | - ___PPC_RS(reg) | (val & 0xffff))); + patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HIGHEST(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_HIGHER(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_SLDI(reg, reg, 32))); + patch_instruction(addr++, ppc_inst(PPC_RAW_ORIS(reg, reg, PPC_HI(val)))); + patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val)))); } static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) -- cgit v1.2.3 From c0ca0fe08c9213a5187e4513b5506667f249030f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 07:53:49 +0000 Subject: powerpc: Remove CONFIG_PPC_MMU_NOHASH_32 Since commit Fixes: 555904d07eef ("powerpc/8xx: MM_SLICE is not needed anymore"), CONFIG_PPC_MMU_NOHASH_32 has not been used. Remove it. Reported-by: Tom Rix Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bf1e074f6fb213a1c4cc4964370bdce4b648d647.1622706812.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 885140055b7a..dc7c46f92302 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -424,10 +424,6 @@ config PPC_MMU_NOHASH def_bool y depends on !PPC_BOOK3S -config PPC_MMU_NOHASH_32 - def_bool y - depends on PPC_MMU_NOHASH && PPC32 - config PPC_BOOK3E_MMU def_bool y depends on FSL_BOOKE || PPC_BOOK3E -- cgit v1.2.3 From 10248dcba1205042a3a0ea65eb441030702d97cd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 2 Jun 2021 06:42:10 +0000 Subject: powerpc/44x: Implement Kernel Userspace Exec Protection (KUEP) Powerpc 44x has two bits for exec protection in TLBs: one for user (UX) and one for superviser (SX). Clear SX on user pages in TLB miss handlers to provide KUEP. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/169310e08152aa1d96c979770291d165ec6896ae.1622616032.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/mmu-44x.h | 1 + arch/powerpc/kernel/head_44x.S | 8 ++++++++ arch/powerpc/mm/nohash/44x.c | 13 +++++++++++++ arch/powerpc/platforms/Kconfig.cputype | 1 + 4 files changed, 23 insertions(+) diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h index 2d92a39d8f2e..43ceca128531 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h @@ -113,6 +113,7 @@ typedef struct { /* patch sites */ extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I; +extern s32 patch__tlb_44x_kuep, patch__tlb_47x_kuep; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 5c106ac36626..2cfb496df615 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -532,6 +532,10 @@ finish_tlb_load_44x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +#ifdef CONFIG_PPC_KUEP +0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ + patch_site 0b, patch__tlb_44x_kuep +#endif 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ /* Done...restore registers and get out of here. @@ -743,6 +747,10 @@ finish_tlb_load_47x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +#ifdef CONFIG_PPC_KUEP +0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ + patch_site 0b, patch__tlb_47x_kuep +#endif 1: tlbwe r11,r13,2 /* Done...restore registers and get out of here. diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c index 3d6ae7c72412..7da6d1e9fc9b 100644 --- a/arch/powerpc/mm/nohash/44x.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -239,3 +239,16 @@ void __init mmu_init_secondary(int cpu) } } #endif /* CONFIG_SMP */ + +#ifdef CONFIG_PPC_KUEP +void __init setup_kuep(bool disabled) +{ + if (disabled) + patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP())); + else + pr_info("Activating Kernel Userspace Execution Prevention\n"); + + if (IS_ENABLED(CONFIG_PPC_47x) && disabled) + patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP())); +} +#endif diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index dc7c46f92302..d4acba7551c2 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -61,6 +61,7 @@ config 44x select 4xx_SOC select HAVE_PCI select PHYS_64BIT + select PPC_HAVE_KUEP endchoice -- cgit v1.2.3 From f6025a140ba8dcabdfb8a1e27ddaf44821700dce Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Jun 2021 04:49:25 +0000 Subject: powerpc/8xx: Allow disabling KUAP at boot time PPC64 uses MMU features to enable/disable KUAP at boot time. But feature fixups are applied way too early on PPC32. But since commit c16728835eec ("powerpc/32: Manage KUAP in C"), all KUAP is in C so it is now possible to use static branches. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3dca510ce555335261a47c4799167da698f569c0.1622782111.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 43 ++++++++++++++++++++++++++-- arch/powerpc/mm/nohash/8xx.c | 11 +++++-- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 295ef5639609..f6a3c1935e75 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -9,10 +9,22 @@ #ifndef __ASSEMBLY__ +#include + #include +extern struct static_key_false disable_kuap_key; + +static __always_inline bool kuap_is_disabled(void) +{ + return static_branch_unlikely(&disable_kuap_key); +} + static inline void kuap_save_and_lock(struct pt_regs *regs) { + if (kuap_is_disabled()) + return; + regs->kuap = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); } @@ -23,12 +35,20 @@ static inline void kuap_user_restore(struct pt_regs *regs) static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, regs->kuap); } static inline unsigned long kuap_get_and_assert_locked(void) { - unsigned long kuap = mfspr(SPRN_MD_AP); + unsigned long kuap; + + if (kuap_is_disabled()) + return MD_APG_INIT; + + kuap = mfspr(SPRN_MD_AP); if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16); @@ -38,25 +58,36 @@ static inline unsigned long kuap_get_and_assert_locked(void) static inline void kuap_assert_locked(void) { - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && !kuap_is_disabled()) kuap_get_and_assert_locked(); } static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, MD_APG_INIT); } static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, MD_APG_KUAP); } static inline unsigned long prevent_user_access_return(void) { - unsigned long flags = mfspr(SPRN_MD_AP); + unsigned long flags; + + if (kuap_is_disabled()) + return MD_APG_INIT; + + flags = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); @@ -65,12 +96,18 @@ static inline unsigned long prevent_user_access_return(void) static inline void restore_user_access(unsigned long flags) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, flags); } static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { + if (kuap_is_disabled()) + return false; + return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000); } diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 71bfdbedacee..fc663322ba58 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -256,12 +256,17 @@ void __init setup_kuep(bool disabled) #endif #ifdef CONFIG_PPC_KUAP +struct static_key_false disable_kuap_key; +EXPORT_SYMBOL(disable_kuap_key); + void __init setup_kuap(bool disabled) { - pr_info("Activating Kernel Userspace Access Protection\n"); + if (disabled) { + static_branch_enable(&disable_kuap_key); + return; + } - if (disabled) - pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n"); + pr_info("Activating Kernel Userspace Access Protection\n"); mtspr(SPRN_MD_AP, MD_APG_KUAP); } -- cgit v1.2.3 From 91ec66719d4c5c0e7b4e32585b01881660d1bc53 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:36 +0000 Subject: powerpc/32s: Move setup_{kuep/kuap}() into {kuep/kuap}.c Avoids the #ifdef in mmu.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0b7a13d414837e58264edc336b89c2fe9f35f9bc.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/Makefile | 1 + arch/powerpc/mm/book3s32/kuap.c | 11 +++++++++++ arch/powerpc/mm/book3s32/kuep.c | 8 ++++++++ arch/powerpc/mm/book3s32/mmu.c | 20 -------------------- 4 files changed, 20 insertions(+), 20 deletions(-) create mode 100644 arch/powerpc/mm/book3s32/kuap.c diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 7f0c8a78ba0c..15f4773643d2 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -10,3 +10,4 @@ obj-y += mmu.o mmu_context.o obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o obj-$(CONFIG_PPC_KUEP) += kuep.o +obj-$(CONFIG_PPC_KUAP) += kuap.o diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c new file mode 100644 index 000000000000..1df55392878e --- /dev/null +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +void __init setup_kuap(bool disabled) +{ + pr_info("Activating Kernel Userspace Access Protection\n"); + + if (disabled) + pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); +} diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c index 8ed1b8634839..6eafe7b2b031 100644 --- a/arch/powerpc/mm/book3s32/kuep.c +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -38,3 +38,11 @@ void kuep_unlock(void) { kuep_update(mfsr(0) & ~SR_NX); } + +void __init setup_kuep(bool disabled) +{ + pr_info("Activating Kernel Userspace Execution Prevention\n"); + + if (disabled) + pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n"); +} diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 159930351d9f..27061583a010 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -445,26 +445,6 @@ void __init print_system_hash_info(void) pr_info("Hash_mask = 0x%lx\n", Hash_mask); } -#ifdef CONFIG_PPC_KUEP -void __init setup_kuep(bool disabled) -{ - pr_info("Activating Kernel Userspace Execution Prevention\n"); - - if (disabled) - pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n"); -} -#endif - -#ifdef CONFIG_PPC_KUAP -void __init setup_kuap(bool disabled) -{ - pr_info("Activating Kernel Userspace Access Protection\n"); - - if (disabled) - pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); -} -#endif - void __init early_init_mmu(void) { } -- cgit v1.2.3 From 91bb30822a2e1d7900f9f42e9e92647a9015f979 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:37 +0000 Subject: powerpc/32s: Refactor update of user segment registers KUEP implements the update of user segment registers. Move it into mmu-hash.h in order to use it from other places. And inline kuep_lock() and kuep_unlock(). Inlining kuep_lock() is important for system_call_exception(), otherwise system_call_exception() has to save into stack the system call parameters that are used just after, and doing that takes more instructions than kuep_lock() itself. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/24591ca480d14a62ef910e38a5273d551262c4a2.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 21 +++++++++++++++ arch/powerpc/include/asm/book3s/32/mmu-hash.h | 27 +++++++++++++++++++ arch/powerpc/include/asm/kup.h | 5 +--- arch/powerpc/mm/book3s32/kuep.c | 37 --------------------------- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index c1f7c2e625a6..83aa0dde50d6 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -7,6 +7,27 @@ #ifndef __ASSEMBLY__ +static __always_inline bool kuep_is_disabled(void) +{ + return !IS_ENABLED(CONFIG_PPC_KUEP); +} + +static inline void kuep_lock(void) +{ + if (kuep_is_disabled()) + return; + + update_user_segments(mfsr(0) | SR_NX); +} + +static inline void kuep_unlock(void) +{ + if (kuep_is_disabled()) + return; + + update_user_segments(mfsr(0) & ~SR_NX); +} + #ifdef CONFIG_PPC_KUAP #include diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index b85f8e114a9c..cc0284bbac86 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -102,6 +102,33 @@ extern s32 patch__hash_page_B, patch__hash_page_C; extern s32 patch__flush_hash_A0, patch__flush_hash_A1, patch__flush_hash_A2; extern s32 patch__flush_hash_B; +#include +#include + +#define UPDATE_TWO_USER_SEGMENTS(n) do { \ + if (TASK_SIZE > ((n) << 28)) \ + mtsr(val1, (n) << 28); \ + if (TASK_SIZE > (((n) + 1) << 28)) \ + mtsr(val2, ((n) + 1) << 28); \ + val1 = (val1 + 0x222) & 0xf0ffffff; \ + val2 = (val2 + 0x222) & 0xf0ffffff; \ +} while (0) + +static __always_inline void update_user_segments(u32 val) +{ + int val1 = val; + int val2 = (val + 0x111) & 0xf0ffffff; + + UPDATE_TWO_USER_SEGMENTS(0); + UPDATE_TWO_USER_SEGMENTS(2); + UPDATE_TWO_USER_SEGMENTS(4); + UPDATE_TWO_USER_SEGMENTS(6); + UPDATE_TWO_USER_SEGMENTS(8); + UPDATE_TWO_USER_SEGMENTS(10); + UPDATE_TWO_USER_SEGMENTS(12); + UPDATE_TWO_USER_SEGMENTS(14); +} + #endif /* !__ASSEMBLY__ */ /* We happily ignore the smaller BATs on 601, we don't actually use diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index ec96232529ac..4b94d4293777 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -46,10 +46,7 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ -#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) -void kuep_lock(void); -void kuep_unlock(void); -#else +#ifndef CONFIG_PPC_BOOK3S_32 static inline void kuep_lock(void) { } static inline void kuep_unlock(void) { } #endif diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c index 6eafe7b2b031..919595f47e25 100644 --- a/arch/powerpc/mm/book3s32/kuep.c +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -1,43 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include -#include -#include - -#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { \ - if (TASK_SIZE > ((n) << 28)) \ - mtsr(val1, (n) << 28); \ - if (TASK_SIZE > (((n) + 1) << 28)) \ - mtsr(val2, ((n) + 1) << 28); \ - val1 = (val1 + 0x222) & 0xf0ffffff; \ - val2 = (val2 + 0x222) & 0xf0ffffff; \ -} while (0) - -static __always_inline void kuep_update(u32 val) -{ - int val1 = val; - int val2 = (val + 0x111) & 0xf0ffffff; - - KUEP_UPDATE_TWO_USER_SEGMENTS(0); - KUEP_UPDATE_TWO_USER_SEGMENTS(2); - KUEP_UPDATE_TWO_USER_SEGMENTS(4); - KUEP_UPDATE_TWO_USER_SEGMENTS(6); - KUEP_UPDATE_TWO_USER_SEGMENTS(8); - KUEP_UPDATE_TWO_USER_SEGMENTS(10); - KUEP_UPDATE_TWO_USER_SEGMENTS(12); - KUEP_UPDATE_TWO_USER_SEGMENTS(14); -} - -void kuep_lock(void) -{ - kuep_update(mfsr(0) | SR_NX); -} - -void kuep_unlock(void) -{ - kuep_update(mfsr(0) & ~SR_NX); -} void __init setup_kuep(bool disabled) { -- cgit v1.2.3 From 7235bb3593781ed022d0714a73c2c0d8eb8a835f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:38 +0000 Subject: powerpc/32s: move CTX_TO_VSID() into mmu-hash.h In order to reuse it in switch_mmu_context(), this patch moves CTX_TO_VSID() macro into asm/book3s/32/mmu-hash.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/26b36ef2939234a04b37baf6ffe50cba81f5d1b7.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 10 ++++++++++ arch/powerpc/kvm/book3s_32_mmu_host.c | 3 --- arch/powerpc/mm/book3s32/mmu_context.c | 13 ------------- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index cc0284bbac86..583388399b1b 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -66,6 +66,16 @@ struct ppc_bat { #ifndef __ASSEMBLY__ +/* + * This macro defines the mapping from contexts to VSIDs (virtual + * segment IDs). We use a skew on both the context and the high 4 bits + * of the 32-bit virtual address (the "effective segment ID") in order + * to spread out the entries in the MMU hash table. Note, if this + * function is changed then hash functions will have to be + * changed to correspond. + */ +#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff) + /* * Hardware Page Table Entry * Note that the xpn and x bitfields are used only by processors that diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index e8e7b2c530d1..4b3a8d80cfa3 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -353,9 +353,6 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) preempt_enable(); } -/* From mm/mmu_context_hash32.c */ -#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff) - int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c index 218996e40a8e..c949363f315f 100644 --- a/arch/powerpc/mm/book3s32/mmu_context.c +++ b/arch/powerpc/mm/book3s32/mmu_context.c @@ -39,19 +39,6 @@ #define LAST_CONTEXT 32767 #define FIRST_CONTEXT 1 -/* - * This function defines the mapping from contexts to VSIDs (virtual - * segment IDs). We use a skew on both the context and the high 4 bits - * of the 32-bit virtual address (the "effective segment ID") in order - * to spread out the entries in the MMU hash table. Note, if this - * function is changed then arch/ppc/mm/hashtable.S will have to be - * changed to correspond. - * - * - * CTX_TO_VSID(ctx, va) (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \ - * & 0xffffff) - */ - static unsigned long next_mmu_context; static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; -- cgit v1.2.3 From 863771a28e27dc9eaeaa88cea300370d032f0e0f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:39 +0000 Subject: powerpc/32s: Convert switch_mmu_context() to C switch_mmu_context() does things that can easily be done in C. For updating user segments, we have update_user_segments(). As mentionned in commit b5efec00b671 ("powerpc/32s: Move KUEP locking/unlocking in C"), update_user_segments() has the loop unrolled which is a significant performance gain. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/05c0875ad8220c03452c3a334946e207c6ca04d6.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 5 +++ arch/powerpc/kernel/asm-offsets.c | 5 --- arch/powerpc/kernel/head_book3s_32.S | 58 -------------------------------- arch/powerpc/mm/book3s32/mmu_context.c | 35 +++++++++++++++++++ 4 files changed, 40 insertions(+), 63 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 83aa0dde50d6..618ffc8e4ee9 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -7,6 +7,11 @@ #ifndef __ASSEMBLY__ +static __always_inline bool kuap_is_disabled(void) +{ + return !IS_ENABLED(CONFIG_PPC_KUAP); +} + static __always_inline bool kuep_is_disabled(void) { return !IS_ENABLED(CONFIG_PPC_KUEP); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b38dabe4ecb7..672e7e2b2343 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -86,7 +86,6 @@ int main(void) OFFSET(PACA_CANARY, paca_struct, canary); #endif #endif - OFFSET(MMCONTEXTID, mm_struct, context.id); #ifdef CONFIG_PPC32 #ifdef CONFIG_PPC_RTAS OFFSET(RTAS_SP, thread_struct, rtas_sp); @@ -323,10 +322,6 @@ int main(void) #endif #endif -#ifndef CONFIG_PPC64 - OFFSET(MM_PGD, mm_struct, pgd); -#endif /* ! CONFIG_PPC64 */ - /* About the CPU features table */ OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features); OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup); diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 326262030279..db0e2dc25f86 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -1033,58 +1033,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) mtspr SPRN_SRR1,r4 rfi -/* - * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); - * - * Set up the segment registers for a new context. - */ -_ENTRY(switch_mmu_context) - lwz r3,MMCONTEXTID(r4) - cmpwi cr0,r3,0 - blt- 4f - mulli r3,r3,897 /* multiply context by skew factor */ - rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ -#ifdef CONFIG_PPC_KUEP - oris r3, r3, SR_NX@h /* Set Nx */ -#endif -#ifdef CONFIG_PPC_KUAP - oris r3, r3, SR_KS@h /* Set Ks */ -#endif - li r0,NUM_USER_SEGMENTS - mtctr r0 - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is passed as second argument. - */ - lwz r4, MM_PGD(r4) - lis r5, abatron_pteptrs@ha - stw r4, abatron_pteptrs@l + 0x4(r5) -#endif -BEGIN_MMU_FTR_SECTION -#ifndef CONFIG_BDI_SWITCH - lwz r4, MM_PGD(r4) -#endif - tophys(r4, r4) - rlwinm r4, r4, 4, 0xffff01ff - mtspr SPRN_SDR1, r4 -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) - li r4,0 - isync -3: - mtsrin r3,r4 - addi r3,r3,0x111 /* next VSID */ - rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b - sync - isync - blr -4: trap - EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0 - blr -EXPORT_SYMBOL(switch_mmu_context) - /* * An undocumented "feature" of 604e requires that the v bit * be cleared before changing BAT values. @@ -1282,9 +1230,3 @@ EXPORT_SYMBOL(empty_zero_page) .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE - -/* Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c index c949363f315f..e2708e387dc3 100644 --- a/arch/powerpc/mm/book3s32/mmu_context.c +++ b/arch/powerpc/mm/book3s32/mmu_context.c @@ -23,6 +23,12 @@ #include +/* + * Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +void *abatron_pteptrs[2]; + /* * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs * (virtual segment identifiers) for each context. Although the @@ -98,3 +104,32 @@ void __init mmu_context_init(void) context_map[0] = (1 << FIRST_CONTEXT) - 1; next_mmu_context = FIRST_CONTEXT; } + +void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) +{ + long id = next->context.id; + unsigned long val; + + if (id < 0) + panic("mm_struct %p has no context ID", next); + + isync(); + + val = CTX_TO_VSID(id, 0); + if (!kuep_is_disabled()) + val |= SR_NX; + if (!kuap_is_disabled()) + val |= SR_KS; + + update_user_segments(val); + + if (IS_ENABLED(CONFIG_BDI_SWITCH)) + abatron_pteptrs[1] = next->pgd; + + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) + mtspr(SPRN_SDR1, rol32(__pa(next->pgd), 4) & 0xffff01ff); + + mb(); /* sync */ + isync(); +} +EXPORT_SYMBOL(switch_mmu_context); -- cgit v1.2.3 From 882136fb2f5208a35ddad9205b20e5791edd4782 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:40 +0000 Subject: powerpc/32s: Simplify calculation of segment register content segment register has VSID on bits 8-31. Bits 4-7 are reserved, there is no requirement to set them to 0. VSIDs are calculated from VSID of SR0 by adding 0x111. Even with highest possible VSID which would be 0xFFFFF0, adding 16 times 0x111 results in 0x1001100. So, the reserved bits are never overflowed, no need to clear the reserved bits after each calculation. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ddc1cfd2ec8f3b2395c6a4d7f2b0c1aa1b1e64fb.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 42 +++++++++++++++------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 583388399b1b..f5be185cbdf8 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -115,28 +115,32 @@ extern s32 patch__flush_hash_B; #include #include -#define UPDATE_TWO_USER_SEGMENTS(n) do { \ - if (TASK_SIZE > ((n) << 28)) \ - mtsr(val1, (n) << 28); \ - if (TASK_SIZE > (((n) + 1) << 28)) \ - mtsr(val2, ((n) + 1) << 28); \ - val1 = (val1 + 0x222) & 0xf0ffffff; \ - val2 = (val2 + 0x222) & 0xf0ffffff; \ -} while (0) +static __always_inline void update_user_segment(u32 n, u32 val) +{ + if (n << 28 < TASK_SIZE) + mtsr(val + n * 0x111, n << 28); +} static __always_inline void update_user_segments(u32 val) { - int val1 = val; - int val2 = (val + 0x111) & 0xf0ffffff; - - UPDATE_TWO_USER_SEGMENTS(0); - UPDATE_TWO_USER_SEGMENTS(2); - UPDATE_TWO_USER_SEGMENTS(4); - UPDATE_TWO_USER_SEGMENTS(6); - UPDATE_TWO_USER_SEGMENTS(8); - UPDATE_TWO_USER_SEGMENTS(10); - UPDATE_TWO_USER_SEGMENTS(12); - UPDATE_TWO_USER_SEGMENTS(14); + val &= 0xf0ffffff; + + update_user_segment(0, val); + update_user_segment(1, val); + update_user_segment(2, val); + update_user_segment(3, val); + update_user_segment(4, val); + update_user_segment(5, val); + update_user_segment(6, val); + update_user_segment(7, val); + update_user_segment(8, val); + update_user_segment(9, val); + update_user_segment(10, val); + update_user_segment(11, val); + update_user_segment(12, val); + update_user_segment(13, val); + update_user_segment(14, val); + update_user_segment(15, val); } #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 86f46f3432727933be82f64b739712a6edb9d704 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:41 +0000 Subject: powerpc/32s: Initialise KUAP and KUEP in C In order to selectively activate KUAP and KUEP in a following patch, perform KUAP and KUEP initialisation in C. Unlike PPC64, PPC32 doesn't have an early_setup_secondary(), so do it in start_secondary(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/87be72023448dd4e476744ed279b8c04b8d08a1c.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 6 ------ arch/powerpc/kernel/smp.c | 4 ++++ arch/powerpc/mm/book3s32/kuap.c | 6 ++++++ arch/powerpc/mm/book3s32/kuep.c | 6 ++++++ 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index db0e2dc25f86..0b82672ff7a6 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -934,12 +934,6 @@ _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ -#ifdef CONFIG_PPC_KUEP - oris r3, r3, SR_NX@h /* Set Nx */ -#endif -#ifdef CONFIG_PPC_KUAP - oris r3, r3, SR_KS@h /* Set Ks */ -#endif li r4, 0 3: mtsrin r3, r4 addi r3, r3, 0x111 /* increment VSID */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 2e05c783440a..820ae31e0231 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1541,6 +1541,10 @@ void start_secondary(void *unused) { unsigned int cpu = raw_smp_processor_id(); + /* PPC64 calls setup_kup() in early_setup_secondary() */ + if (IS_ENABLED(CONFIG_PPC32)) + setup_kup(); + mmgrab(&init_mm); current->active_mm = &init_mm; diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c index 1df55392878e..5533ed92ab3d 100644 --- a/arch/powerpc/mm/book3s32/kuap.c +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -1,9 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include void __init setup_kuap(bool disabled) { + kuap_update_sr(mfsr(0) | SR_KS, 0, TASK_SIZE); + + if (smp_processor_id() != boot_cpuid) + return; + pr_info("Activating Kernel Userspace Access Protection\n"); if (disabled) diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c index 919595f47e25..3147e2edcf63 100644 --- a/arch/powerpc/mm/book3s32/kuep.c +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -1,9 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include void __init setup_kuep(bool disabled) { + kuep_lock(); + + if (smp_processor_id() != boot_cpuid) + return; + pr_info("Activating Kernel Userspace Execution Prevention\n"); if (disabled) -- cgit v1.2.3 From 50d2f104cd9572af476579eae9aa1b38de602ec7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:42 +0000 Subject: powerpc/32s: Allow disabling KUEP at boot time PPC64 uses MMU features to enable/disable KUEP at boot time. But feature fixups are applied way too early on PPC32. Now that all KUEP related actions are in C following the conversion of KUEP initial setup and context switch in C, static branches can be used to enable/disable KUEP. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7745a2c3a08ec46302920a3f48d1cb9b5469dbbb.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 6 +++++- arch/powerpc/mm/book3s32/kuep.c | 11 +++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 618ffc8e4ee9..2854d970dabe 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -7,6 +7,10 @@ #ifndef __ASSEMBLY__ +#include + +extern struct static_key_false disable_kuep_key; + static __always_inline bool kuap_is_disabled(void) { return !IS_ENABLED(CONFIG_PPC_KUAP); @@ -14,7 +18,7 @@ static __always_inline bool kuap_is_disabled(void) static __always_inline bool kuep_is_disabled(void) { - return !IS_ENABLED(CONFIG_PPC_KUEP); + return !IS_ENABLED(CONFIG_PPC_KUEP) || static_branch_unlikely(&disable_kuep_key); } static inline void kuep_lock(void) diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c index 3147e2edcf63..3f6eb6e23fca 100644 --- a/arch/powerpc/mm/book3s32/kuep.c +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -3,15 +3,18 @@ #include #include +struct static_key_false disable_kuep_key; + void __init setup_kuep(bool disabled) { - kuep_lock(); + if (!disabled) + kuep_lock(); if (smp_processor_id() != boot_cpuid) return; - pr_info("Activating Kernel Userspace Execution Prevention\n"); - if (disabled) - pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n"); + static_branch_enable(&disable_kuep_key); + else + pr_info("Activating Kernel Userspace Execution Prevention\n"); } -- cgit v1.2.3 From 6b4d630068b0c5cdd6d8e599182b131448e0cb06 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:43 +0000 Subject: powerpc/32s: Allow disabling KUAP at boot time PPC64 uses MMU features to enable/disable KUAP at boot time. But feature fixups are applied way too early on PPC32. Now that all KUAP related actions are in C following the conversion of KUAP initial setup and context switch in C, static branches can be used to enable/disable KUAP. Signed-off-by: Christophe Leroy [mpe: Export disable_kuap_key to fix build errors] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cd79e8008455fba5395d099f9bb1305c039b931c.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 27 ++++++++++++++++++++++++++- arch/powerpc/mm/book3s32/kuap.c | 12 ++++++++---- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 2854d970dabe..68fbe28c6d7e 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -9,11 +9,12 @@ #include +extern struct static_key_false disable_kuap_key; extern struct static_key_false disable_kuep_key; static __always_inline bool kuap_is_disabled(void) { - return !IS_ENABLED(CONFIG_PPC_KUAP); + return !IS_ENABLED(CONFIG_PPC_KUAP) || static_branch_unlikely(&disable_kuap_key); } static __always_inline bool kuep_is_disabled(void) @@ -62,6 +63,9 @@ static inline void kuap_save_and_lock(struct pt_regs *regs) u32 addr = kuap & 0xf0000000; u32 end = kuap << 28; + if (kuap_is_disabled()) + return; + regs->kuap = kuap; if (unlikely(!kuap)) return; @@ -79,6 +83,9 @@ static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) u32 addr = regs->kuap & 0xf0000000; u32 end = regs->kuap << 28; + if (kuap_is_disabled()) + return; + current->thread.kuap = regs->kuap; if (unlikely(regs->kuap == kuap)) @@ -91,6 +98,9 @@ static inline unsigned long kuap_get_and_assert_locked(void) { unsigned long kuap = current->thread.kuap; + if (kuap_is_disabled()) + return 0; + WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0); return kuap; @@ -106,6 +116,9 @@ static __always_inline void allow_user_access(void __user *to, const void __user { u32 addr, end; + if (kuap_is_disabled()) + return; + BUILD_BUG_ON(!__builtin_constant_p(dir)); BUILD_BUG_ON(dir & ~KUAP_READ_WRITE); @@ -128,6 +141,9 @@ static __always_inline void prevent_user_access(void __user *to, const void __us { u32 addr, end; + if (kuap_is_disabled()) + return; + BUILD_BUG_ON(!__builtin_constant_p(dir)); if (dir & KUAP_CURRENT_WRITE) { @@ -159,6 +175,9 @@ static inline unsigned long prevent_user_access_return(void) unsigned long end = flags << 28; void __user *to = (__force void __user *)addr; + if (kuap_is_disabled()) + return 0; + if (flags) prevent_user_access(to, to, end - addr, KUAP_READ_WRITE); @@ -171,6 +190,9 @@ static inline void restore_user_access(unsigned long flags) unsigned long end = flags << 28; void __user *to = (__force void __user *)addr; + if (kuap_is_disabled()) + return; + if (flags) allow_user_access(to, to, end - addr, KUAP_READ_WRITE); } @@ -181,6 +203,9 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) unsigned long begin = regs->kuap & 0xf0000000; unsigned long end = regs->kuap << 28; + if (kuap_is_disabled()) + return false; + return is_write && (address < begin || address >= end); } diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c index 5533ed92ab3d..0e59c8b32c4f 100644 --- a/arch/powerpc/mm/book3s32/kuap.c +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -3,15 +3,19 @@ #include #include +struct static_key_false disable_kuap_key; +EXPORT_SYMBOL(disable_kuap_key); + void __init setup_kuap(bool disabled) { - kuap_update_sr(mfsr(0) | SR_KS, 0, TASK_SIZE); + if (!disabled) + kuap_update_sr(mfsr(0) | SR_KS, 0, TASK_SIZE); if (smp_processor_id() != boot_cpuid) return; - pr_info("Activating Kernel Userspace Access Protection\n"); - if (disabled) - pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); + static_branch_enable(&disable_kuap_key); + else + pr_info("Activating Kernel Userspace Access Protection\n"); } -- cgit v1.2.3 From 16132529cee586ee9a058bb33cfbdcb5d884f6b3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:44 +0000 Subject: powerpc/32s: Rework Kernel Userspace Access Protection On book3s/32, KUAP is provided by toggling Ks bit in segment registers. One segment register addresses 256M of virtual memory. At the time being, KUAP implements a complex logic to apply the unlock/lock on the exact number of segments covering the user range to access, with saving the boundaries of the range of segments in a member of thread struct. But most if not all user accesses are within a single segment. Rework KUAP with a different approach: - Open only one segment, the one corresponding to the starting address of the range to be accessed. - If a second segment is involved, it will generate a page fault. The segment will then be open by the page fault handler. The kuap member of thread struct will now contain: - The start address of the current on going user access, that will be used to know which segment to lock at the end of the user access. - ~0 when no user access is open - ~1 when additionnal segments are opened by a page fault. Then, at lock time - When only one segment is open, close it. - When several segments are open, close all user segments. Almost 100% of the time, only one segment will be involved. In interrupts, inline the function that unlock/lock all segments, because not inlining them implies a lot of register save/restore. With the patch, writing value 128 in userspace in perf_copy_attr() is done with 16 instructions: 3890: 93 82 04 dc stw r28,1244(r2) 3894: 7d 20 e5 26 mfsrin r9,r28 3898: 55 29 00 80 rlwinm r9,r9,0,2,0 389c: 7d 20 e1 e4 mtsrin r9,r28 38a0: 4c 00 01 2c isync 38a4: 39 20 00 80 li r9,128 38a8: 91 3c 00 00 stw r9,0(r28) 38ac: 81 42 04 dc lwz r10,1244(r2) 38b0: 39 00 ff ff li r8,-1 38b4: 91 02 04 dc stw r8,1244(r2) 38b8: 2c 0a ff fe cmpwi r10,-2 38bc: 41 82 00 88 beq 3944 38c0: 7d 20 55 26 mfsrin r9,r10 38c4: 65 29 40 00 oris r9,r9,16384 38c8: 7d 20 51 e4 mtsrin r9,r10 38cc: 4c 00 01 2c isync ... 3944: 48 00 00 01 bl 3944 3944: R_PPC_REL24 kuap_lock_all_ool Before the patch it was 118 instructions. In reality only 42 are executed in most cases, but GCC is not able to see that a properly aligned user access cannot involve more than one segment. 5060: 39 1d 00 04 addi r8,r29,4 5064: 3d 20 b0 00 lis r9,-20480 5068: 7c 08 48 40 cmplw r8,r9 506c: 40 81 00 08 ble 5074 5070: 3d 00 b0 00 lis r8,-20480 5074: 39 28 ff ff addi r9,r8,-1 5078: 57 aa 00 06 rlwinm r10,r29,0,0,3 507c: 55 29 27 3e rlwinm r9,r9,4,28,31 5080: 39 29 00 01 addi r9,r9,1 5084: 7d 29 53 78 or r9,r9,r10 5088: 91 22 04 dc stw r9,1244(r2) 508c: 7d 20 ed 26 mfsrin r9,r29 5090: 55 29 00 80 rlwinm r9,r9,0,2,0 5094: 7c 08 50 40 cmplw r8,r10 5098: 40 81 00 c0 ble 5158 509c: 7d 46 50 f8 not r6,r10 50a0: 7c c6 42 14 add r6,r6,r8 50a4: 54 c6 27 be rlwinm r6,r6,4,30,31 50a8: 7d 20 51 e4 mtsrin r9,r10 50ac: 3c ea 10 00 addis r7,r10,4096 50b0: 39 29 01 11 addi r9,r9,273 50b4: 7f 88 38 40 cmplw cr7,r8,r7 50b8: 55 29 02 06 rlwinm r9,r9,0,8,3 50bc: 40 9d 00 9c ble cr7,5158 50c0: 2f 86 00 00 cmpwi cr7,r6,0 50c4: 41 9e 00 4c beq cr7,5110 50c8: 2f 86 00 01 cmpwi cr7,r6,1 50cc: 41 9e 00 2c beq cr7,50f8 50d0: 2f 86 00 02 cmpwi cr7,r6,2 50d4: 41 9e 00 14 beq cr7,50e8 50d8: 7d 20 39 e4 mtsrin r9,r7 50dc: 39 29 01 11 addi r9,r9,273 50e0: 3c e7 10 00 addis r7,r7,4096 50e4: 55 29 02 06 rlwinm r9,r9,0,8,3 50e8: 7d 20 39 e4 mtsrin r9,r7 50ec: 39 29 01 11 addi r9,r9,273 50f0: 3c e7 10 00 addis r7,r7,4096 50f4: 55 29 02 06 rlwinm r9,r9,0,8,3 50f8: 7d 20 39 e4 mtsrin r9,r7 50fc: 3c e7 10 00 addis r7,r7,4096 5100: 39 29 01 11 addi r9,r9,273 5104: 7f 88 38 40 cmplw cr7,r8,r7 5108: 55 29 02 06 rlwinm r9,r9,0,8,3 510c: 40 9d 00 4c ble cr7,5158 5110: 7d 20 39 e4 mtsrin r9,r7 5114: 39 29 01 11 addi r9,r9,273 5118: 3c c7 10 00 addis r6,r7,4096 511c: 55 29 02 06 rlwinm r9,r9,0,8,3 5120: 7d 20 31 e4 mtsrin r9,r6 5124: 39 29 01 11 addi r9,r9,273 5128: 3c c6 10 00 addis r6,r6,4096 512c: 55 29 02 06 rlwinm r9,r9,0,8,3 5130: 7d 20 31 e4 mtsrin r9,r6 5134: 39 29 01 11 addi r9,r9,273 5138: 3c c7 30 00 addis r6,r7,12288 513c: 55 29 02 06 rlwinm r9,r9,0,8,3 5140: 7d 20 31 e4 mtsrin r9,r6 5144: 3c e7 40 00 addis r7,r7,16384 5148: 39 29 01 11 addi r9,r9,273 514c: 7f 88 38 40 cmplw cr7,r8,r7 5150: 55 29 02 06 rlwinm r9,r9,0,8,3 5154: 41 9d ff bc bgt cr7,5110 5158: 4c 00 01 2c isync 515c: 39 20 00 80 li r9,128 5160: 91 3d 00 00 stw r9,0(r29) 5164: 38 e0 00 00 li r7,0 5168: 90 e2 04 dc stw r7,1244(r2) 516c: 7d 20 ed 26 mfsrin r9,r29 5170: 65 29 40 00 oris r9,r9,16384 5174: 40 81 00 c0 ble 5234 5178: 7d 47 50 f8 not r7,r10 517c: 7c e7 42 14 add r7,r7,r8 5180: 54 e7 27 be rlwinm r7,r7,4,30,31 5184: 7d 20 51 e4 mtsrin r9,r10 5188: 3d 4a 10 00 addis r10,r10,4096 518c: 39 29 01 11 addi r9,r9,273 5190: 7c 08 50 40 cmplw r8,r10 5194: 55 29 02 06 rlwinm r9,r9,0,8,3 5198: 40 81 00 9c ble 5234 519c: 2c 07 00 00 cmpwi r7,0 51a0: 41 82 00 4c beq 51ec 51a4: 2c 07 00 01 cmpwi r7,1 51a8: 41 82 00 2c beq 51d4 51ac: 2c 07 00 02 cmpwi r7,2 51b0: 41 82 00 14 beq 51c4 51b4: 7d 20 51 e4 mtsrin r9,r10 51b8: 39 29 01 11 addi r9,r9,273 51bc: 3d 4a 10 00 addis r10,r10,4096 51c0: 55 29 02 06 rlwinm r9,r9,0,8,3 51c4: 7d 20 51 e4 mtsrin r9,r10 51c8: 39 29 01 11 addi r9,r9,273 51cc: 3d 4a 10 00 addis r10,r10,4096 51d0: 55 29 02 06 rlwinm r9,r9,0,8,3 51d4: 7d 20 51 e4 mtsrin r9,r10 51d8: 3d 4a 10 00 addis r10,r10,4096 51dc: 39 29 01 11 addi r9,r9,273 51e0: 7c 08 50 40 cmplw r8,r10 51e4: 55 29 02 06 rlwinm r9,r9,0,8,3 51e8: 40 81 00 4c ble 5234 51ec: 7d 20 51 e4 mtsrin r9,r10 51f0: 39 29 01 11 addi r9,r9,273 51f4: 3c ea 10 00 addis r7,r10,4096 51f8: 55 29 02 06 rlwinm r9,r9,0,8,3 51fc: 7d 20 39 e4 mtsrin r9,r7 5200: 39 29 01 11 addi r9,r9,273 5204: 3c e7 10 00 addis r7,r7,4096 5208: 55 29 02 06 rlwinm r9,r9,0,8,3 520c: 7d 20 39 e4 mtsrin r9,r7 5210: 39 29 01 11 addi r9,r9,273 5214: 3c ea 30 00 addis r7,r10,12288 5218: 55 29 02 06 rlwinm r9,r9,0,8,3 521c: 7d 20 39 e4 mtsrin r9,r7 5220: 3d 4a 40 00 addis r10,r10,16384 5224: 39 29 01 11 addi r9,r9,273 5228: 7c 08 50 40 cmplw r8,r10 522c: 55 29 02 06 rlwinm r9,r9,0,8,3 5230: 41 81 ff bc bgt 51ec 5234: 4c 00 01 2c isync Signed-off-by: Christophe Leroy [mpe: Export the ool handlers to fix build errors] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d9121f96a7c4302946839a0771f5d1daeeb6968c.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 150 ++++++++++++++++--------------- arch/powerpc/include/asm/processor.h | 10 ++- arch/powerpc/kernel/process.c | 3 + arch/powerpc/mm/book3s32/kuap.c | 14 ++- 4 files changed, 104 insertions(+), 73 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 68fbe28c6d7e..bac7edae64bf 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -42,36 +42,69 @@ static inline void kuep_unlock(void) #include -static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) -{ - addr &= 0xf0000000; /* align addr to start of segment */ - barrier(); /* make sure thread.kuap is updated before playing with SRs */ - for (;;) { - mtsr(sr, addr); - addr += 0x10000000; /* address of next segment */ - if (addr >= end) - break; - sr += 0x111; /* next VSID */ - sr &= 0xf0ffffff; /* clear VSID overflow */ - } +#define KUAP_NONE (~0UL) +#define KUAP_ALL (~1UL) + +static inline void kuap_lock_one(unsigned long addr) +{ + mtsr(mfsr(addr) | SR_KS, addr); + isync(); /* Context sync required after mtsr() */ +} + +static inline void kuap_unlock_one(unsigned long addr) +{ + mtsr(mfsr(addr) & ~SR_KS, addr); + isync(); /* Context sync required after mtsr() */ +} + +static inline void kuap_lock_all(void) +{ + update_user_segments(mfsr(0) | SR_KS); isync(); /* Context sync required after mtsr() */ } +static inline void kuap_unlock_all(void) +{ + update_user_segments(mfsr(0) & ~SR_KS); + isync(); /* Context sync required after mtsr() */ +} + +void kuap_lock_all_ool(void); +void kuap_unlock_all_ool(void); + +static inline void kuap_lock(unsigned long addr, bool ool) +{ + if (likely(addr != KUAP_ALL)) + kuap_lock_one(addr); + else if (!ool) + kuap_lock_all(); + else + kuap_lock_all_ool(); +} + +static inline void kuap_unlock(unsigned long addr, bool ool) +{ + if (likely(addr != KUAP_ALL)) + kuap_unlock_one(addr); + else if (!ool) + kuap_unlock_all(); + else + kuap_unlock_all_ool(); +} + static inline void kuap_save_and_lock(struct pt_regs *regs) { unsigned long kuap = current->thread.kuap; - u32 addr = kuap & 0xf0000000; - u32 end = kuap << 28; if (kuap_is_disabled()) return; regs->kuap = kuap; - if (unlikely(!kuap)) + if (unlikely(kuap == KUAP_NONE)) return; - current->thread.kuap = 0; - kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* Set Ks */ + current->thread.kuap = KUAP_NONE; + kuap_lock(kuap, false); } static inline void kuap_user_restore(struct pt_regs *regs) @@ -80,18 +113,12 @@ static inline void kuap_user_restore(struct pt_regs *regs) static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { - u32 addr = regs->kuap & 0xf0000000; - u32 end = regs->kuap << 28; - if (kuap_is_disabled()) return; current->thread.kuap = regs->kuap; - if (unlikely(regs->kuap == kuap)) - return; - - kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ + kuap_unlock(regs->kuap, false); } static inline unsigned long kuap_get_and_assert_locked(void) @@ -99,9 +126,9 @@ static inline unsigned long kuap_get_and_assert_locked(void) unsigned long kuap = current->thread.kuap; if (kuap_is_disabled()) - return 0; + return KUAP_NONE; - WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE); return kuap; } @@ -114,8 +141,6 @@ static inline void kuap_assert_locked(void) static __always_inline void allow_user_access(void __user *to, const void __user *from, u32 size, unsigned long dir) { - u32 addr, end; - if (kuap_is_disabled()) return; @@ -125,88 +150,71 @@ static __always_inline void allow_user_access(void __user *to, const void __user if (!(dir & KUAP_WRITE)) return; - addr = (__force u32)to; - - if (unlikely(addr >= TASK_SIZE || !size)) - return; - - end = min(addr + size, TASK_SIZE); - - current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf); - kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ + current->thread.kuap = (__force u32)to; + kuap_unlock_one((__force u32)to); } static __always_inline void prevent_user_access(void __user *to, const void __user *from, u32 size, unsigned long dir) { - u32 addr, end; + u32 kuap = current->thread.kuap; if (kuap_is_disabled()) return; BUILD_BUG_ON(!__builtin_constant_p(dir)); - if (dir & KUAP_CURRENT_WRITE) { - u32 kuap = current->thread.kuap; - - if (unlikely(!kuap)) - return; - - addr = kuap & 0xf0000000; - end = kuap << 28; - } else if (dir & KUAP_WRITE) { - addr = (__force u32)to; - end = min(addr + size, TASK_SIZE); - - if (unlikely(addr >= TASK_SIZE || !size)) - return; - } else { + if (!(dir & KUAP_WRITE)) return; - } - current->thread.kuap = 0; - kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* set Ks */ + current->thread.kuap = KUAP_NONE; + kuap_lock(kuap, true); } static inline unsigned long prevent_user_access_return(void) { unsigned long flags = current->thread.kuap; - unsigned long addr = flags & 0xf0000000; - unsigned long end = flags << 28; - void __user *to = (__force void __user *)addr; if (kuap_is_disabled()) - return 0; + return KUAP_NONE; - if (flags) - prevent_user_access(to, to, end - addr, KUAP_READ_WRITE); + if (flags != KUAP_NONE) { + current->thread.kuap = KUAP_NONE; + kuap_lock(flags, true); + } return flags; } static inline void restore_user_access(unsigned long flags) { - unsigned long addr = flags & 0xf0000000; - unsigned long end = flags << 28; - void __user *to = (__force void __user *)addr; - if (kuap_is_disabled()) return; - if (flags) - allow_user_access(to, to, end - addr, KUAP_READ_WRITE); + if (flags != KUAP_NONE) { + current->thread.kuap = flags; + kuap_unlock(flags, true); + } } static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - unsigned long begin = regs->kuap & 0xf0000000; - unsigned long end = regs->kuap << 28; + unsigned long kuap = regs->kuap; if (kuap_is_disabled()) return false; - return is_write && (address < begin || address >= end); + if (!is_write || kuap == KUAP_ALL) + return false; + if (kuap == KUAP_NONE) + return true; + + /* If faulting address doesn't match unlocked segment, unlock all */ + if ((kuap ^ address) & 0xf0000000) + regs->kuap = KUAP_ALL; + + return false; } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 7bf8a15af224..aeb1a35163d1 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -276,7 +276,15 @@ struct thread_struct { #define SPEFSCR_INIT #endif -#ifdef CONFIG_PPC32 +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) +#define INIT_THREAD { \ + .ksp = INIT_SP, \ + .pgdir = swapper_pg_dir, \ + .kuap = ~0UL, /* KUAP_NONE */ \ + .fpexc_mode = MSR_FE0 | MSR_FE1, \ + SPEFSCR_INIT \ +} +#elif defined(CONFIG_PPC32) #define INIT_THREAD { \ .ksp = INIT_SP, \ .pgdir = swapper_pg_dir, \ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 2bd30acc843c..3626074acec9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1745,6 +1745,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #ifdef CONFIG_ALTIVEC p->thread.vr_save_area = NULL; #endif +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) + p->thread.kuap = KUAP_NONE; +#endif setup_ksp_vsid(p, sp); diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c index 0e59c8b32c4f..9df6911b8fde 100644 --- a/arch/powerpc/mm/book3s32/kuap.c +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -6,10 +6,22 @@ struct static_key_false disable_kuap_key; EXPORT_SYMBOL(disable_kuap_key); +void kuap_lock_all_ool(void) +{ + kuap_lock_all(); +} +EXPORT_SYMBOL(kuap_lock_all_ool); + +void kuap_unlock_all_ool(void) +{ + kuap_unlock_all(); +} +EXPORT_SYMBOL(kuap_unlock_all_ool); + void __init setup_kuap(bool disabled) { if (!disabled) - kuap_update_sr(mfsr(0) | SR_KS, 0, TASK_SIZE); + kuap_lock_all_ool(); if (smp_processor_id() != boot_cpuid) return; -- cgit v1.2.3 From 9f5bd8f1471d7498c934c0a686fd0997cf872653 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:45 +0000 Subject: powerpc/32s: Activate KUAP and KUEP by default Now that KUAP and KUEP have been significantly optimised and can be disabled at boot time using 'nosmap' and 'nosmep' kernel parameters, them can be active by default like in other powerpc platforms. It is still possible to disable them completely in the configuration. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/86c7c74a3ba5312daea7e9658b096e2bcc6f4b64.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index d4acba7551c2..50237e2007e7 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -390,7 +390,7 @@ config PPC_HAVE_KUEP config PPC_KUEP bool "Kernel Userspace Execution Prevention" depends on PPC_HAVE_KUEP - default y if !PPC_BOOK3S_32 + default y help Enable support for Kernel Userspace Execution Prevention (KUEP) @@ -402,7 +402,7 @@ config PPC_HAVE_KUAP config PPC_KUAP bool "Kernel Userspace Access Protection" depends on PPC_HAVE_KUAP - default y if !PPC_BOOK3S_32 + default y help Enable support for Kernel Userspace Access Protection (KUAP) -- cgit v1.2.3 From d008f8f8a0c3efe4fe1008a797f9497ea5965e27 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:46 +0000 Subject: powerpc/kuap: Remove KUAP_CURRENT_XXX book3s/32 was the only user of KUAP_CURRENT_XXX. After rework of book3s/32 KUAP, it is not used anymore. Remove them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/549214ecf6887d965645e664520d4886663c5ffb.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 1 - arch/powerpc/include/asm/kup.h | 14 +++----------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index bac7edae64bf..eb5ef59a406b 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -145,7 +145,6 @@ static __always_inline void allow_user_access(void __user *to, const void __user return; BUILD_BUG_ON(!__builtin_constant_p(dir)); - BUILD_BUG_ON(dir & ~KUAP_READ_WRITE); if (!(dir & KUAP_WRITE)) return; diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 4b94d4293777..2c47feee9482 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -5,14 +5,6 @@ #define KUAP_READ 1 #define KUAP_WRITE 2 #define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) -/* - * For prevent_user_access() only. - * Use the current saved situation instead of the to/from/size params. - * Used on book3s/32 - */ -#define KUAP_CURRENT_READ 4 -#define KUAP_CURRENT_WRITE 8 -#define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) #ifdef CONFIG_PPC_BOOK3S_64 #include @@ -129,17 +121,17 @@ static inline void prevent_read_write_user(void __user *to, const void __user *f static inline void prevent_current_access_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT); + prevent_user_access(NULL, NULL, ~0UL, KUAP_READ_WRITE); } static inline void prevent_current_read_from_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_READ); + prevent_user_access(NULL, NULL, ~0UL, KUAP_READ); } static inline void prevent_current_write_to_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_WRITE); + prevent_user_access(NULL, NULL, ~0UL, KUAP_WRITE); } #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From cb2f1fb205cc20695fcaef84baf80d9d3e54c88b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 08:41:48 +0000 Subject: powerpc/kuap: Remove to/from/size parameters of prevent_user_access() prevent_user_access() doesn't use anymore to/from/size parameters. Remove them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b7113662fd2c26e4c33e9d705de324bd3860822e.1622708530.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 3 +-- arch/powerpc/include/asm/book3s/64/kup.h | 3 +-- arch/powerpc/include/asm/kup.h | 15 +++++++-------- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 3 +-- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index eb5ef59a406b..64201125a287 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -153,8 +153,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user kuap_unlock_one((__force u32)to); } -static __always_inline void prevent_user_access(void __user *to, const void __user *from, - u32 size, unsigned long dir) +static __always_inline void prevent_user_access(unsigned long dir) { u32 kuap = current->thread.kuap; diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 9700da3a4093..a1cc73a88710 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -398,8 +398,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user #endif /* !CONFIG_PPC_KUAP */ -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static inline void prevent_user_access(unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); if (static_branch_unlikely(&uaccess_flush_key)) diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 2c47feee9482..0202a7e84a49 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -72,8 +72,7 @@ static inline unsigned long kuap_get_and_assert_locked(void) #ifndef CONFIG_PPC_BOOK3S_64 static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } +static inline void prevent_user_access(unsigned long dir) { } static inline unsigned long prevent_user_access_return(void) { return 0UL; } static inline void restore_user_access(unsigned long flags) { } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -105,33 +104,33 @@ static inline void allow_read_write_user(void __user *to, const void __user *fro static inline void prevent_read_from_user(const void __user *from, unsigned long size) { - prevent_user_access(NULL, from, size, KUAP_READ); + prevent_user_access(KUAP_READ); } static inline void prevent_write_to_user(void __user *to, unsigned long size) { - prevent_user_access(to, NULL, size, KUAP_WRITE); + prevent_user_access(KUAP_WRITE); } static inline void prevent_read_write_user(void __user *to, const void __user *from, unsigned long size) { - prevent_user_access(to, from, size, KUAP_READ_WRITE); + prevent_user_access(KUAP_READ_WRITE); } static inline void prevent_current_access_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_READ_WRITE); + prevent_user_access(KUAP_READ_WRITE); } static inline void prevent_current_read_from_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_READ); + prevent_user_access(KUAP_READ); } static inline void prevent_current_write_to_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_WRITE); + prevent_user_access(KUAP_WRITE); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index f6a3c1935e75..882a0bc7887a 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -71,8 +71,7 @@ static inline void allow_user_access(void __user *to, const void __user *from, mtspr(SPRN_MD_AP, MD_APG_INIT); } -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static inline void prevent_user_access(unsigned long dir) { if (kuap_is_disabled()) return; -- cgit v1.2.3 From 240efd717c415e69511780044f44416bdf161523 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 09:13:54 +0000 Subject: powerpc/kuap: Force inlining of all first level KUAP helpers. All KUAP helpers defined in asm/kup.h are single line functions that should be inlined. But on book3s/32 build, we get many instances of . Force inlining of those helpers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8479a862e165a57a855292d47e24c259a578f5a0.1622711627.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/kup.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 0202a7e84a49..1df763002726 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -84,51 +84,51 @@ static __always_inline void setup_kup(void) setup_kuap(disable_kuap); } -static inline void allow_read_from_user(const void __user *from, unsigned long size) +static __always_inline void allow_read_from_user(const void __user *from, unsigned long size) { barrier_nospec(); allow_user_access(NULL, from, size, KUAP_READ); } -static inline void allow_write_to_user(void __user *to, unsigned long size) +static __always_inline void allow_write_to_user(void __user *to, unsigned long size) { allow_user_access(to, NULL, size, KUAP_WRITE); } -static inline void allow_read_write_user(void __user *to, const void __user *from, - unsigned long size) +static __always_inline void allow_read_write_user(void __user *to, const void __user *from, + unsigned long size) { barrier_nospec(); allow_user_access(to, from, size, KUAP_READ_WRITE); } -static inline void prevent_read_from_user(const void __user *from, unsigned long size) +static __always_inline void prevent_read_from_user(const void __user *from, unsigned long size) { prevent_user_access(KUAP_READ); } -static inline void prevent_write_to_user(void __user *to, unsigned long size) +static __always_inline void prevent_write_to_user(void __user *to, unsigned long size) { prevent_user_access(KUAP_WRITE); } -static inline void prevent_read_write_user(void __user *to, const void __user *from, - unsigned long size) +static __always_inline void prevent_read_write_user(void __user *to, const void __user *from, + unsigned long size) { prevent_user_access(KUAP_READ_WRITE); } -static inline void prevent_current_access_user(void) +static __always_inline void prevent_current_access_user(void) { prevent_user_access(KUAP_READ_WRITE); } -static inline void prevent_current_read_from_user(void) +static __always_inline void prevent_current_read_from_user(void) { prevent_user_access(KUAP_READ); } -static inline void prevent_current_write_to_user(void) +static __always_inline void prevent_current_write_to_user(void) { prevent_user_access(KUAP_WRITE); } -- cgit v1.2.3 From 25910260ff69fa0c37e26541aac4e8f978e1f17f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 09:29:02 +0000 Subject: powerpc/nohash: Refactor update of BDI2000 pointers in switch_mmu_context() Instead of duplicating the update of BDI2000 pointers in set_context(), do it directly from switch_mmu_context(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4c54997edd3548fa54717915e7c6ebaf60f208c0.1622712515.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_40x.S | 14 -------------- arch/powerpc/kernel/head_44x.S | 16 ---------------- arch/powerpc/kernel/head_8xx.S | 7 ------- arch/powerpc/kernel/head_fsl_booke.S | 16 ---------------- arch/powerpc/mm/nohash/8xx.c | 6 ------ arch/powerpc/mm/nohash/mmu_context.c | 8 ++++++++ arch/powerpc/mm/nohash/tlb_low.S | 8 -------- 7 files changed, 8 insertions(+), 67 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index e1360b88b6cb..7ef1bbc23bed 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -703,14 +703,6 @@ _GLOBAL(abort) mtspr SPRN_DBCR0,r13 _GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@ha - stw r4, abatron_pteptrs@l + 0x4(r5) -#endif sync mtspr SPRN_PID,r3 isync /* Need an isync to flush shadow */ @@ -731,9 +723,3 @@ EXPORT_SYMBOL(empty_zero_page) .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE - -/* Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 2cfb496df615..76c84acd9086 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -789,15 +789,6 @@ _GLOBAL(__fixup_440A_mcheck) blr _GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif mtspr SPRN_PID,r3 isync /* Force context change */ blr @@ -1267,13 +1258,6 @@ EXPORT_SYMBOL(empty_zero_page) swapper_pg_dir: .space PGD_TABLE_SIZE -/* - * Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 - #ifdef CONFIG_SMP .align 12 temp_boot_stack: diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 7d445e4342c0..817df9fe7fb3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -804,10 +804,3 @@ EXPORT_SYMBOL(empty_zero_page) .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE - -/* Room for two PTE table pointers, usually the kernel and current user - * pointer to their respective root page table (pgdir). - */ - .globl abatron_pteptrs -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index a1a5c3f10dc4..590f34cc5bb2 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -986,15 +986,6 @@ _GLOBAL(abort) isync _GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif mtspr SPRN_PID,r3 isync /* Force context change */ blr @@ -1242,10 +1233,3 @@ EXPORT_SYMBOL(empty_zero_page) .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE - -/* - * Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index fc663322ba58..4dd02bb8bd73 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -224,12 +224,6 @@ void set_context(unsigned long id, pgd_t *pgd) { s16 offset = (s16)(__pa(swapper_pg_dir)); - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is passed as second argument. - */ - if (IS_ENABLED(CONFIG_BDI_SWITCH)) - abatron_pteptrs[1] = pgd; - /* Register M_TWB will contain base address of level 1 table minus the * lower part of the kernel PGDIR base address, so that all accesses to * level 1 table are done relative to lower part of kernel PGDIR base diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index aac81c9f84a5..eae48635a9ac 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -50,6 +50,12 @@ #include +/* + * Room for two PTE table pointers, usually the kernel and current user + * pointer to their respective root page table (pgdir). + */ +void *abatron_pteptrs[2]; + /* * The MPC8xx has only 16 contexts. We rotate through them on each task switch. * A better way would be to keep track of tasks that own contexts, and implement @@ -357,6 +363,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, /* Flick the MMU and release lock */ pr_hardcont(" -> %d\n", id); + if (IS_ENABLED(CONFIG_BDI_SWITCH)) + abatron_pteptrs[1] = next->pgd; set_context(id, next->pgd); raw_spin_unlock(&context_lock); } diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index 68797e072f55..b3d0f0127828 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -362,14 +362,6 @@ _GLOBAL(_tlbivax_bcast) blr _GLOBAL(set_context) -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif mtspr SPRN_PID,r3 isync /* Force context change */ blr -- cgit v1.2.3 From a56ab7c7290f5922363d1ee11bbafc4da2b9bf51 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 09:29:03 +0000 Subject: powerpc/nohash: Convert set_context() to C ppc8xx already has set_context() in C. Other ones have it in assembly. The only thing it does is to write the context id into SPRN_PID. Do it in C. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a5d0759064f3831c6b88af49ef5d3b05ba1c4dad.1622712515.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu_context.h | 1 - arch/powerpc/kernel/head_40x.S | 7 ------- arch/powerpc/kernel/head_44x.S | 5 ----- arch/powerpc/kernel/head_fsl_booke.S | 5 ----- arch/powerpc/mm/nohash/8xx.c | 25 ------------------------- arch/powerpc/mm/nohash/mmu_context.c | 27 +++++++++++++++++++++++++++ arch/powerpc/mm/nohash/tlb_low.S | 5 ----- 7 files changed, 27 insertions(+), 48 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 4bc45d3ed8b0..ef6df2681582 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -57,7 +57,6 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, static inline void mm_iommu_init(struct mm_struct *mm) { } #endif extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); -extern void set_context(unsigned long id, pgd_t *pgd); #ifdef CONFIG_PPC_BOOK3S_64 extern void radix__switch_mmu_context(struct mm_struct *prev, diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7ef1bbc23bed..2717aa860cae 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -702,13 +702,6 @@ _GLOBAL(abort) oris r13,r13,DBCR0_RST_SYSTEM@h mtspr SPRN_DBCR0,r13 -_GLOBAL(set_context) - sync - mtspr SPRN_PID,r3 - isync /* Need an isync to flush shadow */ - /* TLBs after changing PID */ - blr - /* We put a few things here that have to be page-aligned. This stuff * goes at the beginning of the data segment, which is page-aligned. */ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 76c84acd9086..2c4ffec027a2 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -788,11 +788,6 @@ _GLOBAL(__fixup_440A_mcheck) sync blr -_GLOBAL(set_context) - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr - /* * Init CPU state. This is called at boot time or for secondary CPUs * to setup initial TLB entries, setup IVORs, etc... diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 590f34cc5bb2..ab718fd5e2a2 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -985,11 +985,6 @@ _GLOBAL(abort) mtspr SPRN_DBCR0,r13 isync -_GLOBAL(set_context) - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr - #ifdef CONFIG_SMP /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 4dd02bb8bd73..60780e089118 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -212,31 +212,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); } -/* - * Set up to use a given MMU context. - * id is context number, pgd is PGD pointer. - * - * We place the physical address of the new task page directory loaded - * into the MMU base register, and set the ASID compare register with - * the new "context." - */ -void set_context(unsigned long id, pgd_t *pgd) -{ - s16 offset = (s16)(__pa(swapper_pg_dir)); - - /* Register M_TWB will contain base address of level 1 table minus the - * lower part of the kernel PGDIR base address, so that all accesses to - * level 1 table are done relative to lower part of kernel PGDIR base - * address. - */ - mtspr(SPRN_M_TWB, __pa(pgd) - offset); - - /* Update context */ - mtspr(SPRN_M_CASID, id - 1); - /* sync */ - mb(); -} - #ifdef CONFIG_PPC_KUEP void __init setup_kuep(bool disabled) { diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index eae48635a9ac..0d22bc93bb40 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -264,6 +264,33 @@ static void context_check_map(void) static void context_check_map(void) { } #endif +static void set_context(unsigned long id, pgd_t *pgd) +{ + if (IS_ENABLED(CONFIG_PPC_8xx)) { + s16 offset = (s16)(__pa(swapper_pg_dir)); + + /* + * Register M_TWB will contain base address of level 1 table minus the + * lower part of the kernel PGDIR base address, so that all accesses to + * level 1 table are done relative to lower part of kernel PGDIR base + * address. + */ + mtspr(SPRN_M_TWB, __pa(pgd) - offset); + + /* Update context */ + mtspr(SPRN_M_CASID, id - 1); + + /* sync */ + mb(); + } else { + if (IS_ENABLED(CONFIG_40x)) + mb(); /* sync */ + + mtspr(SPRN_PID, id); + isync(); + } +} + void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index b3d0f0127828..4613bf8e9aae 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -360,11 +360,6 @@ _GLOBAL(_tlbivax_bcast) sync wrtee r10 blr - -_GLOBAL(set_context) - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr #else #error Unsupported processor type ! #endif -- cgit v1.2.3 From c13066e53aabd8f268f051d267270765e10343aa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 09:29:04 +0000 Subject: powerpc/nohash: Remove CONFIG_SMP #ifdefery in mmu_context.h Everything can be done even when CONFIG_SMP is not selected. Just use IS_ENABLED() where relevant and rely on GCC to opt out unneeded code and variables when CONFIG_SMP is not set. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cc13b87b0f750a538621876ecc24c22a07e7c8be.1622712515.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/mmu_context.c | 66 +++++++++++++----------------------- 1 file changed, 24 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index 0d22bc93bb40..1a4991c5aea2 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -47,6 +47,7 @@ #include #include +#include #include @@ -86,9 +87,7 @@ void *abatron_pteptrs[2]; static unsigned int next_context, nr_free_contexts; static unsigned long *context_map; -#ifdef CONFIG_SMP static unsigned long *stale_map[NR_CPUS]; -#endif static struct mm_struct **context_mm; static DEFINE_RAW_SPINLOCK(context_lock); @@ -111,7 +110,6 @@ static DEFINE_RAW_SPINLOCK(context_lock); * the stale map as we can just flush the local CPU * -- benh */ -#ifdef CONFIG_SMP static unsigned int steal_context_smp(unsigned int id) { struct mm_struct *mm; @@ -164,14 +162,11 @@ static unsigned int steal_context_smp(unsigned int id) /* This will cause the caller to try again */ return MMU_NO_CONTEXT; } -#endif /* CONFIG_SMP */ static unsigned int steal_all_contexts(void) { struct mm_struct *mm; -#ifdef CONFIG_SMP int cpu = smp_processor_id(); -#endif unsigned int id; for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { @@ -189,9 +184,8 @@ static unsigned int steal_all_contexts(void) mm->context.active = 0; #endif } -#ifdef CONFIG_SMP - __clear_bit(id, stale_map[cpu]); -#endif + if (IS_ENABLED(CONFIG_SMP)) + __clear_bit(id, stale_map[cpu]); } /* Flush the TLB for all contexts (not to be used on SMP) */ @@ -210,9 +204,7 @@ static unsigned int steal_all_contexts(void) static unsigned int steal_context_up(unsigned int id) { struct mm_struct *mm; -#ifdef CONFIG_SMP int cpu = smp_processor_id(); -#endif /* Pick up the victim mm */ mm = context_mm[id]; @@ -226,9 +218,8 @@ static unsigned int steal_context_up(unsigned int id) mm->context.id = MMU_NO_CONTEXT; /* XXX This clear should ultimately be part of local_flush_tlb_mm */ -#ifdef CONFIG_SMP - __clear_bit(id, stale_map[cpu]); -#endif + if (IS_ENABLED(CONFIG_SMP)) + __clear_bit(id, stale_map[cpu]); return id; } @@ -295,9 +286,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { unsigned int id; -#ifdef CONFIG_SMP unsigned int i, cpu = smp_processor_id(); -#endif unsigned long *map; /* No lockless fast path .. yet */ @@ -306,17 +295,17 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", cpu, next, next->context.active, next->context.id); -#ifdef CONFIG_SMP - /* Mark us active and the previous one not anymore */ - next->context.active++; - if (prev) { - pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active); - WARN_ON(prev->context.active < 1); - prev->context.active--; + if (IS_ENABLED(CONFIG_SMP)) { + /* Mark us active and the previous one not anymore */ + next->context.active++; + if (prev) { + pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active); + WARN_ON(prev->context.active < 1); + prev->context.active--; + } } again: -#endif /* CONFIG_SMP */ /* If we already have a valid assigned context, skip all that */ id = next->context.id; @@ -337,14 +326,12 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, /* No more free contexts, let's try to steal one */ if (nr_free_contexts == 0) { -#ifdef CONFIG_SMP if (num_online_cpus() > 1) { id = steal_context_smp(id); if (id == MMU_NO_CONTEXT) goto again; goto stolen; } -#endif /* CONFIG_SMP */ if (IS_ENABLED(CONFIG_PPC_8xx)) id = steal_all_contexts(); else @@ -371,8 +358,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, /* If that context got marked stale on this CPU, then flush the * local TLB for it and unmark it before we use it */ -#ifdef CONFIG_SMP - if (test_bit(id, stale_map[cpu])) { + if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) { pr_hardcont(" | stale flush %d [%d..%d]", id, cpu_first_thread_sibling(cpu), cpu_last_thread_sibling(cpu)); @@ -386,7 +372,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, __clear_bit(id, stale_map[i]); } } -#endif /* Flick the MMU and release lock */ pr_hardcont(" -> %d\n", id); @@ -445,7 +430,6 @@ void destroy_context(struct mm_struct *mm) raw_spin_unlock_irqrestore(&context_lock, flags); } -#ifdef CONFIG_SMP static int mmu_ctx_cpu_prepare(unsigned int cpu) { /* We don't touch CPU 0 map, it's allocated at aboot and kept @@ -475,8 +459,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu) return 0; } -#endif /* CONFIG_SMP */ - /* * Initialize the context management stuff. */ @@ -500,16 +482,16 @@ void __init mmu_context_init(void) if (!context_mm) panic("%s: Failed to allocate %zu bytes\n", __func__, sizeof(void *) * (LAST_CONTEXT + 1)); -#ifdef CONFIG_SMP - stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); - if (!stale_map[boot_cpuid]) - panic("%s: Failed to allocate %zu bytes\n", __func__, - CTX_MAP_SIZE); - - cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, - "powerpc/mmu/ctx:prepare", - mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); -#endif + if (IS_ENABLED(CONFIG_SMP)) { + stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); + if (!stale_map[boot_cpuid]) + panic("%s: Failed to allocate %zu bytes\n", __func__, + CTX_MAP_SIZE); + + cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, + "powerpc/mmu/ctx:prepare", + mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); + } printk(KERN_INFO "MMU: Allocated %zu bytes of context maps for %d contexts\n", -- cgit v1.2.3 From dac3db1edf8b4c75859f07789f577322f2a51e3a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 09:29:05 +0000 Subject: powerpc/nohash: Remove DEBUG_MAP_CONSISTENCY mmu_context handling has been there for years, so we would know if there was problems with maps. DEBUG_MAP_CONSISTENCY is not user selectable, remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6fe2b88956db53f8d6ee221525b2c5dc6aec82c6.1622712515.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/mmu_context.c | 47 +----------------------------------- 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index 1a4991c5aea2..46ff6a6b6e78 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -21,7 +21,6 @@ * also clear mm->cpu_vm_mask bits when processes are migrated */ -//#define DEBUG_MAP_CONSISTENCY //#define DEBUG_CLAMP_LAST_CONTEXT 31 //#define DEBUG_HARDER @@ -180,9 +179,6 @@ static unsigned int steal_all_contexts(void) if (id != FIRST_CONTEXT) { context_mm[id] = NULL; __clear_bit(id, context_map); -#ifdef DEBUG_MAP_CONSISTENCY - mm->context.active = 0; -#endif } if (IS_ENABLED(CONFIG_SMP)) __clear_bit(id, stale_map[cpu]); @@ -224,37 +220,6 @@ static unsigned int steal_context_up(unsigned int id) return id; } -#ifdef DEBUG_MAP_CONSISTENCY -static void context_check_map(void) -{ - unsigned int id, nrf, nact; - - nrf = nact = 0; - for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { - int used = test_bit(id, context_map); - if (!used) - nrf++; - if (used != (context_mm[id] != NULL)) - pr_err("MMU: Context %d is %s and MM is %p !\n", - id, used ? "used" : "free", context_mm[id]); - if (context_mm[id] != NULL) - nact += context_mm[id]->context.active; - } - if (nrf != nr_free_contexts) { - pr_err("MMU: Free context count out of sync ! (%d vs %d)\n", - nr_free_contexts, nrf); - nr_free_contexts = nrf; - } - if (nact > num_online_cpus()) - pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", - nact, num_online_cpus()); - if (FIRST_CONTEXT > 0 && !test_bit(0, context_map)) - pr_err("MMU: Context 0 has been freed !!!\n"); -} -#else -static void context_check_map(void) { } -#endif - static void set_context(unsigned long id, pgd_t *pgd) { if (IS_ENABLED(CONFIG_PPC_8xx)) { @@ -309,14 +274,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, /* If we already have a valid assigned context, skip all that */ id = next->context.id; - if (likely(id != MMU_NO_CONTEXT)) { -#ifdef DEBUG_MAP_CONSISTENCY - if (context_mm[id] != next) - pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n", - next, id, id, context_mm[id]); -#endif + if (likely(id != MMU_NO_CONTEXT)) goto ctxt_ok; - } /* We really don't have a context, let's try to acquire one */ id = next_context; @@ -352,7 +311,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, next->context.id = id; pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts); - context_check_map(); ctxt_ok: /* If that context got marked stale on this CPU, then flush the @@ -421,9 +379,6 @@ void destroy_context(struct mm_struct *mm) if (id != MMU_NO_CONTEXT) { __clear_bit(id, context_map); mm->context.id = MMU_NO_CONTEXT; -#ifdef DEBUG_MAP_CONSISTENCY - mm->context.active = 0; -#endif context_mm[id] = NULL; nr_free_contexts++; } -- cgit v1.2.3 From a36c0faf3dbc429d5ddcb941afe38dd6fe6c5901 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 09:29:06 +0000 Subject: powerpc/nohash: Remove DEBUG_CLAMP_LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT was there in the old days to reduce number of contexts in order to ease debugging implementation of context switching, but that's been quite stable during years now. As it is not user selectable, remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/da81837b452e8b9f1657b529b9c3050dc10b9770.1622712515.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/mmu_context.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index 46ff6a6b6e78..56d6715d6557 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -21,7 +21,6 @@ * also clear mm->cpu_vm_mask bits when processes are migrated */ -//#define DEBUG_CLAMP_LAST_CONTEXT 31 //#define DEBUG_HARDER /* We don't use DEBUG because it tends to be compiled in always nowadays @@ -74,9 +73,7 @@ void *abatron_pteptrs[2]; * -- BenH */ #define FIRST_CONTEXT 1 -#ifdef DEBUG_CLAMP_LAST_CONTEXT -#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT -#elif defined(CONFIG_PPC_8xx) +#if defined(CONFIG_PPC_8xx) #define LAST_CONTEXT 16 #elif defined(CONFIG_PPC_47x) #define LAST_CONTEXT 65535 -- cgit v1.2.3 From e2c043163d44f7b3a9e65d9161af72b647b18451 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 3 Jun 2021 09:29:07 +0000 Subject: powerpc/nohash: Remove DEBUG_HARDER DEBUG_HARDER is not user selectable. Remove it together with related messages. Also remove two pr_devel() messages that should likely have been pr_hard(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0f25109b0e12fdd1e6541dedbb2212cc53526a57.1622712515.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/mmu_context.c | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index 56d6715d6557..44b2b5e7cabe 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -21,19 +21,6 @@ * also clear mm->cpu_vm_mask bits when processes are migrated */ -//#define DEBUG_HARDER - -/* We don't use DEBUG because it tends to be compiled in always nowadays - * and this would generate way too much output - */ -#ifdef DEBUG_HARDER -#define pr_hard(args...) printk(KERN_DEBUG args) -#define pr_hardcont(args...) printk(KERN_CONT args) -#else -#define pr_hard(args...) do { } while(0) -#define pr_hardcont(args...) do { } while(0) -#endif - #include #include #include @@ -127,7 +114,6 @@ static unsigned int steal_context_smp(unsigned int id) id = FIRST_CONTEXT; continue; } - pr_hardcont(" | steal %d from 0x%p", id, mm); /* Mark this mm has having no context anymore */ mm->context.id = MMU_NO_CONTEXT; @@ -169,8 +155,6 @@ static unsigned int steal_all_contexts(void) /* Pick up the victim mm */ mm = context_mm[id]; - pr_hardcont(" | steal %d from 0x%p", id, mm); - /* Mark this mm as having no context anymore */ mm->context.id = MMU_NO_CONTEXT; if (id != FIRST_CONTEXT) { @@ -202,8 +186,6 @@ static unsigned int steal_context_up(unsigned int id) /* Pick up the victim mm */ mm = context_mm[id]; - pr_hardcont(" | steal %d from 0x%p", id, mm); - /* Flush the TLB for that context */ local_flush_tlb_mm(mm); @@ -254,14 +236,10 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, /* No lockless fast path .. yet */ raw_spin_lock(&context_lock); - pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", - cpu, next, next->context.active, next->context.id); - if (IS_ENABLED(CONFIG_SMP)) { /* Mark us active and the previous one not anymore */ next->context.active++; if (prev) { - pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active); WARN_ON(prev->context.active < 1); prev->context.active--; } @@ -306,7 +284,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, next_context = id + 1; context_mm[id] = next; next->context.id = id; - pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts); ctxt_ok: @@ -314,10 +291,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, * local TLB for it and unmark it before we use it */ if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) { - pr_hardcont(" | stale flush %d [%d..%d]", - id, cpu_first_thread_sibling(cpu), - cpu_last_thread_sibling(cpu)); - local_flush_tlb_mm(next); /* XXX This clear should ultimately be part of local_flush_tlb_mm */ @@ -329,7 +302,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, } /* Flick the MMU and release lock */ - pr_hardcont(" -> %d\n", id); if (IS_ENABLED(CONFIG_BDI_SWITCH)) abatron_pteptrs[1] = next->pgd; set_context(id, next->pgd); @@ -341,8 +313,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, */ int init_new_context(struct task_struct *t, struct mm_struct *mm) { - pr_hard("initing context for mm @%p\n", mm); - /* * We have MMU_NO_CONTEXT set to be ~0. Hence check * explicitly against context.id == 0. This ensures that we properly @@ -390,7 +360,6 @@ static int mmu_ctx_cpu_prepare(unsigned int cpu) if (cpu == boot_cpuid) return 0; - pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); return 0; } @@ -401,7 +370,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu) if (cpu == boot_cpuid) return 0; - pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); kfree(stale_map[cpu]); stale_map[cpu] = NULL; -- cgit v1.2.3 From a1ea0ca8a6f17d7b79bbc4d05dd4e6ca162d8f15 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Jun 2021 12:31:09 +0000 Subject: powerpc/selftests: Use gettid() instead of getppid() for null_syscall gettid() is 10% lighter than getppid(), use it for null_syscall selftest. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0ad62673d3e063f848e7c99d719bb966efd433e8.1622809833.git.christophe.leroy@csgroup.eu --- tools/testing/selftests/powerpc/benchmarks/null_syscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c index 579f0215c6e7..9836838a529f 100644 --- a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c +++ b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c @@ -14,6 +14,7 @@ #include #include #include +#include #include static volatile int soak_done; @@ -121,7 +122,7 @@ static void do_null_syscall(unsigned long nr) unsigned long i; for (i = 0; i < nr; i++) - getppid(); + syscall(__NR_gettid); } #define TIME(A, STR) \ -- cgit v1.2.3 From 45b30fafe528601f1a4449c9d68d8ebe7bbc39ad Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 7 Jun 2021 10:56:04 +0000 Subject: powerpc: Define empty_zero_page[] in C At the time being, empty_zero_page[] is defined in each platform head.S. Define it in mm/mem.c instead, and put it in BSS section instead of the DATA section. Commit 5227cfa71f9e ("arm64: mm: place empty_zero_page in bss") explains why it is interesting to have it in BSS. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5838caffa269e0957c5a50cc85477876220298b0.1623063174.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_40x.S | 4 ---- arch/powerpc/kernel/head_44x.S | 4 ---- arch/powerpc/kernel/head_64.S | 5 ----- arch/powerpc/kernel/head_8xx.S | 6 ------ arch/powerpc/kernel/head_book3s_32.S | 5 ----- arch/powerpc/kernel/head_fsl_booke.S | 4 ---- arch/powerpc/mm/mem.c | 3 +++ 7 files changed, 3 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 2717aa860cae..92b6c7356161 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -709,10 +709,6 @@ _GLOBAL(abort) .align 12 .globl sdata sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 2c4ffec027a2..e037eb615757 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -1241,10 +1241,6 @@ head_start_common: .align PAGE_SHIFT .globl sdata sdata: - .globl empty_zero_page -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) /* * To support >32-bit physical addresses, we use an 8KB pgdir. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ece7f97bafff..730838c7ca39 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -1012,8 +1012,3 @@ start_here_common: .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE - - .globl empty_zero_page -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 817df9fe7fb3..5ce42dfac061 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -795,12 +795,6 @@ _GLOBAL(mmu_pin_tlb) .data .globl sdata sdata: - .globl empty_zero_page - .align PAGE_SHIFT -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) - .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 0b82672ff7a6..59ddd68d60bf 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -1216,11 +1216,6 @@ setup_usbgecko_bat: .data .globl sdata sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index ab718fd5e2a2..f33bc5a8e73e 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1221,10 +1221,6 @@ _GLOBAL(restore_to_as0) .align 12 .globl sdata sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index a6b36a40897a..77fce7aa7dc5 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -28,6 +28,9 @@ unsigned long long memory_limit; bool init_mem_is_free; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; +EXPORT_SYMBOL(empty_zero_page); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { -- cgit v1.2.3 From e72421a085a8dc81c71b0daeb89612279c2c621c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 7 Jun 2021 10:56:05 +0000 Subject: powerpc: Define swapper_pg_dir[] in C Don't duplicate swapper_pg_dir[] in each platform's head.S Define it in mm/pgtable.c Define MAX_PTRS_PER_PGD because on book3s/64 PTRS_PER_PGD is not a constant. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5e3f1b8a4695c33ccc80aa3870e016bef32b85e1.1623063174.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/pgtable.h | 3 +++ arch/powerpc/include/asm/pgtable.h | 4 ++++ arch/powerpc/kernel/asm-offsets.c | 5 ----- arch/powerpc/kernel/head_40x.S | 11 ----------- arch/powerpc/kernel/head_44x.S | 17 +---------------- arch/powerpc/kernel/head_64.S | 15 --------------- arch/powerpc/kernel/head_8xx.S | 12 ------------ arch/powerpc/kernel/head_book3s_32.S | 10 ---------- arch/powerpc/kernel/head_fsl_booke.S | 12 ------------ arch/powerpc/mm/pgtable.c | 2 ++ 10 files changed, 10 insertions(+), 81 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index a666d561b44d..4d9941b2fe51 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -232,6 +232,9 @@ extern unsigned long __pmd_frag_size_shift; #define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) +#define MAX_PTRS_PER_PGD (1 << (H_PGD_INDEX_SIZE > RADIX_PGD_INDEX_SIZE ? \ + H_PGD_INDEX_SIZE : RADIX_PGD_INDEX_SIZE)) + /* PMD_SHIFT determines what a second-level page table entry can map */ #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PMD_SIZE (1UL << PMD_SHIFT) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index c6a676714f04..b9c8641654f4 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -41,6 +41,10 @@ struct mm_struct; #ifndef __ASSEMBLY__ +#ifndef MAX_PTRS_PER_PGD +#define MAX_PTRS_PER_PGD PTRS_PER_PGD +#endif + /* Keep these as a macros to avoid include dependency mess */ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 672e7e2b2343..c46685c9b2c6 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -354,11 +354,6 @@ int main(void) DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE))); -#else - DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); -#endif DEFINE(PTE_SIZE, sizeof(pte_t)); #ifdef CONFIG_KVM diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 92b6c7356161..7d72ee5ab387 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -701,14 +701,3 @@ _GLOBAL(abort) mfspr r13,SPRN_DBCR0 oris r13,r13,DBCR0_RST_SYSTEM@h mtspr SPRN_DBCR0,r13 - -/* We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align 12 - .globl sdata -sdata: - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index e037eb615757..ddc978a2d381 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -1233,23 +1233,8 @@ head_start_common: isync blr -/* - * We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align PAGE_SHIFT - .globl sdata -sdata: - -/* - * To support >32-bit physical addresses, we use an 8KB pgdir. - */ - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - #ifdef CONFIG_SMP + .data .align 12 temp_boot_stack: .space 1024 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 730838c7ca39..79f2d1e61abd 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -997,18 +997,3 @@ start_here_common: 0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 .previous - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the bss, which is page-aligned. - */ - .section ".bss" -/* - * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K. - * We will need to find a better way to fix this - */ - .align 16 - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 5ce42dfac061..9bdb95f5694f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -786,15 +786,3 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. - */ - .data - .globl sdata -sdata: - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 59ddd68d60bf..e8d861e48c25 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -1208,14 +1208,4 @@ setup_usbgecko_bat: blr #endif -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. - */ .data - .globl sdata -sdata: - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index f33bc5a8e73e..0f9642f36b49 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1212,15 +1212,3 @@ _GLOBAL(restore_to_as0) */ 3: mr r3,r5 bl _start - -/* - * We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align 12 - .globl sdata -sdata: - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 354611940118..1707ab580ee2 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -28,6 +28,8 @@ #include #include +pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __page_aligned_bss; + static inline int is_exec_fault(void) { return current->thread.regs && TRAP(current->thread.regs) == 0x400; -- cgit v1.2.3 From 91e9ee7e949bff08cc3845a4811185e826b6e2f1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 7 Jun 2021 10:56:06 +0000 Subject: powerpc/32s: Rename PTE_SIZE to PTE_T_SIZE PTE_SIZE means PTE page table size in most placed, whereas in hash_low.S in means size of one entry in the table. Rename it PTE_T_SIZE, and define it directly in hash_low.S instead of going through asm-offsets. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/83a008a9fd6cc3f2bbcb470f592555d260ed7a3d.1623063174.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/asm-offsets.c | 2 -- arch/powerpc/mm/book3s32/hash_low.S | 6 ++++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c46685c9b2c6..8599cf4f799c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -354,8 +354,6 @@ int main(void) DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif - DEFINE(PTE_SIZE, sizeof(pte_t)); - #ifdef CONFIG_KVM OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack); OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid); diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index fb4233a5bdf7..6925ce998557 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -27,8 +27,10 @@ #include #ifdef CONFIG_PTE_64BIT +#define PTE_T_SIZE 8 #define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ #else +#define PTE_T_SIZE 4 #define PTE_FLAGS_OFFSET 0 #endif @@ -488,7 +490,7 @@ _GLOBAL(flush_hash_pages) bne 2f ble cr1,19f addi r4,r4,0x1000 - addi r5,r5,PTE_SIZE + addi r5,r5,PTE_T_SIZE addi r6,r6,-1 b 1b @@ -573,7 +575,7 @@ _GLOBAL(flush_hash_pages) 8: ble cr1,9f /* if all ptes checked */ 81: addi r6,r6,-1 - addi r5,r5,PTE_SIZE + addi r5,r5,PTE_T_SIZE addi r4,r4,0x1000 lwz r0,0(r5) /* check next pte */ cmpwi cr1,r6,1 -- cgit v1.2.3 From 4696cfdb1380238dca2bda6199428d7e50c4ea38 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Jun 2021 17:22:51 +0000 Subject: powerpc/32: Remove __main() Comment says that __main() is there to make GCC happy. It's been there since the implementation of ppc arch in Linux 1.3.45. ppc32 is the only architecture having that. Even ppc64 doesn't have it. Seems like GCC is still happy without it. Drop it for good. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d01028f8166b98584eec536b52f14c5e3f98ff6b.1623172922.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/misc_32.S | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 6a076bef2932..39ab15419592 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -388,9 +388,3 @@ _GLOBAL(start_secondary_resume) bl start_secondary b . #endif /* CONFIG_SMP */ - -/* - * This routine is just here to keep GCC happy - sigh... - */ -_GLOBAL(__main) - blr -- cgit v1.2.3 From 77b0bed74232c480b94bae188b6c7cd0ddee92e8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Jun 2021 05:52:50 +0000 Subject: powerpc: Remove proc_trap() proc_trap() has never been used, remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/827944ea12d470c2f862635f48b5ee6c1520351f.1623217909.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c70782eae3a0..59b98f552ebf 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1436,8 +1436,6 @@ static inline void mtsr(u32 val, u32 idx) } #endif -#define proc_trap() asm volatile("trap") - extern unsigned long current_stack_frame(void); register unsigned long current_stack_pointer asm("r1"); -- cgit v1.2.3 From ab3aab292cb2f417f63b8f4887c1dd01c2a831cd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Jun 2021 06:10:29 +0000 Subject: powerpc: Move update_power8_hid0() into its only user update_power8_hid0() is used only by powernv platform subcore.c Move it there. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/37f41d74faa0c66f90b373e243e8b1ee37a1f6fa.1623219019.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 10 ---------- arch/powerpc/platforms/powernv/subcore.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 59b98f552ebf..be85cf156a1f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1446,16 +1446,6 @@ extern void scom970_write(unsigned int address, unsigned long value); struct pt_regs; extern void ppc_save_regs(struct pt_regs *regs); - -static inline void update_power8_hid0(unsigned long hid0) -{ - /* - * The HID0 update on Power8 should at the very least be - * preceded by a SYNC instruction followed by an ISYNC - * instruction - */ - asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); -} #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_REG_H */ diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 73207b53dc2b..7e98b00ea2e8 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -169,6 +169,16 @@ static void update_hid_in_slw(u64 hid0) } } +static inline void update_power8_hid0(unsigned long hid0) +{ + /* + * The HID0 update on Power8 should at the very least be + * preceded by a SYNC instruction followed by an ISYNC + * instruction + */ + asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); +} + static void unsplit_core(void) { u64 hid0, mask; -- cgit v1.2.3 From 45677c9aebe926192e59475b35a1ff35ff2d4217 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 25 May 2021 09:51:42 -0400 Subject: selftests/powerpc: Fix "no_handler" EBB selftest The "no_handler_test" in ebb selftests attempts to read the PMU registers twice via helper function "dump_ebb_state". First dump is just before closing of event and the second invocation is done after closing of the event. The original intention of second dump_ebb_state was to dump the state of registers at the end of the test when the counters are frozen. But this will be achieved with the first call itself since sample period is set to low value and PMU will be frozen by then. Hence patch removes the dump which was done before closing of the event. Reported-by: Shirisha Ganta Signed-off-by: Athira Rajeev Tested-by: Nageswara R Sastry > Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1621950703-1532-2-git-send-email-atrajeev@linux.vnet.ibm.com --- tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c index fc5bf4870d8e..01e827c31169 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c @@ -50,8 +50,6 @@ static int no_handler_test(void) event_close(&event); - dump_ebb_state(); - /* The real test is that we never took an EBB at 0x0 */ return 0; -- cgit v1.2.3 From d81090ed44c0d15abf2b07663d5f0b9e5ba51525 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 25 May 2021 09:51:43 -0400 Subject: selftests/powerpc: EBB selftest for MMCR0 control for PMU SPRs in ISA v3.1 With the MMCR0 control bit (PMCCEXT) in ISA v3.1, read access to group B registers is restricted when MMCR0 PMCC=0b00. In other platforms (like power9), the older behaviour works where group B PMU SPRs are readable. Patch creates a selftest which verifies that the test takes a SIGILL when attempting to read PMU registers via helper function "dump_ebb_state" for ISA v3.1. Signed-off-by: Athira Rajeev Tested-by: Nageswara R Sastry > Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1621950703-1532-3-git-send-email-atrajeev@linux.vnet.ibm.com --- tools/testing/selftests/powerpc/pmu/ebb/Makefile | 2 +- .../powerpc/pmu/ebb/regs_access_pmccext_test.c | 63 ++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index c5ecb4634094..010160690227 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -24,7 +24,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ fork_cleanup_test ebb_on_child_test \ ebb_on_willing_child_test back_to_back_ebbs_test \ lost_exception_test no_handler_test \ - cycles_with_mmcr2_test + cycles_with_mmcr2_test regs_access_pmccext_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c new file mode 100644 index 000000000000..1eda8e9932e8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021, Athira Rajeev, IBM Corp. + */ + +#include +#include +#include +#include + +#include "ebb.h" + + +/* + * Test that closing the EBB event clears MMCR0_PMCC and + * sets MMCR0_PMCCEXT preventing further read access to the + * group B PMU registers. + */ + +static int regs_access_pmccext(void) +{ + struct event event; + + SKIP_IF(!ebb_is_supported()); + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + FAIL_IF(event_open(&event)); + + ebb_enable_pmc_counting(1); + setup_ebb_handler(standard_ebb_callee); + ebb_global_enable(); + FAIL_IF(ebb_event_enable(&event)); + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + while (ebb_state.stats.ebb_count < 1) + FAIL_IF(core_busy_loop()); + + ebb_global_disable(); + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + + /* + * For ISA v3.1, verify the test takes a SIGILL when reading + * PMU regs after the event is closed. With the control bit + * in MMCR0 (PMCCEXT) restricting access to group B PMU regs, + * sigill is expected. + */ + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) + FAIL_IF(catch_sigill(dump_ebb_state)); + else + dump_ebb_state(); + + return 0; +} + +int main(void) +{ + return test_harness(regs_access_pmccext, "regs_access_pmccext"); +} -- cgit v1.2.3 From b112fb913b5b5705db22efa90ec60f42518934af Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 14 Jun 2021 22:09:07 +1000 Subject: powerpc: make stack walking KASAN-safe Make our stack-walking code KASAN-safe by using __no_sanitize_address. Generic code, arm64, s390 and x86 all make accesses unchecked for similar sorts of reasons: when unwinding a stack, we might touch memory that KASAN has marked as being out-of-bounds. In ppc64 KASAN development, I hit this sometimes when checking for an exception frame - because we're checking an arbitrary offset into the stack frame. See commit 20955746320e ("s390/kasan: avoid false positives during stack unwind"), commit bcaf669b4bdb ("arm64: disable kasan when accessing frame->fp in unwind_frame"), commit 91e08ab0c851 ("x86/dumpstack: Prevent KASAN false positive warnings") and commit 6e22c8366416 ("tracing, kasan: Silence Kasan warning in check_stack of stack_tracer"). Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210614120907.1952321-1-dja@axtens.net --- arch/powerpc/kernel/process.c | 5 +++-- arch/powerpc/kernel/stacktrace.c | 8 ++++---- arch/powerpc/perf/callchain.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3626074acec9..4e593fc2c66d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2133,8 +2133,9 @@ unsigned long get_wchan(struct task_struct *p) static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack(struct task_struct *tsk, unsigned long *stack, - const char *loglvl) +void __no_sanitize_address show_stack(struct task_struct *tsk, + unsigned long *stack, + const char *loglvl) { unsigned long sp, ip, lr, newsp; int count = 0; diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 1deb1bf331dd..1961e6d5e33b 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -23,8 +23,8 @@ #include -void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, - struct task_struct *task, struct pt_regs *regs) +void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { unsigned long sp; @@ -61,8 +61,8 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, * * If the task is not 'current', the caller *must* ensure the task is inactive. */ -int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, - void *cookie, struct task_struct *task) +int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) { unsigned long sp; unsigned long newsp; diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 6c028ee513c0..082f6d0308a4 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -40,7 +40,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -void +void __no_sanitize_address perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { unsigned long sp, next_sp; -- cgit v1.2.3 From baf24d23be7d2357a2aa9c5ffb6a2d680ac2a68c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 11 Jun 2021 19:08:54 +0000 Subject: powerpc/32: Display modules range in virtual memory layout book3s/32 and 8xx don't use vmalloc for modules. Print the modules area at startup as part of the virtual memory layout: [ 0.000000] Kernel virtual memory layout: [ 0.000000] * 0xffafc000..0xffffc000 : fixmap [ 0.000000] * 0xc9000000..0xffafc000 : vmalloc & ioremap [ 0.000000] * 0xb0000000..0xc0000000 : modules [ 0.000000] Memory: 118480K/131072K available (7152K kernel code, 2320K rwdata, 1328K rodata, 368K init, 854K bss, 12592K reserved, 0K cma-reserved) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/98394503e92d6fd6d8f657e0b263b32f21cf2790.1623438478.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 77fce7aa7dc5..c3b4fdda7069 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -302,6 +302,10 @@ void __init mem_init(void) ioremap_bot, IOREMAP_TOP); pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", VMALLOC_START, VMALLOC_END); +#ifdef MODULES_VADDR + pr_info(" * 0x%08lx..0x%08lx : modules\n", + MODULES_VADDR, MODULES_END); +#endif #endif /* CONFIG_PPC32 */ } -- cgit v1.2.3 From ac3d085368b3abf19b24d8505b897454c7372855 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 10 Jun 2021 15:58:34 +0000 Subject: powerpc/signal32: Remove impossible #ifdef combinations PPC_TRANSACTIONAL_MEM is only on book3s/64 SPE is only on booke PPC_TRANSACTIONAL_MEM selects ALTIVEC and VSX Therefore, within PPC_TRANSACTIONAL_MEM sections, ALTIVEC and VSX are always defined while SPE never is. Remove all SPE code and all #ifdef ALTIVEC and VSX in tm functions. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a069a348ee3c2fe3123a5a93695c2b35dc42cb40.1623340691.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 55 ----------------------------------------- 1 file changed, 55 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index a50d091334ea..f55ac65c7492 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -354,14 +354,8 @@ static void prepare_save_tm_user_regs(void) { WARN_ON(tm_suspend_disabled); -#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC)) current->thread.ckvrsave = mfspr(SPRN_VRSAVE); -#endif -#ifdef CONFIG_SPE - if (current->thread.used_spe) - flush_spe_to_thread(current); -#endif } static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, @@ -379,7 +373,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user */ unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed); -#ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, @@ -412,7 +405,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user else unsafe_put_user(current->thread.ckvrsave, (u32 __user *)&tm_frame->mc_vregs[32], failed); -#endif /* CONFIG_ALTIVEC */ unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed); if (msr & MSR_FP) @@ -420,7 +412,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user else unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed); -#ifdef CONFIG_VSX /* * Copy VSR 0-31 upper half from thread_struct to local * buffer, then write that to userspace. Also set MSR_VSX in @@ -436,23 +427,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user msr |= MSR_VSX; } -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE - /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in __unsafe_save_user_regs(). - */ - if (current->thread.used_spe) { - unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32), failed); - /* set MSR_SPE in the saved MSR value to indicate that - * frame->mc_vregs contains valid data */ - msr |= MSR_SPE; - } - - /* We always copy to/from spefscr */ - unsafe_put_user(current->thread.spefscr, - (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); -#endif /* CONFIG_SPE */ unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); @@ -587,9 +561,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *tm_sr) { unsigned long msr, msr_hi; -#ifdef CONFIG_VSX int i; -#endif if (tm_suspend_disabled) return 1; @@ -610,7 +582,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* Restore the previous little-endian mode */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); -#ifdef CONFIG_ALTIVEC regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ @@ -629,13 +600,11 @@ static long restore_tm_user_regs(struct pt_regs *regs, (u32 __user *)&sr->mc_vregs[32], failed); if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.ckvrsave); -#endif /* CONFIG_ALTIVEC */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); -#ifdef CONFIG_VSX regs->msr &= ~MSR_VSX; if (msr & MSR_VSX) { /* @@ -649,24 +618,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } -#endif /* CONFIG_VSX */ - -#ifdef CONFIG_SPE - /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in restore_user_regs(). - */ - regs->msr &= ~MSR_SPE; - if (msr & MSR_SPE) { - unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32), failed); - current->thread.used_spe = true; - } else if (current->thread.used_spe) - memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); - - /* Always get SPEFSCR back */ - unsafe_get_user(current->thread.spefscr, - (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); -#endif /* CONFIG_SPE */ user_read_access_end(); @@ -675,7 +626,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_restore_general_regs(regs, tm_sr, failed); -#ifdef CONFIG_ALTIVEC /* restore altivec registers from the stack */ if (msr & MSR_VEC) unsafe_copy_from_user(¤t->thread.vr_state, &tm_sr->mc_vregs, @@ -684,11 +634,9 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* Always get VRSAVE back */ unsafe_get_user(current->thread.vrsave, (u32 __user *)&tm_sr->mc_vregs[32], failed); -#endif /* CONFIG_ALTIVEC */ unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed); -#ifdef CONFIG_VSX if (msr & MSR_VSX) { /* * Restore altivec registers from the stack to a local @@ -697,7 +645,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed); current->thread.used_vsr = true; } -#endif /* CONFIG_VSX */ /* Get the top half of the MSR from the user context */ unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed); @@ -742,12 +689,10 @@ static long restore_tm_user_regs(struct pt_regs *regs, load_fp_state(¤t->thread.fp_state); regs->msr |= (MSR_FP | current->thread.fpexc_mode); } -#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { load_vr_state(¤t->thread.vr_state); regs->msr |= MSR_VEC; } -#endif preempt_enable(); -- cgit v1.2.3 From db8f7066dc498acf9074ed3c11a7a24f318d8d4f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 11 Jun 2021 19:10:57 +0000 Subject: powerpc/64: drop redundant defination of spin_until_cond linux/processor.h has exactly same defination for spin_until_cond. Drop the redundant defination in asm/processor.h Signed-off-by: Sudeep Holla Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1fff2054e5dfc00329804dbd3f2a91667c9a8aff.1623438544.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/processor.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index aeb1a35163d1..f348e564f7dd 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -347,17 +347,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define spin_end() HMT_medium() -#define spin_until_cond(cond) \ -do { \ - if (unlikely(!(cond))) { \ - spin_begin(); \ - do { \ - spin_cpu_relax(); \ - } while (!(cond)); \ - spin_end(); \ - } \ -} while (0) - #endif /* Check that a certain kernel stack pointer is valid in task_struct p */ -- cgit v1.2.3 From 2400c13c437debc99d3399a7100d4e8c3fe20a08 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 11 Jun 2021 19:10:58 +0000 Subject: powerpc/watchdog: include linux/processor.h for spin_until_cond This implementation uses spin_until_cond in wd_smp_lock including neither linux/processor.h nor asm/processor.h This patch includes linux/processor.h here for spin_until_cond usage. Signed-off-by: Sudeep Holla Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5e8d2d50f301a346040362028c2ecba40685de9e.1623438544.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/watchdog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index c9a8f4781a10..a165635fd214 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From a3309226454a7e76d76251579c1183787694f303 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 10 Jun 2021 17:29:49 +1000 Subject: powerpc/signal64: Don't read sigaction arguments back from user memory When delivering a signal to a sigaction style handler (SA_SIGINFO), we pass pointers to the siginfo and ucontext via r4 and r5. Currently we populate the values in those registers by reading the pointers out of the sigframe in user memory, even though the values in user memory were written by the kernel just prior: unsafe_put_user(&frame->info, &frame->pinfo, badframe_block); unsafe_put_user(&frame->uc, &frame->puc, badframe_block); ... if (ksig->ka.sa.sa_flags & SA_SIGINFO) { err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo); err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc); ie. we write &frame->info into frame->pinfo, and then read frame->pinfo back into r4, and similarly for &frame->uc. The code has always been like this, since linux-fullhistory commit d4f2d95eca2c ("Forward port of 2.4 ppc64 signal changes."). There's no reason for us to read the values back from user memory, rather than just setting the value in the gpr[4/5] directly. In fact reading the value back from user memory opens up the possibility of another user thread changing the values before we read them back. Although any process doing that would be racing against the kernel delivering the signal, and would risk corrupting the stack, so that would be a userspace bug. Note that this is 64-bit only code, so there's no subtlety with the size of pointers differing between kernel and user. Also the frame variable is not modified to point elsewhere during the function. In the past reading the values back from user memory was not costly, but now that we have KUAP on some CPUs it is, so we'd rather avoid it for that reason too. So change the code to just set the values directly, using the same values we have written to the sigframe previously in the function. Note also that this matches what our 32-bit signal code does. Using a version of will-it-scale's signal1_threads that sets SA_SIGINFO, this results in a ~4% increase in signals per second on a Power9, from 229,777 to 239,766. Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210610072949.3198522-1-mpe@ellerman.id.au --- arch/powerpc/kernel/signal_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 0f9646682598..16b41470ec92 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -944,8 +944,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, regs->gpr[3] = ksig->sig; regs->result = 0; if (ksig->ka.sa.sa_flags & SA_SIGINFO) { - err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo); - err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc); + regs->gpr[4] = (unsigned long)&frame->info; + regs->gpr[5] = (unsigned long)&frame->uc; regs->gpr[6] = (unsigned long) frame; } else { regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext; -- cgit v1.2.3 From 07d8ad6fd8a3d47f50595ca4826f41dbf4f3a0c6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 10 Jun 2021 14:06:39 +0530 Subject: powerpc/mm/book3s64: Fix possible build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update _tlbiel_pid() such that we can avoid build errors like below when using this function in other places. arch/powerpc/mm/book3s64/radix_tlb.c: In function ‘__radix__flush_tlb_range_psize’: arch/powerpc/mm/book3s64/radix_tlb.c:114:2: warning: ‘asm’ operand 3 probably does not match constraints 114 | asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) | ^~~ arch/powerpc/mm/book3s64/radix_tlb.c:114:2: error: impossible constraint in ‘asm’ make[4]: *** [scripts/Makefile.build:271: arch/powerpc/mm/book3s64/radix_tlb.o] Error 1 m With this fix, we can also drop the __always_inline in __radix_flush_tlb_range_psize which was added by commit e12d6d7d46a6 ("powerpc/mm/radix: mark __radix__flush_tlb_range_psize() as __always_inline") Signed-off-by: Aneesh Kumar K.V Reviewed-by: Christophe Leroy Acked-by: Michael Ellerman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210610083639.387365-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 409e61210789..817a02ef6032 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -291,22 +291,30 @@ static inline void fixup_tlbie_lpid(unsigned long lpid) /* * We use 128 set in radix mode and 256 set in hpt mode. */ -static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) +static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) { int set; asm volatile("ptesync": : :"memory"); - /* - * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, - * also flush the entire Page Walk Cache. - */ - __tlbiel_pid(pid, 0, ric); + switch (ric) { + case RIC_FLUSH_PWC: - /* For PWC, only one flush is needed */ - if (ric == RIC_FLUSH_PWC) { + /* For PWC, only one flush is needed */ + __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); ppc_after_tlbiel_barrier(); return; + case RIC_FLUSH_TLB: + __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_ALL: + default: + /* + * Flush the first set of the TLB, and if + * we're doing a RIC_FLUSH_ALL, also flush + * the entire Page Walk Cache. + */ + __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); } if (!cpu_has_feature(CPU_FTR_ARCH_31)) { @@ -1176,7 +1184,7 @@ void radix__tlb_flush(struct mmu_gather *tlb) } } -static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, +static void __radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, unsigned long end, int psize, bool also_pwc) { -- cgit v1.2.3 From 91cdbb955aa94ee0841af4685be40937345d29b8 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:29:05 -0700 Subject: powerpc/powernv/vas: Release reference to tgid during window close The kernel handles the NX fault by updating CSB or sending signal to process. In multithread applications, children can open VAS windows and can exit without closing them. But the parent can continue to send NX requests with these windows. To prevent pid reuse, reference will be taken on pid and tgid when the window is opened and release them during window close. The current code is not releasing the tgid reference which can cause pid leak and this patch fixes the issue. Fixes: db1c08a740635 ("powerpc/vas: Take reference to PID and mm for user space windows") Cc: stable@vger.kernel.org # 5.8+ Reported-by: Nicholas Piggin Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6020fc4d444864fe20f7dcdc5edfe53e67480a1c.camel@linux.ibm.com --- arch/powerpc/platforms/powernv/vas-window.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 5f5fe63a3d1c..7ba0840fc3b5 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -1093,9 +1093,9 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, /* * Process closes window during exit. In the case of * multithread application, the child thread can open - * window and can exit without closing it. Expects parent - * thread to use and close the window. So do not need - * to take pid reference for parent thread. + * window and can exit without closing it. so takes tgid + * reference until window closed to make sure tgid is not + * reused. */ txwin->tgid = find_get_pid(task_tgid_vnr(current)); /* @@ -1339,8 +1339,9 @@ int vas_win_close(struct vas_window *window) /* if send window, drop reference to matching receive window */ if (window->tx_win) { if (window->user_win) { - /* Drop references to pid and mm */ + /* Drop references to pid. tgid and mm */ put_pid(window->pid); + put_pid(window->tgid); if (window->mm) { mm_context_remove_vas_window(window->mm); mmdrop(window->mm); -- cgit v1.2.3 From 413d6ed3eac387a2876893c337174f0c5b99d01d Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:29:48 -0700 Subject: powerpc/vas: Move VAS API to book3s common platform The pseries platform will share vas and nx code and interfaces with the PowerNV platform, so create the arch/powerpc/platforms/book3s/ directory and move VAS API code there. Functionality is not changed. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e05c8db17b9eabe3545b902d034238e4c6c08180.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 3 + arch/powerpc/platforms/Kconfig | 1 + arch/powerpc/platforms/Makefile | 1 + arch/powerpc/platforms/book3s/Kconfig | 15 ++ arch/powerpc/platforms/book3s/Makefile | 2 + arch/powerpc/platforms/book3s/vas-api.c | 278 +++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/Kconfig | 14 -- arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/vas-api.c | 278 ------------------------------- arch/powerpc/platforms/powernv/vas.h | 2 - 10 files changed, 301 insertions(+), 295 deletions(-) create mode 100644 arch/powerpc/platforms/book3s/Kconfig create mode 100644 arch/powerpc/platforms/book3s/Makefile create mode 100644 arch/powerpc/platforms/book3s/vas-api.c delete mode 100644 arch/powerpc/platforms/powernv/vas-api.c diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index e33f80b0ea81..3be76e813e2d 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -162,6 +162,9 @@ int vas_copy_crb(void *crb, int offset); */ int vas_paste_crb(struct vas_window *win, int offset, bool re); +void vas_win_paste_addr(struct vas_window *window, u64 *addr, + int *len); + /* * Register / unregister coprocessor type to VAS API which will be exported * to user space. Applications can use this API to open / close window diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 7a5e8f4541e3..594544a65b02 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -20,6 +20,7 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig" source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" +source "arch/powerpc/platforms/book3s/Kconfig" config KVM_GUEST bool "KVM Guest support" diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 143d4417f6cc..0e75d7df387b 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ obj-$(CONFIG_AMIGAONE) += amigaone/ +obj-$(CONFIG_PPC_BOOK3S) += book3s/ diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig new file mode 100644 index 000000000000..34c931592ef0 --- /dev/null +++ b/arch/powerpc/platforms/book3s/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_VAS + bool "IBM Virtual Accelerator Switchboard (VAS)" + depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES + default y + help + This enables support for IBM Virtual Accelerator Switchboard (VAS). + + VAS devices are found in POWER9-based and later systems, they + provide access to accelerator coprocessors such as NX-GZIP and + NX-842. This config allows the kernel to use NX-842 accelerators, + and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV + and POWER10 PowerVM platforms. + + If unsure, say "N". diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile new file mode 100644 index 000000000000..e790f1910f61 --- /dev/null +++ b/arch/powerpc/platforms/book3s/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_PPC_VAS) += vas-api.o diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c new file mode 100644 index 000000000000..cfc9d7dd65ab --- /dev/null +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * VAS user space API for its accelerators (Only NX-GZIP is supported now) + * Copyright (C) 2019 Haren Myneni, IBM Corp + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The driver creates the device node that can be used as follows: + * For NX-GZIP + * + * fd = open("/dev/crypto/nx-gzip", O_RDWR); + * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); + * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). + * vas_copy(&crb, 0, 1); + * vas_paste(paste_addr, 0, 1); + * close(fd) or exit process to close window. + * + * where "vas_copy" and "vas_paste" are defined in copy-paste.h. + * copy/paste returns to the user space directly. So refer NX hardware + * documententation for exact copy/paste usage and completion / error + * conditions. + */ + +/* + * Wrapper object for the nx-gzip device - there is just one instance of + * this node for the whole system. + */ +static struct coproc_dev { + struct cdev cdev; + struct device *device; + char *name; + dev_t devt; + struct class *class; + enum vas_cop_type cop_type; +} coproc_device; + +struct coproc_instance { + struct coproc_dev *coproc; + struct vas_window *txwin; +}; + +static char *coproc_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); +} + +static int coproc_open(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst; + + cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); + if (!cp_inst) + return -ENOMEM; + + cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, + cdev); + fp->private_data = cp_inst; + + return 0; +} + +static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) +{ + void __user *uptr = (void __user *)arg; + struct vas_tx_win_attr txattr = {}; + struct vas_tx_win_open_attr uattr; + struct coproc_instance *cp_inst; + struct vas_window *txwin; + int rc, vasid; + + cp_inst = fp->private_data; + + /* + * One window for file descriptor + */ + if (cp_inst->txwin) + return -EEXIST; + + rc = copy_from_user(&uattr, uptr, sizeof(uattr)); + if (rc) { + pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); + return -EFAULT; + } + + if (uattr.version != 1) { + pr_err("Invalid version\n"); + return -EINVAL; + } + + vasid = uattr.vas_id; + + vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type); + + txattr.lpid = mfspr(SPRN_LPID); + txattr.pidr = mfspr(SPRN_PID); + txattr.user_win = true; + txattr.rsvd_txbuf_count = false; + txattr.pswid = false; + + pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, + mfspr(SPRN_PID)); + + txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr); + if (IS_ERR(txwin)) { + pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__, + PTR_ERR(txwin)); + return PTR_ERR(txwin); + } + + cp_inst->txwin = txwin; + + return 0; +} + +static int coproc_release(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst = fp->private_data; + + if (cp_inst->txwin) { + vas_win_close(cp_inst->txwin); + cp_inst->txwin = NULL; + } + + kfree(cp_inst); + fp->private_data = NULL; + + /* + * We don't know here if user has other receive windows + * open, so we can't really call clear_thread_tidr(). + * So, once the process calls set_thread_tidr(), the + * TIDR value sticks around until process exits, resulting + * in an extra copy in restore_sprs(). + */ + + return 0; +} + +static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + unsigned long pfn; + u64 paste_addr; + pgprot_t prot; + int rc; + + txwin = cp_inst->txwin; + + if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { + pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, + (vma->vm_end - vma->vm_start), PAGE_SIZE); + return -EINVAL; + } + + /* Ensure instance has an open send window */ + if (!txwin) { + pr_err("%s(): No send window open?\n", __func__); + return -EINVAL; + } + + vas_win_paste_addr(txwin, &paste_addr, NULL); + pfn = paste_addr >> PAGE_SHIFT; + + /* flags, page_prot from cxl_mmap(), except we want cachable */ + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + + prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); + + rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, prot); + + pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, + paste_addr, vma->vm_start, rc); + + return rc; +} + +static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case VAS_TX_WIN_OPEN: + return coproc_ioc_tx_win_open(fp, arg); + default: + return -EINVAL; + } +} + +static struct file_operations coproc_fops = { + .open = coproc_open, + .release = coproc_release, + .mmap = coproc_mmap, + .unlocked_ioctl = coproc_ioctl, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + int rc = -EINVAL; + dev_t devno; + + rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); + if (rc) { + pr_err("Unable to allocate coproc major number: %i\n", rc); + return rc; + } + + pr_devel("%s device allocated, dev [%i,%i]\n", name, + MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); + + coproc_device.class = class_create(mod, name); + if (IS_ERR(coproc_device.class)) { + rc = PTR_ERR(coproc_device.class); + pr_err("Unable to create %s class %d\n", name, rc); + goto err_class; + } + coproc_device.class->devnode = coproc_devnode; + coproc_device.cop_type = cop_type; + + coproc_fops.owner = mod; + cdev_init(&coproc_device.cdev, &coproc_fops); + + devno = MKDEV(MAJOR(coproc_device.devt), 0); + rc = cdev_add(&coproc_device.cdev, devno, 1); + if (rc) { + pr_err("cdev_add() failed %d\n", rc); + goto err_cdev; + } + + coproc_device.device = device_create(coproc_device.class, NULL, + devno, NULL, name, MINOR(devno)); + if (IS_ERR(coproc_device.device)) { + rc = PTR_ERR(coproc_device.device); + pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); + goto err; + } + + pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), + MINOR(devno)); + + return 0; + +err: + cdev_del(&coproc_device.cdev); +err_cdev: + class_destroy(coproc_device.class); +err_class: + unregister_chrdev_region(coproc_device.devt, 1); + return rc; +} +EXPORT_SYMBOL_GPL(vas_register_coproc_api); + +void vas_unregister_coproc_api(void) +{ + dev_t devno; + + cdev_del(&coproc_device.cdev); + devno = MKDEV(MAJOR(coproc_device.devt), 0); + device_destroy(coproc_device.class, devno); + + class_destroy(coproc_device.class); + unregister_chrdev_region(coproc_device.devt, 1); +} +EXPORT_SYMBOL_GPL(vas_unregister_coproc_api); diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 619b093a0657..043eefbbdd28 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -33,20 +33,6 @@ config PPC_MEMTRACE Enabling this option allows for runtime allocation of memory (RAM) for hardware tracing. -config PPC_VAS - bool "IBM Virtual Accelerator Switchboard (VAS)" - depends on PPC_POWERNV && PPC_64K_PAGES - default y - help - This enables support for IBM Virtual Accelerator Switchboard (VAS). - - VAS allows accelerators in co-processors like NX-GZIP and NX-842 - to be accessible to kernel subsystems and user processes. - - VAS adapters are found in POWER9 based systems. - - If unsure, say N. - config SCOM_DEBUGFS bool "Expose SCOM controllers via debugfs" depends on DEBUG_FS diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index be2546b96816..dc7b37c23b60 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o -obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o obj-$(CONFIG_OCXL_BASE) += ocxl.o obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c deleted file mode 100644 index 98ed5d8c5441..000000000000 --- a/arch/powerpc/platforms/powernv/vas-api.c +++ /dev/null @@ -1,278 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * VAS user space API for its accelerators (Only NX-GZIP is supported now) - * Copyright (C) 2019 Haren Myneni, IBM Corp - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "vas.h" - -/* - * The driver creates the device node that can be used as follows: - * For NX-GZIP - * - * fd = open("/dev/crypto/nx-gzip", O_RDWR); - * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); - * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). - * vas_copy(&crb, 0, 1); - * vas_paste(paste_addr, 0, 1); - * close(fd) or exit process to close window. - * - * where "vas_copy" and "vas_paste" are defined in copy-paste.h. - * copy/paste returns to the user space directly. So refer NX hardware - * documententation for exact copy/paste usage and completion / error - * conditions. - */ - -/* - * Wrapper object for the nx-gzip device - there is just one instance of - * this node for the whole system. - */ -static struct coproc_dev { - struct cdev cdev; - struct device *device; - char *name; - dev_t devt; - struct class *class; - enum vas_cop_type cop_type; -} coproc_device; - -struct coproc_instance { - struct coproc_dev *coproc; - struct vas_window *txwin; -}; - -static char *coproc_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); -} - -static int coproc_open(struct inode *inode, struct file *fp) -{ - struct coproc_instance *cp_inst; - - cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); - if (!cp_inst) - return -ENOMEM; - - cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, - cdev); - fp->private_data = cp_inst; - - return 0; -} - -static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) -{ - void __user *uptr = (void __user *)arg; - struct vas_tx_win_attr txattr = {}; - struct vas_tx_win_open_attr uattr; - struct coproc_instance *cp_inst; - struct vas_window *txwin; - int rc, vasid; - - cp_inst = fp->private_data; - - /* - * One window for file descriptor - */ - if (cp_inst->txwin) - return -EEXIST; - - rc = copy_from_user(&uattr, uptr, sizeof(uattr)); - if (rc) { - pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); - return -EFAULT; - } - - if (uattr.version != 1) { - pr_err("Invalid version\n"); - return -EINVAL; - } - - vasid = uattr.vas_id; - - vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type); - - txattr.lpid = mfspr(SPRN_LPID); - txattr.pidr = mfspr(SPRN_PID); - txattr.user_win = true; - txattr.rsvd_txbuf_count = false; - txattr.pswid = false; - - pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, - mfspr(SPRN_PID)); - - txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr); - if (IS_ERR(txwin)) { - pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__, - PTR_ERR(txwin)); - return PTR_ERR(txwin); - } - - cp_inst->txwin = txwin; - - return 0; -} - -static int coproc_release(struct inode *inode, struct file *fp) -{ - struct coproc_instance *cp_inst = fp->private_data; - - if (cp_inst->txwin) { - vas_win_close(cp_inst->txwin); - cp_inst->txwin = NULL; - } - - kfree(cp_inst); - fp->private_data = NULL; - - /* - * We don't know here if user has other receive windows - * open, so we can't really call clear_thread_tidr(). - * So, once the process calls set_thread_tidr(), the - * TIDR value sticks around until process exits, resulting - * in an extra copy in restore_sprs(). - */ - - return 0; -} - -static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) -{ - struct coproc_instance *cp_inst = fp->private_data; - struct vas_window *txwin; - unsigned long pfn; - u64 paste_addr; - pgprot_t prot; - int rc; - - txwin = cp_inst->txwin; - - if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { - pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, - (vma->vm_end - vma->vm_start), PAGE_SIZE); - return -EINVAL; - } - - /* Ensure instance has an open send window */ - if (!txwin) { - pr_err("%s(): No send window open?\n", __func__); - return -EINVAL; - } - - vas_win_paste_addr(txwin, &paste_addr, NULL); - pfn = paste_addr >> PAGE_SHIFT; - - /* flags, page_prot from cxl_mmap(), except we want cachable */ - vma->vm_flags |= VM_IO | VM_PFNMAP; - vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); - - prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); - - rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, - vma->vm_end - vma->vm_start, prot); - - pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, - paste_addr, vma->vm_start, rc); - - return rc; -} - -static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - case VAS_TX_WIN_OPEN: - return coproc_ioc_tx_win_open(fp, arg); - default: - return -EINVAL; - } -} - -static struct file_operations coproc_fops = { - .open = coproc_open, - .release = coproc_release, - .mmap = coproc_mmap, - .unlocked_ioctl = coproc_ioctl, -}; - -/* - * Supporting only nx-gzip coprocessor type now, but this API code - * extended to other coprocessor types later. - */ -int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, - const char *name) -{ - int rc = -EINVAL; - dev_t devno; - - rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); - if (rc) { - pr_err("Unable to allocate coproc major number: %i\n", rc); - return rc; - } - - pr_devel("%s device allocated, dev [%i,%i]\n", name, - MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); - - coproc_device.class = class_create(mod, name); - if (IS_ERR(coproc_device.class)) { - rc = PTR_ERR(coproc_device.class); - pr_err("Unable to create %s class %d\n", name, rc); - goto err_class; - } - coproc_device.class->devnode = coproc_devnode; - coproc_device.cop_type = cop_type; - - coproc_fops.owner = mod; - cdev_init(&coproc_device.cdev, &coproc_fops); - - devno = MKDEV(MAJOR(coproc_device.devt), 0); - rc = cdev_add(&coproc_device.cdev, devno, 1); - if (rc) { - pr_err("cdev_add() failed %d\n", rc); - goto err_cdev; - } - - coproc_device.device = device_create(coproc_device.class, NULL, - devno, NULL, name, MINOR(devno)); - if (IS_ERR(coproc_device.device)) { - rc = PTR_ERR(coproc_device.device); - pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); - goto err; - } - - pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), - MINOR(devno)); - - return 0; - -err: - cdev_del(&coproc_device.cdev); -err_cdev: - class_destroy(coproc_device.class); -err_class: - unregister_chrdev_region(coproc_device.devt, 1); - return rc; -} -EXPORT_SYMBOL_GPL(vas_register_coproc_api); - -void vas_unregister_coproc_api(void) -{ - dev_t devno; - - cdev_del(&coproc_device.cdev); - devno = MKDEV(MAJOR(coproc_device.devt), 0); - device_destroy(coproc_device.class, devno); - - class_destroy(coproc_device.class); - unregister_chrdev_region(coproc_device.devt, 1); -} -EXPORT_SYMBOL_GPL(vas_unregister_coproc_api); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index c7db3190baca..e5027c4226df 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -438,8 +438,6 @@ extern irqreturn_t vas_fault_handler(int irq, void *dev_id); extern void vas_return_credit(struct vas_window *window, bool tx); extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, uint32_t pswid); -extern void vas_win_paste_addr(struct vas_window *window, u64 *addr, - int *len); static inline int vas_window_pid(struct vas_window *window) { -- cgit v1.2.3 From 06c6fad9bfe0b6439e18ea1f1cf0d178405ccf25 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:30:24 -0700 Subject: powerpc/powernv/vas: Rename register/unregister functions powerNV and pseries drivers register / unregister to the corresponding platform specific VAS separately. Then these VAS functions call the common API with the specific window operations. So rename powerNV VAS API register/unregister functions. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9db00d58dbdcb7cfc07a1df95f3d2a9e3e5d746a.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 3 +++ arch/powerpc/platforms/book3s/vas-api.c | 2 -- arch/powerpc/platforms/powernv/vas-window.c | 18 ++++++++++++++++++ drivers/crypto/nx/nx-common-powernv.c | 6 +++--- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 3be76e813e2d..6076adf9ab4f 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -164,6 +164,9 @@ int vas_paste_crb(struct vas_window *win, int offset, bool re); void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len); +int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, + const char *name); +void vas_unregister_api_powernv(void); /* * Register / unregister coprocessor type to VAS API which will be exported diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index cfc9d7dd65ab..72c126d87216 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -262,7 +262,6 @@ err_class: unregister_chrdev_region(coproc_device.devt, 1); return rc; } -EXPORT_SYMBOL_GPL(vas_register_coproc_api); void vas_unregister_coproc_api(void) { @@ -275,4 +274,3 @@ void vas_unregister_coproc_api(void) class_destroy(coproc_device.class); unregister_chrdev_region(coproc_device.devt, 1); } -EXPORT_SYMBOL_GPL(vas_unregister_coproc_api); diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 7ba0840fc3b5..41712b4b268e 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -1442,3 +1442,21 @@ struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, return window; } + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + + return vas_register_coproc_api(mod, cop_type, name); +} +EXPORT_SYMBOL_GPL(vas_register_api_powernv); + +void vas_unregister_api_powernv(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_powernv); diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c index 446f611726df..3b159f2fae17 100644 --- a/drivers/crypto/nx/nx-common-powernv.c +++ b/drivers/crypto/nx/nx-common-powernv.c @@ -1092,8 +1092,8 @@ static __init int nx_compress_powernv_init(void) * normal FIFO priority is assigned for userspace. * 842 compression is supported only in kernel. */ - ret = vas_register_coproc_api(THIS_MODULE, VAS_COP_TYPE_GZIP, - "nx-gzip"); + ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP, + "nx-gzip"); /* * GZIP is not supported in kernel right now. @@ -1129,7 +1129,7 @@ static void __exit nx_compress_powernv_exit(void) * use. So delete this API use for GZIP engine. */ if (!nx842_ct) - vas_unregister_coproc_api(); + vas_unregister_api_powernv(); crypto_unregister_alg(&nx842_powernv_alg); -- cgit v1.2.3 From 1a0d0d5ed5e3cd9e3fc1ad4459f1db2f3618fce0 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:31:06 -0700 Subject: powerpc/vas: Add platform specific user window operations PowerNV uses registers to open/close VAS windows, and getting the paste address. Whereas the hypervisor calls are used on PowerVM. This patch adds the platform specific user space window operations and register with the common VAS user space interface. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f85091f4ace67f951ac04d60394d67b21e2f5d3c.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 16 +++++++-- arch/powerpc/platforms/book3s/vas-api.c | 53 +++++++++++++++++------------ arch/powerpc/platforms/powernv/vas-window.c | 45 +++++++++++++++++++++++- arch/powerpc/platforms/powernv/vas.h | 2 ++ 4 files changed, 91 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 6076adf9ab4f..163a8bb85d02 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -5,6 +5,7 @@ #ifndef _ASM_POWERPC_VAS_H #define _ASM_POWERPC_VAS_H +#include struct vas_window; @@ -48,6 +49,16 @@ enum vas_cop_type { VAS_COP_TYPE_MAX, }; +/* + * User space window operations used for powernv and powerVM + */ +struct vas_user_win_ops { + struct vas_window * (*open_win)(int vas_id, u64 flags, + enum vas_cop_type); + u64 (*paste_addr)(struct vas_window *); + int (*close_win)(struct vas_window *); +}; + /* * Receive window attributes specified by the (in-kernel) owner of window. */ @@ -162,8 +173,6 @@ int vas_copy_crb(void *crb, int offset); */ int vas_paste_crb(struct vas_window *win, int offset, bool re); -void vas_win_paste_addr(struct vas_window *window, u64 *addr, - int *len); int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, const char *name); void vas_unregister_api_powernv(void); @@ -177,7 +186,8 @@ void vas_unregister_api_powernv(void); * used for others in future. */ int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, - const char *name); + const char *name, + const struct vas_user_win_ops *vops); void vas_unregister_coproc_api(void); #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 72c126d87216..ad566464b55b 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -42,6 +42,7 @@ static struct coproc_dev { dev_t devt; struct class *class; enum vas_cop_type cop_type; + const struct vas_user_win_ops *vops; } coproc_device; struct coproc_instance { @@ -72,11 +73,10 @@ static int coproc_open(struct inode *inode, struct file *fp) static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) { void __user *uptr = (void __user *)arg; - struct vas_tx_win_attr txattr = {}; struct vas_tx_win_open_attr uattr; struct coproc_instance *cp_inst; struct vas_window *txwin; - int rc, vasid; + int rc; cp_inst = fp->private_data; @@ -93,27 +93,20 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) } if (uattr.version != 1) { - pr_err("Invalid version\n"); + pr_err("Invalid window open API version\n"); return -EINVAL; } - vasid = uattr.vas_id; - - vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type); - - txattr.lpid = mfspr(SPRN_LPID); - txattr.pidr = mfspr(SPRN_PID); - txattr.user_win = true; - txattr.rsvd_txbuf_count = false; - txattr.pswid = false; - - pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, - mfspr(SPRN_PID)); + if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->open_win) { + pr_err("VAS API is not registered\n"); + return -EACCES; + } - txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr); + txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags, + cp_inst->coproc->cop_type); if (IS_ERR(txwin)) { - pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__, - PTR_ERR(txwin)); + pr_err("%s() VAS window open failed, %ld\n", __func__, + PTR_ERR(txwin)); return PTR_ERR(txwin); } @@ -125,9 +118,15 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) static int coproc_release(struct inode *inode, struct file *fp) { struct coproc_instance *cp_inst = fp->private_data; + int rc; if (cp_inst->txwin) { - vas_win_close(cp_inst->txwin); + if (cp_inst->coproc->vops && + cp_inst->coproc->vops->close_win) { + rc = cp_inst->coproc->vops->close_win(cp_inst->txwin); + if (rc) + return rc; + } cp_inst->txwin = NULL; } @@ -168,7 +167,17 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EINVAL; } - vas_win_paste_addr(txwin, &paste_addr, NULL); + if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->paste_addr) { + pr_err("%s(): VAS API is not registered\n", __func__); + return -EACCES; + } + + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (!paste_addr) { + pr_err("%s(): Window paste address failed\n", __func__); + return -EINVAL; + } + pfn = paste_addr >> PAGE_SHIFT; /* flags, page_prot from cxl_mmap(), except we want cachable */ @@ -208,7 +217,8 @@ static struct file_operations coproc_fops = { * extended to other coprocessor types later. */ int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, - const char *name) + const char *name, + const struct vas_user_win_ops *vops) { int rc = -EINVAL; dev_t devno; @@ -230,6 +240,7 @@ int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, } coproc_device.class->devnode = coproc_devnode; coproc_device.cop_type = cop_type; + coproc_device.vops = vops; coproc_fops.owner = mod; cdev_init(&coproc_device.cdev, &coproc_fops); diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 41712b4b268e..26440da4fc6a 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "vas.h" #include "copy-paste.h" @@ -1443,6 +1444,48 @@ struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, return window; } +static struct vas_window *vas_user_win_open(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + struct vas_tx_win_attr txattr = {}; + + vas_init_tx_win_attr(&txattr, cop_type); + + txattr.lpid = mfspr(SPRN_LPID); + txattr.pidr = mfspr(SPRN_PID); + txattr.user_win = true; + txattr.rsvd_txbuf_count = false; + txattr.pswid = false; + + pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, + mfspr(SPRN_PID)); + + return vas_tx_win_open(vas_id, cop_type, &txattr); +} + +static u64 vas_user_win_paste_addr(struct vas_window *win) +{ + u64 paste_addr; + + vas_win_paste_addr(win, &paste_addr, NULL); + + return paste_addr; +} + +static int vas_user_win_close(struct vas_window *txwin) +{ + + vas_win_close(txwin); + + return 0; +} + +static const struct vas_user_win_ops vops = { + .open_win = vas_user_win_open, + .paste_addr = vas_user_win_paste_addr, + .close_win = vas_user_win_close, +}; + /* * Supporting only nx-gzip coprocessor type now, but this API code * extended to other coprocessor types later. @@ -1451,7 +1494,7 @@ int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, const char *name) { - return vas_register_coproc_api(mod, cop_type, name); + return vas_register_coproc_api(mod, cop_type, name, &vops); } EXPORT_SYMBOL_GPL(vas_register_api_powernv); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index e5027c4226df..c2b1e12efca5 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -438,6 +438,8 @@ extern irqreturn_t vas_fault_handler(int irq, void *dev_id); extern void vas_return_credit(struct vas_window *window, bool tx); extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, uint32_t pswid); +extern void vas_win_paste_addr(struct vas_window *window, u64 *addr, + int *len); static inline int vas_window_pid(struct vas_window *window) { -- cgit v1.2.3 From 3856aa542d90ed79cd5ed4cfd828b1fb04017131 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:31:43 -0700 Subject: powerpc/vas: Create take/drop pid and mm reference functions Take pid and mm references when each window opens and drops during close. This functionality is needed for powerNV and pseries. So this patch defines the existing code as functions in common book3s platform vas-api.c Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2fa40df962250a737c804e58202924717b39e381.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 40 +++++++++++++++++++++ arch/powerpc/platforms/book3s/vas-api.c | 39 ++++++++++++++++++++ arch/powerpc/platforms/powernv/vas-fault.c | 10 +++--- arch/powerpc/platforms/powernv/vas-window.c | 55 +++-------------------------- arch/powerpc/platforms/powernv/vas.h | 6 ++-- 5 files changed, 91 insertions(+), 59 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 163a8bb85d02..71cff6d6bf3a 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -5,6 +5,9 @@ #ifndef _ASM_POWERPC_VAS_H #define _ASM_POWERPC_VAS_H +#include +#include +#include #include struct vas_window; @@ -49,6 +52,17 @@ enum vas_cop_type { VAS_COP_TYPE_MAX, }; +/* + * User space VAS windows are opened by tasks and take references + * to pid and mm until windows are closed. + * Stores pid, mm, and tgid for each window. + */ +struct vas_user_win_ref { + struct pid *pid; /* PID of owner */ + struct pid *tgid; /* Thread group ID of owner */ + struct mm_struct *mm; /* Linux process mm_struct */ +}; + /* * User space window operations used for powernv and powerVM */ @@ -59,6 +73,31 @@ struct vas_user_win_ops { int (*close_win)(struct vas_window *); }; +static inline void put_vas_user_win_ref(struct vas_user_win_ref *ref) +{ + /* Drop references to pid, tgid, and mm */ + put_pid(ref->pid); + put_pid(ref->tgid); + if (ref->mm) + mmdrop(ref->mm); +} + +static inline void vas_user_win_add_mm_context(struct vas_user_win_ref *ref) +{ + mm_context_add_vas_window(ref->mm); + /* + * Even a process that has no foreign real address mapping can + * use an unpaired COPY instruction (to no real effect). Issue + * CP_ABORT to clear any pending COPY and prevent a covert + * channel. + * + * __switch_to() will issue CP_ABORT on future context switches + * if process / thread has any open VAS window (Use + * current->mm->context.vas_windows). + */ + asm volatile(PPC_CP_ABORT); +} + /* * Receive window attributes specified by the (in-kernel) owner of window. */ @@ -190,4 +229,5 @@ int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, const struct vas_user_win_ops *vops); void vas_unregister_coproc_api(void); +int get_vas_user_win_ref(struct vas_user_win_ref *task_ref); #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index ad566464b55b..4ce82500f4c5 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -55,6 +55,45 @@ static char *coproc_devnode(struct device *dev, umode_t *mode) return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); } +/* + * Take reference to pid and mm + */ +int get_vas_user_win_ref(struct vas_user_win_ref *task_ref) +{ + /* + * Window opened by a child thread may not be closed when + * it exits. So take reference to its pid and release it + * when the window is free by parent thread. + * Acquire a reference to the task's pid to make sure + * pid will not be re-used - needed only for multithread + * applications. + */ + task_ref->pid = get_task_pid(current, PIDTYPE_PID); + /* + * Acquire a reference to the task's mm. + */ + task_ref->mm = get_task_mm(current); + if (!task_ref->mm) { + put_pid(task_ref->pid); + pr_err("VAS: pid(%d): mm_struct is not found\n", + current->pid); + return -EPERM; + } + + mmgrab(task_ref->mm); + mmput(task_ref->mm); + /* + * Process closes window during exit. In the case of + * multithread application, the child thread can open + * window and can exit without closing it. So takes tgid + * reference until window closed to make sure tgid is not + * reused. + */ + task_ref->tgid = find_get_pid(task_tgid_vnr(current)); + + return 0; +} + static int coproc_open(struct inode *inode, struct file *fp) { struct coproc_instance *cp_inst; diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 3d21fce254b7..ac3a71ec3bd5 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -73,7 +73,7 @@ static void update_csb(struct vas_window *window, * NX user space windows can not be opened for task->mm=NULL * and faults will not be generated for kernel requests. */ - if (WARN_ON_ONCE(!window->mm || !window->user_win)) + if (WARN_ON_ONCE(!window->task_ref.mm || !window->user_win)) return; csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); @@ -92,7 +92,7 @@ static void update_csb(struct vas_window *window, csb.address = crb->stamp.nx.fault_storage_addr; csb.flags = 0; - pid = window->pid; + pid = window->task_ref.pid; tsk = get_pid_task(pid, PIDTYPE_PID); /* * Process closes send window after all pending NX requests are @@ -111,7 +111,7 @@ static void update_csb(struct vas_window *window, * a window and exits without closing it. */ if (!tsk) { - pid = window->tgid; + pid = window->task_ref.tgid; tsk = get_pid_task(pid, PIDTYPE_PID); /* * Parent thread (tgid) will be closing window when it @@ -127,7 +127,7 @@ static void update_csb(struct vas_window *window, return; } - kthread_use_mm(window->mm); + kthread_use_mm(window->task_ref.mm); rc = copy_to_user(csb_addr, &csb, sizeof(csb)); /* * User space polls on csb.flags (first byte). So add barrier @@ -139,7 +139,7 @@ static void update_csb(struct vas_window *window, smp_mb(); rc = copy_to_user(csb_addr, &csb, sizeof(u8)); } - kthread_unuse_mm(window->mm); + kthread_unuse_mm(window->task_ref.mm); put_task_struct(tsk); /* Success */ diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 26440da4fc6a..6489e29085be 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -1065,51 +1065,11 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rc = -ENODEV; goto free_window; } - - /* - * Window opened by a child thread may not be closed when - * it exits. So take reference to its pid and release it - * when the window is free by parent thread. - * Acquire a reference to the task's pid to make sure - * pid will not be re-used - needed only for multithread - * applications. - */ - txwin->pid = get_task_pid(current, PIDTYPE_PID); - /* - * Acquire a reference to the task's mm. - */ - txwin->mm = get_task_mm(current); - - if (!txwin->mm) { - put_pid(txwin->pid); - pr_err("VAS: pid(%d): mm_struct is not found\n", - current->pid); - rc = -EPERM; + rc = get_vas_user_win_ref(&txwin->task_ref); + if (rc) goto free_window; - } - mmgrab(txwin->mm); - mmput(txwin->mm); - mm_context_add_vas_window(txwin->mm); - /* - * Process closes window during exit. In the case of - * multithread application, the child thread can open - * window and can exit without closing it. so takes tgid - * reference until window closed to make sure tgid is not - * reused. - */ - txwin->tgid = find_get_pid(task_tgid_vnr(current)); - /* - * Even a process that has no foreign real address mapping can - * use an unpaired COPY instruction (to no real effect). Issue - * CP_ABORT to clear any pending COPY and prevent a covert - * channel. - * - * __switch_to() will issue CP_ABORT on future context switches - * if process / thread has any open VAS window (Use - * current->mm->context.vas_windows). - */ - asm volatile(PPC_CP_ABORT); + vas_user_win_add_mm_context(&txwin->task_ref); } set_vinst_win(vinst, txwin); @@ -1340,13 +1300,8 @@ int vas_win_close(struct vas_window *window) /* if send window, drop reference to matching receive window */ if (window->tx_win) { if (window->user_win) { - /* Drop references to pid. tgid and mm */ - put_pid(window->pid); - put_pid(window->tgid); - if (window->mm) { - mm_context_remove_vas_window(window->mm); - mmdrop(window->mm); - } + put_vas_user_win_ref(&window->task_ref); + mm_context_remove_vas_window(window->task_ref.mm); } put_rx_win(window->rxwin); } diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index c2b1e12efca5..614db6a80c67 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -357,11 +357,9 @@ struct vas_window { bool user_win; /* True if user space window */ void *hvwc_map; /* HV window context */ void *uwc_map; /* OS/User window context */ - struct pid *pid; /* Linux process id of owner */ - struct pid *tgid; /* Thread group ID of owner */ - struct mm_struct *mm; /* Linux process mm_struct */ int wcreds_max; /* Window credits */ + struct vas_user_win_ref task_ref; char *dbgname; struct dentry *dbgdir; @@ -443,7 +441,7 @@ extern void vas_win_paste_addr(struct vas_window *window, u64 *addr, static inline int vas_window_pid(struct vas_window *window) { - return pid_vnr(window->pid); + return pid_vnr(window->task_ref.pid); } static inline void vas_log_write(struct vas_window *win, char *name, -- cgit v1.2.3 From 3b26797350352479f37216d674c8e5d126faab66 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:32:38 -0700 Subject: powerpc/vas: Move update_csb/dump_crb to common book3s platform If a coprocessor encounters an error translating an address, the VAS will cause an interrupt in the host. The kernel processes the fault by updating CSB. This functionality is same for both powerNV and pseries. So this patch moves these functions to common vas-api.c and the actual functionality is not changed. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bf8d5b0770fa1ef5cba88c96580caa08d999d3b5.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 3 + arch/powerpc/platforms/book3s/vas-api.c | 167 +++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/vas-fault.c | 155 ++------------------------ 3 files changed, 179 insertions(+), 146 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 71cff6d6bf3a..6b41c0818958 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -230,4 +230,7 @@ int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, void vas_unregister_coproc_api(void); int get_vas_user_win_ref(struct vas_user_win_ref *task_ref); +void vas_update_csb(struct coprocessor_request_block *crb, + struct vas_user_win_ref *task_ref); +void vas_dump_crb(struct coprocessor_request_block *crb); #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 4ce82500f4c5..30172e52e16b 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -94,6 +97,170 @@ int get_vas_user_win_ref(struct vas_user_win_ref *task_ref) return 0; } +/* + * Successful return must release the task reference with + * put_task_struct + */ +static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref, + struct task_struct **tskp, struct pid **pidp) +{ + struct task_struct *tsk; + struct pid *pid; + + pid = task_ref->pid; + tsk = get_pid_task(pid, PIDTYPE_PID); + if (!tsk) { + pid = task_ref->tgid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Parent thread (tgid) will be closing window when it + * exits. So should not get here. + */ + if (WARN_ON_ONCE(!tsk)) + return false; + } + + /* Return if the task is exiting. */ + if (tsk->flags & PF_EXITING) { + put_task_struct(tsk); + return false; + } + + *tskp = tsk; + *pidp = pid; + + return true; +} + +/* + * Update the CSB to indicate a translation error. + * + * User space will be polling on CSB after the request is issued. + * If NX can handle the request without any issues, it updates CSB. + * Whereas if NX encounters page fault, the kernel will handle the + * fault and update CSB with translation error. + * + * If we are unable to update the CSB means copy_to_user failed due to + * invalid csb_addr, send a signal to the process. + */ +void vas_update_csb(struct coprocessor_request_block *crb, + struct vas_user_win_ref *task_ref) +{ + struct coprocessor_status_block csb; + struct kernel_siginfo info; + struct task_struct *tsk; + void __user *csb_addr; + struct pid *pid; + int rc; + + /* + * NX user space windows can not be opened for task->mm=NULL + * and faults will not be generated for kernel requests. + */ + if (WARN_ON_ONCE(!task_ref->mm)) + return; + + csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); + + memset(&csb, 0, sizeof(csb)); + csb.cc = CSB_CC_FAULT_ADDRESS; + csb.ce = CSB_CE_TERMINATION; + csb.cs = 0; + csb.count = 0; + + /* + * NX operates and returns in BE format as defined CRB struct. + * So saves fault_storage_addr in BE as NX pastes in FIFO and + * expects user space to convert to CPU format. + */ + csb.address = crb->stamp.nx.fault_storage_addr; + csb.flags = 0; + + /* + * Process closes send window after all pending NX requests are + * completed. In multi-thread applications, a child thread can + * open a window and can exit without closing it. May be some + * requests are pending or this window can be used by other + * threads later. We should handle faults if NX encounters + * pages faults on these requests. Update CSB with translation + * error and fault address. If csb_addr passed by user space is + * invalid, send SEGV signal to pid saved in window. If the + * child thread is not running, send the signal to tgid. + * Parent thread (tgid) will close this window upon its exit. + * + * pid and mm references are taken when window is opened by + * process (pid). So tgid is used only when child thread opens + * a window and exits without closing it. + */ + + if (!ref_get_pid_and_task(task_ref, &tsk, &pid)) + return; + + kthread_use_mm(task_ref->mm); + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); + /* + * User space polls on csb.flags (first byte). So add barrier + * then copy first byte with csb flags update. + */ + if (!rc) { + csb.flags = CSB_V; + /* Make sure update to csb.flags is visible now */ + smp_mb(); + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); + } + kthread_unuse_mm(task_ref->mm); + put_task_struct(tsk); + + /* Success */ + if (!rc) + return; + + + pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", + csb_addr, pid_vnr(pid)); + + clear_siginfo(&info); + info.si_signo = SIGSEGV; + info.si_errno = EFAULT; + info.si_code = SEGV_MAPERR; + info.si_addr = csb_addr; + /* + * process will be polling on csb.flags after request is sent to + * NX. So generally CSB update should not fail except when an + * application passes invalid csb_addr. So an error message will + * be displayed and leave it to user space whether to ignore or + * handle this signal. + */ + rcu_read_lock(); + rc = kill_pid_info(SIGSEGV, &info, pid); + rcu_read_unlock(); + + pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, + pid_vnr(pid), rc); +} + +void vas_dump_crb(struct coprocessor_request_block *crb) +{ + struct data_descriptor_entry *dde; + struct nx_fault_stamp *nx; + + dde = &crb->source; + pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + dde = &crb->target; + pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + nx = &crb->stamp.nx; + pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", + be32_to_cpu(nx->pswid), + be64_to_cpu(crb->stamp.nx.fault_storage_addr), + nx->flags, nx->fault_status); +} + static int coproc_open(struct inode *inode, struct file *fp) { struct coproc_instance *cp_inst; diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index ac3a71ec3bd5..2729ac541fb3 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -26,150 +26,6 @@ */ #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) -static void dump_crb(struct coprocessor_request_block *crb) -{ - struct data_descriptor_entry *dde; - struct nx_fault_stamp *nx; - - dde = &crb->source; - pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", - be64_to_cpu(dde->address), be32_to_cpu(dde->length), - dde->count, dde->index, dde->flags); - - dde = &crb->target; - pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", - be64_to_cpu(dde->address), be32_to_cpu(dde->length), - dde->count, dde->index, dde->flags); - - nx = &crb->stamp.nx; - pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", - be32_to_cpu(nx->pswid), - be64_to_cpu(crb->stamp.nx.fault_storage_addr), - nx->flags, nx->fault_status); -} - -/* - * Update the CSB to indicate a translation error. - * - * User space will be polling on CSB after the request is issued. - * If NX can handle the request without any issues, it updates CSB. - * Whereas if NX encounters page fault, the kernel will handle the - * fault and update CSB with translation error. - * - * If we are unable to update the CSB means copy_to_user failed due to - * invalid csb_addr, send a signal to the process. - */ -static void update_csb(struct vas_window *window, - struct coprocessor_request_block *crb) -{ - struct coprocessor_status_block csb; - struct kernel_siginfo info; - struct task_struct *tsk; - void __user *csb_addr; - struct pid *pid; - int rc; - - /* - * NX user space windows can not be opened for task->mm=NULL - * and faults will not be generated for kernel requests. - */ - if (WARN_ON_ONCE(!window->task_ref.mm || !window->user_win)) - return; - - csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); - - memset(&csb, 0, sizeof(csb)); - csb.cc = CSB_CC_FAULT_ADDRESS; - csb.ce = CSB_CE_TERMINATION; - csb.cs = 0; - csb.count = 0; - - /* - * NX operates and returns in BE format as defined CRB struct. - * So saves fault_storage_addr in BE as NX pastes in FIFO and - * expects user space to convert to CPU format. - */ - csb.address = crb->stamp.nx.fault_storage_addr; - csb.flags = 0; - - pid = window->task_ref.pid; - tsk = get_pid_task(pid, PIDTYPE_PID); - /* - * Process closes send window after all pending NX requests are - * completed. In multi-thread applications, a child thread can - * open a window and can exit without closing it. May be some - * requests are pending or this window can be used by other - * threads later. We should handle faults if NX encounters - * pages faults on these requests. Update CSB with translation - * error and fault address. If csb_addr passed by user space is - * invalid, send SEGV signal to pid saved in window. If the - * child thread is not running, send the signal to tgid. - * Parent thread (tgid) will close this window upon its exit. - * - * pid and mm references are taken when window is opened by - * process (pid). So tgid is used only when child thread opens - * a window and exits without closing it. - */ - if (!tsk) { - pid = window->task_ref.tgid; - tsk = get_pid_task(pid, PIDTYPE_PID); - /* - * Parent thread (tgid) will be closing window when it - * exits. So should not get here. - */ - if (WARN_ON_ONCE(!tsk)) - return; - } - - /* Return if the task is exiting. */ - if (tsk->flags & PF_EXITING) { - put_task_struct(tsk); - return; - } - - kthread_use_mm(window->task_ref.mm); - rc = copy_to_user(csb_addr, &csb, sizeof(csb)); - /* - * User space polls on csb.flags (first byte). So add barrier - * then copy first byte with csb flags update. - */ - if (!rc) { - csb.flags = CSB_V; - /* Make sure update to csb.flags is visible now */ - smp_mb(); - rc = copy_to_user(csb_addr, &csb, sizeof(u8)); - } - kthread_unuse_mm(window->task_ref.mm); - put_task_struct(tsk); - - /* Success */ - if (!rc) - return; - - pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", - csb_addr, pid_vnr(pid)); - - clear_siginfo(&info); - info.si_signo = SIGSEGV; - info.si_errno = EFAULT; - info.si_code = SEGV_MAPERR; - info.si_addr = csb_addr; - - /* - * process will be polling on csb.flags after request is sent to - * NX. So generally CSB update should not fail except when an - * application passes invalid csb_addr. So an error message will - * be displayed and leave it to user space whether to ignore or - * handle this signal. - */ - rcu_read_lock(); - rc = kill_pid_info(SIGSEGV, &info, pid); - rcu_read_unlock(); - - pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, - pid_vnr(pid), rc); -} - static void dump_fifo(struct vas_instance *vinst, void *entry) { unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size; @@ -272,7 +128,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) vinst->vas_id, vinst->fault_fifo, fifo, vinst->fault_crbs); - dump_crb(crb); + vas_dump_crb(crb); window = vas_pswid_to_window(vinst, be32_to_cpu(crb->stamp.nx.pswid)); @@ -293,7 +149,14 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) WARN_ON_ONCE(1); } else { - update_csb(window, crb); + /* + * NX sees faults only with user space windows. + */ + if (window->user_win) + vas_update_csb(crb, &window->task_ref); + else + WARN_ON_ONCE(!window->user_win); + /* * Return credit for send window after processing * fault CRB. -- cgit v1.2.3 From 7bc6f71bdff5f8921e324da0a8fad6f4e2e63a85 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:33:28 -0700 Subject: powerpc/vas: Define and use common vas_window struct Many elements in vas_struct are used on PowerNV and PowerVM platforms. vas_window is used for both TX and RX windows on PowerNV and for TX windows on PowerVM. So some elements are specific to these platforms. So this patch defines common vas_window and platform specific window structs (pnv_vas_window on PowerNV). Also adds the corresponding changes in PowerNV vas code. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1698c35c158dfe52c6d2166667823d3d4a463353.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 14 ++- arch/powerpc/platforms/powernv/vas-debug.c | 27 ++--- arch/powerpc/platforms/powernv/vas-fault.c | 20 ++-- arch/powerpc/platforms/powernv/vas-trace.h | 4 +- arch/powerpc/platforms/powernv/vas-window.c | 161 +++++++++++++++------------- arch/powerpc/platforms/powernv/vas.h | 44 ++++---- 6 files changed, 144 insertions(+), 126 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 6b41c0818958..14ad7982874c 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -10,8 +10,6 @@ #include #include -struct vas_window; - /* * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25 * (Local FIFO Size Register) of the VAS workbook. @@ -63,6 +61,18 @@ struct vas_user_win_ref { struct mm_struct *mm; /* Linux process mm_struct */ }; +/* + * Common VAS window struct on PowerNV and PowerVM + */ +struct vas_window { + u32 winid; + u32 wcreds_max; /* Window credits */ + enum vas_cop_type cop; + struct vas_user_win_ref task_ref; + char *dbgname; + struct dentry *dbgdir; +}; + /* * User space window operations used for powernv and powerVM */ diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index 41fa90d2f4ab..3ce89a4b54be 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "vas.h" static struct dentry *vas_debugfs; @@ -28,7 +29,7 @@ static char *cop_to_str(int cop) static int info_show(struct seq_file *s, void *private) { - struct vas_window *window = s->private; + struct pnv_vas_window *window = s->private; mutex_lock(&vas_mutex); @@ -36,9 +37,9 @@ static int info_show(struct seq_file *s, void *private) if (!window->hvwc_map) goto unlock; - seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop), + seq_printf(s, "Type: %s, %s\n", cop_to_str(window->vas_win.cop), window->tx_win ? "Send" : "Receive"); - seq_printf(s, "Pid : %d\n", vas_window_pid(window)); + seq_printf(s, "Pid : %d\n", vas_window_pid(&window->vas_win)); unlock: mutex_unlock(&vas_mutex); @@ -47,7 +48,7 @@ unlock: DEFINE_SHOW_ATTRIBUTE(info); -static inline void print_reg(struct seq_file *s, struct vas_window *win, +static inline void print_reg(struct seq_file *s, struct pnv_vas_window *win, char *name, u32 reg) { seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name); @@ -55,7 +56,7 @@ static inline void print_reg(struct seq_file *s, struct vas_window *win, static int hvwc_show(struct seq_file *s, void *private) { - struct vas_window *window = s->private; + struct pnv_vas_window *window = s->private; mutex_lock(&vas_mutex); @@ -103,8 +104,10 @@ unlock: DEFINE_SHOW_ATTRIBUTE(hvwc); -void vas_window_free_dbgdir(struct vas_window *window) +void vas_window_free_dbgdir(struct pnv_vas_window *pnv_win) { + struct vas_window *window = &pnv_win->vas_win; + if (window->dbgdir) { debugfs_remove_recursive(window->dbgdir); kfree(window->dbgname); @@ -113,21 +116,21 @@ void vas_window_free_dbgdir(struct vas_window *window) } } -void vas_window_init_dbgdir(struct vas_window *window) +void vas_window_init_dbgdir(struct pnv_vas_window *window) { struct dentry *d; if (!window->vinst->dbgdir) return; - window->dbgname = kzalloc(16, GFP_KERNEL); - if (!window->dbgname) + window->vas_win.dbgname = kzalloc(16, GFP_KERNEL); + if (!window->vas_win.dbgname) return; - snprintf(window->dbgname, 16, "w%d", window->winid); + snprintf(window->vas_win.dbgname, 16, "w%d", window->vas_win.winid); - d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir); - window->dbgdir = d; + d = debugfs_create_dir(window->vas_win.dbgname, window->vinst->dbgdir); + window->vas_win.dbgdir = d; debugfs_create_file("info", 0444, d, window, &info_fops); debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops); diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 2729ac541fb3..a7aabc18039e 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -68,7 +68,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) struct vas_instance *vinst = data; struct coprocessor_request_block *crb, *entry; struct coprocessor_request_block buf; - struct vas_window *window; + struct pnv_vas_window *window; unsigned long flags; void *fifo; @@ -153,7 +153,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) * NX sees faults only with user space windows. */ if (window->user_win) - vas_update_csb(crb, &window->task_ref); + vas_update_csb(crb, &window->vas_win.task_ref); else WARN_ON_ONCE(!window->user_win); @@ -199,6 +199,7 @@ irqreturn_t vas_fault_handler(int irq, void *dev_id) int vas_setup_fault_window(struct vas_instance *vinst) { struct vas_rx_win_attr attr; + struct vas_window *win; vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); @@ -227,18 +228,17 @@ int vas_setup_fault_window(struct vas_instance *vinst) attr.lnotify_pid = mfspr(SPRN_PID); attr.lnotify_tid = mfspr(SPRN_PID); - vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, - &attr); - - if (IS_ERR(vinst->fault_win)) { - pr_err("VAS: Error %ld opening FaultWin\n", - PTR_ERR(vinst->fault_win)); + win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, &attr); + if (IS_ERR(win)) { + pr_err("VAS: Error %ld opening FaultWin\n", PTR_ERR(win)); kfree(vinst->fault_fifo); - return PTR_ERR(vinst->fault_win); + return PTR_ERR(win); } + vinst->fault_win = container_of(win, struct pnv_vas_window, vas_win); + pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", - vinst->fault_win->winid, attr.lnotify_lpid, + vinst->fault_win->vas_win.winid, attr.lnotify_lpid, attr.lnotify_pid, attr.lnotify_tid); return 0; diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h index a449b9f0c12e..ca2e08f2ddc0 100644 --- a/arch/powerpc/platforms/powernv/vas-trace.h +++ b/arch/powerpc/platforms/powernv/vas-trace.h @@ -80,7 +80,7 @@ TRACE_EVENT( vas_tx_win_open, TRACE_EVENT( vas_paste_crb, TP_PROTO(struct task_struct *tsk, - struct vas_window *win), + struct pnv_vas_window *win), TP_ARGS(tsk, win), @@ -96,7 +96,7 @@ TRACE_EVENT( vas_paste_crb, TP_fast_assign( __entry->pid = tsk->pid; __entry->vasid = win->vinst->vas_id; - __entry->winid = win->winid; + __entry->winid = win->vas_win.winid; __entry->paste_kaddr = (unsigned long)win->paste_kaddr ), diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 6489e29085be..0f8d39fbf2b2 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -27,14 +27,14 @@ * Compute the paste address region for the window @window using the * ->paste_base_addr and ->paste_win_id_shift we got from device tree. */ -void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) +void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len) { int winid; u64 base, shift; base = window->vinst->paste_base_addr; shift = window->vinst->paste_win_id_shift; - winid = window->winid; + winid = window->vas_win.winid; *addr = base + (winid << shift); if (len) @@ -43,23 +43,23 @@ void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); } -static inline void get_hvwc_mmio_bar(struct vas_window *window, +static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window, u64 *start, int *len) { u64 pbaddr; pbaddr = window->vinst->hvwc_bar_start; - *start = pbaddr + window->winid * VAS_HVWC_SIZE; + *start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE; *len = VAS_HVWC_SIZE; } -static inline void get_uwc_mmio_bar(struct vas_window *window, +static inline void get_uwc_mmio_bar(struct pnv_vas_window *window, u64 *start, int *len) { u64 pbaddr; pbaddr = window->vinst->uwc_bar_start; - *start = pbaddr + window->winid * VAS_UWC_SIZE; + *start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE; *len = VAS_UWC_SIZE; } @@ -68,7 +68,7 @@ static inline void get_uwc_mmio_bar(struct vas_window *window, * space. Unlike MMIO regions (map_mmio_region() below), paste region must * be mapped cache-able and is only applicable to send windows. */ -static void *map_paste_region(struct vas_window *txwin) +static void *map_paste_region(struct pnv_vas_window *txwin) { int len; void *map; @@ -76,7 +76,7 @@ static void *map_paste_region(struct vas_window *txwin) u64 start; name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id, - txwin->winid); + txwin->vas_win.winid); if (!name) goto free_name; @@ -133,7 +133,7 @@ static void unmap_region(void *addr, u64 start, int len) /* * Unmap the paste address region for a window. */ -static void unmap_paste_region(struct vas_window *window) +static void unmap_paste_region(struct pnv_vas_window *window) { int len; u64 busaddr_start; @@ -154,7 +154,7 @@ static void unmap_paste_region(struct vas_window *window) * path, just minimize the time we hold the mutex for now. We can add * a per-instance mutex later if necessary. */ -static void unmap_winctx_mmio_bars(struct vas_window *window) +static void unmap_winctx_mmio_bars(struct pnv_vas_window *window) { int len; void *uwc_map; @@ -187,7 +187,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window) * OS/User Window Context (UWC) MMIO Base Address Region for the given window. * Map these bus addresses and save the mapped kernel addresses in @window. */ -static int map_winctx_mmio_bars(struct vas_window *window) +static int map_winctx_mmio_bars(struct pnv_vas_window *window) { int len; u64 start; @@ -215,7 +215,7 @@ static int map_winctx_mmio_bars(struct vas_window *window) * registers are not sequential. And, we can only write to offsets * with valid registers. */ -static void reset_window_regs(struct vas_window *window) +static void reset_window_regs(struct pnv_vas_window *window) { write_hvwc_reg(window, VREG(LPID), 0ULL); write_hvwc_reg(window, VREG(PID), 0ULL); @@ -271,7 +271,7 @@ static void reset_window_regs(struct vas_window *window) * want to add fields to vas_winctx and move the initialization to * init_vas_winctx_regs(). */ -static void init_xlate_regs(struct vas_window *window, bool user_win) +static void init_xlate_regs(struct pnv_vas_window *window, bool user_win) { u64 lpcr, val; @@ -336,7 +336,7 @@ static void init_xlate_regs(struct vas_window *window, bool user_win) * * TODO: Reserved (aka dedicated) send buffers are not supported yet. */ -static void init_rsvd_tx_buf_count(struct vas_window *txwin, +static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin, struct vas_winctx *winctx) { write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL); @@ -358,7 +358,7 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin, * as a one-time task? That could work for NX but what about other * receivers? Let the receivers tell us the rx-fifo buffers for now. */ -static void init_winctx_regs(struct vas_window *window, +static void init_winctx_regs(struct pnv_vas_window *window, struct vas_winctx *winctx) { u64 val; @@ -520,10 +520,10 @@ static int vas_assign_window_id(struct ida *ida) return winid; } -static void vas_window_free(struct vas_window *window) +static void vas_window_free(struct pnv_vas_window *window) { - int winid = window->winid; struct vas_instance *vinst = window->vinst; + int winid = window->vas_win.winid; unmap_winctx_mmio_bars(window); @@ -534,10 +534,10 @@ static void vas_window_free(struct vas_window *window) vas_release_window_id(&vinst->ida, winid); } -static struct vas_window *vas_window_alloc(struct vas_instance *vinst) +static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst) { int winid; - struct vas_window *window; + struct pnv_vas_window *window; winid = vas_assign_window_id(&vinst->ida); if (winid < 0) @@ -548,7 +548,7 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst) goto out_free; window->vinst = vinst; - window->winid = winid; + window->vas_win.winid = winid; if (map_winctx_mmio_bars(window)) goto out_free; @@ -563,7 +563,7 @@ out_free: return ERR_PTR(-ENOMEM); } -static void put_rx_win(struct vas_window *rxwin) +static void put_rx_win(struct pnv_vas_window *rxwin) { /* Better not be a send window! */ WARN_ON_ONCE(rxwin->tx_win); @@ -579,10 +579,11 @@ static void put_rx_win(struct vas_window *rxwin) * * NOTE: We access ->windows[] table and assume that vinst->mutex is held. */ -static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) +static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst, + u32 pswid) { int vasid, winid; - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; decode_pswid(pswid, &vasid, &winid); @@ -591,7 +592,7 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) rxwin = vinst->windows[winid]; - if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW) + if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW) return ERR_PTR(-EINVAL); return rxwin; @@ -603,10 +604,10 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) * * See also function header of set_vinst_win(). */ -static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, +static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst, enum vas_cop_type cop, u32 pswid) { - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; mutex_lock(&vinst->mutex); @@ -639,9 +640,9 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, * window, we also save the window in the ->rxwin[] table. */ static void set_vinst_win(struct vas_instance *vinst, - struct vas_window *window) + struct pnv_vas_window *window) { - int id = window->winid; + int id = window->vas_win.winid; mutex_lock(&vinst->mutex); @@ -650,8 +651,8 @@ static void set_vinst_win(struct vas_instance *vinst, * unless its a user (FTW) window. */ if (!window->user_win && !window->tx_win) { - WARN_ON_ONCE(vinst->rxwin[window->cop]); - vinst->rxwin[window->cop] = window; + WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]); + vinst->rxwin[window->vas_win.cop] = window; } WARN_ON_ONCE(vinst->windows[id] != NULL); @@ -664,16 +665,16 @@ static void set_vinst_win(struct vas_instance *vinst, * Clear this window from the table(s) of windows for this VAS instance. * See also function header of set_vinst_win(). */ -static void clear_vinst_win(struct vas_window *window) +static void clear_vinst_win(struct pnv_vas_window *window) { - int id = window->winid; + int id = window->vas_win.winid; struct vas_instance *vinst = window->vinst; mutex_lock(&vinst->mutex); if (!window->user_win && !window->tx_win) { - WARN_ON_ONCE(!vinst->rxwin[window->cop]); - vinst->rxwin[window->cop] = NULL; + WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]); + vinst->rxwin[window->vas_win.cop] = NULL; } WARN_ON_ONCE(vinst->windows[id] != window); @@ -682,7 +683,7 @@ static void clear_vinst_win(struct vas_window *window) mutex_unlock(&vinst->mutex); } -static void init_winctx_for_rxwin(struct vas_window *rxwin, +static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin, struct vas_rx_win_attr *rxattr, struct vas_winctx *winctx) { @@ -703,7 +704,7 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin, winctx->rx_fifo = rxattr->rx_fifo; winctx->rx_fifo_size = rxattr->rx_fifo_size; - winctx->wcreds_max = rxwin->wcreds_max; + winctx->wcreds_max = rxwin->vas_win.wcreds_max; winctx->pin_win = rxattr->pin_win; winctx->nx_win = rxattr->nx_win; @@ -852,7 +853,7 @@ EXPORT_SYMBOL_GPL(vas_init_rx_win_attr); struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, struct vas_rx_win_attr *rxattr) { - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; struct vas_winctx winctx; struct vas_instance *vinst; @@ -871,21 +872,21 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, rxwin = vas_window_alloc(vinst); if (IS_ERR(rxwin)) { pr_devel("Unable to allocate memory for Rx window\n"); - return rxwin; + return (struct vas_window *)rxwin; } rxwin->tx_win = false; rxwin->nx_win = rxattr->nx_win; rxwin->user_win = rxattr->user_win; - rxwin->cop = cop; - rxwin->wcreds_max = rxattr->wcreds_max; + rxwin->vas_win.cop = cop; + rxwin->vas_win.wcreds_max = rxattr->wcreds_max; init_winctx_for_rxwin(rxwin, rxattr, &winctx); init_winctx_regs(rxwin, &winctx); set_vinst_win(vinst, rxwin); - return rxwin; + return &rxwin->vas_win; } EXPORT_SYMBOL_GPL(vas_rx_win_open); @@ -906,7 +907,7 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop) } EXPORT_SYMBOL_GPL(vas_init_tx_win_attr); -static void init_winctx_for_txwin(struct vas_window *txwin, +static void init_winctx_for_txwin(struct pnv_vas_window *txwin, struct vas_tx_win_attr *txattr, struct vas_winctx *winctx) { @@ -927,7 +928,7 @@ static void init_winctx_for_txwin(struct vas_window *txwin, */ memset(winctx, 0, sizeof(struct vas_winctx)); - winctx->wcreds_max = txwin->wcreds_max; + winctx->wcreds_max = txwin->vas_win.wcreds_max; winctx->user_win = txattr->user_win; winctx->nx_win = txwin->rxwin->nx_win; @@ -947,13 +948,13 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->lpid = txattr->lpid; winctx->pidr = txattr->pidr; - winctx->rx_win_id = txwin->rxwin->winid; + winctx->rx_win_id = txwin->rxwin->vas_win.winid; /* * IRQ and fault window setup is successful. Set fault window * for the send window so that ready to handle faults. */ if (txwin->vinst->virq) - winctx->fault_win_id = txwin->vinst->fault_win->winid; + winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid; winctx->dma_type = VAS_DMA_TYPE_INJECT; winctx->tc_mode = txattr->tc_mode; @@ -963,7 +964,8 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->irq_port = txwin->vinst->irq_port; winctx->pswid = txattr->pswid ? txattr->pswid : - encode_pswid(txwin->vinst->vas_id, txwin->winid); + encode_pswid(txwin->vinst->vas_id, + txwin->vas_win.winid); } static bool tx_win_args_valid(enum vas_cop_type cop, @@ -994,8 +996,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, struct vas_tx_win_attr *attr) { int rc; - struct vas_window *txwin; - struct vas_window *rxwin; + struct pnv_vas_window *txwin; + struct pnv_vas_window *rxwin; struct vas_winctx winctx; struct vas_instance *vinst; @@ -1021,7 +1023,7 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rxwin = get_vinst_rxwin(vinst, cop, attr->pswid); if (IS_ERR(rxwin)) { pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop); - return rxwin; + return (struct vas_window *)rxwin; } txwin = vas_window_alloc(vinst); @@ -1030,12 +1032,12 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, goto put_rxwin; } - txwin->cop = cop; + txwin->vas_win.cop = cop; txwin->tx_win = 1; txwin->rxwin = rxwin; txwin->nx_win = txwin->rxwin->nx_win; txwin->user_win = attr->user_win; - txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; + txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; init_winctx_for_txwin(txwin, attr, &winctx); @@ -1065,16 +1067,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rc = -ENODEV; goto free_window; } - rc = get_vas_user_win_ref(&txwin->task_ref); + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); if (rc) goto free_window; - vas_user_win_add_mm_context(&txwin->task_ref); + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); } set_vinst_win(vinst, txwin); - return txwin; + return &txwin->vas_win; free_window: vas_window_free(txwin); @@ -1093,12 +1095,14 @@ int vas_copy_crb(void *crb, int offset) EXPORT_SYMBOL_GPL(vas_copy_crb); #define RMA_LSMP_REPORT_ENABLE PPC_BIT(53) -int vas_paste_crb(struct vas_window *txwin, int offset, bool re) +int vas_paste_crb(struct vas_window *vwin, int offset, bool re) { + struct pnv_vas_window *txwin; int rc; void *addr; uint64_t val; + txwin = container_of(vwin, struct pnv_vas_window, vas_win); trace_vas_paste_crb(current, txwin); /* @@ -1128,7 +1132,7 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re) else rc = -EINVAL; - pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid, + pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid, read_hvwc_reg(txwin, VREG(LRFIFO_PUSH))); return rc; @@ -1148,7 +1152,7 @@ EXPORT_SYMBOL_GPL(vas_paste_crb); * user space. (NX-842 driver waits for CSB and Fast thread-wakeup * doesn't use credit checking). */ -static void poll_window_credits(struct vas_window *window) +static void poll_window_credits(struct pnv_vas_window *window) { u64 val; int creds, mode; @@ -1178,7 +1182,7 @@ retry: * and issue CRB Kill to stop all pending requests. Need only * if there is a bug in NX or fault handling in kernel. */ - if (creds < window->wcreds_max) { + if (creds < window->vas_win.wcreds_max) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(10)); @@ -1189,7 +1193,8 @@ retry: */ if (!(count % 1000)) pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n", - vas_window_pid(window), window->winid, + vas_window_pid(&window->vas_win), + window->vas_win.winid, creds, count); goto retry; @@ -1201,7 +1206,7 @@ retry: * short time to queue a CRB, so window should not be busy for too long. * Trying 5ms intervals. */ -static void poll_window_busy_state(struct vas_window *window) +static void poll_window_busy_state(struct pnv_vas_window *window) { int busy; u64 val; @@ -1221,7 +1226,8 @@ retry: */ if (!(count % 1000)) pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n", - vas_window_pid(window), window->winid, count); + vas_window_pid(&window->vas_win), + window->vas_win.winid, count); goto retry; } @@ -1243,7 +1249,7 @@ retry: * casting out becomes necessary we should consider offloading the * job to a worker thread, so the window close can proceed quickly. */ -static void poll_window_castout(struct vas_window *window) +static void poll_window_castout(struct pnv_vas_window *window) { /* stub for now */ } @@ -1252,7 +1258,7 @@ static void poll_window_castout(struct vas_window *window) * Unpin and close a window so no new requests are accepted and the * hardware can evict this window from cache if necessary. */ -static void unpin_close_window(struct vas_window *window) +static void unpin_close_window(struct pnv_vas_window *window) { u64 val; @@ -1274,11 +1280,15 @@ static void unpin_close_window(struct vas_window *window) * * Besides the hardware, kernel has some bookkeeping of course. */ -int vas_win_close(struct vas_window *window) +int vas_win_close(struct vas_window *vwin) { - if (!window) + struct pnv_vas_window *window; + + if (!vwin) return 0; + window = container_of(vwin, struct pnv_vas_window, vas_win); + if (!window->tx_win && atomic_read(&window->num_txwins) != 0) { pr_devel("Attempting to close an active Rx window!\n"); WARN_ON_ONCE(1); @@ -1300,8 +1310,8 @@ int vas_win_close(struct vas_window *window) /* if send window, drop reference to matching receive window */ if (window->tx_win) { if (window->user_win) { - put_vas_user_win_ref(&window->task_ref); - mm_context_remove_vas_window(window->task_ref.mm); + put_vas_user_win_ref(&vwin->task_ref); + mm_context_remove_vas_window(vwin->task_ref.mm); } put_rx_win(window->rxwin); } @@ -1334,7 +1344,7 @@ EXPORT_SYMBOL_GPL(vas_win_close); * - The kernel with return credit on fault window after reading entry * from fault FIFO. */ -void vas_return_credit(struct vas_window *window, bool tx) +void vas_return_credit(struct pnv_vas_window *window, bool tx) { uint64_t val; @@ -1348,10 +1358,10 @@ void vas_return_credit(struct vas_window *window, bool tx) } } -struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, +struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst, uint32_t pswid) { - struct vas_window *window; + struct pnv_vas_window *window; int winid; if (!pswid) { @@ -1388,11 +1398,11 @@ struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, * by NX). */ if (!window->tx_win || !window->user_win || !window->nx_win || - window->cop == VAS_COP_TYPE_FAULT || - window->cop == VAS_COP_TYPE_FTW) { + window->vas_win.cop == VAS_COP_TYPE_FAULT || + window->vas_win.cop == VAS_COP_TYPE_FTW) { pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n", winid, window->tx_win, window->user_win, - window->nx_win, window->cop); + window->nx_win, window->vas_win.cop); WARN_ON(1); } @@ -1418,10 +1428,12 @@ static struct vas_window *vas_user_win_open(int vas_id, u64 flags, return vas_tx_win_open(vas_id, cop_type, &txattr); } -static u64 vas_user_win_paste_addr(struct vas_window *win) +static u64 vas_user_win_paste_addr(struct vas_window *txwin) { + struct pnv_vas_window *win; u64 paste_addr; + win = container_of(txwin, struct pnv_vas_window, vas_win); vas_win_paste_addr(win, &paste_addr, NULL); return paste_addr; @@ -1429,7 +1441,6 @@ static u64 vas_user_win_paste_addr(struct vas_window *win) static int vas_user_win_close(struct vas_window *txwin) { - vas_win_close(txwin); return 0; diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 614db6a80c67..8bb08e395de0 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -334,11 +334,11 @@ struct vas_instance { int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */ spinlock_t fault_lock; /* Protects fifo_in_progress update */ void *fault_fifo; - struct vas_window *fault_win; /* Fault window */ + struct pnv_vas_window *fault_win; /* Fault window */ struct mutex mutex; - struct vas_window *rxwin[VAS_COP_TYPE_MAX]; - struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; + struct pnv_vas_window *rxwin[VAS_COP_TYPE_MAX]; + struct pnv_vas_window *windows[VAS_WINDOWS_PER_CHIP]; char *name; char *dbgname; @@ -346,30 +346,24 @@ struct vas_instance { }; /* - * In-kernel state a VAS window. One per window. + * In-kernel state a VAS window on PowerNV. One per window. */ -struct vas_window { +struct pnv_vas_window { + struct vas_window vas_win; /* Fields common to send and receive windows */ struct vas_instance *vinst; - int winid; bool tx_win; /* True if send window */ bool nx_win; /* True if NX window */ bool user_win; /* True if user space window */ void *hvwc_map; /* HV window context */ void *uwc_map; /* OS/User window context */ - int wcreds_max; /* Window credits */ - - struct vas_user_win_ref task_ref; - char *dbgname; - struct dentry *dbgdir; /* Fields applicable only to send windows */ void *paste_kaddr; char *paste_addr_name; - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; - /* Feilds applicable only to receive windows */ - enum vas_cop_type cop; + /* Fields applicable only to receive windows */ atomic_t num_txwins; }; @@ -428,15 +422,15 @@ extern struct mutex vas_mutex; extern struct vas_instance *find_vas_instance(int vasid); extern void vas_init_dbgdir(void); extern void vas_instance_init_dbgdir(struct vas_instance *vinst); -extern void vas_window_init_dbgdir(struct vas_window *win); -extern void vas_window_free_dbgdir(struct vas_window *win); +extern void vas_window_init_dbgdir(struct pnv_vas_window *win); +extern void vas_window_free_dbgdir(struct pnv_vas_window *win); extern int vas_setup_fault_window(struct vas_instance *vinst); extern irqreturn_t vas_fault_thread_fn(int irq, void *data); extern irqreturn_t vas_fault_handler(int irq, void *dev_id); -extern void vas_return_credit(struct vas_window *window, bool tx); -extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, +extern void vas_return_credit(struct pnv_vas_window *window, bool tx); +extern struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst, uint32_t pswid); -extern void vas_win_paste_addr(struct vas_window *window, u64 *addr, +extern void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len); static inline int vas_window_pid(struct vas_window *window) @@ -444,16 +438,16 @@ static inline int vas_window_pid(struct vas_window *window) return pid_vnr(window->task_ref.pid); } -static inline void vas_log_write(struct vas_window *win, char *name, +static inline void vas_log_write(struct pnv_vas_window *win, char *name, void *regptr, u64 val) { if (val) pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n", - win->tx_win ? "Tx" : "Rx", win->winid, name, - regptr, val); + win->tx_win ? "Tx" : "Rx", win->vas_win.winid, + name, regptr, val); } -static inline void write_uwc_reg(struct vas_window *win, char *name, +static inline void write_uwc_reg(struct pnv_vas_window *win, char *name, s32 reg, u64 val) { void *regptr; @@ -464,7 +458,7 @@ static inline void write_uwc_reg(struct vas_window *win, char *name, out_be64(regptr, val); } -static inline void write_hvwc_reg(struct vas_window *win, char *name, +static inline void write_hvwc_reg(struct pnv_vas_window *win, char *name, s32 reg, u64 val) { void *regptr; @@ -475,7 +469,7 @@ static inline void write_hvwc_reg(struct vas_window *win, char *name, out_be64(regptr, val); } -static inline u64 read_hvwc_reg(struct vas_window *win, +static inline u64 read_hvwc_reg(struct pnv_vas_window *win, char *name __maybe_unused, s32 reg) { return in_be64(win->hvwc_map+reg); -- cgit v1.2.3 From 8f3a6c92802b7c48043954ba3b507e9b33d8c898 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:34:05 -0700 Subject: powerpc/pseries/vas: Define VAS/NXGZIP hcalls and structs This patch adds hcalls and other definitions. Also define structs that are used in VAS implementation on PowerVM. Signed-off-by: Haren Myneni Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b4b8c594c27ee4aa6be9dc6dc4ee7331571cbbe8.camel@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 7 ++ arch/powerpc/include/asm/vas.h | 30 +++++++++ arch/powerpc/platforms/pseries/vas.h | 125 +++++++++++++++++++++++++++++++++++ 3 files changed, 162 insertions(+) create mode 100644 arch/powerpc/platforms/pseries/vas.h diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index e3b29eda8074..7c3418d1b5e9 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -294,6 +294,13 @@ #define H_RESIZE_HPT_COMMIT 0x370 #define H_REGISTER_PROC_TBL 0x37C #define H_SIGNAL_SYS_RESET 0x380 +#define H_ALLOCATE_VAS_WINDOW 0x388 +#define H_MODIFY_VAS_WINDOW 0x38C +#define H_DEALLOCATE_VAS_WINDOW 0x390 +#define H_QUERY_VAS_WINDOW 0x394 +#define H_QUERY_VAS_CAPABILITIES 0x398 +#define H_QUERY_NX_CAPABILITIES 0x39C +#define H_GET_NX_FAULT 0x3A0 #define H_INT_GET_SOURCE_INFO 0x3A8 #define H_INT_SET_SOURCE_CONFIG 0x3AC #define H_INT_GET_SOURCE_CONFIG 0x3B0 diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 14ad7982874c..99570c33058f 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -160,6 +160,7 @@ struct vas_tx_win_attr { bool rx_win_ord_mode; }; +#ifdef CONFIG_PPC_POWERNV /* * Helper to map a chip id to VAS id. * For POWER9, this is a 1:1 mapping. In the future this maybe a 1:N @@ -225,6 +226,35 @@ int vas_paste_crb(struct vas_window *win, int offset, bool re); int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, const char *name); void vas_unregister_api_powernv(void); +#endif + +#ifdef CONFIG_PPC_PSERIES + +/* VAS Capabilities */ +#define VAS_GZIP_QOS_FEAT 0x1 +#define VAS_GZIP_DEF_FEAT 0x2 +#define VAS_GZIP_QOS_FEAT_BIT PPC_BIT(VAS_GZIP_QOS_FEAT) /* Bit 1 */ +#define VAS_GZIP_DEF_FEAT_BIT PPC_BIT(VAS_GZIP_DEF_FEAT) /* Bit 2 */ + +/* NX Capabilities */ +#define VAS_NX_GZIP_FEAT 0x1 +#define VAS_NX_GZIP_FEAT_BIT PPC_BIT(VAS_NX_GZIP_FEAT) /* Bit 1 */ + +/* + * These structs are used to retrieve overall VAS capabilities that + * the hypervisor provides. + */ +struct hv_vas_all_caps { + __be64 descriptor; + __be64 feat_type; +} __packed __aligned(0x1000); + +struct vas_all_caps { + u64 descriptor; + u64 feat_type; +}; + +#endif /* * Register / unregister coprocessor type to VAS API which will be exported diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h new file mode 100644 index 000000000000..4ecb3fcabd10 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2020-21 IBM Corp. + */ + +#ifndef _VAS_H +#define _VAS_H +#include +#include +#include + +/* + * VAS window modify flags + */ +#define VAS_MOD_WIN_CLOSE PPC_BIT(0) +#define VAS_MOD_WIN_JOBS_KILL PPC_BIT(1) +#define VAS_MOD_WIN_DR PPC_BIT(3) +#define VAS_MOD_WIN_PR PPC_BIT(4) +#define VAS_MOD_WIN_SF PPC_BIT(5) +#define VAS_MOD_WIN_TA PPC_BIT(6) +#define VAS_MOD_WIN_FLAGS (VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \ + VAS_MOD_WIN_PR | VAS_MOD_WIN_SF) + +#define VAS_WIN_ACTIVE 0x0 +#define VAS_WIN_CLOSED 0x1 +#define VAS_WIN_INACTIVE 0x2 /* Inactive due to HW failure */ +/* Process of being modified, deallocated, or quiesced */ +#define VAS_WIN_MOD_IN_PROCESS 0x3 + +#define VAS_COPY_PASTE_USER_MODE 0x00000001 +#define VAS_COP_OP_USER_MODE 0x00000010 + +/* + * Co-processor feature - GZIP QoS windows or GZIP default windows + */ +enum vas_cop_feat_type { + VAS_GZIP_QOS_FEAT_TYPE, + VAS_GZIP_DEF_FEAT_TYPE, + VAS_MAX_FEAT_TYPE, +}; + +/* + * Use to get feature specific capabilities from the + * hypervisor. + */ +struct hv_vas_cop_feat_caps { + __be64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; + __be16 max_lpar_creds; + __be16 max_win_creds; + union { + __be16 reserved; + __be16 def_lpar_creds; /* Used for default capabilities */ + }; + __be16 target_lpar_creds; +} __packed __aligned(0x1000); + +/* + * Feature specific (QoS or default) capabilities. + */ +struct vas_cop_feat_caps { + u64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; /* User mode copy/paste or COP HCALL */ + u16 max_lpar_creds; /* Max credits available in LPAR */ + /* Max credits can be assigned per window */ + u16 max_win_creds; + union { + u16 reserved; /* Used for QoS credit type */ + u16 def_lpar_creds; /* Used for default credit type */ + }; + /* Total LPAR available credits. Can be different from max LPAR */ + /* credits due to DLPAR operation */ + atomic_t target_lpar_creds; + atomic_t used_lpar_creds; /* Used credits so far */ + u16 avail_lpar_creds; /* Remaining available credits */ +}; + +/* + * Feature (QoS or Default) specific to store capabilities and + * the list of open windows. + */ +struct vas_caps { + struct vas_cop_feat_caps caps; + struct list_head list; /* List of open windows */ +}; + +/* + * To get window information from the hypervisor. + */ +struct hv_vas_win_lpar { + __be16 version; + u8 win_type; + u8 status; + __be16 credits; /* No of credits assigned to this window */ + __be16 reserved; + __be32 pid; /* LPAR Process ID */ + __be32 tid; /* LPAR Thread ID */ + __be64 win_addr; /* Paste address */ + __be32 interrupt; /* Interrupt when NX request completes */ + __be32 fault; /* Interrupt when NX sees fault */ + /* Associativity Domain Identifiers as returned in */ + /* H_HOME_NODE_ASSOCIATIVITY */ + __be64 domain[6]; + __be64 win_util; /* Number of bytes processed */ +} __packed __aligned(0x1000); + +struct pseries_vas_window { + struct vas_window vas_win; + u64 win_addr; /* Physical paste address */ + u8 win_type; /* QoS or Default window */ + u32 complete_irq; /* Completion interrupt */ + u32 fault_irq; /* Fault interrupt */ + u64 domain[6]; /* Associativity domain Ids */ + /* this window is allocated */ + u64 util; + + /* List of windows opened which is used for LPM */ + struct list_head win_list; + u64 flags; + char *name; + int fault_virq; +}; +#endif /* _VAS_H */ -- cgit v1.2.3 From 540761b7f51067d76b301c64abc50328ded89b1c Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:34:43 -0700 Subject: powerpc/vas: Define QoS credit flag to allocate window PowerVM introduces two different type of credits: Default and Quality of service (QoS). The total number of default credits available on each LPAR depends on CPU resources configured. But these credits can be shared or over-committed across LPARs in shared mode which can result in paste command failure (RMA_busy). To avoid NX HW contention, the hypervisor ntroduces QoS credit type which makes sure guaranteed access to NX esources. The system admins can assign QoS credits or each LPAR via HMC. Default credit type is used to allocate a VAS window by default as on PowerVM implementation. But the process can pass VAS_TX_WIN_FLAG_QOS_CREDIT flag with VAS_TX_WIN_OPEN ioctl to open QoS type window. Signed-off-by: Haren Myneni Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/aa950b7b8e8077364267720274a7b9ec34e76e73.camel@linux.ibm.com --- arch/powerpc/include/uapi/asm/vas-api.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/uapi/asm/vas-api.h b/arch/powerpc/include/uapi/asm/vas-api.h index ebd4b2424785..7c81301ecdba 100644 --- a/arch/powerpc/include/uapi/asm/vas-api.h +++ b/arch/powerpc/include/uapi/asm/vas-api.h @@ -13,11 +13,15 @@ #define VAS_MAGIC 'v' #define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 0x20, struct vas_tx_win_open_attr) +/* Flags to VAS TX open window ioctl */ +/* To allocate a window with QoS credit, otherwise use default credit */ +#define VAS_TX_WIN_FLAG_QOS_CREDIT 0x0000000000000001 + struct vas_tx_win_open_attr { __u32 version; __s16 vas_id; /* specific instance of vas or -1 for default */ __u16 reserved1; - __u64 flags; /* Future use */ + __u64 flags; __u64 reserved2[6]; }; -- cgit v1.2.3 From f33ecfde30ce6909fff41339285e0274bb403fb8 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:35:22 -0700 Subject: powerpc/pseries/vas: Add hcall wrappers for VAS handling This patch adds the following hcall wrapper functions to allocate, modify and deallocate VAS windows, and retrieve VAS capabilities. H_ALLOCATE_VAS_WINDOW: Allocate VAS window H_DEALLOCATE_VAS_WINDOW: Close VAS window H_MODIFY_VAS_WINDOW: Setup window before using H_QUERY_VAS_CAPABILITIES: Get VAS capabilities Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/40fb02a4d56ca4e240b074a15029082055be5997.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 147 +++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 arch/powerpc/platforms/pseries/vas.c diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c new file mode 100644 index 000000000000..a73d7d00bf55 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020-21 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include "vas.h" + +#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul +#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul +/* The hypervisor allows one credit per window right now */ +#define DEF_WIN_CREDS 1 + +static long hcall_return_busy_check(long rc) +{ + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + + return rc; +} + +/* + * Allocate VAS window hcall + */ +static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, + u8 wintype, u16 credits) +{ + long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; + + do { + rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, + credits, domain[0], domain[1], domain[2], + domain[3], domain[4], domain[5]); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) { + if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { + pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + win->vas_win.winid = retbuf[0]; + win->win_addr = retbuf[1]; + win->complete_irq = retbuf[2]; + win->fault_irq = retbuf[3]; + return 0; + } + + pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", + rc, wintype, credits); + + return -EIO; +} + +/* + * Deallocate VAS window hcall. + */ +static int h_deallocate_vas_window(u64 winid) +{ + long rc; + + do { + rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", + rc, winid); + return -EIO; +} + +/* + * Modify VAS window. + * After the window is opened with allocate window hcall, configure it + * with flags and LPAR PID before using. + */ +static int h_modify_vas_window(struct pseries_vas_window *win) +{ + long rc; + u32 lpid = mfspr(SPRN_PID); + + /* + * AMR value is not supported in Linux VAS implementation. + * The hypervisor ignores it if 0 is passed. + */ + do { + rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, + win->vas_win.winid, lpid, 0, + VAS_MOD_WIN_FLAGS, 0); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", + rc, win->vas_win.winid, lpid); + return -EIO; +} + +/* + * This hcall is used to determine the capabilities from the hypervisor. + * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES + * @query_type: If 0 is passed, the hypervisor returns the overall + * capabilities which provides all feature(s) that are + * available. Then query the hypervisor to get the + * corresponding capabilities for the specific feature. + * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS + * and VAS GZIP Default capabilities. + * H_QUERY_NX_CAPABILITIES provides NX GZIP + * capabilities. + * @result: Return buffer to save capabilities. + */ +int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) +{ + long rc; + + rc = plpar_hcall_norets(hcall, query_type, result); + + if (rc == H_SUCCESS) + return 0; + + pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", + hcall, rc, query_type, result); + return -EIO; +} -- cgit v1.2.3 From ca77d48854177bb9749aef7329201f03b2382fbb Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:35:54 -0700 Subject: powerpc/pseries/vas: Implement getting capabilities from hypervisor The hypervisor provides VAS capabilities for GZIP default and QoS features. These capabilities gives information for the specific features such as total number of credits available in LPAR, maximum credits allowed per window, maximum credits allowed in LPAR, whether usermode copy/paste is supported, and etc. This patch adds the following: - Retrieve all features that are provided by hypervisor using H_QUERY_VAS_CAPABILITIES hcall with 0 as feature type. - Retrieve capabilities for the specific feature using the same hcall and the feature type (1 for QoS and 2 for default type). Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/177c88608cb88f7b87d1c546103f18cec6c056b4.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 122 +++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index a73d7d00bf55..93794e12527d 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,11 @@ /* The hypervisor allows one credit per window right now */ #define DEF_WIN_CREDS 1 +static struct vas_all_caps caps_all; +static bool copypaste_feat; + +static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; + static long hcall_return_busy_check(long rc) { /* Check if we are stalled for some time */ @@ -145,3 +151,119 @@ int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) hcall, rc, query_type, result); return -EIO; } + +/* + * Get the specific capabilities based on the feature type. + * Right now supports GZIP default and GZIP QoS capabilities. + */ +static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, + struct hv_vas_cop_feat_caps *hv_caps) +{ + struct vas_cop_feat_caps *caps; + struct vas_caps *vcaps; + int rc = 0; + + vcaps = &vascaps[type]; + memset(vcaps, 0, sizeof(*vcaps)); + INIT_LIST_HEAD(&vcaps->list); + + caps = &vcaps->caps; + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, + (u64)virt_to_phys(hv_caps)); + if (rc) + return rc; + + caps->user_mode = hv_caps->user_mode; + if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { + pr_err("User space COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + + caps->descriptor = be64_to_cpu(hv_caps->descriptor); + caps->win_type = hv_caps->win_type; + if (caps->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Unsupported window type %u\n", caps->win_type); + return -EINVAL; + } + caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); + caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); + atomic_set(&caps->target_lpar_creds, + be16_to_cpu(hv_caps->target_lpar_creds)); + if (feat == VAS_GZIP_DEF_FEAT) { + caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); + + if (caps->max_win_creds < DEF_WIN_CREDS) { + pr_err("Window creds(%u) > max allowed window creds(%u)\n", + DEF_WIN_CREDS, caps->max_win_creds); + return -EINVAL; + } + } + + copypaste_feat = true; + + return 0; +} + +static int __init pseries_vas_init(void) +{ + struct hv_vas_cop_feat_caps *hv_cop_caps; + struct hv_vas_all_caps *hv_caps; + int rc; + + /* + * Linux supports user space COPY/PASTE only with Radix + */ + if (!radix_enabled()) { + pr_err("API is supported only with radix page tables\n"); + return -ENOTSUPP; + } + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return -ENOMEM; + /* + * Get VAS overall capabilities by passing 0 to feature type. + */ + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); + caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); + + hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); + if (!hv_cop_caps) { + rc = -ENOMEM; + goto out; + } + /* + * QOS capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, + VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); + + if (rc) + goto out_cop; + } + /* + * Default capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, + VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); + if (rc) + goto out_cop; + } + + pr_info("GZIP feature is available\n"); + +out_cop: + kfree(hv_cop_caps); +out: + kfree(hv_caps); + return rc; +} +machine_device_initcall(pseries, pseries_vas_init); -- cgit v1.2.3 From b22f2d88e435cdada32581ca1f11b9806adf459a Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:36:28 -0700 Subject: powerpc/pseries/vas: Integrate API with open/close windows This patch adds VAS window allocatioa/close with the corresponding hcalls. Also changes to integrate with the existing user space VAS API and provide register/unregister functions to NX pseries driver. The driver register function is used to create the user space interface (/dev/crypto/nx-gzip) and unregister to remove this entry. The user space process opens this device node and makes an ioctl to allocate VAS window. The close interface is used to deallocate window. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e8d956bace3f182c4d2e66e343ff37cb0391d1fd.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 4 + arch/powerpc/platforms/pseries/Makefile | 1 + arch/powerpc/platforms/pseries/vas.c | 223 ++++++++++++++++++++++++++++++++ 3 files changed, 228 insertions(+) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 99570c33058f..57573d9c1e09 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -254,6 +254,10 @@ struct vas_all_caps { u64 feat_type; }; +int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result); +int vas_register_api_pseries(struct module *mod, + enum vas_cop_type cop_type, const char *name); +void vas_unregister_api_pseries(void); #endif /* diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index c8a2b0b05ac0..4cda0ef87be0 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o obj-$(CONFIG_SUSPEND) += suspend.o +obj-$(CONFIG_PPC_VAS) += vas.o diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 93794e12527d..f5a44f2f0e99 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ static struct vas_all_caps caps_all; static bool copypaste_feat; static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; +static DEFINE_MUTEX(vas_pseries_mutex); static long hcall_return_busy_check(long rc) { @@ -151,6 +153,227 @@ int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) hcall, rc, query_type, result); return -EIO; } +EXPORT_SYMBOL_GPL(h_query_vas_capabilities); + +/* + * Allocate window and setup IRQ mapping. + */ +static int allocate_setup_window(struct pseries_vas_window *txwin, + u64 *domain, u8 wintype) +{ + int rc; + + rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); + if (rc) + return rc; + + txwin->vas_win.wcreds_max = DEF_WIN_CREDS; + + return 0; +} + +static struct vas_window *vas_allocate_window(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *cop_feat_caps; + struct vas_caps *caps; + struct pseries_vas_window *txwin; + int rc; + + txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); + if (!txwin) + return ERR_PTR(-ENOMEM); + + /* + * A VAS window can have many credits which means that many + * requests can be issued simultaneously. But the hypervisor + * restricts one credit per window. + * The hypervisor introduces 2 different types of credits: + * Default credit type (Uses normal priority FIFO): + * A limited number of credits are assigned to partitions + * based on processor entitlement. But these credits may be + * over-committed on a system depends on whether the CPUs + * are in shared or dedicated modes - that is, more requests + * may be issued across the system than NX can service at + * once which can result in paste command failure (RMA_busy). + * Then the process has to resend requests or fall-back to + * SW compression. + * Quality of Service (QoS) credit type (Uses high priority FIFO): + * To avoid NX HW contention, the system admins can assign + * QoS credits for each LPAR so that this partition is + * guaranteed access to NX resources. These credits are + * assigned to partitions via the HMC. + * Refer PAPR for more information. + * + * Allocate window with QoS credits if user requested. Otherwise + * default credits are used. + */ + if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) + caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; + else + caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; + + cop_feat_caps = &caps->caps; + + if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > + atomic_read(&cop_feat_caps->target_lpar_creds)) { + pr_err("Credits are not available to allocate window\n"); + rc = -EINVAL; + goto out; + } + + if (vas_id == -1) { + /* + * The user space is requesting to allocate a window on + * a VAS instance where the process is executing. + * On PowerVM, domain values are passed to the hypervisor + * to select VAS instance. Useful if the process is + * affinity to NUMA node. + * The hypervisor selects VAS instance if + * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. + * The h_allocate_vas_window hcall is defined to take a + * domain values as specified by h_home_node_associativity, + * So no unpacking needs to be done. + */ + rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, + VPHN_FLAG_VCPU, smp_processor_id()); + if (rc != H_SUCCESS) { + pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); + goto out; + } + } + + /* + * Allocate / Deallocate window hcalls and setup / free IRQs + * have to be protected with mutex. + * Open VAS window: Allocate window hcall and setup IRQ + * Close VAS window: Deallocate window hcall and free IRQ + * The hypervisor waits until all NX requests are + * completed before closing the window. So expects OS + * to handle NX faults, means IRQ can be freed only + * after the deallocate window hcall is returned. + * So once the window is closed with deallocate hcall before + * the IRQ is freed, it can be assigned to new allocate + * hcall with the same fault IRQ by the hypervisor. It can + * result in setup IRQ fail for the new window since the + * same fault IRQ is not freed by the OS before. + */ + mutex_lock(&vas_pseries_mutex); + rc = allocate_setup_window(txwin, (u64 *)&domain[0], + cop_feat_caps->win_type); + mutex_unlock(&vas_pseries_mutex); + if (rc) + goto out; + + /* + * Modify window and it is ready to use. + */ + rc = h_modify_vas_window(txwin); + if (!rc) + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) + goto out_free; + + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + txwin->win_type = cop_feat_caps->win_type; + mutex_lock(&vas_pseries_mutex); + list_add(&txwin->win_list, &caps->list); + mutex_unlock(&vas_pseries_mutex); + + return &txwin->vas_win; + +out_free: + h_deallocate_vas_window(txwin->vas_win.winid); +out: + atomic_dec(&cop_feat_caps->used_lpar_creds); + kfree(txwin); + return ERR_PTR(rc); +} + +static u64 vas_paste_address(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + return win->win_addr; +} + +static int deallocate_free_window(struct pseries_vas_window *win) +{ + int rc = 0; + + rc = h_deallocate_vas_window(win->vas_win.winid); + + return rc; +} + +static int vas_deallocate_window(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + struct vas_cop_feat_caps *caps; + int rc = 0; + + if (!vwin) + return -EINVAL; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + + /* Should not happen */ + if (win->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Window (%u): Invalid window type %u\n", + vwin->winid, win->win_type); + return -EINVAL; + } + + caps = &vascaps[win->win_type].caps; + mutex_lock(&vas_pseries_mutex); + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + + list_del(&win->win_list); + atomic_dec(&caps->used_lpar_creds); + mutex_unlock(&vas_pseries_mutex); + + put_vas_user_win_ref(&vwin->task_ref); + mm_context_remove_vas_window(vwin->task_ref.mm); + + kfree(win); + return 0; +} + +static const struct vas_user_win_ops vops_pseries = { + .open_win = vas_allocate_window, /* Open and configure window */ + .paste_addr = vas_paste_address, /* To do copy/paste */ + .close_win = vas_deallocate_window, /* Close window */ +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + int rc; + + if (!copypaste_feat) + return -ENOTSUPP; + + rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); + + return rc; +} +EXPORT_SYMBOL_GPL(vas_register_api_pseries); + +void vas_unregister_api_pseries(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); /* * Get the specific capabilities based on the feature type. -- cgit v1.2.3 From 6d0aaf5e0de00491de136f387ebed55604cedebe Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:37:06 -0700 Subject: powerpc/pseries/vas: Setup IRQ and fault handling NX generates an interrupt when sees a fault on the user space buffer and the hypervisor forwards that interrupt to OS. Then the kernel handles the interrupt by issuing H_GET_NX_FAULT hcall to retrieve the fault CRB information. This patch also adds changes to setup and free IRQ per each window and also handles the fault by updating the CSB. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b8fc66dcb783d06a099a303e5cfc69087bb3357a.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 102 +++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index f5a44f2f0e99..3385b5400cc6 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -155,6 +156,50 @@ int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) } EXPORT_SYMBOL_GPL(h_query_vas_capabilities); +/* + * hcall to get fault CRB from the hypervisor. + */ +static int h_get_nx_fault(u32 winid, u64 buffer) +{ + long rc; + + rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", + rc, winid, buffer); + return -EIO; + +} + +/* + * Handle the fault interrupt. + * When the fault interrupt is received for each window, query the + * hypervisor to get the fault CRB on the specific fault. Then + * process the CRB by updating CSB or send signal if the user space + * CSB is invalid. + * Note: The hypervisor forwards an interrupt for each fault request. + * So one fault CRB to process for each H_GET_NX_FAULT hcall. + */ +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) +{ + struct pseries_vas_window *txwin = data; + struct coprocessor_request_block crb; + struct vas_user_win_ref *tsk_ref; + int rc; + + rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); + if (!rc) { + tsk_ref = &txwin->vas_win.task_ref; + vas_dump_crb(&crb); + vas_update_csb(&crb, tsk_ref); + } + + return IRQ_HANDLED; +} + /* * Allocate window and setup IRQ mapping. */ @@ -166,10 +211,51 @@ static int allocate_setup_window(struct pseries_vas_window *txwin, rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); if (rc) return rc; + /* + * On PowerVM, the hypervisor setup and forwards the fault + * interrupt per window. So the IRQ setup and fault handling + * will be done for each open window separately. + */ + txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); + if (!txwin->fault_virq) { + pr_err("Failed irq mapping %d\n", txwin->fault_irq); + rc = -EINVAL; + goto out_win; + } + + txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", + txwin->vas_win.winid); + if (!txwin->name) { + rc = -ENOMEM; + goto out_irq; + } + + rc = request_threaded_irq(txwin->fault_virq, NULL, + pseries_vas_fault_thread_fn, IRQF_ONESHOT, + txwin->name, txwin); + if (rc) { + pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", + txwin->vas_win.winid, txwin->fault_virq, rc); + goto out_free; + } txwin->vas_win.wcreds_max = DEF_WIN_CREDS; return 0; +out_free: + kfree(txwin->name); +out_irq: + irq_dispose_mapping(txwin->fault_virq); +out_win: + h_deallocate_vas_window(txwin->vas_win.winid); + return rc; +} + +static inline void free_irq_setup(struct pseries_vas_window *txwin) +{ + free_irq(txwin->fault_virq, txwin); + kfree(txwin->name); + irq_dispose_mapping(txwin->fault_virq); } static struct vas_window *vas_allocate_window(int vas_id, u64 flags, @@ -284,6 +370,11 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, return &txwin->vas_win; out_free: + /* + * Window is not operational. Free IRQ before closing + * window so that do not have to hold mutex. + */ + free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); out: atomic_dec(&cop_feat_caps->used_lpar_creds); @@ -303,7 +394,18 @@ static int deallocate_free_window(struct pseries_vas_window *win) { int rc = 0; + /* + * The hypervisor waits for all requests including faults + * are processed before closing the window - Means all + * credits have to be returned. In the case of fault + * request, a credit is returned after OS issues + * H_GET_NX_FAULT hcall. + * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW + * hcall. + */ rc = h_deallocate_vas_window(win->vas_win.winid); + if (!rc) + free_irq_setup(win); return rc; } -- cgit v1.2.3 From 7da00b0e71334aa1e3d8db1cc1f40eb47cb1e188 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:37:42 -0700 Subject: crypto/nx: Rename nx-842-pseries file name to nx-common-pseries Rename nx-842-pseries.c to nx-common-pseries.c to add code for new GZIP compression type. The actual functionality is not changed in this patch. Signed-off-by: Haren Myneni Acked-by: Herbert Xu Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1fcf672209a14ea8944bd3e49c8a7381c8f450f8.camel@linux.ibm.com --- drivers/crypto/nx/Makefile | 2 +- drivers/crypto/nx/nx-842-pseries.c | 1129 --------------------------------- drivers/crypto/nx/nx-common-pseries.c | 1129 +++++++++++++++++++++++++++++++++ 3 files changed, 1130 insertions(+), 1130 deletions(-) delete mode 100644 drivers/crypto/nx/nx-842-pseries.c create mode 100644 drivers/crypto/nx/nx-common-pseries.c diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile index bc89a20e5d9d..d00181a26dd6 100644 --- a/drivers/crypto/nx/Makefile +++ b/drivers/crypto/nx/Makefile @@ -14,5 +14,5 @@ nx-crypto-objs := nx.o \ obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o nx-compress.o obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o nx-compress.o nx-compress-objs := nx-842.o -nx-compress-pseries-objs := nx-842-pseries.o +nx-compress-pseries-objs := nx-common-pseries.o nx-compress-powernv-objs := nx-common-powernv.o diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c deleted file mode 100644 index cc8dd3072b8b..000000000000 --- a/drivers/crypto/nx/nx-842-pseries.c +++ /dev/null @@ -1,1129 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Driver for IBM Power 842 compression accelerator - * - * Copyright (C) IBM Corporation, 2012 - * - * Authors: Robert Jennings - * Seth Jennings - */ - -#include - -#include "nx-842.h" -#include "nx_csbcpb.h" /* struct nx_csbcpb */ - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Robert Jennings "); -MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); -MODULE_ALIAS_CRYPTO("842"); -MODULE_ALIAS_CRYPTO("842-nx"); - -static struct nx842_constraints nx842_pseries_constraints = { - .alignment = DDE_BUFFER_ALIGN, - .multiple = DDE_BUFFER_LAST_MULT, - .minimum = DDE_BUFFER_LAST_MULT, - .maximum = PAGE_SIZE, /* dynamic, max_sync_size */ -}; - -static int check_constraints(unsigned long buf, unsigned int *len, bool in) -{ - if (!IS_ALIGNED(buf, nx842_pseries_constraints.alignment)) { - pr_debug("%s buffer 0x%lx not aligned to 0x%x\n", - in ? "input" : "output", buf, - nx842_pseries_constraints.alignment); - return -EINVAL; - } - if (*len % nx842_pseries_constraints.multiple) { - pr_debug("%s buffer len 0x%x not multiple of 0x%x\n", - in ? "input" : "output", *len, - nx842_pseries_constraints.multiple); - if (in) - return -EINVAL; - *len = round_down(*len, nx842_pseries_constraints.multiple); - } - if (*len < nx842_pseries_constraints.minimum) { - pr_debug("%s buffer len 0x%x under minimum 0x%x\n", - in ? "input" : "output", *len, - nx842_pseries_constraints.minimum); - return -EINVAL; - } - if (*len > nx842_pseries_constraints.maximum) { - pr_debug("%s buffer len 0x%x over maximum 0x%x\n", - in ? "input" : "output", *len, - nx842_pseries_constraints.maximum); - if (in) - return -EINVAL; - *len = nx842_pseries_constraints.maximum; - } - return 0; -} - -/* I assume we need to align the CSB? */ -#define WORKMEM_ALIGN (256) - -struct nx842_workmem { - /* scatterlist */ - char slin[4096]; - char slout[4096]; - /* coprocessor status/parameter block */ - struct nx_csbcpb csbcpb; - - char padding[WORKMEM_ALIGN]; -} __aligned(WORKMEM_ALIGN); - -/* Macros for fields within nx_csbcpb */ -/* Check the valid bit within the csbcpb valid field */ -#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) - -/* CE macros operate on the completion_extension field bits in the csbcpb. - * CE0 0=full completion, 1=partial completion - * CE1 0=CE0 indicates completion, 1=termination (output may be modified) - * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */ -#define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7)) -#define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6)) -#define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) - -/* The NX unit accepts data only on 4K page boundaries */ -#define NX842_HW_PAGE_SIZE (4096) -#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) - -struct ibm_nx842_counters { - atomic64_t comp_complete; - atomic64_t comp_failed; - atomic64_t decomp_complete; - atomic64_t decomp_failed; - atomic64_t swdecomp; - atomic64_t comp_times[32]; - atomic64_t decomp_times[32]; -}; - -static struct nx842_devdata { - struct vio_dev *vdev; - struct device *dev; - struct ibm_nx842_counters *counters; - unsigned int max_sg_len; - unsigned int max_sync_size; - unsigned int max_sync_sg; -} __rcu *devdata; -static DEFINE_SPINLOCK(devdata_mutex); - -#define NX842_COUNTER_INC(_x) \ -static inline void nx842_inc_##_x( \ - const struct nx842_devdata *dev) { \ - if (dev) \ - atomic64_inc(&dev->counters->_x); \ -} -NX842_COUNTER_INC(comp_complete); -NX842_COUNTER_INC(comp_failed); -NX842_COUNTER_INC(decomp_complete); -NX842_COUNTER_INC(decomp_failed); -NX842_COUNTER_INC(swdecomp); - -#define NX842_HIST_SLOTS 16 - -static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time) -{ - int bucket = fls(time); - - if (bucket) - bucket = min((NX842_HIST_SLOTS - 1), bucket - 1); - - atomic64_inc(×[bucket]); -} - -/* NX unit operation flags */ -#define NX842_OP_COMPRESS 0x0 -#define NX842_OP_CRC 0x1 -#define NX842_OP_DECOMPRESS 0x2 -#define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC) -#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC) -#define NX842_OP_ASYNC (1<<23) -#define NX842_OP_NOTIFY (1<<22) -#define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8) - -static unsigned long nx842_get_desired_dma(struct vio_dev *viodev) -{ - /* No use of DMA mappings within the driver. */ - return 0; -} - -struct nx842_slentry { - __be64 ptr; /* Real address (use __pa()) */ - __be64 len; -}; - -/* pHyp scatterlist entry */ -struct nx842_scatterlist { - int entry_nr; /* number of slentries */ - struct nx842_slentry *entries; /* ptr to array of slentries */ -}; - -/* Does not include sizeof(entry_nr) in the size */ -static inline unsigned long nx842_get_scatterlist_size( - struct nx842_scatterlist *sl) -{ - return sl->entry_nr * sizeof(struct nx842_slentry); -} - -static int nx842_build_scatterlist(unsigned long buf, int len, - struct nx842_scatterlist *sl) -{ - unsigned long entrylen; - struct nx842_slentry *entry; - - sl->entry_nr = 0; - - entry = sl->entries; - while (len) { - entry->ptr = cpu_to_be64(nx842_get_pa((void *)buf)); - entrylen = min_t(int, len, - LEN_ON_SIZE(buf, NX842_HW_PAGE_SIZE)); - entry->len = cpu_to_be64(entrylen); - - len -= entrylen; - buf += entrylen; - - sl->entry_nr++; - entry++; - } - - return 0; -} - -static int nx842_validate_result(struct device *dev, - struct cop_status_block *csb) -{ - /* The csb must be valid after returning from vio_h_cop_sync */ - if (!NX842_CSBCBP_VALID_CHK(csb->valid)) { - dev_err(dev, "%s: cspcbp not valid upon completion.\n", - __func__); - dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n", - csb->valid, - csb->crb_seq_number, - csb->completion_code, - csb->completion_extension); - dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n", - be32_to_cpu(csb->processed_byte_count), - (unsigned long)be64_to_cpu(csb->address)); - return -EIO; - } - - /* Check return values from the hardware in the CSB */ - switch (csb->completion_code) { - case 0: /* Completed without error */ - break; - case 64: /* Compression ok, but output larger than input */ - dev_dbg(dev, "%s: output size larger than input size\n", - __func__); - break; - case 13: /* Output buffer too small */ - dev_dbg(dev, "%s: Out of space in output buffer\n", - __func__); - return -ENOSPC; - case 65: /* Calculated CRC doesn't match the passed value */ - dev_dbg(dev, "%s: CRC mismatch for decompression\n", - __func__); - return -EINVAL; - case 66: /* Input data contains an illegal template field */ - case 67: /* Template indicates data past the end of the input stream */ - dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n", - __func__, csb->completion_code); - return -EINVAL; - default: - dev_dbg(dev, "%s: Unspecified error (code:%d)\n", - __func__, csb->completion_code); - return -EIO; - } - - /* Hardware sanity check */ - if (!NX842_CSBCPB_CE2(csb->completion_extension)) { - dev_err(dev, "%s: No error returned by hardware, but " - "data returned is unusable, contact support.\n" - "(Additional info: csbcbp->processed bytes " - "does not specify processed bytes for the " - "target buffer.)\n", __func__); - return -EIO; - } - - return 0; -} - -/** - * nx842_pseries_compress - Compress data using the 842 algorithm - * - * Compression provide by the NX842 coprocessor on IBM Power systems. - * The input buffer is compressed and the result is stored in the - * provided output buffer. - * - * Upon return from this function @outlen contains the length of the - * compressed data. If there is an error then @outlen will be 0 and an - * error will be specified by the return code from this function. - * - * @in: Pointer to input buffer - * @inlen: Length of input buffer - * @out: Pointer to output buffer - * @outlen: Length of output buffer - * @wrkmem: ptr to buffer for working memory, size determined by - * nx842_pseries_driver.workmem_size - * - * Returns: - * 0 Success, output of length @outlen stored in the buffer at @out - * -ENOMEM Unable to allocate internal buffers - * -ENOSPC Output buffer is to small - * -EIO Internal error - * -ENODEV Hardware unavailable - */ -static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen, - unsigned char *out, unsigned int *outlen, - void *wmem) -{ - struct nx842_devdata *local_devdata; - struct device *dev = NULL; - struct nx842_workmem *workmem; - struct nx842_scatterlist slin, slout; - struct nx_csbcpb *csbcpb; - int ret = 0; - unsigned long inbuf, outbuf; - struct vio_pfo_op op = { - .done = NULL, - .handle = 0, - .timeout = 0, - }; - unsigned long start = get_tb(); - - inbuf = (unsigned long)in; - if (check_constraints(inbuf, &inlen, true)) - return -EINVAL; - - outbuf = (unsigned long)out; - if (check_constraints(outbuf, outlen, false)) - return -EINVAL; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (!local_devdata || !local_devdata->dev) { - rcu_read_unlock(); - return -ENODEV; - } - dev = local_devdata->dev; - - /* Init scatterlist */ - workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN); - slin.entries = (struct nx842_slentry *)workmem->slin; - slout.entries = (struct nx842_slentry *)workmem->slout; - - /* Init operation */ - op.flags = NX842_OP_COMPRESS_CRC; - csbcpb = &workmem->csbcpb; - memset(csbcpb, 0, sizeof(*csbcpb)); - op.csbcpb = nx842_get_pa(csbcpb); - - if ((inbuf & NX842_HW_PAGE_MASK) == - ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) { - /* Create direct DDE */ - op.in = nx842_get_pa((void *)inbuf); - op.inlen = inlen; - } else { - /* Create indirect DDE (scatterlist) */ - nx842_build_scatterlist(inbuf, inlen, &slin); - op.in = nx842_get_pa(slin.entries); - op.inlen = -nx842_get_scatterlist_size(&slin); - } - - if ((outbuf & NX842_HW_PAGE_MASK) == - ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) { - /* Create direct DDE */ - op.out = nx842_get_pa((void *)outbuf); - op.outlen = *outlen; - } else { - /* Create indirect DDE (scatterlist) */ - nx842_build_scatterlist(outbuf, *outlen, &slout); - op.out = nx842_get_pa(slout.entries); - op.outlen = -nx842_get_scatterlist_size(&slout); - } - - dev_dbg(dev, "%s: op.in %lx op.inlen %ld op.out %lx op.outlen %ld\n", - __func__, (unsigned long)op.in, (long)op.inlen, - (unsigned long)op.out, (long)op.outlen); - - /* Send request to pHyp */ - ret = vio_h_cop_sync(local_devdata->vdev, &op); - - /* Check for pHyp error */ - if (ret) { - dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", - __func__, ret, op.hcall_err); - ret = -EIO; - goto unlock; - } - - /* Check for hardware error */ - ret = nx842_validate_result(dev, &csbcpb->csb); - if (ret) - goto unlock; - - *outlen = be32_to_cpu(csbcpb->csb.processed_byte_count); - dev_dbg(dev, "%s: processed_bytes=%d\n", __func__, *outlen); - -unlock: - if (ret) - nx842_inc_comp_failed(local_devdata); - else { - nx842_inc_comp_complete(local_devdata); - ibm_nx842_incr_hist(local_devdata->counters->comp_times, - (get_tb() - start) / tb_ticks_per_usec); - } - rcu_read_unlock(); - return ret; -} - -/** - * nx842_pseries_decompress - Decompress data using the 842 algorithm - * - * Decompression provide by the NX842 coprocessor on IBM Power systems. - * The input buffer is decompressed and the result is stored in the - * provided output buffer. The size allocated to the output buffer is - * provided by the caller of this function in @outlen. Upon return from - * this function @outlen contains the length of the decompressed data. - * If there is an error then @outlen will be 0 and an error will be - * specified by the return code from this function. - * - * @in: Pointer to input buffer - * @inlen: Length of input buffer - * @out: Pointer to output buffer - * @outlen: Length of output buffer - * @wrkmem: ptr to buffer for working memory, size determined by - * nx842_pseries_driver.workmem_size - * - * Returns: - * 0 Success, output of length @outlen stored in the buffer at @out - * -ENODEV Hardware decompression device is unavailable - * -ENOMEM Unable to allocate internal buffers - * -ENOSPC Output buffer is to small - * -EINVAL Bad input data encountered when attempting decompress - * -EIO Internal error - */ -static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen, - unsigned char *out, unsigned int *outlen, - void *wmem) -{ - struct nx842_devdata *local_devdata; - struct device *dev = NULL; - struct nx842_workmem *workmem; - struct nx842_scatterlist slin, slout; - struct nx_csbcpb *csbcpb; - int ret = 0; - unsigned long inbuf, outbuf; - struct vio_pfo_op op = { - .done = NULL, - .handle = 0, - .timeout = 0, - }; - unsigned long start = get_tb(); - - /* Ensure page alignment and size */ - inbuf = (unsigned long)in; - if (check_constraints(inbuf, &inlen, true)) - return -EINVAL; - - outbuf = (unsigned long)out; - if (check_constraints(outbuf, outlen, false)) - return -EINVAL; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (!local_devdata || !local_devdata->dev) { - rcu_read_unlock(); - return -ENODEV; - } - dev = local_devdata->dev; - - workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN); - - /* Init scatterlist */ - slin.entries = (struct nx842_slentry *)workmem->slin; - slout.entries = (struct nx842_slentry *)workmem->slout; - - /* Init operation */ - op.flags = NX842_OP_DECOMPRESS_CRC; - csbcpb = &workmem->csbcpb; - memset(csbcpb, 0, sizeof(*csbcpb)); - op.csbcpb = nx842_get_pa(csbcpb); - - if ((inbuf & NX842_HW_PAGE_MASK) == - ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) { - /* Create direct DDE */ - op.in = nx842_get_pa((void *)inbuf); - op.inlen = inlen; - } else { - /* Create indirect DDE (scatterlist) */ - nx842_build_scatterlist(inbuf, inlen, &slin); - op.in = nx842_get_pa(slin.entries); - op.inlen = -nx842_get_scatterlist_size(&slin); - } - - if ((outbuf & NX842_HW_PAGE_MASK) == - ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) { - /* Create direct DDE */ - op.out = nx842_get_pa((void *)outbuf); - op.outlen = *outlen; - } else { - /* Create indirect DDE (scatterlist) */ - nx842_build_scatterlist(outbuf, *outlen, &slout); - op.out = nx842_get_pa(slout.entries); - op.outlen = -nx842_get_scatterlist_size(&slout); - } - - dev_dbg(dev, "%s: op.in %lx op.inlen %ld op.out %lx op.outlen %ld\n", - __func__, (unsigned long)op.in, (long)op.inlen, - (unsigned long)op.out, (long)op.outlen); - - /* Send request to pHyp */ - ret = vio_h_cop_sync(local_devdata->vdev, &op); - - /* Check for pHyp error */ - if (ret) { - dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", - __func__, ret, op.hcall_err); - goto unlock; - } - - /* Check for hardware error */ - ret = nx842_validate_result(dev, &csbcpb->csb); - if (ret) - goto unlock; - - *outlen = be32_to_cpu(csbcpb->csb.processed_byte_count); - -unlock: - if (ret) - /* decompress fail */ - nx842_inc_decomp_failed(local_devdata); - else { - nx842_inc_decomp_complete(local_devdata); - ibm_nx842_incr_hist(local_devdata->counters->decomp_times, - (get_tb() - start) / tb_ticks_per_usec); - } - - rcu_read_unlock(); - return ret; -} - -/** - * nx842_OF_set_defaults -- Set default (disabled) values for devdata - * - * @devdata - struct nx842_devdata to update - * - * Returns: - * 0 on success - * -ENOENT if @devdata ptr is NULL - */ -static int nx842_OF_set_defaults(struct nx842_devdata *devdata) -{ - if (devdata) { - devdata->max_sync_size = 0; - devdata->max_sync_sg = 0; - devdata->max_sg_len = 0; - return 0; - } else - return -ENOENT; -} - -/** - * nx842_OF_upd_status -- Check the device info from OF status prop - * - * The status property indicates if the accelerator is enabled. If the - * device is in the OF tree it indicates that the hardware is present. - * The status field indicates if the device is enabled when the status - * is 'okay'. Otherwise the device driver will be disabled. - * - * @prop - struct property point containing the maxsyncop for the update - * - * Returns: - * 0 - Device is available - * -ENODEV - Device is not available - */ -static int nx842_OF_upd_status(struct property *prop) -{ - const char *status = (const char *)prop->value; - - if (!strncmp(status, "okay", (size_t)prop->length)) - return 0; - if (!strncmp(status, "disabled", (size_t)prop->length)) - return -ENODEV; - dev_info(devdata->dev, "%s: unknown status '%s'\n", __func__, status); - - return -EINVAL; -} - -/** - * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop - * - * Definition of the 'ibm,max-sg-len' OF property: - * This field indicates the maximum byte length of a scatter list - * for the platform facility. It is a single cell encoded as with encode-int. - * - * Example: - * # od -x ibm,max-sg-len - * 0000000 0000 0ff0 - * - * In this example, the maximum byte length of a scatter list is - * 0x0ff0 (4,080). - * - * @devdata - struct nx842_devdata to update - * @prop - struct property point containing the maxsyncop for the update - * - * Returns: - * 0 on success - * -EINVAL on failure - */ -static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, - struct property *prop) { - int ret = 0; - const unsigned int maxsglen = of_read_number(prop->value, 1); - - if (prop->length != sizeof(maxsglen)) { - dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__); - dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__, - prop->length, sizeof(maxsglen)); - ret = -EINVAL; - } else { - devdata->max_sg_len = min_t(unsigned int, - maxsglen, NX842_HW_PAGE_SIZE); - } - - return ret; -} - -/** - * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop - * - * Definition of the 'ibm,max-sync-cop' OF property: - * Two series of cells. The first series of cells represents the maximums - * that can be synchronously compressed. The second series of cells - * represents the maximums that can be synchronously decompressed. - * 1. The first cell in each series contains the count of the number of - * data length, scatter list elements pairs that follow – each being - * of the form - * a. One cell data byte length - * b. One cell total number of scatter list elements - * - * Example: - * # od -x ibm,max-sync-cop - * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001 - * 0000020 0000 1000 0000 01fe - * - * In this example, compression supports 0x1000 (4,096) data byte length - * and 0x1fe (510) total scatter list elements. Decompression supports - * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list - * elements. - * - * @devdata - struct nx842_devdata to update - * @prop - struct property point containing the maxsyncop for the update - * - * Returns: - * 0 on success - * -EINVAL on failure - */ -static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, - struct property *prop) { - int ret = 0; - unsigned int comp_data_limit, decomp_data_limit; - unsigned int comp_sg_limit, decomp_sg_limit; - const struct maxsynccop_t { - __be32 comp_elements; - __be32 comp_data_limit; - __be32 comp_sg_limit; - __be32 decomp_elements; - __be32 decomp_data_limit; - __be32 decomp_sg_limit; - } *maxsynccop; - - if (prop->length != sizeof(*maxsynccop)) { - dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__); - dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length, - sizeof(*maxsynccop)); - ret = -EINVAL; - goto out; - } - - maxsynccop = (const struct maxsynccop_t *)prop->value; - comp_data_limit = be32_to_cpu(maxsynccop->comp_data_limit); - comp_sg_limit = be32_to_cpu(maxsynccop->comp_sg_limit); - decomp_data_limit = be32_to_cpu(maxsynccop->decomp_data_limit); - decomp_sg_limit = be32_to_cpu(maxsynccop->decomp_sg_limit); - - /* Use one limit rather than separate limits for compression and - * decompression. Set a maximum for this so as not to exceed the - * size that the header can support and round the value down to - * the hardware page size (4K) */ - devdata->max_sync_size = min(comp_data_limit, decomp_data_limit); - - devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, - 65536); - - if (devdata->max_sync_size < 4096) { - dev_err(devdata->dev, "%s: hardware max data size (%u) is " - "less than the driver minimum, unable to use " - "the hardware device\n", - __func__, devdata->max_sync_size); - ret = -EINVAL; - goto out; - } - - nx842_pseries_constraints.maximum = devdata->max_sync_size; - - devdata->max_sync_sg = min(comp_sg_limit, decomp_sg_limit); - if (devdata->max_sync_sg < 1) { - dev_err(devdata->dev, "%s: hardware max sg size (%u) is " - "less than the driver minimum, unable to use " - "the hardware device\n", - __func__, devdata->max_sync_sg); - ret = -EINVAL; - goto out; - } - -out: - return ret; -} - -/** - * - * nx842_OF_upd -- Handle OF properties updates for the device. - * - * Set all properties from the OF tree. Optionally, a new property - * can be provided by the @new_prop pointer to overwrite an existing value. - * The device will remain disabled until all values are valid, this function - * will return an error for updates unless all values are valid. - * - * @new_prop: If not NULL, this property is being updated. If NULL, update - * all properties from the current values in the OF tree. - * - * Returns: - * 0 - Success - * -ENOMEM - Could not allocate memory for new devdata structure - * -EINVAL - property value not found, new_prop is not a recognized - * property for the device or property value is not valid. - * -ENODEV - Device is not available - */ -static int nx842_OF_upd(struct property *new_prop) -{ - struct nx842_devdata *old_devdata = NULL; - struct nx842_devdata *new_devdata = NULL; - struct device_node *of_node = NULL; - struct property *status = NULL; - struct property *maxsglen = NULL; - struct property *maxsyncop = NULL; - int ret = 0; - unsigned long flags; - - new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); - if (!new_devdata) - return -ENOMEM; - - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - if (old_devdata) - of_node = old_devdata->dev->of_node; - - if (!old_devdata || !of_node) { - pr_err("%s: device is not available\n", __func__); - spin_unlock_irqrestore(&devdata_mutex, flags); - kfree(new_devdata); - return -ENODEV; - } - - memcpy(new_devdata, old_devdata, sizeof(*old_devdata)); - new_devdata->counters = old_devdata->counters; - - /* Set ptrs for existing properties */ - status = of_find_property(of_node, "status", NULL); - maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL); - maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL); - if (!status || !maxsglen || !maxsyncop) { - dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__); - ret = -EINVAL; - goto error_out; - } - - /* - * If this is a property update, there are only certain properties that - * we care about. Bail if it isn't in the below list - */ - if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) || - strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) || - strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length))) - goto out; - - /* Perform property updates */ - ret = nx842_OF_upd_status(status); - if (ret) - goto error_out; - - ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen); - if (ret) - goto error_out; - - ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop); - if (ret) - goto error_out; - -out: - dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n", - __func__, new_devdata->max_sync_size, - old_devdata->max_sync_size); - dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n", - __func__, new_devdata->max_sync_sg, - old_devdata->max_sync_sg); - dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n", - __func__, new_devdata->max_sg_len, - old_devdata->max_sg_len); - - rcu_assign_pointer(devdata, new_devdata); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - dev_set_drvdata(new_devdata->dev, new_devdata); - kfree(old_devdata); - return 0; - -error_out: - if (new_devdata) { - dev_info(old_devdata->dev, "%s: device disabled\n", __func__); - nx842_OF_set_defaults(new_devdata); - rcu_assign_pointer(devdata, new_devdata); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - dev_set_drvdata(new_devdata->dev, new_devdata); - kfree(old_devdata); - } else { - dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__); - spin_unlock_irqrestore(&devdata_mutex, flags); - } - - if (!ret) - ret = -EINVAL; - return ret; -} - -/** - * nx842_OF_notifier - Process updates to OF properties for the device - * - * @np: notifier block - * @action: notifier action - * @update: struct pSeries_reconfig_prop_update pointer if action is - * PSERIES_UPDATE_PROPERTY - * - * Returns: - * NOTIFY_OK on success - * NOTIFY_BAD encoded with error number on failure, use - * notifier_to_errno() to decode this value - */ -static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, - void *data) -{ - struct of_reconfig_data *upd = data; - struct nx842_devdata *local_devdata; - struct device_node *node = NULL; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (local_devdata) - node = local_devdata->dev->of_node; - - if (local_devdata && - action == OF_RECONFIG_UPDATE_PROPERTY && - !strcmp(upd->dn->name, node->name)) { - rcu_read_unlock(); - nx842_OF_upd(upd->prop); - } else - rcu_read_unlock(); - - return NOTIFY_OK; -} - -static struct notifier_block nx842_of_nb = { - .notifier_call = nx842_OF_notifier, -}; - -#define nx842_counter_read(_name) \ -static ssize_t nx842_##_name##_show(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) { \ - struct nx842_devdata *local_devdata; \ - int p = 0; \ - rcu_read_lock(); \ - local_devdata = rcu_dereference(devdata); \ - if (local_devdata) \ - p = snprintf(buf, PAGE_SIZE, "%lld\n", \ - atomic64_read(&local_devdata->counters->_name)); \ - rcu_read_unlock(); \ - return p; \ -} - -#define NX842DEV_COUNTER_ATTR_RO(_name) \ - nx842_counter_read(_name); \ - static struct device_attribute dev_attr_##_name = __ATTR(_name, \ - 0444, \ - nx842_##_name##_show,\ - NULL); - -NX842DEV_COUNTER_ATTR_RO(comp_complete); -NX842DEV_COUNTER_ATTR_RO(comp_failed); -NX842DEV_COUNTER_ATTR_RO(decomp_complete); -NX842DEV_COUNTER_ATTR_RO(decomp_failed); -NX842DEV_COUNTER_ATTR_RO(swdecomp); - -static ssize_t nx842_timehist_show(struct device *, - struct device_attribute *, char *); - -static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444, - nx842_timehist_show, NULL); -static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times, - 0444, nx842_timehist_show, NULL); - -static ssize_t nx842_timehist_show(struct device *dev, - struct device_attribute *attr, char *buf) { - char *p = buf; - struct nx842_devdata *local_devdata; - atomic64_t *times; - int bytes_remain = PAGE_SIZE; - int bytes; - int i; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (!local_devdata) { - rcu_read_unlock(); - return 0; - } - - if (attr == &dev_attr_comp_times) - times = local_devdata->counters->comp_times; - else if (attr == &dev_attr_decomp_times) - times = local_devdata->counters->decomp_times; - else { - rcu_read_unlock(); - return 0; - } - - for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { - bytes = snprintf(p, bytes_remain, "%u-%uus:\t%lld\n", - i ? (2<<(i-1)) : 0, (2<counters = kzalloc(sizeof(*new_devdata->counters), - GFP_NOFS); - if (!new_devdata->counters) { - kfree(new_devdata); - return -ENOMEM; - } - - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - - if (old_devdata && old_devdata->vdev != NULL) { - dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__); - ret = -1; - goto error_unlock; - } - - dev_set_drvdata(&viodev->dev, NULL); - - new_devdata->vdev = viodev; - new_devdata->dev = &viodev->dev; - nx842_OF_set_defaults(new_devdata); - - rcu_assign_pointer(devdata, new_devdata); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - kfree(old_devdata); - - of_reconfig_notifier_register(&nx842_of_nb); - - ret = nx842_OF_upd(NULL); - if (ret) - goto error; - - ret = crypto_register_alg(&nx842_pseries_alg); - if (ret) { - dev_err(&viodev->dev, "could not register comp alg: %d\n", ret); - goto error; - } - - rcu_read_lock(); - dev_set_drvdata(&viodev->dev, rcu_dereference(devdata)); - rcu_read_unlock(); - - if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) { - dev_err(&viodev->dev, "could not create sysfs device attributes\n"); - ret = -1; - goto error; - } - - return 0; - -error_unlock: - spin_unlock_irqrestore(&devdata_mutex, flags); - if (new_devdata) - kfree(new_devdata->counters); - kfree(new_devdata); -error: - return ret; -} - -static void nx842_remove(struct vio_dev *viodev) -{ - struct nx842_devdata *old_devdata; - unsigned long flags; - - pr_info("Removing IBM Power 842 compression device\n"); - sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); - - crypto_unregister_alg(&nx842_pseries_alg); - - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - of_reconfig_notifier_unregister(&nx842_of_nb); - RCU_INIT_POINTER(devdata, NULL); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - dev_set_drvdata(&viodev->dev, NULL); - if (old_devdata) - kfree(old_devdata->counters); - kfree(old_devdata); -} - -static const struct vio_device_id nx842_vio_driver_ids[] = { - {"ibm,compression-v1", "ibm,compression"}, - {"", ""}, -}; - -static struct vio_driver nx842_vio_driver = { - .name = KBUILD_MODNAME, - .probe = nx842_probe, - .remove = nx842_remove, - .get_desired_dma = nx842_get_desired_dma, - .id_table = nx842_vio_driver_ids, -}; - -static int __init nx842_pseries_init(void) -{ - struct nx842_devdata *new_devdata; - int ret; - - if (!of_find_compatible_node(NULL, NULL, "ibm,compression")) - return -ENODEV; - - RCU_INIT_POINTER(devdata, NULL); - new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL); - if (!new_devdata) - return -ENOMEM; - - RCU_INIT_POINTER(devdata, new_devdata); - - ret = vio_register_driver(&nx842_vio_driver); - if (ret) { - pr_err("Could not register VIO driver %d\n", ret); - - kfree(new_devdata); - return ret; - } - - return 0; -} - -module_init(nx842_pseries_init); - -static void __exit nx842_pseries_exit(void) -{ - struct nx842_devdata *old_devdata; - unsigned long flags; - - crypto_unregister_alg(&nx842_pseries_alg); - - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - RCU_INIT_POINTER(devdata, NULL); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - if (old_devdata && old_devdata->dev) - dev_set_drvdata(old_devdata->dev, NULL); - kfree(old_devdata); - vio_unregister_driver(&nx842_vio_driver); -} - -module_exit(nx842_pseries_exit); - diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c new file mode 100644 index 000000000000..cc8dd3072b8b --- /dev/null +++ b/drivers/crypto/nx/nx-common-pseries.c @@ -0,0 +1,1129 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for IBM Power 842 compression accelerator + * + * Copyright (C) IBM Corporation, 2012 + * + * Authors: Robert Jennings + * Seth Jennings + */ + +#include + +#include "nx-842.h" +#include "nx_csbcpb.h" /* struct nx_csbcpb */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Robert Jennings "); +MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); +MODULE_ALIAS_CRYPTO("842"); +MODULE_ALIAS_CRYPTO("842-nx"); + +static struct nx842_constraints nx842_pseries_constraints = { + .alignment = DDE_BUFFER_ALIGN, + .multiple = DDE_BUFFER_LAST_MULT, + .minimum = DDE_BUFFER_LAST_MULT, + .maximum = PAGE_SIZE, /* dynamic, max_sync_size */ +}; + +static int check_constraints(unsigned long buf, unsigned int *len, bool in) +{ + if (!IS_ALIGNED(buf, nx842_pseries_constraints.alignment)) { + pr_debug("%s buffer 0x%lx not aligned to 0x%x\n", + in ? "input" : "output", buf, + nx842_pseries_constraints.alignment); + return -EINVAL; + } + if (*len % nx842_pseries_constraints.multiple) { + pr_debug("%s buffer len 0x%x not multiple of 0x%x\n", + in ? "input" : "output", *len, + nx842_pseries_constraints.multiple); + if (in) + return -EINVAL; + *len = round_down(*len, nx842_pseries_constraints.multiple); + } + if (*len < nx842_pseries_constraints.minimum) { + pr_debug("%s buffer len 0x%x under minimum 0x%x\n", + in ? "input" : "output", *len, + nx842_pseries_constraints.minimum); + return -EINVAL; + } + if (*len > nx842_pseries_constraints.maximum) { + pr_debug("%s buffer len 0x%x over maximum 0x%x\n", + in ? "input" : "output", *len, + nx842_pseries_constraints.maximum); + if (in) + return -EINVAL; + *len = nx842_pseries_constraints.maximum; + } + return 0; +} + +/* I assume we need to align the CSB? */ +#define WORKMEM_ALIGN (256) + +struct nx842_workmem { + /* scatterlist */ + char slin[4096]; + char slout[4096]; + /* coprocessor status/parameter block */ + struct nx_csbcpb csbcpb; + + char padding[WORKMEM_ALIGN]; +} __aligned(WORKMEM_ALIGN); + +/* Macros for fields within nx_csbcpb */ +/* Check the valid bit within the csbcpb valid field */ +#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) + +/* CE macros operate on the completion_extension field bits in the csbcpb. + * CE0 0=full completion, 1=partial completion + * CE1 0=CE0 indicates completion, 1=termination (output may be modified) + * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */ +#define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7)) +#define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6)) +#define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) + +/* The NX unit accepts data only on 4K page boundaries */ +#define NX842_HW_PAGE_SIZE (4096) +#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) + +struct ibm_nx842_counters { + atomic64_t comp_complete; + atomic64_t comp_failed; + atomic64_t decomp_complete; + atomic64_t decomp_failed; + atomic64_t swdecomp; + atomic64_t comp_times[32]; + atomic64_t decomp_times[32]; +}; + +static struct nx842_devdata { + struct vio_dev *vdev; + struct device *dev; + struct ibm_nx842_counters *counters; + unsigned int max_sg_len; + unsigned int max_sync_size; + unsigned int max_sync_sg; +} __rcu *devdata; +static DEFINE_SPINLOCK(devdata_mutex); + +#define NX842_COUNTER_INC(_x) \ +static inline void nx842_inc_##_x( \ + const struct nx842_devdata *dev) { \ + if (dev) \ + atomic64_inc(&dev->counters->_x); \ +} +NX842_COUNTER_INC(comp_complete); +NX842_COUNTER_INC(comp_failed); +NX842_COUNTER_INC(decomp_complete); +NX842_COUNTER_INC(decomp_failed); +NX842_COUNTER_INC(swdecomp); + +#define NX842_HIST_SLOTS 16 + +static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time) +{ + int bucket = fls(time); + + if (bucket) + bucket = min((NX842_HIST_SLOTS - 1), bucket - 1); + + atomic64_inc(×[bucket]); +} + +/* NX unit operation flags */ +#define NX842_OP_COMPRESS 0x0 +#define NX842_OP_CRC 0x1 +#define NX842_OP_DECOMPRESS 0x2 +#define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC) +#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC) +#define NX842_OP_ASYNC (1<<23) +#define NX842_OP_NOTIFY (1<<22) +#define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8) + +static unsigned long nx842_get_desired_dma(struct vio_dev *viodev) +{ + /* No use of DMA mappings within the driver. */ + return 0; +} + +struct nx842_slentry { + __be64 ptr; /* Real address (use __pa()) */ + __be64 len; +}; + +/* pHyp scatterlist entry */ +struct nx842_scatterlist { + int entry_nr; /* number of slentries */ + struct nx842_slentry *entries; /* ptr to array of slentries */ +}; + +/* Does not include sizeof(entry_nr) in the size */ +static inline unsigned long nx842_get_scatterlist_size( + struct nx842_scatterlist *sl) +{ + return sl->entry_nr * sizeof(struct nx842_slentry); +} + +static int nx842_build_scatterlist(unsigned long buf, int len, + struct nx842_scatterlist *sl) +{ + unsigned long entrylen; + struct nx842_slentry *entry; + + sl->entry_nr = 0; + + entry = sl->entries; + while (len) { + entry->ptr = cpu_to_be64(nx842_get_pa((void *)buf)); + entrylen = min_t(int, len, + LEN_ON_SIZE(buf, NX842_HW_PAGE_SIZE)); + entry->len = cpu_to_be64(entrylen); + + len -= entrylen; + buf += entrylen; + + sl->entry_nr++; + entry++; + } + + return 0; +} + +static int nx842_validate_result(struct device *dev, + struct cop_status_block *csb) +{ + /* The csb must be valid after returning from vio_h_cop_sync */ + if (!NX842_CSBCBP_VALID_CHK(csb->valid)) { + dev_err(dev, "%s: cspcbp not valid upon completion.\n", + __func__); + dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n", + csb->valid, + csb->crb_seq_number, + csb->completion_code, + csb->completion_extension); + dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n", + be32_to_cpu(csb->processed_byte_count), + (unsigned long)be64_to_cpu(csb->address)); + return -EIO; + } + + /* Check return values from the hardware in the CSB */ + switch (csb->completion_code) { + case 0: /* Completed without error */ + break; + case 64: /* Compression ok, but output larger than input */ + dev_dbg(dev, "%s: output size larger than input size\n", + __func__); + break; + case 13: /* Output buffer too small */ + dev_dbg(dev, "%s: Out of space in output buffer\n", + __func__); + return -ENOSPC; + case 65: /* Calculated CRC doesn't match the passed value */ + dev_dbg(dev, "%s: CRC mismatch for decompression\n", + __func__); + return -EINVAL; + case 66: /* Input data contains an illegal template field */ + case 67: /* Template indicates data past the end of the input stream */ + dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n", + __func__, csb->completion_code); + return -EINVAL; + default: + dev_dbg(dev, "%s: Unspecified error (code:%d)\n", + __func__, csb->completion_code); + return -EIO; + } + + /* Hardware sanity check */ + if (!NX842_CSBCPB_CE2(csb->completion_extension)) { + dev_err(dev, "%s: No error returned by hardware, but " + "data returned is unusable, contact support.\n" + "(Additional info: csbcbp->processed bytes " + "does not specify processed bytes for the " + "target buffer.)\n", __func__); + return -EIO; + } + + return 0; +} + +/** + * nx842_pseries_compress - Compress data using the 842 algorithm + * + * Compression provide by the NX842 coprocessor on IBM Power systems. + * The input buffer is compressed and the result is stored in the + * provided output buffer. + * + * Upon return from this function @outlen contains the length of the + * compressed data. If there is an error then @outlen will be 0 and an + * error will be specified by the return code from this function. + * + * @in: Pointer to input buffer + * @inlen: Length of input buffer + * @out: Pointer to output buffer + * @outlen: Length of output buffer + * @wrkmem: ptr to buffer for working memory, size determined by + * nx842_pseries_driver.workmem_size + * + * Returns: + * 0 Success, output of length @outlen stored in the buffer at @out + * -ENOMEM Unable to allocate internal buffers + * -ENOSPC Output buffer is to small + * -EIO Internal error + * -ENODEV Hardware unavailable + */ +static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlen, + void *wmem) +{ + struct nx842_devdata *local_devdata; + struct device *dev = NULL; + struct nx842_workmem *workmem; + struct nx842_scatterlist slin, slout; + struct nx_csbcpb *csbcpb; + int ret = 0; + unsigned long inbuf, outbuf; + struct vio_pfo_op op = { + .done = NULL, + .handle = 0, + .timeout = 0, + }; + unsigned long start = get_tb(); + + inbuf = (unsigned long)in; + if (check_constraints(inbuf, &inlen, true)) + return -EINVAL; + + outbuf = (unsigned long)out; + if (check_constraints(outbuf, outlen, false)) + return -EINVAL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata || !local_devdata->dev) { + rcu_read_unlock(); + return -ENODEV; + } + dev = local_devdata->dev; + + /* Init scatterlist */ + workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN); + slin.entries = (struct nx842_slentry *)workmem->slin; + slout.entries = (struct nx842_slentry *)workmem->slout; + + /* Init operation */ + op.flags = NX842_OP_COMPRESS_CRC; + csbcpb = &workmem->csbcpb; + memset(csbcpb, 0, sizeof(*csbcpb)); + op.csbcpb = nx842_get_pa(csbcpb); + + if ((inbuf & NX842_HW_PAGE_MASK) == + ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.in = nx842_get_pa((void *)inbuf); + op.inlen = inlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(inbuf, inlen, &slin); + op.in = nx842_get_pa(slin.entries); + op.inlen = -nx842_get_scatterlist_size(&slin); + } + + if ((outbuf & NX842_HW_PAGE_MASK) == + ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.out = nx842_get_pa((void *)outbuf); + op.outlen = *outlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(outbuf, *outlen, &slout); + op.out = nx842_get_pa(slout.entries); + op.outlen = -nx842_get_scatterlist_size(&slout); + } + + dev_dbg(dev, "%s: op.in %lx op.inlen %ld op.out %lx op.outlen %ld\n", + __func__, (unsigned long)op.in, (long)op.inlen, + (unsigned long)op.out, (long)op.outlen); + + /* Send request to pHyp */ + ret = vio_h_cop_sync(local_devdata->vdev, &op); + + /* Check for pHyp error */ + if (ret) { + dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", + __func__, ret, op.hcall_err); + ret = -EIO; + goto unlock; + } + + /* Check for hardware error */ + ret = nx842_validate_result(dev, &csbcpb->csb); + if (ret) + goto unlock; + + *outlen = be32_to_cpu(csbcpb->csb.processed_byte_count); + dev_dbg(dev, "%s: processed_bytes=%d\n", __func__, *outlen); + +unlock: + if (ret) + nx842_inc_comp_failed(local_devdata); + else { + nx842_inc_comp_complete(local_devdata); + ibm_nx842_incr_hist(local_devdata->counters->comp_times, + (get_tb() - start) / tb_ticks_per_usec); + } + rcu_read_unlock(); + return ret; +} + +/** + * nx842_pseries_decompress - Decompress data using the 842 algorithm + * + * Decompression provide by the NX842 coprocessor on IBM Power systems. + * The input buffer is decompressed and the result is stored in the + * provided output buffer. The size allocated to the output buffer is + * provided by the caller of this function in @outlen. Upon return from + * this function @outlen contains the length of the decompressed data. + * If there is an error then @outlen will be 0 and an error will be + * specified by the return code from this function. + * + * @in: Pointer to input buffer + * @inlen: Length of input buffer + * @out: Pointer to output buffer + * @outlen: Length of output buffer + * @wrkmem: ptr to buffer for working memory, size determined by + * nx842_pseries_driver.workmem_size + * + * Returns: + * 0 Success, output of length @outlen stored in the buffer at @out + * -ENODEV Hardware decompression device is unavailable + * -ENOMEM Unable to allocate internal buffers + * -ENOSPC Output buffer is to small + * -EINVAL Bad input data encountered when attempting decompress + * -EIO Internal error + */ +static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlen, + void *wmem) +{ + struct nx842_devdata *local_devdata; + struct device *dev = NULL; + struct nx842_workmem *workmem; + struct nx842_scatterlist slin, slout; + struct nx_csbcpb *csbcpb; + int ret = 0; + unsigned long inbuf, outbuf; + struct vio_pfo_op op = { + .done = NULL, + .handle = 0, + .timeout = 0, + }; + unsigned long start = get_tb(); + + /* Ensure page alignment and size */ + inbuf = (unsigned long)in; + if (check_constraints(inbuf, &inlen, true)) + return -EINVAL; + + outbuf = (unsigned long)out; + if (check_constraints(outbuf, outlen, false)) + return -EINVAL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata || !local_devdata->dev) { + rcu_read_unlock(); + return -ENODEV; + } + dev = local_devdata->dev; + + workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN); + + /* Init scatterlist */ + slin.entries = (struct nx842_slentry *)workmem->slin; + slout.entries = (struct nx842_slentry *)workmem->slout; + + /* Init operation */ + op.flags = NX842_OP_DECOMPRESS_CRC; + csbcpb = &workmem->csbcpb; + memset(csbcpb, 0, sizeof(*csbcpb)); + op.csbcpb = nx842_get_pa(csbcpb); + + if ((inbuf & NX842_HW_PAGE_MASK) == + ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.in = nx842_get_pa((void *)inbuf); + op.inlen = inlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(inbuf, inlen, &slin); + op.in = nx842_get_pa(slin.entries); + op.inlen = -nx842_get_scatterlist_size(&slin); + } + + if ((outbuf & NX842_HW_PAGE_MASK) == + ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.out = nx842_get_pa((void *)outbuf); + op.outlen = *outlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(outbuf, *outlen, &slout); + op.out = nx842_get_pa(slout.entries); + op.outlen = -nx842_get_scatterlist_size(&slout); + } + + dev_dbg(dev, "%s: op.in %lx op.inlen %ld op.out %lx op.outlen %ld\n", + __func__, (unsigned long)op.in, (long)op.inlen, + (unsigned long)op.out, (long)op.outlen); + + /* Send request to pHyp */ + ret = vio_h_cop_sync(local_devdata->vdev, &op); + + /* Check for pHyp error */ + if (ret) { + dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", + __func__, ret, op.hcall_err); + goto unlock; + } + + /* Check for hardware error */ + ret = nx842_validate_result(dev, &csbcpb->csb); + if (ret) + goto unlock; + + *outlen = be32_to_cpu(csbcpb->csb.processed_byte_count); + +unlock: + if (ret) + /* decompress fail */ + nx842_inc_decomp_failed(local_devdata); + else { + nx842_inc_decomp_complete(local_devdata); + ibm_nx842_incr_hist(local_devdata->counters->decomp_times, + (get_tb() - start) / tb_ticks_per_usec); + } + + rcu_read_unlock(); + return ret; +} + +/** + * nx842_OF_set_defaults -- Set default (disabled) values for devdata + * + * @devdata - struct nx842_devdata to update + * + * Returns: + * 0 on success + * -ENOENT if @devdata ptr is NULL + */ +static int nx842_OF_set_defaults(struct nx842_devdata *devdata) +{ + if (devdata) { + devdata->max_sync_size = 0; + devdata->max_sync_sg = 0; + devdata->max_sg_len = 0; + return 0; + } else + return -ENOENT; +} + +/** + * nx842_OF_upd_status -- Check the device info from OF status prop + * + * The status property indicates if the accelerator is enabled. If the + * device is in the OF tree it indicates that the hardware is present. + * The status field indicates if the device is enabled when the status + * is 'okay'. Otherwise the device driver will be disabled. + * + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 - Device is available + * -ENODEV - Device is not available + */ +static int nx842_OF_upd_status(struct property *prop) +{ + const char *status = (const char *)prop->value; + + if (!strncmp(status, "okay", (size_t)prop->length)) + return 0; + if (!strncmp(status, "disabled", (size_t)prop->length)) + return -ENODEV; + dev_info(devdata->dev, "%s: unknown status '%s'\n", __func__, status); + + return -EINVAL; +} + +/** + * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop + * + * Definition of the 'ibm,max-sg-len' OF property: + * This field indicates the maximum byte length of a scatter list + * for the platform facility. It is a single cell encoded as with encode-int. + * + * Example: + * # od -x ibm,max-sg-len + * 0000000 0000 0ff0 + * + * In this example, the maximum byte length of a scatter list is + * 0x0ff0 (4,080). + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 on success + * -EINVAL on failure + */ +static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + const unsigned int maxsglen = of_read_number(prop->value, 1); + + if (prop->length != sizeof(maxsglen)) { + dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__); + dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__, + prop->length, sizeof(maxsglen)); + ret = -EINVAL; + } else { + devdata->max_sg_len = min_t(unsigned int, + maxsglen, NX842_HW_PAGE_SIZE); + } + + return ret; +} + +/** + * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop + * + * Definition of the 'ibm,max-sync-cop' OF property: + * Two series of cells. The first series of cells represents the maximums + * that can be synchronously compressed. The second series of cells + * represents the maximums that can be synchronously decompressed. + * 1. The first cell in each series contains the count of the number of + * data length, scatter list elements pairs that follow – each being + * of the form + * a. One cell data byte length + * b. One cell total number of scatter list elements + * + * Example: + * # od -x ibm,max-sync-cop + * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001 + * 0000020 0000 1000 0000 01fe + * + * In this example, compression supports 0x1000 (4,096) data byte length + * and 0x1fe (510) total scatter list elements. Decompression supports + * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list + * elements. + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 on success + * -EINVAL on failure + */ +static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + unsigned int comp_data_limit, decomp_data_limit; + unsigned int comp_sg_limit, decomp_sg_limit; + const struct maxsynccop_t { + __be32 comp_elements; + __be32 comp_data_limit; + __be32 comp_sg_limit; + __be32 decomp_elements; + __be32 decomp_data_limit; + __be32 decomp_sg_limit; + } *maxsynccop; + + if (prop->length != sizeof(*maxsynccop)) { + dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__); + dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length, + sizeof(*maxsynccop)); + ret = -EINVAL; + goto out; + } + + maxsynccop = (const struct maxsynccop_t *)prop->value; + comp_data_limit = be32_to_cpu(maxsynccop->comp_data_limit); + comp_sg_limit = be32_to_cpu(maxsynccop->comp_sg_limit); + decomp_data_limit = be32_to_cpu(maxsynccop->decomp_data_limit); + decomp_sg_limit = be32_to_cpu(maxsynccop->decomp_sg_limit); + + /* Use one limit rather than separate limits for compression and + * decompression. Set a maximum for this so as not to exceed the + * size that the header can support and round the value down to + * the hardware page size (4K) */ + devdata->max_sync_size = min(comp_data_limit, decomp_data_limit); + + devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, + 65536); + + if (devdata->max_sync_size < 4096) { + dev_err(devdata->dev, "%s: hardware max data size (%u) is " + "less than the driver minimum, unable to use " + "the hardware device\n", + __func__, devdata->max_sync_size); + ret = -EINVAL; + goto out; + } + + nx842_pseries_constraints.maximum = devdata->max_sync_size; + + devdata->max_sync_sg = min(comp_sg_limit, decomp_sg_limit); + if (devdata->max_sync_sg < 1) { + dev_err(devdata->dev, "%s: hardware max sg size (%u) is " + "less than the driver minimum, unable to use " + "the hardware device\n", + __func__, devdata->max_sync_sg); + ret = -EINVAL; + goto out; + } + +out: + return ret; +} + +/** + * + * nx842_OF_upd -- Handle OF properties updates for the device. + * + * Set all properties from the OF tree. Optionally, a new property + * can be provided by the @new_prop pointer to overwrite an existing value. + * The device will remain disabled until all values are valid, this function + * will return an error for updates unless all values are valid. + * + * @new_prop: If not NULL, this property is being updated. If NULL, update + * all properties from the current values in the OF tree. + * + * Returns: + * 0 - Success + * -ENOMEM - Could not allocate memory for new devdata structure + * -EINVAL - property value not found, new_prop is not a recognized + * property for the device or property value is not valid. + * -ENODEV - Device is not available + */ +static int nx842_OF_upd(struct property *new_prop) +{ + struct nx842_devdata *old_devdata = NULL; + struct nx842_devdata *new_devdata = NULL; + struct device_node *of_node = NULL; + struct property *status = NULL; + struct property *maxsglen = NULL; + struct property *maxsyncop = NULL; + int ret = 0; + unsigned long flags; + + new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); + if (!new_devdata) + return -ENOMEM; + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + if (old_devdata) + of_node = old_devdata->dev->of_node; + + if (!old_devdata || !of_node) { + pr_err("%s: device is not available\n", __func__); + spin_unlock_irqrestore(&devdata_mutex, flags); + kfree(new_devdata); + return -ENODEV; + } + + memcpy(new_devdata, old_devdata, sizeof(*old_devdata)); + new_devdata->counters = old_devdata->counters; + + /* Set ptrs for existing properties */ + status = of_find_property(of_node, "status", NULL); + maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL); + maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL); + if (!status || !maxsglen || !maxsyncop) { + dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__); + ret = -EINVAL; + goto error_out; + } + + /* + * If this is a property update, there are only certain properties that + * we care about. Bail if it isn't in the below list + */ + if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length))) + goto out; + + /* Perform property updates */ + ret = nx842_OF_upd_status(status); + if (ret) + goto error_out; + + ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen); + if (ret) + goto error_out; + + ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop); + if (ret) + goto error_out; + +out: + dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n", + __func__, new_devdata->max_sync_size, + old_devdata->max_sync_size); + dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n", + __func__, new_devdata->max_sync_sg, + old_devdata->max_sync_sg); + dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n", + __func__, new_devdata->max_sg_len, + old_devdata->max_sg_len); + + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(new_devdata->dev, new_devdata); + kfree(old_devdata); + return 0; + +error_out: + if (new_devdata) { + dev_info(old_devdata->dev, "%s: device disabled\n", __func__); + nx842_OF_set_defaults(new_devdata); + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(new_devdata->dev, new_devdata); + kfree(old_devdata); + } else { + dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__); + spin_unlock_irqrestore(&devdata_mutex, flags); + } + + if (!ret) + ret = -EINVAL; + return ret; +} + +/** + * nx842_OF_notifier - Process updates to OF properties for the device + * + * @np: notifier block + * @action: notifier action + * @update: struct pSeries_reconfig_prop_update pointer if action is + * PSERIES_UPDATE_PROPERTY + * + * Returns: + * NOTIFY_OK on success + * NOTIFY_BAD encoded with error number on failure, use + * notifier_to_errno() to decode this value + */ +static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, + void *data) +{ + struct of_reconfig_data *upd = data; + struct nx842_devdata *local_devdata; + struct device_node *node = NULL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (local_devdata) + node = local_devdata->dev->of_node; + + if (local_devdata && + action == OF_RECONFIG_UPDATE_PROPERTY && + !strcmp(upd->dn->name, node->name)) { + rcu_read_unlock(); + nx842_OF_upd(upd->prop); + } else + rcu_read_unlock(); + + return NOTIFY_OK; +} + +static struct notifier_block nx842_of_nb = { + .notifier_call = nx842_OF_notifier, +}; + +#define nx842_counter_read(_name) \ +static ssize_t nx842_##_name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) { \ + struct nx842_devdata *local_devdata; \ + int p = 0; \ + rcu_read_lock(); \ + local_devdata = rcu_dereference(devdata); \ + if (local_devdata) \ + p = snprintf(buf, PAGE_SIZE, "%lld\n", \ + atomic64_read(&local_devdata->counters->_name)); \ + rcu_read_unlock(); \ + return p; \ +} + +#define NX842DEV_COUNTER_ATTR_RO(_name) \ + nx842_counter_read(_name); \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, \ + 0444, \ + nx842_##_name##_show,\ + NULL); + +NX842DEV_COUNTER_ATTR_RO(comp_complete); +NX842DEV_COUNTER_ATTR_RO(comp_failed); +NX842DEV_COUNTER_ATTR_RO(decomp_complete); +NX842DEV_COUNTER_ATTR_RO(decomp_failed); +NX842DEV_COUNTER_ATTR_RO(swdecomp); + +static ssize_t nx842_timehist_show(struct device *, + struct device_attribute *, char *); + +static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444, + nx842_timehist_show, NULL); +static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times, + 0444, nx842_timehist_show, NULL); + +static ssize_t nx842_timehist_show(struct device *dev, + struct device_attribute *attr, char *buf) { + char *p = buf; + struct nx842_devdata *local_devdata; + atomic64_t *times; + int bytes_remain = PAGE_SIZE; + int bytes; + int i; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata) { + rcu_read_unlock(); + return 0; + } + + if (attr == &dev_attr_comp_times) + times = local_devdata->counters->comp_times; + else if (attr == &dev_attr_decomp_times) + times = local_devdata->counters->decomp_times; + else { + rcu_read_unlock(); + return 0; + } + + for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { + bytes = snprintf(p, bytes_remain, "%u-%uus:\t%lld\n", + i ? (2<<(i-1)) : 0, (2<counters = kzalloc(sizeof(*new_devdata->counters), + GFP_NOFS); + if (!new_devdata->counters) { + kfree(new_devdata); + return -ENOMEM; + } + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + + if (old_devdata && old_devdata->vdev != NULL) { + dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__); + ret = -1; + goto error_unlock; + } + + dev_set_drvdata(&viodev->dev, NULL); + + new_devdata->vdev = viodev; + new_devdata->dev = &viodev->dev; + nx842_OF_set_defaults(new_devdata); + + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + kfree(old_devdata); + + of_reconfig_notifier_register(&nx842_of_nb); + + ret = nx842_OF_upd(NULL); + if (ret) + goto error; + + ret = crypto_register_alg(&nx842_pseries_alg); + if (ret) { + dev_err(&viodev->dev, "could not register comp alg: %d\n", ret); + goto error; + } + + rcu_read_lock(); + dev_set_drvdata(&viodev->dev, rcu_dereference(devdata)); + rcu_read_unlock(); + + if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) { + dev_err(&viodev->dev, "could not create sysfs device attributes\n"); + ret = -1; + goto error; + } + + return 0; + +error_unlock: + spin_unlock_irqrestore(&devdata_mutex, flags); + if (new_devdata) + kfree(new_devdata->counters); + kfree(new_devdata); +error: + return ret; +} + +static void nx842_remove(struct vio_dev *viodev) +{ + struct nx842_devdata *old_devdata; + unsigned long flags; + + pr_info("Removing IBM Power 842 compression device\n"); + sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); + + crypto_unregister_alg(&nx842_pseries_alg); + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + of_reconfig_notifier_unregister(&nx842_of_nb); + RCU_INIT_POINTER(devdata, NULL); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(&viodev->dev, NULL); + if (old_devdata) + kfree(old_devdata->counters); + kfree(old_devdata); +} + +static const struct vio_device_id nx842_vio_driver_ids[] = { + {"ibm,compression-v1", "ibm,compression"}, + {"", ""}, +}; + +static struct vio_driver nx842_vio_driver = { + .name = KBUILD_MODNAME, + .probe = nx842_probe, + .remove = nx842_remove, + .get_desired_dma = nx842_get_desired_dma, + .id_table = nx842_vio_driver_ids, +}; + +static int __init nx842_pseries_init(void) +{ + struct nx842_devdata *new_devdata; + int ret; + + if (!of_find_compatible_node(NULL, NULL, "ibm,compression")) + return -ENODEV; + + RCU_INIT_POINTER(devdata, NULL); + new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL); + if (!new_devdata) + return -ENOMEM; + + RCU_INIT_POINTER(devdata, new_devdata); + + ret = vio_register_driver(&nx842_vio_driver); + if (ret) { + pr_err("Could not register VIO driver %d\n", ret); + + kfree(new_devdata); + return ret; + } + + return 0; +} + +module_init(nx842_pseries_init); + +static void __exit nx842_pseries_exit(void) +{ + struct nx842_devdata *old_devdata; + unsigned long flags; + + crypto_unregister_alg(&nx842_pseries_alg); + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + RCU_INIT_POINTER(devdata, NULL); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + if (old_devdata && old_devdata->dev) + dev_set_drvdata(old_devdata->dev, NULL); + kfree(old_devdata); + vio_unregister_driver(&nx842_vio_driver); +} + +module_exit(nx842_pseries_exit); + -- cgit v1.2.3 From b4ba22114c78de48fda3818f569f75e97d58c719 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:38:36 -0700 Subject: crypto/nx: Get NX capabilities for GZIP coprocessor type The hypervisor provides different NX capabilities that it supports. These capabilities such as recommended minimum compression / decompression lengths and the maximum request buffer size in bytes are used to define the user space NX request. NX will reject the request if the buffer size is more than the maximum buffer size. Whereas compression / decompression lengths are recommended values for better performance. Changes to get NX overall capabilities which points to the specific features that the hypervisor supports. Then retrieve the capabilities for the specific feature (available only for NXGZIP). Signed-off-by: Haren Myneni Acked-by: Herbert Xu Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f2b6a1fb8b6112595a73d81c67a35af4e7f5d0a3.camel@linux.ibm.com --- drivers/crypto/nx/nx-common-pseries.c | 87 +++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c index cc8dd3072b8b..9fc2abb56019 100644 --- a/drivers/crypto/nx/nx-common-pseries.c +++ b/drivers/crypto/nx/nx-common-pseries.c @@ -9,6 +9,8 @@ */ #include +#include +#include #include "nx-842.h" #include "nx_csbcpb.h" /* struct nx_csbcpb */ @@ -19,6 +21,29 @@ MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); MODULE_ALIAS_CRYPTO("842"); MODULE_ALIAS_CRYPTO("842-nx"); +/* + * Coprocessor type specific capabilities from the hypervisor. + */ +struct hv_nx_cop_caps { + __be64 descriptor; + __be64 req_max_processed_len; /* Max bytes in one GZIP request */ + __be64 min_compress_len; /* Min compression size in bytes */ + __be64 min_decompress_len; /* Min decompression size in bytes */ +} __packed __aligned(0x1000); + +/* + * Coprocessor type specific capabilities. + */ +struct nx_cop_caps { + u64 descriptor; + u64 req_max_processed_len; /* Max bytes in one GZIP request */ + u64 min_compress_len; /* Min compression in bytes */ + u64 min_decompress_len; /* Min decompression in bytes */ +}; + +static u64 caps_feat; +static struct nx_cop_caps nx_cop_caps; + static struct nx842_constraints nx842_pseries_constraints = { .alignment = DDE_BUFFER_ALIGN, .multiple = DDE_BUFFER_LAST_MULT, @@ -1065,6 +1090,64 @@ static void nx842_remove(struct vio_dev *viodev) kfree(old_devdata); } +/* + * Get NX capabilities from the hypervisor. + * Only NXGZIP capabilities are provided by the hypersvisor right + * now and these values are available to user space with sysfs. + */ +static void __init nxcop_get_capabilities(void) +{ + struct hv_vas_all_caps *hv_caps; + struct hv_nx_cop_caps *hv_nxc; + int rc; + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return; + /* + * Get NX overall capabilities with feature type=0 + */ + rc = h_query_vas_capabilities(H_QUERY_NX_CAPABILITIES, 0, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + caps_feat = be64_to_cpu(hv_caps->feat_type); + /* + * NX-GZIP feature available + */ + if (caps_feat & VAS_NX_GZIP_FEAT_BIT) { + hv_nxc = kmalloc(sizeof(*hv_nxc), GFP_KERNEL); + if (!hv_nxc) + goto out; + /* + * Get capabilities for NX-GZIP feature + */ + rc = h_query_vas_capabilities(H_QUERY_NX_CAPABILITIES, + VAS_NX_GZIP_FEAT, + (u64)virt_to_phys(hv_nxc)); + } else { + pr_err("NX-GZIP feature is not available\n"); + rc = -EINVAL; + } + + if (!rc) { + nx_cop_caps.descriptor = be64_to_cpu(hv_nxc->descriptor); + nx_cop_caps.req_max_processed_len = + be64_to_cpu(hv_nxc->req_max_processed_len); + nx_cop_caps.min_compress_len = + be64_to_cpu(hv_nxc->min_compress_len); + nx_cop_caps.min_decompress_len = + be64_to_cpu(hv_nxc->min_decompress_len); + } else { + caps_feat = 0; + } + + kfree(hv_nxc); +out: + kfree(hv_caps); +} + static const struct vio_device_id nx842_vio_driver_ids[] = { {"ibm,compression-v1", "ibm,compression"}, {"", ""}, @@ -1092,6 +1175,10 @@ static int __init nx842_pseries_init(void) return -ENOMEM; RCU_INIT_POINTER(devdata, new_devdata); + /* + * Get NX capabilities from the hypervisor. + */ + nxcop_get_capabilities(); ret = vio_register_driver(&nx842_vio_driver); if (ret) { -- cgit v1.2.3 From 8c099490fd2bd3b012b3b6d0babbba3b90e69b55 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:39:08 -0700 Subject: crypto/nx: Add sysfs interface to export NX capabilities Export NX-GZIP capabilities to usrespace in sysfs /sys/devices/vio/ibm,compression-v1/nx_gzip_caps directory. These are queried by userspace accelerator libraries to set minimum length heuristics and maximum limits on request sizes. NX-GZIP capabilities: min_compress_len /*Recommended minimum compress length in bytes*/ min_decompress_len /*Recommended minimum decompress length in bytes*/ req_max_processed_len /* Maximum number of bytes processed in one request */ NX will return RMA_Reject if the request buffer size is greater than req_max_processed_len. Signed-off-by: Haren Myneni Acked-by: Herbert Xu Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/510da86abbd904878d5f13d74aba72603c37d783.camel@linux.ibm.com --- drivers/crypto/nx/nx-common-pseries.c | 43 +++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c index 9fc2abb56019..f51a50d40504 100644 --- a/drivers/crypto/nx/nx-common-pseries.c +++ b/drivers/crypto/nx/nx-common-pseries.c @@ -967,6 +967,36 @@ static struct attribute_group nx842_attribute_group = { .attrs = nx842_sysfs_entries, }; +#define nxcop_caps_read(_name) \ +static ssize_t nxcop_##_name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%lld\n", nx_cop_caps._name); \ +} + +#define NXCT_ATTR_RO(_name) \ + nxcop_caps_read(_name); \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, \ + 0444, \ + nxcop_##_name##_show, \ + NULL); + +NXCT_ATTR_RO(req_max_processed_len); +NXCT_ATTR_RO(min_compress_len); +NXCT_ATTR_RO(min_decompress_len); + +static struct attribute *nxcop_caps_sysfs_entries[] = { + &dev_attr_req_max_processed_len.attr, + &dev_attr_min_compress_len.attr, + &dev_attr_min_decompress_len.attr, + NULL, +}; + +static struct attribute_group nxcop_caps_attr_group = { + .name = "nx_gzip_caps", + .attrs = nxcop_caps_sysfs_entries, +}; + static struct nx842_driver nx842_pseries_driver = { .name = KBUILD_MODNAME, .owner = THIS_MODULE, @@ -1056,6 +1086,16 @@ static int nx842_probe(struct vio_dev *viodev, goto error; } + if (caps_feat) { + if (sysfs_create_group(&viodev->dev.kobj, + &nxcop_caps_attr_group)) { + dev_err(&viodev->dev, + "Could not create sysfs NX capability entries\n"); + ret = -1; + goto error; + } + } + return 0; error_unlock: @@ -1075,6 +1115,9 @@ static void nx842_remove(struct vio_dev *viodev) pr_info("Removing IBM Power 842 compression device\n"); sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); + if (caps_feat) + sysfs_remove_group(&viodev->dev.kobj, &nxcop_caps_attr_group); + crypto_unregister_alg(&nx842_pseries_alg); spin_lock_irqsave(&devdata_mutex, flags); -- cgit v1.2.3 From 99cd49bb39516d1beb1c38ae629b15ccb923198c Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 17 Jun 2021 13:39:41 -0700 Subject: crypto/nx: Register and unregister VAS interface on PowerVM The user space uses /dev/crypto/nx-gzip interface to setup VAS windows, create paste mapping and close windows. This patch adds changes to create/remove this interface with VAS register/unregister functions on PowerVM platform. Signed-off-by: Haren Myneni Acked-by: Herbert Xu Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/121ea1f4eb3004f3b8f4fe8abefaecc88b292efd.camel@linux.ibm.com --- drivers/crypto/nx/Kconfig | 1 + drivers/crypto/nx/nx-common-pseries.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig index 23e3d0160e67..2a35e0e785bd 100644 --- a/drivers/crypto/nx/Kconfig +++ b/drivers/crypto/nx/Kconfig @@ -29,6 +29,7 @@ if CRYPTO_DEV_NX_COMPRESS config CRYPTO_DEV_NX_COMPRESS_PSERIES tristate "Compression acceleration support on pSeries platform" depends on PPC_PSERIES && IBMVIO + depends on PPC_VAS default y help Support for PowerPC Nest (NX) compression acceleration. This diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c index f51a50d40504..6671f6634dda 100644 --- a/drivers/crypto/nx/nx-common-pseries.c +++ b/drivers/crypto/nx/nx-common-pseries.c @@ -1231,6 +1231,12 @@ static int __init nx842_pseries_init(void) return ret; } + ret = vas_register_api_pseries(THIS_MODULE, VAS_COP_TYPE_GZIP, + "nx-gzip"); + + if (ret) + pr_err("NX-GZIP is not supported. Returned=%d\n", ret); + return 0; } @@ -1241,6 +1247,8 @@ static void __exit nx842_pseries_exit(void) struct nx842_devdata *old_devdata; unsigned long flags; + vas_unregister_api_pseries(); + crypto_unregister_alg(&nx842_pseries_alg); spin_lock_irqsave(&devdata_mutex, flags); -- cgit v1.2.3 From 3729e0ec59a20825bd4c8c70996b2df63915e1dd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 18 May 2021 00:03:55 +1000 Subject: powerpc/powernv: Fix machine check reporting of async store errors POWER9 and POWER10 asynchronous machine checks due to stores have their cause reported in SRR1 but SRR1[42] is set, which in other cases indicates DSISR cause. Check for these cases and clear SRR1[42], so the cause matching uses the i-side (SRR1) table. Fixes: 7b9f71f974a1 ("powerpc/64s: POWER9 machine check handler") Fixes: 201220bb0e8c ("powerpc/powernv: Machine check handler for POWER10") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210517140355.2325406-1-npiggin@gmail.com --- arch/powerpc/kernel/mce_power.c | 48 ++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 7b76a9bf324f..c2f55fe7092d 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -481,12 +481,11 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, return -1; } -static int mce_handle_ierror(struct pt_regs *regs, +static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1, const struct mce_ierror_table table[], struct mce_error_info *mce_err, uint64_t *addr, uint64_t *phys_addr) { - uint64_t srr1 = regs->msr; int handled = 0; int i; @@ -695,19 +694,19 @@ static long mce_handle_ue_error(struct pt_regs *regs, } static long mce_handle_error(struct pt_regs *regs, + unsigned long srr1, const struct mce_derror_table dtable[], const struct mce_ierror_table itable[]) { struct mce_error_info mce_err = { 0 }; uint64_t addr, phys_addr = ULONG_MAX; - uint64_t srr1 = regs->msr; long handled; if (SRR1_MC_LOADSTORE(srr1)) handled = mce_handle_derror(regs, dtable, &mce_err, &addr, &phys_addr); else - handled = mce_handle_ierror(regs, itable, &mce_err, &addr, + handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr, &phys_addr); if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) @@ -723,16 +722,20 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) /* P7 DD1 leaves top bits of DSISR undefined */ regs->dsisr &= 0x0000ffff; - return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); + return mce_handle_error(regs, regs->msr, + mce_p7_derror_table, mce_p7_ierror_table); } long __machine_check_early_realmode_p8(struct pt_regs *regs) { - return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); + return mce_handle_error(regs, regs->msr, + mce_p8_derror_table, mce_p8_ierror_table); } long __machine_check_early_realmode_p9(struct pt_regs *regs) { + unsigned long srr1 = regs->msr; + /* * On POWER9 DD2.1 and below, it's possible to get a machine check * caused by a paste instruction where only DSISR bit 25 is set. This @@ -746,10 +749,39 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000) return 1; - return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); + /* + * Async machine check due to bad real address from store or foreign + * link time out comes with the load/store bit (PPC bit 42) set in + * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're + * directed to the ierror table so it will find the cause (which + * describes it correctly as a store error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + ((srr1 & 0x081c0000) == 0x08140000 || + (srr1 & 0x081c0000) == 0x08180000)) { + srr1 &= ~PPC_BIT(42); + } + + return mce_handle_error(regs, srr1, + mce_p9_derror_table, mce_p9_ierror_table); } long __machine_check_early_realmode_p10(struct pt_regs *regs) { - return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table); + unsigned long srr1 = regs->msr; + + /* + * Async machine check due to bad real address from store comes with + * the load/store bit (PPC bit 42) set in SRR1, but the cause comes in + * SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table + * so it will find the cause (which describes it correctly as a store + * error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + (srr1 & 0x081c0000) == 0x08140000) { + srr1 &= ~PPC_BIT(42); + } + + return mce_handle_error(regs, srr1, + mce_p10_derror_table, mce_p10_ierror_table); } -- cgit v1.2.3 From 710e682286784b50b882fc4befdf83c587059211 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 11 Jun 2021 21:11:04 +1000 Subject: powerpc/boot: add zImage.lds to targets This prevents spurious rebuilds of the lds and then wrappers. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210611111104.1058991-1-npiggin@gmail.com --- arch/powerpc/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 2b8da923ceca..e8e9d17af43b 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -227,7 +227,7 @@ $(obj)/wrapper.a: $(obj-wlib) FORCE hostprogs := addnote hack-coff mktree -targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) +targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ $(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds -- cgit v1.2.3 From 65c7d070850e109a8a75a431f5a7f6eb4c007b77 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 3 May 2021 23:02:40 +1000 Subject: powerpc/pseries: Get entry and uaccess flush required bits from H_GET_CPU_CHARACTERISTICS This allows the hypervisor / firmware to describe these workarounds to the guest. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210503130243.891868-2-npiggin@gmail.com --- arch/powerpc/include/asm/hvcall.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 7c3418d1b5e9..d769ffbb04f2 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -400,6 +400,8 @@ #define H_CPU_BEHAV_FAVOUR_SECURITY_H (1ull << 60) // IBM bit 3 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 #define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6 +#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY (1ull << 56) // IBM bit 7 +#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS (1ull << 55) // IBM bit 8 /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 754e493b7c05..287f33645419 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -549,6 +549,12 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS) + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } -- cgit v1.2.3 From 84ed26fd00c514da57cd46aa3728a48f1f9b35cd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 3 May 2021 23:02:41 +1000 Subject: powerpc/security: Add a security feature for STF barrier Rather than tying this mitigation to RFI L1D flush requirement, add a new bit for it. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210503130243.891868-3-npiggin@gmail.com --- arch/powerpc/include/asm/security_features.h | 4 ++++ arch/powerpc/kernel/security.c | 7 ++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index b774a4477d5f..792eefaf230b 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -92,6 +92,9 @@ static inline bool security_ftr_enabled(u64 feature) // The L1-D cache should be flushed after user accesses from the kernel #define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull +// The STF flush should be executed on privilege state switch +#define SEC_FTR_STF_BARRIER 0x0000000000010000ull + // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ @@ -99,6 +102,7 @@ static inline bool security_ftr_enabled(u64 feature) SEC_FTR_BNDS_CHK_SPEC_BAR | \ SEC_FTR_L1D_FLUSH_ENTRY | \ SEC_FTR_L1D_FLUSH_UACCESS | \ + SEC_FTR_STF_BARRIER | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 9c2f7b909911..cc51fa52e783 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -300,9 +300,7 @@ static void stf_barrier_enable(bool enable) void setup_stf_barrier(void) { enum stf_barrier_type type; - bool enable, hv; - - hv = cpu_has_feature(CPU_FTR_HVMODE); + bool enable; /* Default to fallback in case fw-features are not available */ if (cpu_has_feature(CPU_FTR_ARCH_300)) @@ -315,8 +313,7 @@ void setup_stf_barrier(void) type = STF_BARRIER_NONE; enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && - (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || - (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv)); + security_ftr_enabled(SEC_FTR_STF_BARRIER); if (type == STF_BARRIER_FALLBACK) { pr_info("stf-barrier: fallback barrier available\n"); -- cgit v1.2.3 From 393eff5a7b357a23db3e786e24b5ba8762cc6820 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 3 May 2021 23:02:42 +1000 Subject: powerpc/pesries: Get STF barrier requirement from H_GET_CPU_CHARACTERISTICS This allows the hypervisor / firmware to describe this workarounds to the guest. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210503130243.891868-4-npiggin@gmail.com --- arch/powerpc/include/asm/hvcall.h | 1 + arch/powerpc/platforms/pseries/setup.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index d769ffbb04f2..2a6346b6472f 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -402,6 +402,7 @@ #define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6 #define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY (1ull << 56) // IBM bit 7 #define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS (1ull << 55) // IBM bit 8 +#define H_CPU_BEHAV_NO_STF_BARRIER (1ull << 54) // IBM bit 9 /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 287f33645419..631a0d57b6cd 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -555,6 +555,9 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS) security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER) + security_ftr_clear(SEC_FTR_STF_BARRIER); + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } -- cgit v1.2.3 From 1f9ad21c3b384a8f16d8c46845a48a01d281a603 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 9 Jun 2021 11:34:23 +1000 Subject: powerpc/mm: Implement set_memory() routines The set_memory_{ro/rw/nx/x}() functions are required for STRICT_MODULE_RWX, and are generally useful primitives to have. This implementation is designed to be generic across powerpc's many MMUs. It's possible that this could be optimised to be faster for specific MMUs. This implementation does not handle cases where the caller is attempting to change the mapping of the page it is executing from, or if another CPU is concurrently using the page being altered. These cases likely shouldn't happen, but a more complex implementation with MMU-specific code could safely handle them. On hash, the linear mapping is not kept in the linux pagetable, so this will not change the protection if used on that range. Currently these functions are not used on the linear map so just WARN for now. apply_to_existing_page_range() does not work on huge pages so for now disallow changing the protection of huge pages. [jpn: - Allow set memory functions to be used without Strict RWX - Hash: Disallow certain regions - Have change_page_attr() take function pointers to manipulate ptes - Radix: Add ptesync after set_pte_at()] Signed-off-by: Russell Currey Signed-off-by: Christophe Leroy Signed-off-by: Jordan Niethe Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210609013431.9805-2-jniethe5@gmail.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/set_memory.h | 32 +++++++++++ arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/pageattr.c | 101 ++++++++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/set_memory.h create mode 100644 arch/powerpc/mm/pageattr.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 36ba413f49d3..abfe2e9225fa 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -140,6 +140,7 @@ config PPC select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 + select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h new file mode 100644 index 000000000000..64011ea444b4 --- /dev/null +++ b/arch/powerpc/include/asm/set_memory.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_SET_MEMORY_H +#define _ASM_POWERPC_SET_MEMORY_H + +#define SET_MEMORY_RO 0 +#define SET_MEMORY_RW 1 +#define SET_MEMORY_NX 2 +#define SET_MEMORY_X 3 + +int change_memory_attr(unsigned long addr, int numpages, long action); + +static inline int set_memory_ro(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_RO); +} + +static inline int set_memory_rw(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_RW); +} + +static inline int set_memory_nx(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_NX); +} + +static inline int set_memory_x(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_X); +} + +#endif diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index c3df3a8501d4..9142cf1fb0d5 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -5,7 +5,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ +obj-y := fault.o mem.o pgtable.o mmap.o maccess.o pageattr.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ init-common.o mmu_context.o drmem.o \ diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c new file mode 100644 index 000000000000..5e5ae50a7f23 --- /dev/null +++ b/arch/powerpc/mm/pageattr.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * MMU-generic set_memory implementation for powerpc + * + * Copyright 2019-2021, IBM Corporation. + */ + +#include +#include +#include + +#include +#include +#include + + +/* + * Updates the attributes of a page in three steps: + * + * 1. invalidate the page table entry + * 2. flush the TLB + * 3. install the new entry with the updated attributes + * + * Invalidating the pte means there are situations where this will not work + * when in theory it should. + * For example: + * - removing write from page whilst it is being executed + * - setting a page read-only whilst it is being read by another CPU + * + */ +static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) +{ + long action = (long)data; + pte_t pte; + + spin_lock(&init_mm.page_table_lock); + + /* invalidate the PTE so it's safe to modify */ + pte = ptep_get_and_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + + /* modify the PTE bits as desired, then apply */ + switch (action) { + case SET_MEMORY_RO: + pte = pte_wrprotect(pte); + break; + case SET_MEMORY_RW: + pte = pte_mkwrite(pte_mkdirty(pte)); + break; + case SET_MEMORY_NX: + pte = pte_exprotect(pte); + break; + case SET_MEMORY_X: + pte = pte_mkexec(pte); + break; + default: + WARN_ON_ONCE(1); + break; + } + + set_pte_at(&init_mm, addr, ptep, pte); + + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync": : :"memory"); + spin_unlock(&init_mm.page_table_lock); + + return 0; +} + +int change_memory_attr(unsigned long addr, int numpages, long action) +{ + unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); + unsigned long size = numpages * PAGE_SIZE; + + if (!numpages) + return 0; + + if (WARN_ON_ONCE(is_vmalloc_or_module_addr((void *)addr) && + is_vm_area_hugepages((void *)addr))) + return -EINVAL; + +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * On hash, the linear mapping is not in the Linux page table so + * apply_to_existing_page_range() will have no effect. If in the future + * the set_memory_* functions are used on the linear map this will need + * to be updated. + */ + if (!radix_enabled()) { + int region = get_region_id(addr); + + if (WARN_ON_ONCE(region != VMALLOC_REGION_ID && region != IO_REGION_ID)) + return -EINVAL; + } +#endif + + return apply_to_existing_page_range(&init_mm, start, size, + change_page_attr, (void *)action); +} -- cgit v1.2.3 From 71a5b3db9f209ea5d1e07371017e65398d3c6fbc Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Wed, 9 Jun 2021 11:34:24 +1000 Subject: powerpc/lib/code-patching: Set up Strict RWX patching earlier setup_text_poke_area() is a late init call so it runs before mark_rodata_ro() and after the init calls. This lets all the init code patching simply write to their locations. In the future, kprobes is going to allocate its instruction pages RO which means they will need setup_text__poke_area() to have been already called for their code patching. However, init_kprobes() (which allocates and patches some instruction pages) is an early init call so it happens before setup_text__poke_area(). start_kernel() calls poking_init() before any of the init calls. On powerpc, poking_init() is currently a nop. setup_text_poke_area() relies on kernel virtual memory, cpu hotplug and per_cpu_areas being setup. setup_per_cpu_areas(), boot_cpu_hotplug_init() and mm_init() are called before poking_init(). Turn setup_text_poke_area() into poking_init(). Signed-off-by: Jordan Niethe Reviewed-by: Christophe Leroy Reviewed-by: Russell Currey [mpe: Fold in missing prototype for poking_init() from lkp] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210609013431.9805-3-jniethe5@gmail.com --- arch/powerpc/include/asm/pgtable.h | 1 + arch/powerpc/lib/code-patching.c | 14 ++++---------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index b9c8641654f4..b1e6c9ee3206 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -76,6 +76,7 @@ extern unsigned long empty_zero_page[]; extern pgd_t swapper_pg_dir[]; extern void paging_init(void); +void poking_init(void); extern unsigned long ioremap_bot; diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index b2d692376769..f9a3019e37b4 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -69,22 +69,16 @@ static int text_area_cpu_down(unsigned int cpu) } /* - * Run as a late init call. This allows all the boot time patching to be done - * simply by patching the code, and then we're called here prior to - * mark_rodata_ro(), which happens after all init calls are run. Although - * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge - * it as being preferable to a kernel that will crash later when someone tries - * to use patch_instruction(). + * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and + * we judge it as being preferable to a kernel that will crash later when + * someone tries to use patch_instruction(). */ -static int __init setup_text_poke_area(void) +void __init poking_init(void) { BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/text_poke:online", text_area_cpu_up, text_area_cpu_down)); - - return 0; } -late_initcall(setup_text_poke_area); /* * This can be called for kernel text or a module. -- cgit v1.2.3 From 4fcc636615b1a309b39cab101a2b433cbf1f63f1 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Wed, 9 Jun 2021 11:34:25 +1000 Subject: powerpc/modules: Make module_alloc() Strict Module RWX aware Make module_alloc() use PAGE_KERNEL protections instead of PAGE_KERNEL_EXEX if Strict Module RWX is enabled. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210609013431.9805-4-jniethe5@gmail.com --- arch/powerpc/include/asm/mmu.h | 5 +++++ arch/powerpc/kernel/module.c | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 998fe01dd1a8..27016b98ecb2 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -345,6 +345,11 @@ static inline bool strict_kernel_rwx_enabled(void) return false; } #endif + +static inline bool strict_module_rwx_enabled(void) +{ + return IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && strict_kernel_rwx_enabled(); +} #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 3f35c8d20be7..ed04a3ba66fe 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -92,12 +92,14 @@ int module_finalize(const Elf_Ehdr *hdr, static __always_inline void * __module_alloc(unsigned long size, unsigned long start, unsigned long end) { + pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; + /* * Don't do huge page allocations for modules yet until more testing * is done. STRICT_MODULE_RWX may require extra work to support this * too. */ - return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC, + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot, VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } -- cgit v1.2.3 From 6a3a58e6230dc5b646ce3511436d7e74fc7f764b Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 9 Jun 2021 11:34:26 +1000 Subject: powerpc/kprobes: Mark newly allocated probes as ROX Add the arch specific insn page allocator for powerpc. This allocates ROX pages if STRICT_KERNEL_RWX is enabled. These pages are only written to with patch_instruction() which is able to write RO pages. Signed-off-by: Russell Currey Signed-off-by: Christophe Leroy [jpn: Reword commit message, switch to __vmalloc_node_range()] Signed-off-by: Jordan Niethe Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210609013431.9805-5-jniethe5@gmail.com --- arch/powerpc/kernel/kprobes.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index b19dfc45f75c..8ac248cb518a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -19,11 +19,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -103,6 +105,21 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } +void *alloc_insn_page(void) +{ + void *page; + + page = module_alloc(PAGE_SIZE); + if (!page) + return NULL; + + if (strict_module_rwx_enabled()) { + set_memory_ro((unsigned long)page, 1); + set_memory_x((unsigned long)page, 1); + } + return page; +} + int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; -- cgit v1.2.3 From bc33cfdb0bb84d9e4b125a617a437c29ddcac4d9 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Wed, 9 Jun 2021 11:34:27 +1000 Subject: powerpc/bpf: Remove bpf_jit_free() Commit 74451e66d516 ("bpf: make jited programs visible in traces") added a default bpf_jit_free() implementation. Powerpc did not use the default bpf_jit_free() as powerpc did not set the images read-only. The default bpf_jit_free() called bpf_jit_binary_unlock_ro() is why it could not be used for powerpc. Commit d53d2f78cead ("bpf: Use vmalloc special flag") moved keeping track of read-only memory to vmalloc. This included removing bpf_jit_binary_unlock_ro(). Therefore there is no reason powerpc needs its own bpf_jit_free(). Remove it. Signed-off-by: Jordan Niethe Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210609013431.9805-6-jniethe5@gmail.com --- arch/powerpc/net/bpf_jit_comp.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 798ac4350a82..6c8c268e4fe8 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -257,15 +257,3 @@ out: return fp; } - -/* Overriding bpf_jit_free() as we don't set images read-only. */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *bpf_hdr = (void *)addr; - - if (fp->jited) - bpf_jit_binary_free(bpf_hdr); - - bpf_prog_unlock_free(fp); -} -- cgit v1.2.3 From 62e3d4210ac9c35783d0e8fc306df4239c540a79 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Wed, 9 Jun 2021 11:34:28 +1000 Subject: powerpc/bpf: Write protect JIT code Add the necessary call to bpf_jit_binary_lock_ro() to remove write and add exec permissions to the JIT image after it has finished being written. Without CONFIG_STRICT_MODULE_RWX the image will be writable and executable until the call to bpf_jit_binary_lock_ro(). Signed-off-by: Jordan Niethe Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210609013431.9805-7-jniethe5@gmail.com --- arch/powerpc/net/bpf_jit_comp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 6c8c268e4fe8..53aefee3fe70 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -237,6 +237,7 @@ skip_codegen_passes: fp->jited_len = alloclen; bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + bpf_jit_binary_lock_ro(bpf_hdr); if (!fp->is_func || extra_pass) { bpf_prog_fill_jited_linfo(fp, addrs); out_addrs: -- cgit v1.2.3 From c35717c71e983ed55d61e523cbd11a798429bc82 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 9 Jun 2021 11:34:29 +1000 Subject: powerpc: Set ARCH_HAS_STRICT_MODULE_RWX To enable strict module RWX on powerpc, set: CONFIG_STRICT_MODULE_RWX=y You should also have CONFIG_STRICT_KERNEL_RWX=y set to have any real security benefit. ARCH_HAS_STRICT_MODULE_RWX is set to require ARCH_HAS_STRICT_KERNEL_RWX. This is due to a quirk in arch/Kconfig and arch/powerpc/Kconfig that makes STRICT_MODULE_RWX *on by default* in configurations where STRICT_KERNEL_RWX is *unavailable*. Since this doesn't make much sense, and module RWX without kernel RWX doesn't make much sense, having the same dependencies as kernel RWX works around this problem. Book3s/32 603 and 604 core processors are not able to write protect kernel pages so do not set ARCH_HAS_STRICT_MODULE_RWX for Book3s/32. [jpn: - predicate on !PPC_BOOK3S_604 - make module_alloc() use PAGE_KERNEL protection] Signed-off-by: Russell Currey Signed-off-by: Jordan Niethe Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210609013431.9805-8-jniethe5@gmail.com --- arch/powerpc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index abfe2e9225fa..72f307f1796b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -142,6 +142,7 @@ config PPC select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) + select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -267,6 +268,7 @@ config PPC select PPC_DAWR if PPC64 select RTC_LIB select SPARSE_IRQ + select STRICT_KERNEL_RWX if STRICT_MODULE_RWX select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select VIRT_TO_BUS if !PPC64 -- cgit v1.2.3 From 4d1755b6a762149ae022a32fb2bbeefb6680baa6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Jun 2021 11:34:30 +1000 Subject: powerpc/mm: implement set_memory_attr() In addition to the set_memory_xx() functions which allows to change the memory attributes of not (yet) used memory regions, implement a set_memory_attr() function to: - set the final memory protection after init on currently used kernel regions. - enable/disable kernel memory regions in the scope of DEBUG_PAGEALLOC. Unlike the set_memory_xx() which can act in three step as the regions are unused, this function must modify 'on the fly' as the kernel is executing from them. At the moment only PPC32 will use it and changing page attributes on the fly is not an issue. Reported-by: kbuild test robot [ruscur: cast "data" to unsigned long instead of int] Signed-off-by: Christophe Leroy Signed-off-by: Russell Currey Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210609013431.9805-9-jniethe5@gmail.com --- arch/powerpc/include/asm/set_memory.h | 2 ++ arch/powerpc/mm/pageattr.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index 64011ea444b4..b040094f7920 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -29,4 +29,6 @@ static inline int set_memory_x(unsigned long addr, int numpages) return change_memory_attr(addr, numpages, SET_MEMORY_X); } +int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot); + #endif diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index 5e5ae50a7f23..0876216ceee6 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -99,3 +99,36 @@ int change_memory_attr(unsigned long addr, int numpages, long action) return apply_to_existing_page_range(&init_mm, start, size, change_page_attr, (void *)action); } + +/* + * Set the attributes of a page: + * + * This function is used by PPC32 at the end of init to set final kernel memory + * protection. It includes changing the maping of the page it is executing from + * and data pages it is using. + */ +static int set_page_attr(pte_t *ptep, unsigned long addr, void *data) +{ + pgprot_t prot = __pgprot((unsigned long)data); + + spin_lock(&init_mm.page_table_lock); + + set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot)); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + + spin_unlock(&init_mm.page_table_lock); + + return 0; +} + +int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot) +{ + unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); + unsigned long sz = numpages * PAGE_SIZE; + + if (numpages <= 0) + return 0; + + return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr, + (void *)pgprot_val(prot)); +} -- cgit v1.2.3 From c988cfd38e489d9390d253d4392590daf451d87a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Jun 2021 11:34:31 +1000 Subject: powerpc/32: use set_memory_attr() Use set_memory_attr() instead of the PPC32 specific change_page_attr() change_page_attr() was checking that the address was not mapped by blocks and was handling highmem, but that's unneeded because the affected pages can't be in highmem and block mapping verification is already done by the callers. Signed-off-by: Christophe Leroy [ruscur: rebase on powerpc/merge with Christophe's new patches] Signed-off-by: Russell Currey Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210609013431.9805-10-jniethe5@gmail.com --- arch/powerpc/mm/pgtable_32.c | 60 ++++++++------------------------------------ 1 file changed, 10 insertions(+), 50 deletions(-) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index e0ec67a16887..dcf5ecca19d9 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -132,64 +133,20 @@ void __init mapin_ram(void) } } -static int __change_page_attr_noflush(struct page *page, pgprot_t prot) -{ - pte_t *kpte; - unsigned long address; - - BUG_ON(PageHighMem(page)); - address = (unsigned long)page_address(page); - - if (v_block_mapped(address)) - return 0; - kpte = virt_to_kpte(address); - if (!kpte) - return -EINVAL; - __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); - - return 0; -} - -/* - * Change the page attributes of an page in the linear mapping. - * - * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY - */ -static int change_page_attr(struct page *page, int numpages, pgprot_t prot) -{ - int i, err = 0; - unsigned long flags; - struct page *start = page; - - local_irq_save(flags); - for (i = 0; i < numpages; i++, page++) { - err = __change_page_attr_noflush(page, prot); - if (err) - break; - } - wmb(); - local_irq_restore(flags); - flush_tlb_kernel_range((unsigned long)page_address(start), - (unsigned long)page_address(page)); - return err; -} - void mark_initmem_nx(void) { - struct page *page = virt_to_page(_sinittext); unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); if (v_block_mapped((unsigned long)_sinittext)) mmu_mark_initmem_nx(); else - change_page_attr(page, numpages, PAGE_KERNEL); + set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL); } #ifdef CONFIG_STRICT_KERNEL_RWX void mark_rodata_ro(void) { - struct page *page; unsigned long numpages; if (v_block_mapped((unsigned long)_stext + 1)) { @@ -198,20 +155,18 @@ void mark_rodata_ro(void) return; } - page = virt_to_page(_stext); numpages = PFN_UP((unsigned long)_etext) - PFN_DOWN((unsigned long)_stext); - change_page_attr(page, numpages, PAGE_KERNEL_ROX); + set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX); /* * mark .rodata as read only. Use __init_begin rather than __end_rodata * to cover NOTES and EXCEPTION_TABLE. */ - page = virt_to_page(__start_rodata); numpages = PFN_UP((unsigned long)__init_begin) - PFN_DOWN((unsigned long)__start_rodata); - change_page_attr(page, numpages, PAGE_KERNEL_RO); + set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO); // mark_initmem_nx() should have already run by now ptdump_check_wx(); @@ -221,9 +176,14 @@ void mark_rodata_ro(void) #ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { + unsigned long addr = (unsigned long)page_address(page); + if (PageHighMem(page)) return; - change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); + if (enable) + set_memory_attr(addr, numpages, PAGE_KERNEL); + else + set_memory_attr(addr, numpages, __pgprot(0)); } #endif /* CONFIG_DEBUG_PAGEALLOC */ -- cgit v1.2.3 From 53d143fe08c24c2ce44ee329e41c2a6aad57ebb5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Jun 2021 13:43:41 +1000 Subject: powerpc: Add Microwatt platform Microwatt is a FPGA-based implementation of the Power ISA. It currently only implements little-endian 64-bit mode, and does not (yet) support SMP, VMX, VSX or transactional memory. It has an optional FPU, and an optional MMU (required for running Linux, obviously) which implements a configurable radix tree but not hypervisor mode or nested radix translation. This adds a new machine type to support FPGA-based SoCs with a Microwatt core. CONFIG_MATH_EMULATION can be selected for Microwatt SOCs which don't have the FPU. Signed-off-by: Paul Mackerras Reviewed-by: Nicholas Piggin Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YMwWbZVREsVug9R0@thinks.paulus.ozlabs.org --- arch/powerpc/Kconfig | 2 +- arch/powerpc/platforms/Kconfig | 1 + arch/powerpc/platforms/Makefile | 1 + arch/powerpc/platforms/microwatt/Kconfig | 9 +++++++++ arch/powerpc/platforms/microwatt/Makefile | 1 + arch/powerpc/platforms/microwatt/setup.c | 23 +++++++++++++++++++++++ 6 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/platforms/microwatt/Kconfig create mode 100644 arch/powerpc/platforms/microwatt/Makefile create mode 100644 arch/powerpc/platforms/microwatt/setup.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 72f307f1796b..9bf52e292eda 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -430,7 +430,7 @@ source "kernel/Kconfig.hz" config MATH_EMULATION bool "Math emulation" - depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE + depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE || PPC_MICROWATT select PPC_FPU_REGS help Some PowerPC chips designed for embedded applications do not have diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 594544a65b02..2f071fb9694c 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -21,6 +21,7 @@ source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" source "arch/powerpc/platforms/book3s/Kconfig" +source "arch/powerpc/platforms/microwatt/Kconfig" config KVM_GUEST bool "KVM Guest support" diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 0e75d7df387b..94470fb27c99 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ obj-$(CONFIG_AMIGAONE) += amigaone/ obj-$(CONFIG_PPC_BOOK3S) += book3s/ +obj-$(CONFIG_PPC_MICROWATT) += microwatt/ diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig new file mode 100644 index 000000000000..3be01e78ce57 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_MICROWATT + depends on PPC_BOOK3S_64 && !SMP + bool "Microwatt SoC platform" + select PPC_XICS + select PPC_NATIVE + help + This option enables support for FPGA-based Microwatt implementations. + diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile new file mode 100644 index 000000000000..e6885b3b2ee7 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/Makefile @@ -0,0 +1 @@ +obj-y += setup.o diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c new file mode 100644 index 000000000000..d80d52612672 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/setup.c @@ -0,0 +1,23 @@ +/* + * Microwatt FPGA-based SoC platform setup code. + * + * Copyright 2020 Paul Mackerras (paulus@ozlabs.org), IBM Corp. + */ + +#include +#include +#include +#include +#include +#include + +static int __init microwatt_probe(void) +{ + return of_machine_is_compatible("microwatt-soc"); +} + +define_machine(microwatt) { + .name = "microwatt", + .probe = microwatt_probe, + .calibrate_decr = generic_calibrate_decr, +}; -- cgit v1.2.3 From 151b88e8482167f6eb3117d82e4905efb5e72662 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Jun 2021 13:44:16 +1000 Subject: powerpc: Add Microwatt device tree Microwatt currently runs with MSR[HV] = 0, hence the usable-privilege properties don't have bit 2 (for HV support) set, and we need the /chosen/ibm,architecture-vec-5 property. Signed-off-by: Paul Mackerras Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YMwWkPcXlGDSQ9Q3@thinks.paulus.ozlabs.org --- arch/powerpc/boot/dts/microwatt.dts | 98 +++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 arch/powerpc/boot/dts/microwatt.dts diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts new file mode 100644 index 000000000000..ac264ad3faaf --- /dev/null +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -0,0 +1,98 @@ +/dts-v1/; + +/ { + #size-cells = <0x02>; + #address-cells = <0x02>; + model-name = "microwatt"; + compatible = "microwatt-soc"; + + reserved-memory { + #size-cells = <0x02>; + #address-cells = <0x02>; + ranges; + }; + + memory@0 { + device_type = "memory"; + reg = <0x00000000 0x00000000 0x00000000 0x10000000>; + }; + + cpus { + #size-cells = <0x00>; + #address-cells = <0x01>; + + ibm,powerpc-cpu-features { + display-name = "Microwatt"; + isa = <3000>; + device_type = "cpu-features"; + compatible = "ibm,powerpc-cpu-features"; + + mmu-radix { + isa = <3000>; + usable-privilege = <2>; + }; + + little-endian { + isa = <2050>; + usable-privilege = <3>; + hwcap-bit-nr = <1>; + }; + + cache-inhibited-large-page { + isa = <2040>; + usable-privilege = <2>; + }; + + fixed-point-v3 { + isa = <3000>; + usable-privilege = <3>; + }; + + no-execute { + isa = <2010>; + usable-privilege = <2>; + }; + + floating-point { + hwcap-bit-nr = <27>; + isa = <0>; + usable-privilege = <3>; + }; + }; + + PowerPC,Microwatt@0 { + i-cache-sets = <2>; + ibm,dec-bits = <64>; + reservation-granule-size = <64>; + clock-frequency = <100000000>; + timebase-frequency = <100000000>; + i-tlb-sets = <1>; + ibm,ppc-interrupt-server#s = <0>; + i-cache-block-size = <64>; + d-cache-block-size = <64>; + d-cache-sets = <2>; + i-tlb-size = <64>; + cpu-version = <0x990000>; + status = "okay"; + i-cache-size = <0x1000>; + ibm,processor-radix-AP-encodings = <0x0c 0xa0000010 0x20000015 0x4000001e>; + tlb-size = <0>; + tlb-sets = <0>; + device_type = "cpu"; + d-tlb-size = <128>; + d-tlb-sets = <2>; + reg = <0>; + general-purpose; + 64-bit; + d-cache-size = <0x1000>; + ibm,chip-id = <0>; + }; + }; + + chosen { + bootargs = ""; + ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 40 00 40]; + }; + +}; -- cgit v1.2.3 From 0d0f9e5f2fa7aacf22892078a1065fa5d0ce941b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Jun 2021 13:45:11 +1000 Subject: powerpc/microwatt: Populate platform bus from device-tree Just like any other embedded platform. Add an empty soc node. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YMwWx98+PMibZq/G@thinks.paulus.ozlabs.org --- arch/powerpc/boot/dts/microwatt.dts | 7 +++++++ arch/powerpc/platforms/microwatt/setup.c | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index ac264ad3faaf..9b6140c90370 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -95,4 +95,11 @@ 00 00 00 00 00 00 00 00 40 00 40]; }; + soc@c0000000 { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + ranges = <0 0 0xc0000000 0x40000000>; + }; }; diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c index d80d52612672..5af4adf881bc 100644 --- a/arch/powerpc/platforms/microwatt/setup.c +++ b/arch/powerpc/platforms/microwatt/setup.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include @@ -16,6 +18,12 @@ static int __init microwatt_probe(void) return of_machine_is_compatible("microwatt-soc"); } +static int __init microwatt_populate(void) +{ + return of_platform_default_populate(NULL, NULL, NULL); +} +machine_arch_initcall(microwatt, microwatt_populate); + define_machine(microwatt) { .name = "microwatt", .probe = microwatt_probe, -- cgit v1.2.3 From aa9c5adf2f61da39c92280d9336e091852e292ff Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Jun 2021 13:45:53 +1000 Subject: powerpc/xics: Add a native ICS backend for microwatt This is a simple native ICS backend that matches the layout of the Microwatt implementation of ICS. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Reviewed-by: Segher Boessenkool [mpe: Add empty ics_native_init() to unbreak non-microwatt builds] Signed-off-by: Michael Ellerman fixup-ics Link: https://lore.kernel.org/r/YMwW8cxrwB2W5EUN@thinks.paulus.ozlabs.org --- arch/powerpc/boot/dts/microwatt.dts | 18 +++ arch/powerpc/include/asm/xics.h | 4 + arch/powerpc/platforms/microwatt/Kconfig | 2 + arch/powerpc/platforms/microwatt/setup.c | 8 + arch/powerpc/sysdev/xics/Kconfig | 3 + arch/powerpc/sysdev/xics/Makefile | 1 + arch/powerpc/sysdev/xics/ics-native.c | 257 +++++++++++++++++++++++++++++++ arch/powerpc/sysdev/xics/xics-common.c | 2 + 8 files changed, 295 insertions(+) create mode 100644 arch/powerpc/sysdev/xics/ics-native.c diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index 9b6140c90370..04e5dd92270e 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -99,7 +99,25 @@ compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; + interrupt-parent = <&ICS>; ranges = <0 0 0xc0000000 0x40000000>; + + interrupt-controller@4000 { + compatible = "openpower,xics-presentation", "ibm,ppc-xicp"; + ibm,interrupt-server-ranges = <0x0 0x1>; + reg = <0x4000 0x100>; + }; + + ICS: interrupt-controller@5000 { + compatible = "openpower,xics-sources"; + interrupt-controller; + interrupt-ranges = <0x10 0x10>; + reg = <0x5000 0x100>; + #address-cells = <0>; + #size-cells = <0>; + #interrupt-cells = <2>; + }; + }; }; diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 8e903b3f9c24..d9cf192368ad 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -65,8 +65,12 @@ struct icp_ops { extern const struct icp_ops *icp_ops; +#ifdef CONFIG_PPC_ICS_NATIVE /* Native ICS */ extern int ics_native_init(void); +#else +static inline int ics_native_init(void) { return -ENODEV; } +#endif /* RTAS ICS */ #ifdef CONFIG_PPC_ICS_RTAS diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig index 3be01e78ce57..b52c869c0eb8 100644 --- a/arch/powerpc/platforms/microwatt/Kconfig +++ b/arch/powerpc/platforms/microwatt/Kconfig @@ -3,6 +3,8 @@ config PPC_MICROWATT depends on PPC_BOOK3S_64 && !SMP bool "Microwatt SoC platform" select PPC_XICS + select PPC_ICS_NATIVE + select PPC_ICP_NATIVE select PPC_NATIVE help This option enables support for FPGA-based Microwatt implementations. diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c index 5af4adf881bc..1c1b7791fa57 100644 --- a/arch/powerpc/platforms/microwatt/setup.c +++ b/arch/powerpc/platforms/microwatt/setup.c @@ -10,8 +10,15 @@ #include #include #include + #include #include +#include + +static void __init microwatt_init_IRQ(void) +{ + xics_init(); +} static int __init microwatt_probe(void) { @@ -27,5 +34,6 @@ machine_arch_initcall(microwatt, microwatt_populate); define_machine(microwatt) { .name = "microwatt", .probe = microwatt_probe, + .init_IRQ = microwatt_init_IRQ, .calibrate_decr = generic_calibrate_decr, }; diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig index 304614c920aa..063d9195891f 100644 --- a/arch/powerpc/sysdev/xics/Kconfig +++ b/arch/powerpc/sysdev/xics/Kconfig @@ -12,3 +12,6 @@ config PPC_ICP_HV config PPC_ICS_RTAS def_bool n + +config PPC_ICS_NATIVE + def_bool n diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile index ba1e3117b1c0..747063927c6c 100644 --- a/arch/powerpc/sysdev/xics/Makefile +++ b/arch/powerpc/sysdev/xics/Makefile @@ -4,4 +4,5 @@ obj-y += xics-common.o obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o obj-$(CONFIG_PPC_ICP_HV) += icp-hv.o obj-$(CONFIG_PPC_ICS_RTAS) += ics-rtas.o +obj-$(CONFIG_PPC_ICS_NATIVE) += ics-native.o obj-$(CONFIG_PPC_POWERNV) += ics-opal.o icp-opal.o diff --git a/arch/powerpc/sysdev/xics/ics-native.c b/arch/powerpc/sysdev/xics/ics-native.c new file mode 100644 index 000000000000..d450502f4053 --- /dev/null +++ b/arch/powerpc/sysdev/xics/ics-native.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * ICS backend for OPAL managed interrupts. + * + * Copyright 2011 IBM Corp. + */ + +//#define DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +struct ics_native { + struct ics ics; + struct device_node *node; + void __iomem *base; + u32 ibase; + u32 icount; +}; +#define to_ics_native(_ics) container_of(_ics, struct ics_native, ics) + +static void __iomem *ics_native_xive(struct ics_native *in, unsigned int vec) +{ + return in->base + 0x800 + ((vec - in->ibase) << 2); +} + +static void ics_native_unmask_irq(struct irq_data *d) +{ + unsigned int vec = (unsigned int)irqd_to_hwirq(d); + struct ics *ics = irq_data_get_irq_chip_data(d); + struct ics_native *in = to_ics_native(ics); + unsigned int server; + + pr_devel("ics-native: unmask virq %d [hw 0x%x]\n", d->irq, vec); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return; + + server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); + out_be32(ics_native_xive(in, vec), (server << 8) | DEFAULT_PRIORITY); +} + +static unsigned int ics_native_startup(struct irq_data *d) +{ +#ifdef CONFIG_PCI_MSI + /* + * The generic MSI code returns with the interrupt disabled on the + * card, using the MSI mask bits. Firmware doesn't appear to unmask + * at that level, so we do it here by hand. + */ + if (irq_data_get_msi_desc(d)) + pci_msi_unmask_irq(d); +#endif + + /* unmask it */ + ics_native_unmask_irq(d); + return 0; +} + +static void ics_native_do_mask(struct ics_native *in, unsigned int vec) +{ + out_be32(ics_native_xive(in, vec), 0xff); +} + +static void ics_native_mask_irq(struct irq_data *d) +{ + unsigned int vec = (unsigned int)irqd_to_hwirq(d); + struct ics *ics = irq_data_get_irq_chip_data(d); + struct ics_native *in = to_ics_native(ics); + + pr_devel("ics-native: mask virq %d [hw 0x%x]\n", d->irq, vec); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return; + ics_native_do_mask(in, vec); +} + +static int ics_native_set_affinity(struct irq_data *d, + const struct cpumask *cpumask, + bool force) +{ + unsigned int vec = (unsigned int)irqd_to_hwirq(d); + struct ics *ics = irq_data_get_irq_chip_data(d); + struct ics_native *in = to_ics_native(ics); + int server; + u32 xive; + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return -EINVAL; + + server = xics_get_irq_server(d->irq, cpumask, 1); + if (server == -1) { + pr_warn("%s: No online cpus in the mask %*pb for irq %d\n", + __func__, cpumask_pr_args(cpumask), d->irq); + return -1; + } + + xive = in_be32(ics_native_xive(in, vec)); + xive = (xive & 0xff) | (server << 8); + out_be32(ics_native_xive(in, vec), xive); + + return IRQ_SET_MASK_OK; +} + +static struct irq_chip ics_native_irq_chip = { + .name = "ICS", + .irq_startup = ics_native_startup, + .irq_mask = ics_native_mask_irq, + .irq_unmask = ics_native_unmask_irq, + .irq_eoi = NULL, /* Patched at init time */ + .irq_set_affinity = ics_native_set_affinity, + .irq_set_type = xics_set_irq_type, + .irq_retrigger = xics_retrigger, +}; + +static int ics_native_map(struct ics *ics, unsigned int virq) +{ + unsigned int vec = (unsigned int)virq_to_hw(virq); + struct ics_native *in = to_ics_native(ics); + + pr_devel("%s: vec=0x%x\n", __func__, vec); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return -EINVAL; + + irq_set_chip_and_handler(virq, &ics_native_irq_chip, handle_fasteoi_irq); + irq_set_chip_data(virq, ics); + + return 0; +} + +static void ics_native_mask_unknown(struct ics *ics, unsigned long vec) +{ + struct ics_native *in = to_ics_native(ics); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return; + + ics_native_do_mask(in, vec); +} + +static long ics_native_get_server(struct ics *ics, unsigned long vec) +{ + struct ics_native *in = to_ics_native(ics); + u32 xive; + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return -EINVAL; + + xive = in_be32(ics_native_xive(in, vec)); + return (xive >> 8) & 0xfff; +} + +static int ics_native_host_match(struct ics *ics, struct device_node *node) +{ + struct ics_native *in = to_ics_native(ics); + + return in->node == node; +} + +static struct ics ics_native_template = { + .map = ics_native_map, + .mask_unknown = ics_native_mask_unknown, + .get_server = ics_native_get_server, + .host_match = ics_native_host_match, +}; + +static int __init ics_native_add_one(struct device_node *np) +{ + struct ics_native *ics; + u32 ranges[2]; + int rc, count; + + ics = kzalloc(sizeof(struct ics_native), GFP_KERNEL); + if (!ics) + return -ENOMEM; + ics->node = of_node_get(np); + memcpy(&ics->ics, &ics_native_template, sizeof(struct ics)); + + ics->base = of_iomap(np, 0); + if (!ics->base) { + pr_err("Failed to map %pOFP\n", np); + rc = -ENOMEM; + goto fail; + } + + count = of_property_count_u32_elems(np, "interrupt-ranges"); + if (count < 2 || count & 1) { + pr_err("Failed to read interrupt-ranges of %pOFP\n", np); + rc = -EINVAL; + goto fail; + } + if (count > 2) { + pr_warn("ICS %pOFP has %d ranges, only one supported\n", + np, count >> 1); + } + rc = of_property_read_u32_array(np, "interrupt-ranges", + ranges, 2); + if (rc) { + pr_err("Failed to read interrupt-ranges of %pOFP\n", np); + goto fail; + } + ics->ibase = ranges[0]; + ics->icount = ranges[1]; + + pr_info("ICS native initialized for sources %d..%d\n", + ics->ibase, ics->ibase + ics->icount - 1); + + /* Register ourselves */ + xics_register_ics(&ics->ics); + + return 0; +fail: + of_node_put(ics->node); + kfree(ics); + return rc; +} + +int __init ics_native_init(void) +{ + struct device_node *ics; + bool found_one = false; + + /* We need to patch our irq chip's EOI to point to the + * right ICP + */ + ics_native_irq_chip.irq_eoi = icp_ops->eoi; + + /* Find native ICS in the device-tree */ + for_each_compatible_node(ics, NULL, "openpower,xics-sources") { + if (ics_native_add_one(ics) == 0) + found_one = true; + } + + if (found_one) + pr_info("ICS native backend registered\n"); + + return found_one ? 0 : -ENODEV; +} diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 7e4305c01bac..de41ab91f793 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -476,6 +476,8 @@ void __init xics_init(void) rc = ics_rtas_init(); if (rc < 0) rc = ics_opal_init(); + if (rc < 0) + rc = ics_native_init(); if (rc < 0) pr_warn("XICS: Cannot find a Source Controller !\n"); -- cgit v1.2.3 From 48b545b8018db61ab4978d29c73c16b9fbfad12c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Jun 2021 13:46:32 +1000 Subject: powerpc/microwatt: Use standard 16550 UART for console This adds support to the Microwatt platform to use the standard 16550-style UART which available in the standalone Microwatt FPGA. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YMwXGCTzedpQje7r@thinks.paulus.ozlabs.org --- arch/powerpc/boot/dts/microwatt.dts | 27 +++++++++++++++++----- arch/powerpc/kernel/udbg_16550.c | 39 ++++++++++++++++++++++++++++++++ arch/powerpc/platforms/microwatt/Kconfig | 1 + arch/powerpc/platforms/microwatt/setup.c | 2 ++ 4 files changed, 63 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index 04e5dd92270e..974abbdda249 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -6,6 +6,10 @@ model-name = "microwatt"; compatible = "microwatt-soc"; + aliases { + serial0 = &UART0; + }; + reserved-memory { #size-cells = <0x02>; #address-cells = <0x02>; @@ -89,12 +93,6 @@ }; }; - chosen { - bootargs = ""; - ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 - 00 00 00 00 00 00 00 00 40 00 40]; - }; - soc@c0000000 { compatible = "simple-bus"; #address-cells = <1>; @@ -119,5 +117,22 @@ #interrupt-cells = <2>; }; + UART0: serial@2000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0x2000 0x8>; + clock-frequency = <100000000>; + current-speed = <115200>; + reg-shift = <2>; + fifo-size = <16>; + interrupts = <0x10 0x1>; + }; + }; + + chosen { + bootargs = ""; + ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 40 00 40]; + stdout-path = &UART0; }; }; diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 9356b60d6030..8513aa49614e 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -296,3 +296,42 @@ void __init udbg_init_40x_realmode(void) } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_MICROWATT + +#define UDBG_UART_MW_ADDR ((void __iomem *)0xc0002000) + +static u8 udbg_uart_in_isa300_rm(unsigned int reg) +{ + uint64_t msr = mfmsr(); + uint8_t c; + + mtmsr(msr & ~(MSR_EE|MSR_DR)); + isync(); + eieio(); + c = __raw_rm_readb(UDBG_UART_MW_ADDR + (reg << 2)); + mtmsr(msr); + isync(); + return c; +} + +static void udbg_uart_out_isa300_rm(unsigned int reg, u8 val) +{ + uint64_t msr = mfmsr(); + + mtmsr(msr & ~(MSR_EE|MSR_DR)); + isync(); + eieio(); + __raw_rm_writeb(val, UDBG_UART_MW_ADDR + (reg << 2)); + mtmsr(msr); + isync(); +} + +void __init udbg_init_debug_microwatt(void) +{ + udbg_uart_in = udbg_uart_in_isa300_rm; + udbg_uart_out = udbg_uart_out_isa300_rm; + udbg_use_uart(); +} + +#endif /* CONFIG_PPC_EARLY_DEBUG_MICROWATT */ diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig index b52c869c0eb8..50ed0cedb5f1 100644 --- a/arch/powerpc/platforms/microwatt/Kconfig +++ b/arch/powerpc/platforms/microwatt/Kconfig @@ -6,6 +6,7 @@ config PPC_MICROWATT select PPC_ICS_NATIVE select PPC_ICP_NATIVE select PPC_NATIVE + select PPC_UDBG_16550 help This option enables support for FPGA-based Microwatt implementations. diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c index 1c1b7791fa57..0b02603bdb74 100644 --- a/arch/powerpc/platforms/microwatt/setup.c +++ b/arch/powerpc/platforms/microwatt/setup.c @@ -14,6 +14,7 @@ #include #include #include +#include static void __init microwatt_init_IRQ(void) { @@ -35,5 +36,6 @@ define_machine(microwatt) { .name = "microwatt", .probe = microwatt_probe, .init_IRQ = microwatt_init_IRQ, + .progress = udbg_progress, .calibrate_decr = generic_calibrate_decr, }; -- cgit v1.2.3 From c25769fddaec13509b6cdc7ad17458f239c4cee7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Jun 2021 13:47:08 +1000 Subject: powerpc/microwatt: Add support for hardware random number generator Microwatt's hardware RNG is accessed using the DARN instruction. Signed-off-by: Paul Mackerras Reviewed-by: Nicholas Piggin Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YMwXPHlV/ZleiQUY@thinks.paulus.ozlabs.org --- arch/powerpc/platforms/microwatt/Kconfig | 1 + arch/powerpc/platforms/microwatt/Makefile | 2 +- arch/powerpc/platforms/microwatt/rng.c | 48 +++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/platforms/microwatt/rng.c diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig index 50ed0cedb5f1..8f6a81978461 100644 --- a/arch/powerpc/platforms/microwatt/Kconfig +++ b/arch/powerpc/platforms/microwatt/Kconfig @@ -7,6 +7,7 @@ config PPC_MICROWATT select PPC_ICP_NATIVE select PPC_NATIVE select PPC_UDBG_16550 + select ARCH_RANDOM help This option enables support for FPGA-based Microwatt implementations. diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile index e6885b3b2ee7..116d6d3ad3f0 100644 --- a/arch/powerpc/platforms/microwatt/Makefile +++ b/arch/powerpc/platforms/microwatt/Makefile @@ -1 +1 @@ -obj-y += setup.o +obj-y += setup.o rng.o diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c new file mode 100644 index 000000000000..3d8ee6eb7dad --- /dev/null +++ b/arch/powerpc/platforms/microwatt/rng.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/powerpc/platforms/powernv/rng.c, which is: + * Copyright 2013, Michael Ellerman, IBM Corporation. + */ + +#define pr_fmt(fmt) "microwatt-rng: " fmt + +#include +#include +#include +#include +#include + +#define DARN_ERR 0xFFFFFFFFFFFFFFFFul + +int microwatt_get_random_darn(unsigned long *v) +{ + unsigned long val; + + /* Using DARN with L=1 - 64-bit conditioned random number */ + asm volatile(PPC_DARN(%0, 1) : "=r"(val)); + + if (val == DARN_ERR) + return 0; + + *v = val; + + return 1; +} + +static __init int rng_init(void) +{ + unsigned long val; + int i; + + for (i = 0; i < 10; i++) { + if (microwatt_get_random_darn(&val)) { + ppc_md.get_random_seed = microwatt_get_random_darn; + return 0; + } + } + + pr_warn("Unable to use DARN for get_random_seed()\n"); + + return -EIO; +} +machine_subsys_initcall(, rng_init); -- cgit v1.2.3 From 4a1511eb342bd80c6ea0e8a7ce0bbe68aac96ac5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Jun 2021 13:48:12 +1000 Subject: powerpc/microwatt: Add microwatt_defconfig Signed-off-by: Paul Mackerras Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YMwXfL8hOpReIiiP@thinks.paulus.ozlabs.org --- arch/powerpc/configs/microwatt_defconfig | 98 ++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 arch/powerpc/configs/microwatt_defconfig diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig new file mode 100644 index 000000000000..a08b739123da --- /dev/null +++ b/arch/powerpc/configs/microwatt_defconfig @@ -0,0 +1,98 @@ +# CONFIG_SWAP is not set +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_TICK_CPU_ACCOUNTING=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +# CONFIG_SLAB_MERGE_DEFAULT is not set +CONFIG_PPC64=y +# CONFIG_PPC_KUEP is not set +# CONFIG_PPC_KUAP is not set +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_NR_IRQS=64 +CONFIG_PANIC_TIMEOUT=10 +# CONFIG_PPC_POWERNV is not set +# CONFIG_PPC_PSERIES is not set +CONFIG_PPC_MICROWATT=y +# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set +CONFIG_CPU_FREQ=y +CONFIG_HZ_100=y +# CONFIG_PPC_MEM_KEYS is not set +# CONFIG_SECCOMP is not set +# CONFIG_MQ_IOSCHED_KYBER is not set +# CONFIG_COREDUMP is not set +# CONFIG_COMPACTION is not set +# CONFIG_MIGRATION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +CONFIG_INET=y +CONFIG_INET_UDP_DIAG=y +CONFIG_INET_RAW_DIAG=y +# CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +# CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_MTD=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_PARTITIONED_MASTER=y +CONFIG_MTD_SPI_NOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_NETDEVICES=y +# CONFIG_WLAN is not set +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_NVRAM is not set +CONFIG_RANDOM_TRUST_CPU=y +CONFIG_SPI=y +CONFIG_SPI_DEBUG=y +CONFIG_SPI_BITBANG=y +CONFIG_SPI_SPIDEV=y +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_VIRTIO_MENU is not set +# CONFIG_IOMMU_SUPPORT is not set +# CONFIG_NVMEM is not set +CONFIG_EXT4_FS=y +# CONFIG_FILE_LOCKING is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY_USER is not set +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_CRYPTO_HW is not set +# CONFIG_XZ_DEC_X86 is not set +# CONFIG_XZ_DEC_IA64 is not set +# CONFIG_XZ_DEC_ARM is not set +# CONFIG_XZ_DEC_ARMTHUMB is not set +# CONFIG_XZ_DEC_SPARC is not set +CONFIG_PRINTK_TIME=y +# CONFIG_SYMBOLIC_ERRNAME is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_MISC is not set +# CONFIG_SCHED_DEBUG is not set +# CONFIG_FTRACE is not set +# CONFIG_STRICT_DEVMEM is not set +CONFIG_PPC_DISABLE_WERROR=y +CONFIG_XMON=y +CONFIG_XMON_DEFAULT=y +# CONFIG_XMON_DEFAULT_RO_MODE is not set +# CONFIG_RUNTIME_TESTING_MENU is not set -- cgit v1.2.3 From c93f80849bdd9b45d834053ae1336e28f0026c84 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Jun 2021 13:49:00 +1000 Subject: powerpc/boot: Fixup device-tree on little endian This fixes the core devtree.c functions and the ns16550 UART backend. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Reviewed-by: Segher Boessenkool Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YMwXrPT8nc4YUdJ9@thinks.paulus.ozlabs.org --- arch/powerpc/boot/devtree.c | 59 ++++++++++++++++++++++++++------------------- arch/powerpc/boot/ns16550.c | 9 +++++-- 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/boot/devtree.c b/arch/powerpc/boot/devtree.c index 5d91036ad626..58fbcfcc98c9 100644 --- a/arch/powerpc/boot/devtree.c +++ b/arch/powerpc/boot/devtree.c @@ -13,6 +13,7 @@ #include "string.h" #include "stdio.h" #include "ops.h" +#include "of.h" void dt_fixup_memory(u64 start, u64 size) { @@ -23,21 +24,25 @@ void dt_fixup_memory(u64 start, u64 size) root = finddevice("/"); if (getprop(root, "#address-cells", &naddr, sizeof(naddr)) < 0) naddr = 2; + else + naddr = be32_to_cpu(naddr); if (naddr < 1 || naddr > 2) fatal("Can't cope with #address-cells == %d in /\n\r", naddr); if (getprop(root, "#size-cells", &nsize, sizeof(nsize)) < 0) nsize = 1; + else + nsize = be32_to_cpu(nsize); if (nsize < 1 || nsize > 2) fatal("Can't cope with #size-cells == %d in /\n\r", nsize); i = 0; if (naddr == 2) - memreg[i++] = start >> 32; - memreg[i++] = start & 0xffffffff; + memreg[i++] = cpu_to_be32(start >> 32); + memreg[i++] = cpu_to_be32(start & 0xffffffff); if (nsize == 2) - memreg[i++] = size >> 32; - memreg[i++] = size & 0xffffffff; + memreg[i++] = cpu_to_be32(size >> 32); + memreg[i++] = cpu_to_be32(size & 0xffffffff); memory = finddevice("/memory"); if (! memory) { @@ -45,9 +50,9 @@ void dt_fixup_memory(u64 start, u64 size) setprop_str(memory, "device_type", "memory"); } - printf("Memory <- <0x%x", memreg[0]); + printf("Memory <- <0x%x", be32_to_cpu(memreg[0])); for (i = 1; i < (naddr + nsize); i++) - printf(" 0x%x", memreg[i]); + printf(" 0x%x", be32_to_cpu(memreg[i])); printf("> (%ldMB)\n\r", (unsigned long)(size >> 20)); setprop(memory, "reg", memreg, (naddr + nsize)*sizeof(u32)); @@ -65,10 +70,10 @@ void dt_fixup_cpu_clocks(u32 cpu, u32 tb, u32 bus) printf("CPU bus-frequency <- 0x%x (%dMHz)\n\r", bus, MHZ(bus)); while ((devp = find_node_by_devtype(devp, "cpu"))) { - setprop_val(devp, "clock-frequency", cpu); - setprop_val(devp, "timebase-frequency", tb); + setprop_val(devp, "clock-frequency", cpu_to_be32(cpu)); + setprop_val(devp, "timebase-frequency", cpu_to_be32(tb)); if (bus > 0) - setprop_val(devp, "bus-frequency", bus); + setprop_val(devp, "bus-frequency", cpu_to_be32(bus)); } timebase_period_ns = 1000000000 / tb; @@ -80,7 +85,7 @@ void dt_fixup_clock(const char *path, u32 freq) if (devp) { printf("%s: clock-frequency <- %x (%dMHz)\n\r", path, freq, MHZ(freq)); - setprop_val(devp, "clock-frequency", freq); + setprop_val(devp, "clock-frequency", cpu_to_be32(freq)); } } @@ -133,8 +138,12 @@ void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize) { if (getprop(node, "#address-cells", naddr, 4) != 4) *naddr = 2; + else + *naddr = be32_to_cpu(*naddr); if (getprop(node, "#size-cells", nsize, 4) != 4) *nsize = 1; + else + *nsize = be32_to_cpu(*nsize); } static void copy_val(u32 *dest, u32 *src, int naddr) @@ -163,9 +172,9 @@ static int add_reg(u32 *reg, u32 *add, int naddr) int i, carry = 0; for (i = MAX_ADDR_CELLS - 1; i >= MAX_ADDR_CELLS - naddr; i--) { - u64 tmp = (u64)reg[i] + add[i] + carry; + u64 tmp = (u64)be32_to_cpu(reg[i]) + be32_to_cpu(add[i]) + carry; carry = tmp >> 32; - reg[i] = (u32)tmp; + reg[i] = cpu_to_be32((u32)tmp); } return !carry; @@ -180,18 +189,18 @@ static int compare_reg(u32 *reg, u32 *range, u32 *rangesize) u32 end; for (i = 0; i < MAX_ADDR_CELLS; i++) { - if (reg[i] < range[i]) + if (be32_to_cpu(reg[i]) < be32_to_cpu(range[i])) return 0; - if (reg[i] > range[i]) + if (be32_to_cpu(reg[i]) > be32_to_cpu(range[i])) break; } for (i = 0; i < MAX_ADDR_CELLS; i++) { - end = range[i] + rangesize[i]; + end = be32_to_cpu(range[i]) + be32_to_cpu(rangesize[i]); - if (reg[i] < end) + if (be32_to_cpu(reg[i]) < end) break; - if (reg[i] > end) + if (be32_to_cpu(reg[i]) > end) return 0; } @@ -240,7 +249,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, return 0; dt_get_reg_format(parent, &naddr, &nsize); - if (nsize > 2) return 0; @@ -252,10 +260,10 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, copy_val(last_addr, prop_buf + offset, naddr); - ret_size = prop_buf[offset + naddr]; + ret_size = be32_to_cpu(prop_buf[offset + naddr]); if (nsize == 2) { ret_size <<= 32; - ret_size |= prop_buf[offset + naddr + 1]; + ret_size |= be32_to_cpu(prop_buf[offset + naddr + 1]); } for (;;) { @@ -278,7 +286,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, offset = find_range(last_addr, prop_buf, prev_naddr, naddr, prev_nsize, buflen / 4); - if (offset < 0) return 0; @@ -296,8 +303,7 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, if (naddr > 2) return 0; - ret_addr = ((u64)last_addr[2] << 32) | last_addr[3]; - + ret_addr = ((u64)be32_to_cpu(last_addr[2]) << 32) | be32_to_cpu(last_addr[3]); if (sizeof(void *) == 4 && (ret_addr >= 0x100000000ULL || ret_size > 0x100000000ULL || ret_addr + ret_size > 0x100000000ULL)) @@ -350,11 +356,14 @@ int dt_is_compatible(void *node, const char *compat) int dt_get_virtual_reg(void *node, void **addr, int nres) { unsigned long xaddr; - int n; + int n, i; n = getprop(node, "virtual-reg", addr, nres * 4); - if (n > 0) + if (n > 0) { + for (i = 0; i < n/4; i ++) + ((u32 *)addr)[i] = be32_to_cpu(((u32 *)addr)[i]); return n / 4; + } for (n = 0; n < nres; n++) { if (!dt_xlate_reg(node, n, &xaddr, NULL)) diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c index b0da4466d419..f16d2be1d0f3 100644 --- a/arch/powerpc/boot/ns16550.c +++ b/arch/powerpc/boot/ns16550.c @@ -15,6 +15,7 @@ #include "stdio.h" #include "io.h" #include "ops.h" +#include "of.h" #define UART_DLL 0 /* Out: Divisor Latch Low */ #define UART_DLM 1 /* Out: Divisor Latch High */ @@ -58,16 +59,20 @@ int ns16550_console_init(void *devp, struct serial_console_data *scdp) int n; u32 reg_offset; - if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) + if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) { + printf("virt reg parse fail...\r\n"); return -1; + } n = getprop(devp, "reg-offset", ®_offset, sizeof(reg_offset)); if (n == sizeof(reg_offset)) - reg_base += reg_offset; + reg_base += be32_to_cpu(reg_offset); n = getprop(devp, "reg-shift", ®_shift, sizeof(reg_shift)); if (n != sizeof(reg_shift)) reg_shift = 0; + else + reg_shift = be32_to_cpu(reg_shift); scdp->open = ns16550_open; scdp->putc = ns16550_putc; -- cgit v1.2.3 From 4a21192e2796c3338c4b0083b494a84a61311aaf Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 18 Jun 2021 13:49:43 +1000 Subject: powerpc/boot: Add a boot wrapper for Microwatt This allows microwatt's kernel to be built with an embedded device tree. Load to arch/powerpc/boot/dtbImage.microwatt to 0x500000: mw_debug -b fpga stop load arch/powerpc/boot/dtbImage.microwatt 500000 start Signed-off-by: Joel Stanley Signed-off-by: Paul Mackerras Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YMwX19wym3kQ7guu@thinks.paulus.ozlabs.org --- arch/powerpc/boot/Makefile | 4 ++++ arch/powerpc/boot/microwatt.c | 24 ++++++++++++++++++++++++ arch/powerpc/boot/wrapper | 5 +++++ 3 files changed, 33 insertions(+) create mode 100644 arch/powerpc/boot/microwatt.c diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index e8e9d17af43b..e312ea802aa6 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -163,6 +163,8 @@ src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S src-plat-$(CONFIG_MVME7100) += motload-head.S mvme7100.c +src-plat-$(CONFIG_PPC_MICROWATT) += fixed-head.S microwatt.c + src-wlib := $(sort $(src-wlib-y)) src-plat := $(sort $(src-plat-y)) src-boot := $(src-wlib) $(src-plat) empty.c @@ -355,6 +357,8 @@ image-$(CONFIG_MVME5100) += dtbImage.mvme5100 # Board port in arch/powerpc/platform/amigaone/Kconfig image-$(CONFIG_AMIGAONE) += cuImage.amigaone +image-$(CONFIG_PPC_MICROWATT) += dtbImage.microwatt + # For 32-bit powermacs, build the COFF and miboot images # as well as the ELF images. ifdef CONFIG_PPC32 diff --git a/arch/powerpc/boot/microwatt.c b/arch/powerpc/boot/microwatt.c new file mode 100644 index 000000000000..ca9d83617fc1 --- /dev/null +++ b/arch/powerpc/boot/microwatt.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "stdio.h" +#include "types.h" +#include "io.h" +#include "ops.h" + +BSS_STACK(8192); + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5) +{ + unsigned long heapsize = 16*1024*1024 - (unsigned long)_end; + + /* + * Disable interrupts and turn off MSR_RI, since we'll + * shortly be overwriting the interrupt vectors. + */ + __asm__ volatile("mtmsrd %0,1" : : "r" (0)); + + simple_alloc_init(_end, heapsize, 32, 64); + fdt_init(_dtb_start); + serial_console_init(); +} diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index cdb796b76e2e..1f4676bab786 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -342,6 +342,11 @@ gamecube|wii) link_address='0x600000' platformo="$object/$platform-head.o $object/$platform.o" ;; +microwatt) + link_address='0x500000' + platformo="$object/fixed-head.o $object/$platform.o" + binary=y + ;; treeboot-currituck) link_address='0x1000000' ;; -- cgit v1.2.3 From 3018fbc63632e6d30abde228f29aaeaf78b782bf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Jun 2021 22:34:20 +1000 Subject: powerpc/64s: Fix boot failure with 4K Radix When using the Radix MMU our PGD is always 64K, and must be naturally aligned. For a 4K page size kernel that means page alignment of swapper_pg_dir is not sufficient, leading to failure to boot. Use the existing MAX_PTRS_PER_PGD which has the correct value, and avoids us hard-coding 64K here. Fixes: e72421a085a8 ("powerpc: Define swapper_pg_dir[] in C") Reported-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210624123420.2784187-1-mpe@ellerman.id.au --- arch/powerpc/mm/pgtable.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 1707ab580ee2..cd16b407f47e 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -28,7 +28,13 @@ #include #include -pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __page_aligned_bss; +#ifdef CONFIG_PPC64 +#define PGD_ALIGN (sizeof(pgd_t) * MAX_PTRS_PER_PGD) +#else +#define PGD_ALIGN PAGE_SIZE +#endif + +pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __section(".bss..page_aligned") __aligned(PGD_ALIGN); static inline int is_exec_fault(void) { -- cgit v1.2.3 From f47d5a4fc254e62ea5af5cbb2fc3e68901def434 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 21 Jun 2021 16:49:37 +1000 Subject: powerpc/prom_init: Convert prom_strcpy() into prom_strscpy_pad() In a subsequent patch we'd like to have something like a strscpy_pad() implementation usable in prom_init.c. Currently we have a strcpy() implementation with only one caller, so convert it into strscpy_pad() and update the caller. Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210621064938.2021419-1-mpe@ellerman.id.au --- arch/powerpc/kernel/prom_init.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 523b31685c4c..c18d55f8b951 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -242,13 +242,31 @@ static int __init prom_strcmp(const char *cs, const char *ct) return 0; } -static char __init *prom_strcpy(char *dest, const char *src) +static ssize_t __init prom_strscpy_pad(char *dest, const char *src, size_t n) { - char *tmp = dest; + ssize_t rc; + size_t i; - while ((*dest++ = *src++) != '\0') - /* nothing */; - return tmp; + if (n == 0 || n > INT_MAX) + return -E2BIG; + + // Copy up to n bytes + for (i = 0; i < n && src[i] != '\0'; i++) + dest[i] = src[i]; + + rc = i; + + // If we copied all n then we have run out of space for the nul + if (rc == n) { + // Rewind by one character to ensure nul termination + i--; + rc = -E2BIG; + } + + for (; i < n; i++) + dest[i] = '\0'; + + return rc; } static int __init prom_strncmp(const char *cs, const char *ct, size_t count) @@ -2701,7 +2719,7 @@ static void __init flatten_device_tree(void) /* Add "phandle" in there, we'll need it */ namep = make_room(&mem_start, &mem_end, 16, 1); - prom_strcpy(namep, "phandle"); + prom_strscpy_pad(namep, "phandle", sizeof("phandle")); mem_start = (unsigned long)namep + prom_strlen(namep) + 1; /* Build string array */ -- cgit v1.2.3 From ffaacd97fd37b9f4e825d8107f5cba5470458f0e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 21 Jun 2021 16:49:38 +1000 Subject: powerpc/prom_init: Pass linux_banner to firmware via option vector 7 Pass the value of linux_banner to firmware via option vector 7. Option vector 7 is described in "LoPAR" Linux on Power Architecture Reference v2.9, in table B.7 on page 824: An ASCII character formatted null terminated string that describes the client operating system. The string shall be human readable and may be displayed on the console. The string can be up to 256 bytes total, including the nul terminator. linux_banner contains lots of information, and should make it possible to identify the exact kernel version that is running: const char linux_banner[] = "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; For example: Linux version 4.15.0-144-generic (buildd@bos02-ppc64el-018) (gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)) #148-Ubuntu SMP Sat May 8 02:32:13 UTC 2021 (Ubuntu 4.15.0-144.148-generic 4.15.18) It's also printed at boot to the console/dmesg, which should make it possible to correlate what firmware receives with the console/dmesg on the machine. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210621064938.2021419-2-mpe@ellerman.id.au --- arch/powerpc/kernel/prom_init.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index c18d55f8b951..cb5be081e499 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -944,6 +945,10 @@ struct option_vector6 { u8 os_name; } __packed; +struct option_vector7 { + u8 os_id[256]; +} __packed; + struct ibm_arch_vec { struct { u32 mask, val; } pvrs[14]; @@ -966,6 +971,9 @@ struct ibm_arch_vec { u8 vec6_len; struct option_vector6 vec6; + + u8 vec7_len; + struct option_vector7 vec7; } __packed; static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { @@ -1112,6 +1120,9 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .secondary_pteg = 0, .os_name = OV6_LINUX, }, + + /* option vector 7: OS Identification */ + .vec7_len = VECTOR_LENGTH(sizeof(struct option_vector7)), }; static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned; @@ -1340,6 +1351,8 @@ static void __init prom_check_platform_support(void) memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template, sizeof(ibm_architecture_vec)); + prom_strscpy_pad(ibm_architecture_vec.vec7.os_id, linux_banner, 256); + if (prop_len > 1) { int i; u8 vec[8]; -- cgit v1.2.3 From 9a3ed7adcabce24a85fbe05f54e762b18756ec22 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Jun 2021 01:51:00 +1000 Subject: powerpc/interrupt: Fix CONFIG ifdef typo CONFIG_PPC_BOOK3S should be CONFIG_PPC_BOOK3S_64. restore_math is a no-op for other configurations. Signed-off-by: Christophe Leroy [np: split from another patch] Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-2-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index e0938ba298f2..f2baeee437f7 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -303,7 +303,7 @@ again: ti_flags = READ_ONCE(current_thread_info()->flags); } - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && unlikely((ti_flags & _TIF_RESTORE_TM))) { restore_tm_state(regs); -- cgit v1.2.3 From bf9155f1970c4dbf9ec6b87d3688433bd494a4e1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Jun 2021 01:51:01 +1000 Subject: powerpc: remove interrupt exit helpers unused argument The msr argument is not used, remove it. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-3-npiggin@gmail.com --- arch/powerpc/include/asm/asm-prototypes.h | 4 ++-- arch/powerpc/kernel/interrupt.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 02ee6f5ac9fe..8dc8dc006267 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -71,8 +71,8 @@ void __init machine_init(u64 dt_ptr); #endif long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); -notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr); -notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr); +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index f2baeee437f7..05fa3ae56e25 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -352,7 +352,7 @@ again: return ret; } -notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) { unsigned long ti_flags; unsigned long flags; @@ -431,7 +431,7 @@ again: void preempt_schedule_irq(void); -notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) { unsigned long flags; unsigned long ret = 0; -- cgit v1.2.3 From 1df7d5e4baeac74d14c1bee18b2dff9302b3efbc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Jun 2021 01:51:02 +1000 Subject: powerpc/64s: introduce different functions to return from SRR vs HSRR interrupts This makes no real difference yet except that HSRR type interrupts will use hrfid to return. This is important for the next patch. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-4-npiggin@gmail.com --- arch/powerpc/kernel/entry_64.S | 65 +++++++++++++++++++++--------- arch/powerpc/kernel/exceptions-64e.S | 4 ++ arch/powerpc/kernel/exceptions-64s.S | 76 +++++++++++++++++++----------------- arch/powerpc/kernel/vector.S | 2 +- 4 files changed, 92 insertions(+), 55 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 03727308d8cc..03a45a88b4b8 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -635,51 +635,57 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) * touched, no exit work created, then this can be used. */ .balign IFETCH_ALIGN_BYTES - .globl fast_interrupt_return -fast_interrupt_return: -_ASM_NOKPROBE_SYMBOL(fast_interrupt_return) + .globl fast_interrupt_return_srr +fast_interrupt_return_srr: +_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) kuap_check_amr r3, r4 ld r5,_MSR(r1) andi. r0,r5,MSR_PR #ifdef CONFIG_PPC_BOOK3S - bne .Lfast_user_interrupt_return_amr + bne .Lfast_user_interrupt_return_amr_srr kuap_kernel_restore r3, r4 andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ - bne+ .Lfast_kernel_interrupt_return + bne+ .Lfast_kernel_interrupt_return_srr addi r3,r1,STACK_FRAME_OVERHEAD bl unrecoverable_exception b . /* should not get here */ #else - bne .Lfast_user_interrupt_return - b .Lfast_kernel_interrupt_return + bne .Lfast_user_interrupt_return_srr + b .Lfast_kernel_interrupt_return_srr #endif +.macro interrupt_return_macro srr .balign IFETCH_ALIGN_BYTES - .globl interrupt_return -interrupt_return: -_ASM_NOKPROBE_SYMBOL(interrupt_return) + .globl interrupt_return_\srr +interrupt_return_\srr\(): +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) ld r4,_MSR(r1) andi. r0,r4,MSR_PR - beq .Lkernel_interrupt_return + beq .Lkernel_interrupt_return_\srr addi r3,r1,STACK_FRAME_OVERHEAD bl interrupt_exit_user_prepare cmpdi r3,0 - bne- .Lrestore_nvgprs + bne- .Lrestore_nvgprs_\srr #ifdef CONFIG_PPC_BOOK3S -.Lfast_user_interrupt_return_amr: +.Lfast_user_interrupt_return_amr_\srr\(): kuap_user_restore r3, r4 #endif -.Lfast_user_interrupt_return: +.Lfast_user_interrupt_return_\srr\(): ld r11,_NIP(r1) ld r12,_MSR(r1) BEGIN_FTR_SECTION ld r10,_PPR(r1) mtspr SPRN_PPR,r10 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + .ifc \srr,srr mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 + .else + mtspr SPRN_HSRR0,r11 + mtspr SPRN_HSRR1,r12 + .endif BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ @@ -706,24 +712,33 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) REST_GPR(6, r1) REST_GPR(0, r1) REST_GPR(1, r1) + .ifc \srr,srr RFI_TO_USER + .else + HRFI_TO_USER + .endif b . /* prevent speculative execution */ -.Lrestore_nvgprs: +.Lrestore_nvgprs_\srr\(): REST_NVGPRS(r1) - b .Lfast_user_interrupt_return + b .Lfast_user_interrupt_return_\srr .balign IFETCH_ALIGN_BYTES -.Lkernel_interrupt_return: +.Lkernel_interrupt_return_\srr\(): addi r3,r1,STACK_FRAME_OVERHEAD bl interrupt_exit_kernel_prepare -.Lfast_kernel_interrupt_return: +.Lfast_kernel_interrupt_return_\srr\(): cmpdi cr1,r3,0 ld r11,_NIP(r1) ld r12,_MSR(r1) + .ifc \srr,srr mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 + .else + mtspr SPRN_HSRR0,r11 + mtspr SPRN_HSRR1,r12 + .endif BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ @@ -757,7 +772,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) REST_GPR(6, r1) REST_GPR(0, r1) REST_GPR(1, r1) + .ifc \srr,srr RFI_TO_KERNEL + .else + HRFI_TO_KERNEL + .endif b . /* prevent speculative execution */ 1: /* @@ -777,8 +796,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) std r9,0(r1) /* perform store component of stdu */ ld r9,PACA_EXGEN+0(r13) + .ifc \srr,srr RFI_TO_KERNEL + .else + HRFI_TO_KERNEL + .endif b . /* prevent speculative execution */ +.endm + +interrupt_return_macro srr +#ifdef CONFIG_PPC_BOOK3S +interrupt_return_macro hsrr +#endif #ifdef CONFIG_PPC_RTAS /* diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index f1ae710274bc..b35c97c7082f 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -26,6 +26,10 @@ #include #include +/* 64e interrupt returns always use SRR registers */ +#define fast_interrupt_return fast_interrupt_return_srr +#define interrupt_return interrupt_return_srr + /* XXX This will ultimately add space for a special exception save * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... * when taking special interrupts. For now we don't support that, diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f7fc6e078d4e..1bc27af1b425 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1149,7 +1149,7 @@ EXC_COMMON_BEGIN(machine_check_common) mtmsrd r10,1 addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception - b interrupt_return + b interrupt_return_srr #ifdef CONFIG_PPC_P7_NAP @@ -1275,7 +1275,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) - b interrupt_return + b interrupt_return_srr 1: bl do_break /* @@ -1283,7 +1283,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * If so, we need to restore them with their updated values. */ REST_NVGPRS(r1) - b interrupt_return + b interrupt_return_srr /** @@ -1323,7 +1323,7 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_interrupt_return + b fast_interrupt_return_srr 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ @@ -1332,7 +1332,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b interrupt_return + b interrupt_return_srr /** @@ -1368,7 +1368,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) - b interrupt_return + b interrupt_return_srr /** @@ -1403,7 +1403,7 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_interrupt_return + b fast_interrupt_return_srr 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ @@ -1412,7 +1412,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b interrupt_return + b interrupt_return_srr /** @@ -1456,7 +1456,11 @@ EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ - b interrupt_return + BEGIN_FTR_SECTION + b interrupt_return_hsrr + FTR_SECTION_ELSE + b interrupt_return_srr + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) /** @@ -1483,7 +1487,7 @@ EXC_COMMON_BEGIN(alignment_common) addi r3,r1,STACK_FRAME_OVERHEAD bl alignment_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_srr /** @@ -1590,7 +1594,7 @@ EXC_COMMON_BEGIN(program_check_common) addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_srr /* @@ -1633,12 +1637,12 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif bl load_up_fpu - b fast_interrupt_return + b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm - b interrupt_return + b interrupt_return_srr #endif @@ -1677,7 +1681,7 @@ EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt - b interrupt_return + b interrupt_return_srr /** @@ -1761,7 +1765,7 @@ EXC_COMMON_BEGIN(doorbell_super_common) #else bl unknown_async_exception #endif - b interrupt_return + b interrupt_return_srr EXC_REAL_NONE(0xb00, 0x100) @@ -1925,7 +1929,7 @@ EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step addi r3,r1,STACK_FRAME_OVERHEAD bl single_step_exception - b interrupt_return + b interrupt_return_srr /** @@ -1963,7 +1967,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl unknown_exception ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) - b interrupt_return + b interrupt_return_hsrr /** @@ -1988,7 +1992,7 @@ EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b interrupt_return + b interrupt_return_hsrr /** @@ -2012,7 +2016,7 @@ EXC_COMMON_BEGIN(emulation_assist_common) addi r3,r1,STACK_FRAME_OVERHEAD bl emulation_assist_interrupt REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_hsrr /** @@ -2089,7 +2093,7 @@ EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception - b interrupt_return + b interrupt_return_hsrr /** @@ -2119,7 +2123,7 @@ EXC_COMMON_BEGIN(h_doorbell_common) #else bl unknown_async_exception #endif - b interrupt_return + b interrupt_return_hsrr /** @@ -2145,7 +2149,7 @@ EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ - b interrupt_return + b interrupt_return_hsrr EXC_REAL_NONE(0xec0, 0x20) @@ -2188,7 +2192,7 @@ EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception - b interrupt_return + b interrupt_return_srr /** @@ -2225,19 +2229,19 @@ BEGIN_FTR_SECTION END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif bl load_up_altivec - b fast_interrupt_return + b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm - b interrupt_return + b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception - b interrupt_return + b interrupt_return_srr /** @@ -2278,14 +2282,14 @@ BEGIN_FTR_SECTION 2: /* User process was in a transaction */ addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm - b interrupt_return + b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception - b interrupt_return + b interrupt_return_srr /** @@ -2313,7 +2317,7 @@ EXC_COMMON_BEGIN(facility_unavailable_common) addi r3,r1,STACK_FRAME_OVERHEAD bl facility_unavailable_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_srr /** @@ -2341,7 +2345,7 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common) addi r3,r1,STACK_FRAME_OVERHEAD bl facility_unavailable_exception REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */ - b interrupt_return + b interrupt_return_hsrr EXC_REAL_NONE(0xfa0, 0x20) @@ -2370,7 +2374,7 @@ EXC_COMMON_BEGIN(cbe_system_error_common) GEN_COMMON cbe_system_error addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_system_error_exception - b interrupt_return + b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) @@ -2401,7 +2405,7 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint addi r3,r1,STACK_FRAME_OVERHEAD bl instruction_breakpoint_exception - b interrupt_return + b interrupt_return_srr EXC_REAL_NONE(0x1400, 0x100) @@ -2521,7 +2525,7 @@ EXC_COMMON_BEGIN(denorm_exception_common) GEN_COMMON denorm_exception addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b interrupt_return + b interrupt_return_hsrr #ifdef CONFIG_CBE_RAS @@ -2538,7 +2542,7 @@ EXC_COMMON_BEGIN(cbe_maintenance_common) GEN_COMMON cbe_maintenance addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_maintenance_exception - b interrupt_return + b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) @@ -2568,7 +2572,7 @@ EXC_COMMON_BEGIN(altivec_assist_common) #else bl unknown_exception #endif - b interrupt_return + b interrupt_return_srr #ifdef CONFIG_CBE_RAS @@ -2585,7 +2589,7 @@ EXC_COMMON_BEGIN(cbe_thermal_common) GEN_COMMON cbe_thermal addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_thermal_exception - b interrupt_return + b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index f5a52f444e36..54dbefcb4cde 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -131,7 +131,7 @@ _GLOBAL(load_up_vsx) /* enable use of VSX after return */ oris r12,r12,MSR_VSX@h std r12,_MSR(r1) - b fast_interrupt_return + b fast_interrupt_return_srr #endif /* CONFIG_VSX */ -- cgit v1.2.3 From 59dc5bfca0cb6a29db1a50847684eb5c19f8f400 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Jun 2021 01:51:03 +1000 Subject: powerpc/64s: avoid reloading (H)SRR registers if they are still valid When an interrupt is taken, the SRR registers are set to return to where it left off. Unless they are modified in the meantime, or the return address or MSR are modified, there is no need to reload these registers when returning from interrupt. Introduce per-CPU flags that track the validity of SRR and HSRR registers. These are cleared when returning from interrupt, when using the registers for something else (e.g., OPAL calls), when adjusting the return address or MSR of a context, and when context switching (which changes the return address and MSR). This improves the performance of interrupt returns. Signed-off-by: Nicholas Piggin [mpe: Fold in fixup patch from Nick] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-5-npiggin@gmail.com --- arch/powerpc/Kconfig.debug | 4 + arch/powerpc/include/asm/hw_irq.h | 10 ++- arch/powerpc/include/asm/interrupt.h | 14 +++- arch/powerpc/include/asm/livepatch.h | 2 +- arch/powerpc/include/asm/paca.h | 4 + arch/powerpc/include/asm/probes.h | 4 +- arch/powerpc/include/asm/ptrace.h | 52 ++++++++++--- arch/powerpc/kernel/asm-offsets.c | 4 + arch/powerpc/kernel/entry_64.S | 92 +++++++++++++++++++++-- arch/powerpc/kernel/exceptions-64s.S | 27 +++++++ arch/powerpc/kernel/fpu.S | 4 + arch/powerpc/kernel/hw_breakpoint.c | 4 +- arch/powerpc/kernel/kgdb.c | 10 +-- arch/powerpc/kernel/kprobes-ftrace.c | 4 +- arch/powerpc/kernel/kprobes.c | 23 +++--- arch/powerpc/kernel/mce.c | 2 +- arch/powerpc/kernel/optprobes.c | 2 +- arch/powerpc/kernel/process.c | 42 ++++++----- arch/powerpc/kernel/prom_init.c | 3 + arch/powerpc/kernel/ptrace/ptrace-adv.c | 20 ++--- arch/powerpc/kernel/ptrace/ptrace-noadv.c | 14 ++-- arch/powerpc/kernel/ptrace/ptrace-view.c | 5 +- arch/powerpc/kernel/rtas.c | 14 +++- arch/powerpc/kernel/signal.c | 12 +-- arch/powerpc/kernel/signal_32.c | 40 +++++----- arch/powerpc/kernel/signal_64.c | 30 ++++---- arch/powerpc/kernel/syscalls.c | 3 +- arch/powerpc/kernel/traps.c | 42 +++++------ arch/powerpc/kernel/uprobes.c | 4 +- arch/powerpc/kernel/vector.S | 6 ++ arch/powerpc/kvm/book3s_hv.c | 3 + arch/powerpc/kvm/book3s_pr.c | 2 + arch/powerpc/lib/error-inject.c | 2 +- arch/powerpc/lib/sstep.c | 17 +++-- arch/powerpc/lib/test_emulate_step.c | 1 + arch/powerpc/math-emu/math.c | 2 +- arch/powerpc/math-emu/math_efp.c | 2 +- arch/powerpc/platforms/embedded6xx/holly.c | 4 +- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 4 +- arch/powerpc/platforms/pasemi/idle.c | 4 +- arch/powerpc/platforms/powernv/opal-call.c | 4 + arch/powerpc/platforms/powernv/opal.c | 2 +- arch/powerpc/platforms/pseries/hvCall.S | 29 +++++++ arch/powerpc/platforms/pseries/ras.c | 4 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- arch/powerpc/sysdev/fsl_rio.c | 4 +- arch/powerpc/xmon/xmon.c | 14 ++-- 47 files changed, 418 insertions(+), 179 deletions(-) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 6342f9da4545..3a8ce31a2ae6 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -85,6 +85,10 @@ config MSI_BITMAP_SELFTEST config PPC_IRQ_SOFT_MASK_DEBUG bool "Include extra checks for powerpc irq soft masking" +config PPC_RFI_SRR_DEBUG + bool "Include extra checks for RFI SRR register validity" + depends on PPC_BOOK3S_64 + config XMON bool "Include xmon kernel debugger" depends on DEBUG_KERNEL diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 56a98936a6a9..19bcef666cf6 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -389,7 +389,15 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) return !(regs->msr & MSR_EE); } -static inline void may_hard_irq_enable(void) { } +static inline bool may_hard_irq_enable(void) +{ + return false; +} + +static inline void do_hard_irq_enable(void) +{ + BUILD_BUG(); +} static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) { diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 59f704408d65..de36fb5d9c51 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -73,13 +73,25 @@ #include #include +#ifdef CONFIG_PPC_BOOK3S_64 +static inline void srr_regs_clobbered(void) +{ + local_paca->srr_valid = 0; + local_paca->hsrr_valid = 0; +} +#else +static inline void srr_regs_clobbered(void) +{ +} +#endif + static inline void nap_adjust_return(struct pt_regs *regs) { #ifdef CONFIG_PPC_970_NAP if (unlikely(test_thread_local_flags(_TLF_NAPPING))) { /* Can avoid a test-and-clear because NMIs do not call this */ clear_thread_local_flags(_TLF_NAPPING); - regs->nip = (unsigned long)power4_idle_nap_return; + regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return); } #endif } diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index ae25e6e72997..4fe018cc207b 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -16,7 +16,7 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { struct pt_regs *regs = ftrace_get_regs(fregs); - regs->nip = ip; + regs_set_return_ip(regs, ip); } #define klp_get_ftrace_location klp_get_ftrace_location diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index ecc8d792a431..f5f0f3408047 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -167,6 +167,10 @@ struct paca_struct { u64 saved_msr; /* MSR saved here by enter_rtas */ #ifdef CONFIG_PPC_BOOK3E u16 trap_save; /* Used when bad stack is encountered */ +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + u8 hsrr_valid; /* HSRRs set for HRFID */ + u8 srr_valid; /* SRRs set for RFID */ #endif u8 irq_soft_mask; /* mask for irq soft masking */ u8 irq_happened; /* irq happened while soft-disabled */ diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h index 84dd1addd434..c5d984700d24 100644 --- a/arch/powerpc/include/asm/probes.h +++ b/arch/powerpc/include/asm/probes.h @@ -34,14 +34,14 @@ typedef u32 ppc_opcode_t; /* Enable single stepping for the current task */ static inline void enable_single_step(struct pt_regs *regs) { - regs->msr |= MSR_SINGLESTEP; + regs_set_return_msr(regs, regs->msr | MSR_SINGLESTEP); #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* * We turn off Critical Input Exception(CE) to ensure that the single * step will be for the instruction we have the probe on; if we don't, * it is possible we'd get the single step reported for CE. */ - regs->msr &= ~MSR_CE; + regs_set_return_msr(regs, regs->msr & ~MSR_CE); mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); #ifdef CONFIG_PPC_47x isync(); diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index e06d61b668d4..5d69dafd80ad 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -123,6 +123,47 @@ struct pt_regs #endif /* __powerpc64__ */ #ifndef __ASSEMBLY__ +#include + +#ifdef CONFIG_SMP +extern unsigned long profile_pc(struct pt_regs *regs); +#else +#define profile_pc(regs) instruction_pointer(regs) +#endif + +long do_syscall_trace_enter(struct pt_regs *regs); +void do_syscall_trace_leave(struct pt_regs *regs); + +static inline void regs_set_return_ip(struct pt_regs *regs, unsigned long ip) +{ + regs->nip = ip; +#ifdef CONFIG_PPC_BOOK3S_64 + local_paca->hsrr_valid = 0; + local_paca->srr_valid = 0; +#endif +} + +static inline void regs_set_return_msr(struct pt_regs *regs, unsigned long msr) +{ + regs->msr = msr; +#ifdef CONFIG_PPC_BOOK3S_64 + local_paca->hsrr_valid = 0; + local_paca->srr_valid = 0; +#endif +} + +static inline void set_return_regs_changed(void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + local_paca->hsrr_valid = 0; + local_paca->srr_valid = 0; +#endif +} + +static inline void regs_add_return_ip(struct pt_regs *regs, long offset) +{ + regs_set_return_ip(regs, regs->nip + offset); +} static inline unsigned long instruction_pointer(struct pt_regs *regs) { @@ -132,7 +173,7 @@ static inline unsigned long instruction_pointer(struct pt_regs *regs) static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { - regs->nip = val; + regs_set_return_ip(regs, val); } static inline unsigned long user_stack_pointer(struct pt_regs *regs) @@ -145,15 +186,6 @@ static inline unsigned long frame_pointer(struct pt_regs *regs) return 0; } -#ifdef CONFIG_SMP -extern unsigned long profile_pc(struct pt_regs *regs); -#else -#define profile_pc(regs) instruction_pointer(regs) -#endif - -long do_syscall_trace_enter(struct pt_regs *regs); -void do_syscall_trace_leave(struct pt_regs *regs); - #ifdef __powerpc64__ #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) #else diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 31881ef3641d..69a2885adfe9 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -190,6 +190,10 @@ int main(void) OFFSET(PACATOC, paca_struct, kernel_toc); OFFSET(PACAKBASE, paca_struct, kernelbase); OFFSET(PACAKMSR, paca_struct, kernel_msr); +#ifdef CONFIG_PPC_BOOK3S_64 + OFFSET(PACAHSRR_VALID, paca_struct, hsrr_valid); + OFFSET(PACASRR_VALID, paca_struct, srr_valid); +#endif OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 03a45a88b4b8..9a1d5e5599d3 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -64,6 +64,30 @@ exception_marker: .section ".text" .align 7 +.macro DEBUG_SRR_VALID srr +#ifdef CONFIG_PPC_RFI_SRR_DEBUG + .ifc \srr,srr + mfspr r11,SPRN_SRR0 + ld r12,_NIP(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + mfspr r11,SPRN_SRR1 + ld r12,_MSR(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + .else + mfspr r11,SPRN_HSRR0 + ld r12,_NIP(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + mfspr r11,SPRN_HSRR1 + ld r12,_MSR(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + .endif +#endif +.endm + #ifdef CONFIG_PPC_BOOK3S .macro system_call_vectored name trapnr .globl system_call_vectored_\name @@ -286,6 +310,11 @@ END_BTB_FLUSH_SECTION ld r11,exception_marker@toc(r2) std r11,-16(r10) /* "regshere" marker */ +#ifdef CONFIG_PPC_BOOK3S + li r11,1 + stb r11,PACASRR_VALID(r13) +#endif + /* * We always enter kernel from userspace with irq soft-mask enabled and * nothing pending. system_call_exception() will call @@ -306,18 +335,27 @@ END_BTB_FLUSH_SECTION bl syscall_exit_prepare ld r2,_CCR(r1) + ld r6,_LINK(r1) + mtlr r6 + +#ifdef CONFIG_PPC_BOOK3S + lbz r4,PACASRR_VALID(r13) + cmpdi r4,0 + bne 1f + li r4,0 + stb r4,PACASRR_VALID(r13) +#endif ld r4,_NIP(r1) ld r5,_MSR(r1) - ld r6,_LINK(r1) + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 +1: + DEBUG_SRR_VALID srr BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r5 - mtlr r6 - cmpdi r3,0 bne .Lsyscall_restore_regs /* Zero volatile regs that may contain sensitive kernel data */ @@ -673,19 +711,40 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) kuap_user_restore r3, r4 #endif .Lfast_user_interrupt_return_\srr\(): - ld r11,_NIP(r1) - ld r12,_MSR(r1) + BEGIN_FTR_SECTION ld r10,_PPR(r1) mtspr SPRN_PPR,r10 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + +#ifdef CONFIG_PPC_BOOK3S + .ifc \srr,srr + lbz r4,PACASRR_VALID(r13) + .else + lbz r4,PACAHSRR_VALID(r13) + .endif + cmpdi r4,0 + li r4,0 + bne 1f +#endif + ld r11,_NIP(r1) + ld r12,_MSR(r1) .ifc \srr,srr mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACASRR_VALID(r13) +#endif .else mtspr SPRN_HSRR0,r11 mtspr SPRN_HSRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACAHSRR_VALID(r13) +#endif .endif + DEBUG_SRR_VALID \srr BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ @@ -730,15 +789,34 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) .Lfast_kernel_interrupt_return_\srr\(): cmpdi cr1,r3,0 +#ifdef CONFIG_PPC_BOOK3S + .ifc \srr,srr + lbz r4,PACASRR_VALID(r13) + .else + lbz r4,PACAHSRR_VALID(r13) + .endif + cmpdi r4,0 + li r4,0 + bne 1f +#endif ld r11,_NIP(r1) ld r12,_MSR(r1) .ifc \srr,srr mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACASRR_VALID(r13) +#endif .else mtspr SPRN_HSRR0,r11 mtspr SPRN_HSRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACAHSRR_VALID(r13) +#endif .endif + DEBUG_SRR_VALID \srr BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1bc27af1b425..3d238a3b2a24 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -485,6 +485,20 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) std r0,GPR0(r1) /* save r0 in stackframe */ std r10,GPR1(r1) /* save r1 in stackframe */ + /* Mark our [H]SRRs valid for return */ + li r10,1 + .if IHSRR_IF_HVMODE + BEGIN_FTR_SECTION + stb r10,PACAHSRR_VALID(r13) + FTR_SECTION_ELSE + stb r10,PACASRR_VALID(r13) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + .elseif IHSRR + stb r10,PACAHSRR_VALID(r13) + .else + stb r10,PACASRR_VALID(r13) + .endif + .if ISET_RI li r10,MSR_RI mtmsrd r10,1 /* Set MSR_RI */ @@ -584,10 +598,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .macro EXCEPTION_RESTORE_REGS hsrr=0 /* Move original SRR0 and SRR1 into the respective regs */ ld r9,_MSR(r1) + li r10,0 .if \hsrr mtspr SPRN_HSRR1,r9 + stb r10,PACAHSRR_VALID(r13) .else mtspr SPRN_SRR1,r9 + stb r10,PACASRR_VALID(r13) .endif ld r9,_NIP(r1) .if \hsrr @@ -1718,6 +1735,8 @@ EXC_COMMON_BEGIN(hdecrementer_common) * * Be careful to avoid touching the kernel stack. */ + li r10,0 + stb r10,PACAHSRR_VALID(r13) ld r10,PACA_EXGEN+EX_CTR(r13) mtctr r10 mtcrf 0x80,r9 @@ -2513,6 +2532,8 @@ BEGIN_FTR_SECTION ld r10,PACA_EXGEN+EX_CFAR(r13) mtspr SPRN_CFAR,r10 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + li r10,0 + stb r10,PACAHSRR_VALID(r13) ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) @@ -2673,6 +2694,12 @@ masked_interrupt: ori r11,r11,PACA_IRQ_HARD_DIS stb r11,PACAIRQHAPPENED(r13) 2: /* done */ + li r10,0 + .if \hsrr + stb r10,PACAHSRR_VALID(r13) + .else + stb r10,PACASRR_VALID(r13) + .endif ld r10,PACA_EXGEN+EX_CTR(r13) mtctr r10 mtcrf 0x80,r9 diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 2c57ece6671c..6010adcee16e 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -103,6 +103,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) ori r12,r12,MSR_FP or r12,r12,r4 std r12,_MSR(r1) +#ifdef CONFIG_PPC_BOOK3S_64 + li r4,0 + stb r4,PACASRR_VALID(r13) +#endif #endif li r4,1 stb r4,THREAD_LOAD_FP(r5) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 8fc7a14e4d71..21a638aff72f 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -486,7 +486,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) return; reset: - regs->msr &= ~MSR_SE; + regs_set_return_msr(regs, regs->msr & ~MSR_SE); for (i = 0; i < nr_wp_slots(); i++) { info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); __set_breakpoint(i, info); @@ -537,7 +537,7 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, current->thread.last_hit_ubp[i] = bp[i]; info[i] = NULL; } - regs->msr |= MSR_SE; + regs_set_return_msr(regs, regs->msr | MSR_SE); return false; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index c48565762698..bdee7262c080 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -147,7 +147,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) return 0; if (*(u32 *)regs->nip == BREAK_INSTR) - regs->nip += BREAK_INSTR_SIZE; + regs_add_return_ip(regs, BREAK_INSTR_SIZE); return 1; } @@ -372,7 +372,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) { - regs->nip = pc; + regs_set_return_ip(regs, pc); } /* @@ -394,7 +394,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, case 'c': /* handle the optional parameter */ if (kgdb_hex2long(&ptr, &addr)) - linux_regs->nip = addr; + regs_set_return_ip(linux_regs, addr); atomic_set(&kgdb_cpu_doing_single_step, -1); /* set the trace bit if we're stepping */ @@ -402,9 +402,9 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, #ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); - linux_regs->msr |= MSR_DE; + regs_set_return_msr(linux_regs, linux_regs->msr | MSR_DE); #else - linux_regs->msr |= MSR_SE; + regs_set_return_msr(linux_regs, linux_regs->msr | MSR_SE); #endif atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 660138f6c4b2..7154d58338cc 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -39,7 +39,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, * On powerpc, NIP is *before* this instruction for the * pre handler */ - regs->nip -= MCOUNT_INSN_SIZE; + regs_add_return_ip(regs, -MCOUNT_INSN_SIZE); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -48,7 +48,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, * Emulate singlestep (and also recover regs->nip) * as if there is a nop */ - regs->nip += MCOUNT_INSN_SIZE; + regs_add_return_ip(regs, MCOUNT_INSN_SIZE); if (unlikely(p->post_handler)) { kcb->kprobe_status = KPROBE_HIT_SSDONE; p->post_handler(p, regs, 0); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 8ac248cb518a..04714895d3f9 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -194,7 +194,7 @@ static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs * variant as values in regs could play a part in * if the trap is taken or not */ - regs->nip = (unsigned long)p->ainsn.insn; + regs_set_return_ip(regs, (unsigned long)p->ainsn.insn); } static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) @@ -335,8 +335,9 @@ int kprobe_handler(struct pt_regs *regs) kprobe_opcode_t insn = *p->ainsn.insn; if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) { /* Turn off 'trace' bits */ - regs->msr &= ~MSR_SINGLESTEP; - regs->msr |= kcb->kprobe_saved_msr; + regs_set_return_msr(regs, + (regs->msr & ~MSR_SINGLESTEP) | + kcb->kprobe_saved_msr); goto no_kprobe; } @@ -431,7 +432,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * we end up emulating it in kprobe_handler(), which increments the nip * again. */ - regs->nip = orig_ret_address - 4; + regs_set_return_ip(regs, orig_ret_address - 4); regs->link = orig_ret_address; return 0; @@ -466,8 +467,8 @@ int kprobe_post_handler(struct pt_regs *regs) } /* Adjust nip to after the single-stepped instruction */ - regs->nip = (unsigned long)cur->addr + len; - regs->msr |= kcb->kprobe_saved_msr; + regs_set_return_ip(regs, (unsigned long)cur->addr + len); + regs_set_return_msr(regs, regs->msr | kcb->kprobe_saved_msr); /*Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -506,9 +507,11 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * and allow the page fault handler to continue as a * normal page fault. */ - regs->nip = (unsigned long)cur->addr; - regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */ - regs->msr |= kcb->kprobe_saved_msr; + regs_set_return_ip(regs, (unsigned long)cur->addr); + /* Turn off 'trace' bits */ + regs_set_return_msr(regs, + (regs->msr & ~MSR_SINGLESTEP) | + kcb->kprobe_saved_msr); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else @@ -539,7 +542,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = extable_fixup(entry); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 9a3c2a84a2ac..f4b922a1e6ad 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -273,7 +273,7 @@ void mce_common_process_ue(struct pt_regs *regs, entry = search_kernel_exception_table(regs->nip); if (entry) { mce_err->ignore_event = true; - regs->nip = extable_fixup(entry); + regs_set_return_ip(regs, extable_fixup(entry)); } } diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 2b8fe40069ad..8b9f82dc6ece 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -106,7 +106,7 @@ static void optimized_callback(struct optimized_kprobe *op, kprobes_inc_nmissed_count(&op->kp); } else { __this_cpu_write(current_kprobe, &op->kp); - regs->nip = (unsigned long)op->kp.addr; + regs_set_return_ip(regs, (unsigned long)op->kp.addr); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; opt_pre_handler(&op->kp, regs); __this_cpu_write(current_kprobe, NULL); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4e593fc2c66d..8ab1bfdfd31a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -96,7 +96,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk) if (tsk == current && tsk->thread.regs && MSR_TM_ACTIVE(tsk->thread.regs->msr) && !test_thread_flag(TIF_RESTORE_TM)) { - tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr; + regs_set_return_msr(&tsk->thread.ckpt_regs, + tsk->thread.regs->msr); set_thread_flag(TIF_RESTORE_TM); } } @@ -161,7 +162,7 @@ static void __giveup_fpu(struct task_struct *tsk) msr &= ~(MSR_FP|MSR_FE0|MSR_FE1); if (cpu_has_feature(CPU_FTR_VSX)) msr &= ~MSR_VSX; - tsk->thread.regs->msr = msr; + regs_set_return_msr(tsk->thread.regs, msr); } void giveup_fpu(struct task_struct *tsk) @@ -244,7 +245,7 @@ static void __giveup_altivec(struct task_struct *tsk) msr &= ~MSR_VEC; if (cpu_has_feature(CPU_FTR_VSX)) msr &= ~MSR_VSX; - tsk->thread.regs->msr = msr; + regs_set_return_msr(tsk->thread.regs, msr); } void giveup_altivec(struct task_struct *tsk) @@ -559,7 +560,7 @@ void notrace restore_math(struct pt_regs *regs) msr_check_and_clear(new_msr); - regs->msr |= new_msr | fpexc_mode; + regs_set_return_msr(regs, regs->msr | new_msr | fpexc_mode); } } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -1114,7 +1115,7 @@ void restore_tm_state(struct pt_regs *regs) #endif restore_math(regs); - regs->msr |= msr_diff; + regs_set_return_msr(regs, regs->msr | msr_diff); } #else /* !CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -1257,14 +1258,16 @@ struct task_struct *__switch_to(struct task_struct *prev, } /* - * Call restore_sprs() before calling _switch(). If we move it after - * _switch() then we miss out on calling it for new tasks. The reason - * for this is we manually create a stack frame for new tasks that - * directly returns through ret_from_fork() or + * Call restore_sprs() and set_return_regs_changed() before calling + * _switch(). If we move it after _switch() then we miss out on calling + * it for new tasks. The reason for this is we manually create a stack + * frame for new tasks that directly returns through ret_from_fork() or * ret_from_kernel_thread(). See copy_thread() for details. */ restore_sprs(old_thread, new_thread); + set_return_regs_changed(); /* _switch changes stack (and regs) */ + #ifdef CONFIG_PPC32 kuap_assert_locked(); #endif @@ -1850,13 +1853,14 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) } regs->gpr[2] = toc; } - regs->nip = entry; - regs->msr = MSR_USER64; + regs_set_return_ip(regs, entry); + regs_set_return_msr(regs, MSR_USER64); } else { - regs->nip = start; regs->gpr[2] = 0; - regs->msr = MSR_USER32; + regs_set_return_ip(regs, start); + regs_set_return_msr(regs, MSR_USER32); } + #endif #ifdef CONFIG_VSX current->thread.used_vsr = 0; @@ -1887,7 +1891,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - } EXPORT_SYMBOL(start_thread); @@ -1935,9 +1938,10 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) if (val > PR_FP_EXC_PRECISE) return -EINVAL; tsk->thread.fpexc_mode = __pack_fe01(val); - if (regs != NULL && (regs->msr & MSR_FP) != 0) - regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1)) - | tsk->thread.fpexc_mode; + if (regs != NULL && (regs->msr & MSR_FP) != 0) { + regs_set_return_msr(regs, (regs->msr & ~(MSR_FE0|MSR_FE1)) + | tsk->thread.fpexc_mode); + } return 0; } @@ -1983,9 +1987,9 @@ int set_endian(struct task_struct *tsk, unsigned int val) return -EINVAL; if (val == PR_ENDIAN_BIG) - regs->msr &= ~MSR_LE; + regs_set_return_msr(regs, regs->msr & ~MSR_LE); else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE) - regs->msr |= MSR_LE; + regs_set_return_msr(regs, regs->msr | MSR_LE); else return -EINVAL; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index cb5be081e499..05ce15b854e2 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1792,6 +1793,8 @@ static int prom_rtas_hcall(uint64_t args) asm volatile("sc 1\n" : "=r" (arg1) : "r" (arg1), "r" (arg2) :); + srr_regs_clobbered(); + return arg1; } diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c index 3990c01ef8cf..399f5d94a3df 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-adv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c @@ -12,7 +12,7 @@ void user_enable_single_step(struct task_struct *task) if (regs != NULL) { task->thread.debug.dbcr0 &= ~DBCR0_BT; task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); } set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -24,7 +24,7 @@ void user_enable_block_step(struct task_struct *task) if (regs != NULL) { task->thread.debug.dbcr0 &= ~DBCR0_IC; task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); } set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -50,7 +50,7 @@ void user_disable_single_step(struct task_struct *task) * All debug events were off..... */ task->thread.debug.dbcr0 &= ~DBCR0_IDM; - regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); } } clear_tsk_thread_flag(task, TIF_SINGLESTEP); @@ -82,6 +82,7 @@ int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { + struct pt_regs *regs = task->thread.regs; #ifdef CONFIG_HAVE_HW_BREAKPOINT int ret; struct thread_struct *thread = &task->thread; @@ -112,7 +113,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, task->thread.debug.dbcr1)) { - task->thread.regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); task->thread.debug.dbcr0 &= ~DBCR0_IDM; } return 0; @@ -132,7 +133,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l dbcr_dac(task) |= DBCR_DAC1R; if (data & 0x2UL) dbcr_dac(task) |= DBCR_DAC1W; - task->thread.regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); return 0; } @@ -220,7 +221,7 @@ static long set_instruction_bp(struct task_struct *child, } out: child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; + regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE); return slot; } @@ -336,7 +337,7 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) return -ENOSPC; } child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; + regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE); return slot + 4; } @@ -430,7 +431,7 @@ static int set_dac_range(struct task_struct *child, child->thread.debug.dbcr2 |= DBCR2_DAC12MX; else /* PPC_BREAKPOINT_MODE_MASK */ child->thread.debug.dbcr2 |= DBCR2_DAC12MM; - child->thread.regs->msr |= MSR_DE; + regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE); return 5; } @@ -485,7 +486,8 @@ long ppc_del_hwdebug(struct task_struct *child, long data) if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, child->thread.debug.dbcr1)) { child->thread.debug.dbcr0 &= ~DBCR0_IDM; - child->thread.regs->msr &= ~MSR_DE; + regs_set_return_msr(child->thread.regs, + child->thread.regs->msr & ~MSR_DE); } } return rc; diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index aa36fcad36cd..a5dd7d2e2c9e 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -11,10 +11,8 @@ void user_enable_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; - if (regs != NULL) { - regs->msr &= ~MSR_BE; - regs->msr |= MSR_SE; - } + if (regs != NULL) + regs_set_return_msr(regs, (regs->msr & ~MSR_BE) | MSR_SE); set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -22,10 +20,8 @@ void user_enable_block_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; - if (regs != NULL) { - regs->msr &= ~MSR_SE; - regs->msr |= MSR_BE; - } + if (regs != NULL) + regs_set_return_msr(regs, (regs->msr & ~MSR_SE) | MSR_BE); set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -34,7 +30,7 @@ void user_disable_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) - regs->msr &= ~(MSR_SE | MSR_BE); + regs_set_return_msr(regs, regs->msr & ~(MSR_SE | MSR_BE)); clear_tsk_thread_flag(task, TIF_SINGLESTEP); } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 773bcc4ca843..b8be1d6668b5 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -113,8 +113,9 @@ static unsigned long get_user_msr(struct task_struct *task) static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr) { - task->thread.regs->msr &= ~MSR_DEBUGCHANGE; - task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; + unsigned long newmsr = (task->thread.regs->msr & ~MSR_DEBUGCHANGE) | + (msr & MSR_DEBUGCHANGE); + regs_set_return_msr(task->thread.regs, newmsr); return 0; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 6bada744402b..99f2cce635fb 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -46,6 +47,13 @@ /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); +static inline void do_enter_rtas(unsigned long args) +{ + enter_rtas(args); + + srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ +} + struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; @@ -384,7 +392,7 @@ static char *__fetch_rtas_last_error(char *altbuf) save_args = rtas.args; rtas.args = err_args; - enter_rtas(__pa(&rtas.args)); + do_enter_rtas(__pa(&rtas.args)); err_args = rtas.args; rtas.args = save_args; @@ -430,7 +438,7 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, for (i = 0; i < nret; ++i) args->rets[i] = 0; - enter_rtas(__pa(args)); + do_enter_rtas(__pa(args)); } void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) @@ -1138,7 +1146,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) flags = lock_rtas(); rtas.args = args; - enter_rtas(__pa(&rtas.args)); + do_enter_rtas(__pa(&rtas.args)); args = rtas.args; /* A -1 return code indicates that the last command couldn't diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 9ded046edb0e..e600764a926c 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -214,7 +214,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, regs->gpr[0] = __NR_restart_syscall; else regs->gpr[3] = regs->orig_gpr3; - regs->nip -= 4; + regs_add_return_ip(regs, -4); regs->result = 0; } else { if (trap_is_scv(regs)) { @@ -322,16 +322,16 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk) * For signals taken in non-TM or suspended mode, we use the * normal/non-checkpointed stack pointer. */ - - unsigned long ret = tsk->thread.regs->gpr[1]; + struct pt_regs *regs = tsk->thread.regs; + unsigned long ret = regs->gpr[1]; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BUG_ON(tsk != current); - if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) { + if (MSR_TM_ACTIVE(regs->msr)) { preempt_disable(); tm_reclaim_current(TM_CAUSE_SIGNAL); - if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr)) + if (MSR_TM_TRANSACTIONAL(regs->msr)) ret = tsk->thread.ckpt_regs.gpr[1]; /* @@ -341,7 +341,7 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk) * (tm_recheckpoint_new_task() would recheckpoint). Besides, we * enter the signal handler in non-transactional state. */ - tsk->thread.regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK); preempt_enable(); } #endif diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index f55ac65c7492..0608581967f0 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -479,14 +479,14 @@ static long restore_user_regs(struct pt_regs *regs, /* if doing signal return, restore the previous little-endian mode */ if (sig) - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); #ifdef CONFIG_ALTIVEC /* * Force the process to reload the altivec registers from * current->thread when it next does altivec instructions */ - regs->msr &= ~MSR_VEC; + regs_set_return_msr(regs, regs->msr & ~MSR_VEC); if (msr & MSR_VEC) { /* restore altivec registers from the stack */ unsafe_copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, @@ -508,7 +508,7 @@ static long restore_user_regs(struct pt_regs *regs, * Force the process to reload the VSX registers from * current->thread when it next does VSX instruction. */ - regs->msr &= ~MSR_VSX; + regs_set_return_msr(regs, regs->msr & ~MSR_VSX); if (msr & MSR_VSX) { /* * Restore altivec registers from the stack to a local @@ -524,12 +524,12 @@ static long restore_user_regs(struct pt_regs *regs, * force the process to reload the FP registers from * current->thread when it next does FP instructions */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1)); #ifdef CONFIG_SPE /* force the process to reload the spe registers from current->thread when it next does spe instructions */ - regs->msr &= ~MSR_SPE; + regs_set_return_msr(regs, regs->msr & ~MSR_SPE); if (msr & MSR_SPE) { /* restore spe registers from the stack */ unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, @@ -580,9 +580,9 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed); /* Restore the previous little-endian mode */ - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); - regs->msr &= ~MSR_VEC; + regs_set_return_msr(regs, regs->msr & ~MSR_VEC); if (msr & MSR_VEC) { /* restore altivec registers from the stack */ unsafe_copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, @@ -601,11 +601,11 @@ static long restore_tm_user_regs(struct pt_regs *regs, if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.ckvrsave); - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1)); unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); - regs->msr &= ~MSR_VSX; + regs_set_return_msr(regs, regs->msr & ~MSR_VSX); if (msr & MSR_VSX) { /* * Restore altivec registers from the stack to a local @@ -672,7 +672,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, * * Pull in the MSR TM bits from the user context */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK); + regs_set_return_msr(regs, (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK)); /* Now, recheckpoint. This loads up all of the checkpointed (older) * registers, including FP and V[S]Rs. After recheckpointing, the * transactional versions should be loaded. @@ -687,11 +687,11 @@ static long restore_tm_user_regs(struct pt_regs *regs, msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { load_fp_state(¤t->thread.fp_state); - regs->msr |= (MSR_FP | current->thread.fpexc_mode); + regs_set_return_msr(regs, regs->msr | (MSR_FP | current->thread.fpexc_mode)); } if (msr & MSR_VEC) { load_vr_state(¤t->thread.vr_state); - regs->msr |= MSR_VEC; + regs_set_return_msr(regs, regs->msr | MSR_VEC); } preempt_enable(); @@ -801,10 +801,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[4] = (unsigned long)&frame->info; regs->gpr[5] = (unsigned long)&frame->uc; regs->gpr[6] = (unsigned long)frame; - regs->nip = (unsigned long) ksig->ka.sa.sa_handler; + regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler); /* enter the signal handler in native-endian mode */ - regs->msr &= ~MSR_LE; - regs->msr |= (MSR_KERNEL & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE)); + return 0; failed: @@ -889,10 +889,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long)ksig->ka.sa.sa_handler; + regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler); /* enter the signal handler in native-endian mode */ - regs->msr &= ~MSR_LE; - regs->msr |= (MSR_KERNEL & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE)); + return 0; failed: @@ -1142,7 +1142,7 @@ SYSCALL_DEFINE0(rt_sigreturn) * set, and recheckpoint was not called. This avoid * hitting a TM Bad thing at RFID */ - regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK); } /* Fall through, for non-TM restore */ #endif @@ -1231,7 +1231,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, affect the contents of these registers. After this point, failure is a problem, anyway, and it's very unlikely unless the user is really doing something wrong. */ - regs->msr = new_msr; + regs_set_return_msr(regs, new_msr); #ifdef CONFIG_PPC_ADV_DEBUG_REGS current->thread.debug.dbcr0 = new_dbcr0; #endif diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 16b41470ec92..1831bba0582e 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -354,7 +354,7 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_ /* get MSR separately, transfer the LE bit if doing signal return */ unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out); if (sig) - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out); unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out); unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out); @@ -376,7 +376,7 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_ * This has to be done before copying stuff into tsk->thread.fpr/vr * for the reasons explained in the previous comment. */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX)); #ifdef CONFIG_ALTIVEC unsafe_get_user(v_regs, &sc->v_regs, efault_out); @@ -468,7 +468,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, return -EINVAL; /* pull in MSR LE from user context */ - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); /* The following non-GPR non-FPR non-VR state is also checkpointed: */ err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]); @@ -495,7 +495,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, * This has to be done before copying stuff into tsk->thread.fpr/vr * for the reasons explained in the previous comment. */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX)); #ifdef CONFIG_ALTIVEC err |= __get_user(v_regs, &sc->v_regs); @@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, preempt_disable(); /* pull in MSR TS bits from user context */ - regs->msr |= msr & MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr | (msr & MSR_TS_MASK)); /* * Ensure that TM is enabled in regs->msr before we leave the signal @@ -583,7 +583,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, * to be de-scheduled with MSR[TS] set but without calling * tm_recheckpoint(). This can cause a bug. */ - regs->msr |= MSR_TM; + regs_set_return_msr(regs, regs->msr | MSR_TM); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(&tsk->thread); @@ -591,11 +591,11 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { load_fp_state(&tsk->thread.fp_state); - regs->msr |= (MSR_FP | tsk->thread.fpexc_mode); + regs_set_return_msr(regs, regs->msr | (MSR_FP | tsk->thread.fpexc_mode)); } if (msr & MSR_VEC) { load_vr_state(&tsk->thread.vr_state); - regs->msr |= MSR_VEC; + regs_set_return_msr(regs, regs->msr | MSR_VEC); } preempt_enable(); @@ -717,6 +717,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, /* This returns like rt_sigreturn */ set_thread_flag(TIF_RESTOREALL); + return 0; efault_out: @@ -783,7 +784,7 @@ SYSCALL_DEFINE0(rt_sigreturn) * the MSR[TS] that came from user context later, at * restore_tm_sigcontexts. */ - regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK); if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; @@ -815,7 +816,8 @@ SYSCALL_DEFINE0(rt_sigreturn) * MSR[TS] set, but without CPU in the proper state, * causing a TM bad thing. */ - current->thread.regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(current->thread.regs, + current->thread.regs->msr & ~MSR_TS_MASK); if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext))) goto badframe; @@ -829,6 +831,7 @@ SYSCALL_DEFINE0(rt_sigreturn) goto badframe; set_thread_flag(TIF_RESTOREALL); + return 0; badframe_block: @@ -908,12 +911,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up to return from userspace. */ if (tsk->mm->context.vdso) { - regs->nip = VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64); + regs_set_return_ip(regs, VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64)); } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) goto badframe; - regs->nip = (unsigned long) &frame->tramp[0]; + regs_set_return_ip(regs, (unsigned long) &frame->tramp[0]); } /* Allocate a dummy caller frame for the signal handler. */ @@ -938,8 +941,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, } /* enter the signal handler in native-endian mode */ - regs->msr &= ~MSR_LE; - regs->msr |= (MSR_KERNEL & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE)); regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; regs->result = 0; diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index a552c9e68d7e..bf4ae0f0e36c 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -114,7 +114,8 @@ SYSCALL_DEFINE0(switch_endian) { struct thread_info *ti; - current->thread.regs->msr ^= MSR_LE; + regs_set_return_msr(current->thread.regs, + current->thread.regs->msr ^ MSR_LE); /* * Set TIF_RESTOREALL so that r3 isn't clobbered on return to diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c929d93c35d0..dfbce527c98e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -428,7 +428,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs) return; nonrecoverable: - regs->msr &= ~MSR_RI; + regs_set_return_msr(regs, regs->msr & ~MSR_RI); #endif } DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception) @@ -550,8 +550,8 @@ static inline int check_io_access(struct pt_regs *regs) printk(KERN_DEBUG "%s bad port %lx at %p\n", (*nip & 0x100)? "OUT to": "IN from", regs->gpr[rb] - _IO_BASE, nip); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } } @@ -587,8 +587,8 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_BOUNDARY SRR1_BOUNDARY #define single_stepping(regs) ((regs)->msr & MSR_SE) -#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) -#define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE) +#define clear_single_step(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_SE)) +#define clear_br_trace(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_BE)) #endif #define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4) @@ -1032,7 +1032,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) #endif /* !__LITTLE_ENDIAN__ */ /* Go to next instruction */ - regs->nip += 4; + regs_add_return_ip(regs, 4); } #endif /* CONFIG_VSX */ @@ -1477,7 +1477,7 @@ static void do_program_check(struct pt_regs *regs) if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { - regs->nip += 4; + regs_add_return_ip(regs, 4); return; } _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); @@ -1539,7 +1539,7 @@ static void do_program_check(struct pt_regs *regs) if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { switch (emulate_instruction(regs)) { case 0: - regs->nip += 4; + regs_add_return_ip(regs, 4); emulate_single_step(regs); return; case -EFAULT: @@ -1567,7 +1567,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception) */ DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt) { - regs->msr |= REASON_ILLEGAL; + regs_set_return_msr(regs, regs->msr | REASON_ILLEGAL); do_program_check(regs); } @@ -1594,7 +1594,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception) if (fixed == 1) { /* skip over emulated instruction */ - regs->nip += inst_length(reason); + regs_add_return_ip(regs, inst_length(reason)); emulate_single_step(regs); return; } @@ -1660,7 +1660,7 @@ static void tm_unavailable(struct pt_regs *regs) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (user_mode(regs)) { current->thread.load_tm++; - regs->msr |= MSR_TM; + regs_set_return_msr(regs, regs->msr | MSR_TM); tm_enable(); tm_restore_sprs(¤t->thread); return; @@ -1752,7 +1752,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) pr_err("DSCR based mfspr emulation failed\n"); return; } - regs->nip += 4; + regs_add_return_ip(regs, 4); emulate_single_step(regs); } return; @@ -1949,7 +1949,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) */ if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, current->thread.debug.dbcr1)) - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); else /* Make sure the IDM flag is off */ current->thread.debug.dbcr0 &= ~DBCR0_IDM; @@ -1970,7 +1970,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) * instead of stopping here when hitting a BT */ if (debug_status & DBSR_BT) { - regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); /* Disable BT */ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT); @@ -1981,7 +1981,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) if (user_mode(regs)) { current->thread.debug.dbcr0 &= ~DBCR0_BT; current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); return; } @@ -1995,7 +1995,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) if (debugger_sstep(regs)) return; } else if (debug_status & DBSR_IC) { /* Instruction complete */ - regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); /* Disable instruction completion */ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); @@ -2017,7 +2017,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) current->thread.debug.dbcr0 &= ~DBCR0_IC; if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, current->thread.debug.dbcr1)) - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); else /* Make sure the IDM bit is off */ current->thread.debug.dbcr0 &= ~DBCR0_IDM; @@ -2045,7 +2045,7 @@ DEFINE_INTERRUPT_HANDLER(altivec_assist_exception) PPC_WARN_EMULATED(altivec, regs); err = emulate_altivec(regs); if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ + regs_add_return_ip(regs, 4); /* skip emulated instruction */ emulate_single_step(regs); return; } @@ -2110,7 +2110,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) err = do_spe_mathemu(regs); if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ + regs_add_return_ip(regs, 4); /* skip emulated instruction */ emulate_single_step(regs); return; } @@ -2141,10 +2141,10 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) giveup_spe(current); preempt_enable(); - regs->nip -= 4; + regs_add_return_ip(regs, -4); err = speround_handler(regs); if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ + regs_add_return_ip(regs, 4); /* skip emulated instruction */ emulate_single_step(regs); return; } diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 1aefd2ecbb8a..c6975467d9ff 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -62,7 +62,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) autask->saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; - regs->nip = current->utask->xol_vaddr; + regs_set_return_ip(regs, current->utask->xol_vaddr); user_enable_single_step(current); return 0; @@ -119,7 +119,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * support doesn't exist and have to fix-up the next instruction * to be executed. */ - regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn); + regs_set_return_ip(regs, (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn)); user_disable_single_step(current); return 0; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 54dbefcb4cde..fc120fac1910 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -73,6 +73,10 @@ _GLOBAL(load_up_altivec) addi r5,r4,THREAD /* Get THREAD */ oris r12,r12,MSR_VEC@h std r12,_MSR(r1) +#ifdef CONFIG_PPC_BOOK3S_64 + li r4,0 + stb r4,PACASRR_VALID(r13) +#endif #endif li r4,1 stb r4,THREAD_LOAD_VEC(r5) @@ -131,6 +135,8 @@ _GLOBAL(load_up_vsx) /* enable use of VSX after return */ oris r12,r12,MSR_VSX@h std r12,_MSR(r1) + li r4,0 + stb r4,PACASRR_VALID(r13) b fast_interrupt_return_srr #endif /* CONFIG_VSX */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 279eae8f9dbc..010fa75518ae 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4626,6 +4626,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&kvm->arch.vcpus_running); + + srr_regs_clobbered(); + return r; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d7733b07f489..1ed5ceee73eb 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1848,6 +1849,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) /* Make sure we save the guest TAR/EBB/DSCR state */ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + srr_regs_clobbered(); out: vcpu->mode = OUTSIDE_GUEST_MODE; return ret; diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c index 407b992fb02f..e834079d2b5c 100644 --- a/arch/powerpc/lib/error-inject.c +++ b/arch/powerpc/lib/error-inject.c @@ -11,6 +11,6 @@ void override_function_with_return(struct pt_regs *regs) * function in the kernel/module, captured on a kprobe. We don't need * to worry about 32-bit userspace on a 64-bit kernel. */ - regs->nip = regs->link; + regs_set_return_ip(regs, regs->link); } NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index aee42bcc775b..d8d5f901cee1 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3225,7 +3225,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) default: WARN_ON_ONCE(1); } - regs->nip = next_pc; + regs_set_return_ip(regs, next_pc); } NOKPROBE_SYMBOL(emulate_update_regs); @@ -3563,7 +3563,7 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) /* can't step mtmsr[d] that would clear MSR_RI */ return -1; /* here op.val is the mask of bits to change */ - regs->msr = (regs->msr & ~op.val) | (val & op.val); + regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val)); goto instr_done; #ifdef CONFIG_PPC64 @@ -3576,7 +3576,7 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) if (IS_ENABLED(CONFIG_PPC_FAST_ENDIAN_SWITCH) && cpu_has_feature(CPU_FTR_REAL_LE) && regs->gpr[0] == 0x1ebe) { - regs->msr ^= MSR_LE; + regs_set_return_msr(regs, regs->msr ^ MSR_LE); goto instr_done; } regs->gpr[9] = regs->gpr[13]; @@ -3584,8 +3584,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) regs->gpr[11] = regs->nip + 4; regs->gpr[12] = regs->msr & MSR_MASK; regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_common; - regs->msr = MSR_KERNEL; + regs_set_return_ip(regs, (unsigned long) &system_call_common); + regs_set_return_msr(regs, MSR_KERNEL); return 1; #ifdef CONFIG_PPC_BOOK3S_64 @@ -3595,8 +3595,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) regs->gpr[11] = regs->nip + 4; regs->gpr[12] = regs->msr & MSR_MASK; regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_vectored_emulate; - regs->msr = MSR_KERNEL; + regs_set_return_ip(regs, (unsigned long) &system_call_vectored_emulate); + regs_set_return_msr(regs, MSR_KERNEL); return 1; #endif @@ -3607,7 +3607,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) return 0; instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type)); + regs_set_return_ip(regs, + truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type))); return 1; } NOKPROBE_SYMBOL(emulate_step); diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 1cbfa468813b..8b4f6b3e96c4 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -1609,6 +1609,7 @@ static int __init emulate_compute_instr(struct pt_regs *regs, if (!regs || !ppc_inst_val(instr)) return -EINVAL; + /* This is not a return frame regs */ regs->nip = patch_site_addr(&patch__exec_instr); analysed = analyse_instr(&op, regs, instr); diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index 327165f26ca6..36761bd00f38 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c @@ -453,7 +453,7 @@ do_mathemu(struct pt_regs *regs) break; } - regs->nip += 4; + regs_add_return_ip(regs, 4); return 0; illegal: diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c index 0a05e51964c1..39b84e7452e1 100644 --- a/arch/powerpc/math-emu/math_efp.c +++ b/arch/powerpc/math-emu/math_efp.c @@ -710,7 +710,7 @@ update_regs: illegal: if (have_e500_cpu_a005_erratum) { /* according to e500 cpu a005 erratum, reissue efp inst */ - regs->nip -= 4; + regs_add_return_ip(regs, -4); pr_debug("re-issue efp inst: %08lx\n", speinsn); return 0; } diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 53065d564161..85521b3e7098 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -251,8 +251,8 @@ static int ppc750_machine_check_exception(struct pt_regs *regs) /* Are we prepared to handle this fault */ if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } return 0; diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index 5565647dc879..d8da6a483e59 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -173,8 +173,8 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs) /* Are we prepared to handle this fault */ if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } return 0; diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c index 1c954c90b0f4..9b88e3cded7d 100644 --- a/arch/powerpc/platforms/pasemi/idle.c +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -37,7 +37,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) */ if (regs->msr & SRR1_WAKEMASK) - regs->nip = regs->link; + regs_set_return_ip(regs, regs->link); switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: @@ -58,7 +58,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) restore_astate(hard_smp_processor_id()); /* everything handled */ - regs->msr |= MSR_RI; + regs_set_return_msr(regs, regs->msr | MSR_RI); return 1; } diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index 01401e3da7ca..f812c74c61e5 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -100,6 +101,9 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, bool mmu = (msr & (MSR_IR|MSR_DR)); int64_t ret; + /* OPAL call / firmware may use SRR and/or HSRR */ + srr_regs_clobbered(); + msr &= ~MSR_EE; if (unlikely(!mmu)) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 303d7c775740..2835376e61a4 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -773,7 +773,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs) * Setup regs->nip to rfi into fixup address. */ if (recover_addr) - regs->nip = recover_addr; + regs_set_return_ip(regs, recover_addr); out: return !!recover_addr; diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 8a2b8d64265b..ab9fc6506861 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -108,6 +108,10 @@ _GLOBAL_TOC(plpar_hcall_norets_notrace) mfcr r0 stw r0,8(r1) HVSC /* invoke the hypervisor */ + + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ @@ -120,6 +124,9 @@ _GLOBAL_TOC(plpar_hcall_norets) HCALL_BRANCH(plpar_hcall_norets_trace) HVSC /* invoke the hypervisor */ + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ @@ -129,6 +136,10 @@ plpar_hcall_norets_trace: HCALL_INST_PRECALL(R4) HVSC HCALL_INST_POSTCALL_NORETS + + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr @@ -159,6 +170,9 @@ _GLOBAL_TOC(plpar_hcall) std r6, 16(r12) std r7, 24(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -188,6 +202,9 @@ plpar_hcall_trace: HCALL_INST_POSTCALL(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -223,6 +240,9 @@ _GLOBAL(plpar_hcall_raw) std r6, 16(r12) std r7, 24(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -262,6 +282,9 @@ _GLOBAL_TOC(plpar_hcall9) std r11,56(r12) std r0, 64(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -300,6 +323,9 @@ plpar_hcall9_trace: HCALL_INST_POSTCALL(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -339,6 +365,9 @@ _GLOBAL(plpar_hcall9_raw) std r11,56(r12) std r0, 64(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 2f636308cf60..167f2e1b8d39 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -487,8 +487,8 @@ int pSeries_system_reset_exception(struct pt_regs *regs) if ((be64_to_cpu(regs->msr) & (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { - regs->nip = be64_to_cpu((__be64)regs->nip); - regs->msr = 0; + regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); + regs_set_return_msr(regs, 0); } #endif diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 69af73765783..b8f76f3fd994 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1072,7 +1072,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) ret = get_kernel_nofault(inst, (void *)regs->nip); if (!ret && mcheck_handle_load(regs, inst)) { - regs->nip += 4; + regs_add_return_ip(regs, 4); return 1; } } diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 07c164f7f8cf..5a95b8ea23d8 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -108,8 +108,8 @@ int fsl_rio_mcheck_exception(struct pt_regs *regs) __func__); out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR), 0); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index bd7ee794be92..dbc38483473c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -514,7 +514,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL) { - regs->nip = bp->address + offset; + regs_set_return_ip(regs, bp->address + offset); atomic_dec(&bp->ref_count); } @@ -702,7 +702,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if (regs->msr & MSR_DE) { bp = at_breakpoint(regs->nip); if (bp != NULL) { - regs->nip = (unsigned long) &bp->instr[0]; + regs_set_return_ip(regs, (unsigned long) &bp->instr[0]); atomic_inc(&bp->ref_count); } } @@ -712,7 +712,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if (bp != NULL) { int stepped = emulate_step(regs, ppc_inst_read(bp->instr)); if (stepped == 0) { - regs->nip = (unsigned long) &bp->instr[0]; + regs_set_return_ip(regs, (unsigned long) &bp->instr[0]); atomic_inc(&bp->ref_count); } else if (stepped < 0) { printf("Couldn't single-step %s instruction\n", @@ -766,7 +766,7 @@ static int xmon_bpt(struct pt_regs *regs) /* Are we at the trap at bp->instr[1] for some bp? */ bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL && (offset == 4 || offset == 8)) { - regs->nip = bp->address + offset; + regs_set_return_ip(regs, bp->address + offset); atomic_dec(&bp->ref_count); return 1; } @@ -836,7 +836,7 @@ static int xmon_fault_handler(struct pt_regs *regs) if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) { bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL) { - regs->nip = bp->address + offset; + regs_set_return_ip(regs, bp->address + offset); atomic_dec(&bp->ref_count); } } @@ -1188,7 +1188,7 @@ cmds(struct pt_regs *excp) #ifdef CONFIG_BOOKE static int do_step(struct pt_regs *regs) { - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); return 1; } @@ -1221,7 +1221,7 @@ static int do_step(struct pt_regs *regs) } } } - regs->msr |= MSR_SE; + regs_set_return_msr(regs, regs->msr | MSR_SE); return 1; } #endif -- cgit v1.2.3 From e754f4d13e3919aafa485657599907aa63b9a40c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Jun 2021 01:51:05 +1000 Subject: powerpc/64: move interrupt return asm to interrupt_64.S The next patch would like to move interrupt return assembly code to a low location before general text, so move it into its own file and include via head_64.S Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-7-npiggin@gmail.com --- arch/powerpc/include/asm/head-64.h | 2 +- arch/powerpc/kernel/entry_64.S | 623 ------------------------------------ arch/powerpc/kernel/head_64.S | 5 +- arch/powerpc/kernel/interrupt_64.S | 635 +++++++++++++++++++++++++++++++++++++ 4 files changed, 640 insertions(+), 625 deletions(-) create mode 100644 arch/powerpc/kernel/interrupt_64.S diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 4cb9efa2eb21..242204e12993 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -16,7 +16,7 @@ .section ".head.data.\name\()","a",@progbits .endm .macro use_ftsec name - .section ".head.text.\name\()" + .section ".head.text.\name\()","ax",@progbits .endm /* diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9a1d5e5599d3..15720f8661a1 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -48,410 +47,7 @@ /* * System calls. */ - .section ".toc","aw" -SYS_CALL_TABLE: - .tc sys_call_table[TC],sys_call_table - -#ifdef CONFIG_COMPAT -COMPAT_SYS_CALL_TABLE: - .tc compat_sys_call_table[TC],compat_sys_call_table -#endif - -/* This value is used to mark exception frames on the stack. */ -exception_marker: - .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER - .section ".text" - .align 7 - -.macro DEBUG_SRR_VALID srr -#ifdef CONFIG_PPC_RFI_SRR_DEBUG - .ifc \srr,srr - mfspr r11,SPRN_SRR0 - ld r12,_NIP(r1) -100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) - mfspr r11,SPRN_SRR1 - ld r12,_MSR(r1) -100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) - .else - mfspr r11,SPRN_HSRR0 - ld r12,_NIP(r1) -100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) - mfspr r11,SPRN_HSRR1 - ld r12,_MSR(r1) -100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) - .endif -#endif -.endm - -#ifdef CONFIG_PPC_BOOK3S -.macro system_call_vectored name trapnr - .globl system_call_vectored_\name -system_call_vectored_\name: -_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne .Ltabort_syscall -END_FTR_SECTION_IFSET(CPU_FTR_TM) -#endif - SCV_INTERRUPT_TO_KERNEL - mr r10,r1 - ld r1,PACAKSAVE(r13) - std r10,0(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - std r0,GPR0(r1) - std r10,GPR1(r1) - std r2,GPR2(r1) - ld r2,PACATOC(r13) - mfcr r12 - li r11,0 - /* Can we avoid saving r3-r8 in common case? */ - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) - /* Zero r9-r12, this should only be required when restoring all GPRs */ - std r11,GPR9(r1) - std r11,GPR10(r1) - std r11,GPR11(r1) - std r11,GPR12(r1) - std r9,GPR13(r1) - SAVE_NVGPRS(r1) - std r11,_XER(r1) - std r11,_LINK(r1) - std r11,_CTR(r1) - - li r11,\trapnr - std r11,_TRAP(r1) - std r12,_CCR(r1) - addi r10,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker@toc(r2) - std r11,-16(r10) /* "regshere" marker */ - -BEGIN_FTR_SECTION - HMT_MEDIUM -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - /* - * scv enters with MSR[EE]=1 and is immediately considered soft-masked. - * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED, - * and interrupts may be masked and pending already. - * system_call_exception() will call trace_hardirqs_off() which means - * interrupts could already have been blocked before trace_hardirqs_off, - * but this is the best we can do. - */ - - /* Calling convention has r9 = orig r0, r10 = regs */ - mr r9,r0 - bl system_call_exception - -.Lsyscall_vectored_\name\()_exit: - addi r4,r1,STACK_FRAME_OVERHEAD - li r5,1 /* scv */ - bl syscall_exit_prepare - - ld r2,_CCR(r1) - ld r4,_NIP(r1) - ld r5,_MSR(r1) - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - -BEGIN_FTR_SECTION - HMT_MEDIUM_LOW -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - cmpdi r3,0 - bne .Lsyscall_vectored_\name\()_restore_regs - - /* rfscv returns with LR->NIA and CTR->MSR */ - mtlr r4 - mtctr r5 - - /* Could zero these as per ABI, but we may consider a stricter ABI - * which preserves these if libc implementations can benefit, so - * restore them for now until further measurement is done. */ - ld r0,GPR0(r1) - ld r4,GPR4(r1) - ld r5,GPR5(r1) - ld r6,GPR6(r1) - ld r7,GPR7(r1) - ld r8,GPR8(r1) - /* Zero volatile regs that may contain sensitive kernel data */ - li r9,0 - li r10,0 - li r11,0 - li r12,0 - mtspr SPRN_XER,r0 - - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. - */ - mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) - RFSCV_TO_USER - b . /* prevent speculative execution */ - -.Lsyscall_vectored_\name\()_restore_regs: - li r3,0 - mtmsrd r3,1 - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r5 - - ld r3,_CTR(r1) - ld r4,_LINK(r1) - ld r5,_XER(r1) - - REST_NVGPRS(r1) - ld r0,GPR0(r1) - mtcr r2 - mtctr r3 - mtlr r4 - mtspr SPRN_XER,r5 - REST_10GPRS(2, r1) - REST_2GPRS(12, r1) - ld r1,GPR1(r1) - RFI_TO_USER -.endm - -system_call_vectored common 0x3000 -/* - * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 - * which is tested by system_call_exception when r0 is -1 (as set by vector - * entry code). - */ -system_call_vectored sigill 0x7ff0 - - -/* - * Entered via kernel return set up by kernel/sstep.c, must match entry regs - */ - .globl system_call_vectored_emulate -system_call_vectored_emulate: -_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - b system_call_vectored_common -#endif - - .balign IFETCH_ALIGN_BYTES - .globl system_call_common_real -system_call_common_real: - ld r10,PACAKMSR(r13) /* get MSR value for kernel */ - mtmsrd r10 - - .balign IFETCH_ALIGN_BYTES - .globl system_call_common -system_call_common: -_ASM_NOKPROBE_SYMBOL(system_call_common) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne .Ltabort_syscall -END_FTR_SECTION_IFSET(CPU_FTR_TM) -#endif - mr r10,r1 - ld r1,PACAKSAVE(r13) - std r10,0(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - std r0,GPR0(r1) - std r10,GPR1(r1) - std r2,GPR2(r1) -#ifdef CONFIG_PPC_FSL_BOOK3E -START_BTB_FLUSH_SECTION - BTB_FLUSH(r10) -END_BTB_FLUSH_SECTION -#endif - ld r2,PACATOC(r13) - mfcr r12 - li r11,0 - /* Can we avoid saving r3-r8 in common case? */ - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) - /* Zero r9-r12, this should only be required when restoring all GPRs */ - std r11,GPR9(r1) - std r11,GPR10(r1) - std r11,GPR11(r1) - std r11,GPR12(r1) - std r9,GPR13(r1) - SAVE_NVGPRS(r1) - std r11,_XER(r1) - std r11,_CTR(r1) - mflr r10 - - /* - * This clears CR0.SO (bit 28), which is the error indication on - * return from this system call. - */ - rldimi r12,r11,28,(63-28) - li r11,0xc00 - std r10,_LINK(r1) - std r11,_TRAP(r1) - std r12,_CCR(r1) - addi r10,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker@toc(r2) - std r11,-16(r10) /* "regshere" marker */ - -#ifdef CONFIG_PPC_BOOK3S - li r11,1 - stb r11,PACASRR_VALID(r13) -#endif - - /* - * We always enter kernel from userspace with irq soft-mask enabled and - * nothing pending. system_call_exception() will call - * trace_hardirqs_off(). - */ - li r11,IRQS_ALL_DISABLED - li r12,PACA_IRQ_HARD_DIS - stb r11,PACAIRQSOFTMASK(r13) - stb r12,PACAIRQHAPPENED(r13) - - /* Calling convention has r9 = orig r0, r10 = regs */ - mr r9,r0 - bl system_call_exception - -.Lsyscall_exit: - addi r4,r1,STACK_FRAME_OVERHEAD - li r5,0 /* !scv */ - bl syscall_exit_prepare - - ld r2,_CCR(r1) - ld r6,_LINK(r1) - mtlr r6 - -#ifdef CONFIG_PPC_BOOK3S - lbz r4,PACASRR_VALID(r13) - cmpdi r4,0 - bne 1f - li r4,0 - stb r4,PACASRR_VALID(r13) -#endif - ld r4,_NIP(r1) - ld r5,_MSR(r1) - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r5 -1: - DEBUG_SRR_VALID srr - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - cmpdi r3,0 - bne .Lsyscall_restore_regs - /* Zero volatile regs that may contain sensitive kernel data */ - li r0,0 - li r4,0 - li r5,0 - li r6,0 - li r7,0 - li r8,0 - li r9,0 - li r10,0 - li r11,0 - li r12,0 - mtctr r0 - mtspr SPRN_XER,r0 -.Lsyscall_restore_regs_cont: - -BEGIN_FTR_SECTION - HMT_MEDIUM_LOW -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. - */ - mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) - RFI_TO_USER - b . /* prevent speculative execution */ - -.Lsyscall_restore_regs: - ld r3,_CTR(r1) - ld r4,_XER(r1) - REST_NVGPRS(r1) - mtctr r3 - mtspr SPRN_XER,r4 - ld r0,GPR0(r1) - REST_8GPRS(4, r1) - ld r12,GPR12(r1) - b .Lsyscall_restore_regs_cont - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -.Ltabort_syscall: - /* Firstly we need to enable TM in the kernel */ - mfmsr r10 - li r9, 1 - rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r10, 0 - - /* tabort, this dooms the transaction, nothing else */ - li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) - TABORT(R9) - - /* - * Return directly to userspace. We have corrupted user register state, - * but userspace will never see that register state. Execution will - * resume after the tbegin of the aborted transaction with the - * checkpointed register state. - */ - li r9, MSR_RI - andc r10, r10, r9 - mtmsrd r10, 1 - mtspr SPRN_SRR0, r11 - mtspr SPRN_SRR1, r12 - RFI_TO_USER - b . /* prevent speculative execution */ -#endif - -#ifdef CONFIG_PPC_BOOK3S -_GLOBAL(ret_from_fork_scv) - bl schedule_tail - REST_NVGPRS(r1) - li r3,0 /* fork() return value */ - b .Lsyscall_vectored_common_exit -#endif - -_GLOBAL(ret_from_fork) - bl schedule_tail - REST_NVGPRS(r1) - li r3,0 /* fork() return value */ - b .Lsyscall_exit - -_GLOBAL(ret_from_kernel_thread) - bl schedule_tail - REST_NVGPRS(r1) - mtctr r14 - mr r3,r15 -#ifdef PPC64_ELF_ABI_v2 - mr r12,r14 -#endif - bctrl - li r3,0 - b .Lsyscall_exit #ifdef CONFIG_PPC_BOOK3S_64 @@ -668,225 +264,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) addi r1,r1,SWITCH_FRAME_SIZE blr - /* - * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not - * touched, no exit work created, then this can be used. - */ - .balign IFETCH_ALIGN_BYTES - .globl fast_interrupt_return_srr -fast_interrupt_return_srr: -_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) - kuap_check_amr r3, r4 - ld r5,_MSR(r1) - andi. r0,r5,MSR_PR -#ifdef CONFIG_PPC_BOOK3S - bne .Lfast_user_interrupt_return_amr_srr - kuap_kernel_restore r3, r4 - andi. r0,r5,MSR_RI - li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ - bne+ .Lfast_kernel_interrupt_return_srr - addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b . /* should not get here */ -#else - bne .Lfast_user_interrupt_return_srr - b .Lfast_kernel_interrupt_return_srr -#endif - -.macro interrupt_return_macro srr - .balign IFETCH_ALIGN_BYTES - .globl interrupt_return_\srr -interrupt_return_\srr\(): -_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) - ld r4,_MSR(r1) - andi. r0,r4,MSR_PR - beq .Lkernel_interrupt_return_\srr - addi r3,r1,STACK_FRAME_OVERHEAD - bl interrupt_exit_user_prepare - cmpdi r3,0 - bne- .Lrestore_nvgprs_\srr - -#ifdef CONFIG_PPC_BOOK3S -.Lfast_user_interrupt_return_amr_\srr\(): - kuap_user_restore r3, r4 -#endif -.Lfast_user_interrupt_return_\srr\(): - -BEGIN_FTR_SECTION - ld r10,_PPR(r1) - mtspr SPRN_PPR,r10 -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - -#ifdef CONFIG_PPC_BOOK3S - .ifc \srr,srr - lbz r4,PACASRR_VALID(r13) - .else - lbz r4,PACAHSRR_VALID(r13) - .endif - cmpdi r4,0 - li r4,0 - bne 1f -#endif - ld r11,_NIP(r1) - ld r12,_MSR(r1) - .ifc \srr,srr - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 -1: -#ifdef CONFIG_PPC_BOOK3S - stb r4,PACASRR_VALID(r13) -#endif - .else - mtspr SPRN_HSRR0,r11 - mtspr SPRN_HSRR1,r12 -1: -#ifdef CONFIG_PPC_BOOK3S - stb r4,PACAHSRR_VALID(r13) -#endif - .endif - DEBUG_SRR_VALID \srr - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - ldarx r0,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - ld r3,_CCR(r1) - ld r4,_LINK(r1) - ld r5,_CTR(r1) - ld r6,_XER(r1) - li r0,0 - - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) - REST_GPR(13, r1) - - mtcr r3 - mtlr r4 - mtctr r5 - mtspr SPRN_XER,r6 - - REST_4GPRS(2, r1) - REST_GPR(6, r1) - REST_GPR(0, r1) - REST_GPR(1, r1) - .ifc \srr,srr - RFI_TO_USER - .else - HRFI_TO_USER - .endif - b . /* prevent speculative execution */ - -.Lrestore_nvgprs_\srr\(): - REST_NVGPRS(r1) - b .Lfast_user_interrupt_return_\srr - - .balign IFETCH_ALIGN_BYTES -.Lkernel_interrupt_return_\srr\(): - addi r3,r1,STACK_FRAME_OVERHEAD - bl interrupt_exit_kernel_prepare - -.Lfast_kernel_interrupt_return_\srr\(): - cmpdi cr1,r3,0 -#ifdef CONFIG_PPC_BOOK3S - .ifc \srr,srr - lbz r4,PACASRR_VALID(r13) - .else - lbz r4,PACAHSRR_VALID(r13) - .endif - cmpdi r4,0 - li r4,0 - bne 1f -#endif - ld r11,_NIP(r1) - ld r12,_MSR(r1) - .ifc \srr,srr - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 -1: -#ifdef CONFIG_PPC_BOOK3S - stb r4,PACASRR_VALID(r13) -#endif - .else - mtspr SPRN_HSRR0,r11 - mtspr SPRN_HSRR1,r12 -1: -#ifdef CONFIG_PPC_BOOK3S - stb r4,PACAHSRR_VALID(r13) -#endif - .endif - DEBUG_SRR_VALID \srr - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - ldarx r0,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - ld r3,_LINK(r1) - ld r4,_CTR(r1) - ld r5,_XER(r1) - ld r6,_CCR(r1) - li r0,0 - - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) - - mtlr r3 - mtctr r4 - mtspr SPRN_XER,r5 - - /* - * Leaving a stale exception_marker on the stack can confuse - * the reliable stack unwinder later on. Clear it. - */ - std r0,STACK_FRAME_OVERHEAD-16(r1) - - REST_4GPRS(2, r1) - - bne- cr1,1f /* emulate stack store */ - mtcr r6 - REST_GPR(6, r1) - REST_GPR(0, r1) - REST_GPR(1, r1) - .ifc \srr,srr - RFI_TO_KERNEL - .else - HRFI_TO_KERNEL - .endif - b . /* prevent speculative execution */ - -1: /* - * Emulate stack store with update. New r1 value was already calculated - * and updated in our interrupt regs by emulate_loadstore, but we can't - * store the previous value of r1 to the stack before re-loading our - * registers from it, otherwise they could be clobbered. Use - * PACA_EXGEN as temporary storage to hold the store data, as - * interrupts are disabled here so it won't be clobbered. - */ - mtcr r6 - std r9,PACA_EXGEN+0(r13) - addi r9,r1,INT_FRAME_SIZE /* get original r1 */ - REST_GPR(6, r1) - REST_GPR(0, r1) - REST_GPR(1, r1) - std r9,0(r1) /* perform store component of stdu */ - ld r9,PACA_EXGEN+0(r13) - - .ifc \srr,srr - RFI_TO_KERNEL - .else - HRFI_TO_KERNEL - .endif - b . /* prevent speculative execution */ -.endm - -interrupt_return_macro srr -#ifdef CONFIG_PPC_BOOK3S -interrupt_return_macro hsrr -#endif - #ifdef CONFIG_PPC_RTAS /* * On CHRP, the Run-Time Abstraction Services (RTAS) have to be diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 79f2d1e61abd..79930b0bc781 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -194,8 +194,9 @@ CLOSE_FIXED_SECTION(first_256B) /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" +/* This value is used to mark exception frames on the stack. */ exception_marker: - .tc ID_72656773_68657265[TC],0x7265677368657265 + .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER .previous /* @@ -211,6 +212,8 @@ OPEN_TEXT_SECTION(0x100) USE_TEXT_SECTION() +#include "interrupt_64.S" + #ifdef CONFIG_PPC_BOOK3E /* * The booting_thread_hwid holds the thread id we want to boot in cpu diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S new file mode 100644 index 000000000000..e17a77a2c1a1 --- /dev/null +++ b/arch/powerpc/kernel/interrupt_64.S @@ -0,0 +1,635 @@ +#include +#include +#ifdef CONFIG_PPC_BOOK3S +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include + + .section ".toc","aw" +SYS_CALL_TABLE: + .tc sys_call_table[TC],sys_call_table + +#ifdef CONFIG_COMPAT +COMPAT_SYS_CALL_TABLE: + .tc compat_sys_call_table[TC],compat_sys_call_table +#endif + .previous + + .align 7 + +.macro DEBUG_SRR_VALID srr +#ifdef CONFIG_PPC_RFI_SRR_DEBUG + .ifc \srr,srr + mfspr r11,SPRN_SRR0 + ld r12,_NIP(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + mfspr r11,SPRN_SRR1 + ld r12,_MSR(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + .else + mfspr r11,SPRN_HSRR0 + ld r12,_NIP(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + mfspr r11,SPRN_HSRR1 + ld r12,_MSR(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + .endif +#endif +.endm + +#ifdef CONFIG_PPC_BOOK3S +.macro system_call_vectored name trapnr + .globl system_call_vectored_\name +system_call_vectored_\name: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne .Ltabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + SCV_INTERRUPT_TO_KERNEL + mr r10,r1 + ld r1,PACAKSAVE(r13) + std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + /* Zero r9-r12, this should only be required when restoring all GPRs */ + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_LINK(r1) + std r11,_CTR(r1) + + li r11,\trapnr + std r11,_TRAP(r1) + std r12,_CCR(r1) + addi r10,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r10) /* "regshere" marker */ + +BEGIN_FTR_SECTION + HMT_MEDIUM +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + /* + * scv enters with MSR[EE]=1 and is immediately considered soft-masked. + * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED, + * and interrupts may be masked and pending already. + * system_call_exception() will call trace_hardirqs_off() which means + * interrupts could already have been blocked before trace_hardirqs_off, + * but this is the best we can do. + */ + + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception + +.Lsyscall_vectored_\name\()_exit: + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,1 /* scv */ + bl syscall_exit_prepare + + ld r2,_CCR(r1) + ld r4,_NIP(r1) + ld r5,_MSR(r1) + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + cmpdi r3,0 + bne .Lsyscall_vectored_\name\()_restore_regs + + /* rfscv returns with LR->NIA and CTR->MSR */ + mtlr r4 + mtctr r5 + + /* Could zero these as per ABI, but we may consider a stricter ABI + * which preserves these if libc implementations can benefit, so + * restore them for now until further measurement is done. */ + ld r0,GPR0(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + /* Zero volatile regs that may contain sensitive kernel data */ + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtspr SPRN_XER,r0 + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + mtcr r2 + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) + ld r1,GPR1(r1) + RFSCV_TO_USER + b . /* prevent speculative execution */ + +.Lsyscall_vectored_\name\()_restore_regs: + li r3,0 + mtmsrd r3,1 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + + ld r3,_CTR(r1) + ld r4,_LINK(r1) + ld r5,_XER(r1) + + REST_NVGPRS(r1) + ld r0,GPR0(r1) + mtcr r2 + mtctr r3 + mtlr r4 + mtspr SPRN_XER,r5 + REST_10GPRS(2, r1) + REST_2GPRS(12, r1) + ld r1,GPR1(r1) + RFI_TO_USER +.endm + +system_call_vectored common 0x3000 +/* + * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 + * which is tested by system_call_exception when r0 is -1 (as set by vector + * entry code). + */ +system_call_vectored sigill 0x7ff0 + + +/* + * Entered via kernel return set up by kernel/sstep.c, must match entry regs + */ + .globl system_call_vectored_emulate +system_call_vectored_emulate: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + b system_call_vectored_common +#endif + + .balign IFETCH_ALIGN_BYTES + .globl system_call_common_real +system_call_common_real: + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + mtmsrd r10 + + .balign IFETCH_ALIGN_BYTES + .globl system_call_common +system_call_common: +_ASM_NOKPROBE_SYMBOL(system_call_common) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne .Ltabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + mr r10,r1 + ld r1,PACAKSAVE(r13) + std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) +#ifdef CONFIG_PPC_FSL_BOOK3E +START_BTB_FLUSH_SECTION + BTB_FLUSH(r10) +END_BTB_FLUSH_SECTION +#endif + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + /* Zero r9-r12, this should only be required when restoring all GPRs */ + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_CTR(r1) + mflr r10 + + /* + * This clears CR0.SO (bit 28), which is the error indication on + * return from this system call. + */ + rldimi r12,r11,28,(63-28) + li r11,0xc00 + std r10,_LINK(r1) + std r11,_TRAP(r1) + std r12,_CCR(r1) + addi r10,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r10) /* "regshere" marker */ + +#ifdef CONFIG_PPC_BOOK3S + li r11,1 + stb r11,PACASRR_VALID(r13) +#endif + + /* + * We always enter kernel from userspace with irq soft-mask enabled and + * nothing pending. system_call_exception() will call + * trace_hardirqs_off(). + */ + li r11,IRQS_ALL_DISABLED + li r12,PACA_IRQ_HARD_DIS + stb r11,PACAIRQSOFTMASK(r13) + stb r12,PACAIRQHAPPENED(r13) + + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception + +.Lsyscall_exit: + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,0 /* !scv */ + bl syscall_exit_prepare + + ld r2,_CCR(r1) + ld r6,_LINK(r1) + mtlr r6 + +#ifdef CONFIG_PPC_BOOK3S + lbz r4,PACASRR_VALID(r13) + cmpdi r4,0 + bne 1f + li r4,0 + stb r4,PACASRR_VALID(r13) +#endif + ld r4,_NIP(r1) + ld r5,_MSR(r1) + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 +1: + DEBUG_SRR_VALID srr + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + cmpdi r3,0 + bne .Lsyscall_restore_regs + /* Zero volatile regs that may contain sensitive kernel data */ + li r0,0 + li r4,0 + li r5,0 + li r6,0 + li r7,0 + li r8,0 + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtctr r0 + mtspr SPRN_XER,r0 +.Lsyscall_restore_regs_cont: + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + mtcr r2 + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) + ld r1,GPR1(r1) + RFI_TO_USER + b . /* prevent speculative execution */ + +.Lsyscall_restore_regs: + ld r3,_CTR(r1) + ld r4,_XER(r1) + REST_NVGPRS(r1) + mtctr r3 + mtspr SPRN_XER,r4 + ld r0,GPR0(r1) + REST_8GPRS(4, r1) + ld r12,GPR12(r1) + b .Lsyscall_restore_regs_cont + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +.Ltabort_syscall: + /* Firstly we need to enable TM in the kernel */ + mfmsr r10 + li r9, 1 + rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r10, 0 + + /* tabort, this dooms the transaction, nothing else */ + li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) + TABORT(R9) + + /* + * Return directly to userspace. We have corrupted user register state, + * but userspace will never see that register state. Execution will + * resume after the tbegin of the aborted transaction with the + * checkpointed register state. + */ + li r9, MSR_RI + andc r10, r10, r9 + mtmsrd r10, 1 + mtspr SPRN_SRR0, r11 + mtspr SPRN_SRR1, r12 + RFI_TO_USER + b . /* prevent speculative execution */ +#endif + +#ifdef CONFIG_PPC_BOOK3S +_GLOBAL(ret_from_fork_scv) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_vectored_common_exit +#endif + +_GLOBAL(ret_from_fork) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_exit + +_GLOBAL(ret_from_kernel_thread) + bl schedule_tail + REST_NVGPRS(r1) + mtctr r14 + mr r3,r15 +#ifdef PPC64_ELF_ABI_v2 + mr r12,r14 +#endif + bctrl + li r3,0 + b .Lsyscall_exit + + /* + * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not + * touched, no exit work created, then this can be used. + */ + .balign IFETCH_ALIGN_BYTES + .globl fast_interrupt_return_srr +fast_interrupt_return_srr: +_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) + kuap_check_amr r3, r4 + ld r5,_MSR(r1) + andi. r0,r5,MSR_PR +#ifdef CONFIG_PPC_BOOK3S + bne .Lfast_user_interrupt_return_amr_srr + kuap_kernel_restore r3, r4 + andi. r0,r5,MSR_RI + li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ + bne+ .Lfast_kernel_interrupt_return_srr + addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b . /* should not get here */ +#else + bne .Lfast_user_interrupt_return_srr + b .Lfast_kernel_interrupt_return_srr +#endif + +.macro interrupt_return_macro srr + .balign IFETCH_ALIGN_BYTES + .globl interrupt_return_\srr +interrupt_return_\srr\(): +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) + ld r4,_MSR(r1) + andi. r0,r4,MSR_PR + beq .Lkernel_interrupt_return_\srr + addi r3,r1,STACK_FRAME_OVERHEAD + bl interrupt_exit_user_prepare + cmpdi r3,0 + bne- .Lrestore_nvgprs_\srr + +#ifdef CONFIG_PPC_BOOK3S +.Lfast_user_interrupt_return_amr_\srr\(): + kuap_user_restore r3, r4 +#endif +.Lfast_user_interrupt_return_\srr\(): + +BEGIN_FTR_SECTION + ld r10,_PPR(r1) + mtspr SPRN_PPR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + +#ifdef CONFIG_PPC_BOOK3S + .ifc \srr,srr + lbz r4,PACASRR_VALID(r13) + .else + lbz r4,PACAHSRR_VALID(r13) + .endif + cmpdi r4,0 + li r4,0 + bne 1f +#endif + ld r11,_NIP(r1) + ld r12,_MSR(r1) + .ifc \srr,srr + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACASRR_VALID(r13) +#endif + .else + mtspr SPRN_HSRR0,r11 + mtspr SPRN_HSRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACAHSRR_VALID(r13) +#endif + .endif + DEBUG_SRR_VALID \srr + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + li r0,0 + + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + REST_GPR(13, r1) + + mtcr r3 + mtlr r4 + mtctr r5 + mtspr SPRN_XER,r6 + + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + .ifc \srr,srr + RFI_TO_USER + .else + HRFI_TO_USER + .endif + b . /* prevent speculative execution */ + +.Lrestore_nvgprs_\srr\(): + REST_NVGPRS(r1) + b .Lfast_user_interrupt_return_\srr + + .balign IFETCH_ALIGN_BYTES +.Lkernel_interrupt_return_\srr\(): + addi r3,r1,STACK_FRAME_OVERHEAD + bl interrupt_exit_kernel_prepare + +.Lfast_kernel_interrupt_return_\srr\(): + cmpdi cr1,r3,0 +#ifdef CONFIG_PPC_BOOK3S + .ifc \srr,srr + lbz r4,PACASRR_VALID(r13) + .else + lbz r4,PACAHSRR_VALID(r13) + .endif + cmpdi r4,0 + li r4,0 + bne 1f +#endif + ld r11,_NIP(r1) + ld r12,_MSR(r1) + .ifc \srr,srr + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACASRR_VALID(r13) +#endif + .else + mtspr SPRN_HSRR0,r11 + mtspr SPRN_HSRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACAHSRR_VALID(r13) +#endif + .endif + DEBUG_SRR_VALID \srr + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + ld r3,_LINK(r1) + ld r4,_CTR(r1) + ld r5,_XER(r1) + ld r6,_CCR(r1) + li r0,0 + + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + + mtlr r3 + mtctr r4 + mtspr SPRN_XER,r5 + + /* + * Leaving a stale exception_marker on the stack can confuse + * the reliable stack unwinder later on. Clear it. + */ + std r0,STACK_FRAME_OVERHEAD-16(r1) + + REST_4GPRS(2, r1) + + bne- cr1,1f /* emulate stack store */ + mtcr r6 + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + .ifc \srr,srr + RFI_TO_KERNEL + .else + HRFI_TO_KERNEL + .endif + b . /* prevent speculative execution */ + +1: /* + * Emulate stack store with update. New r1 value was already calculated + * and updated in our interrupt regs by emulate_loadstore, but we can't + * store the previous value of r1 to the stack before re-loading our + * registers from it, otherwise they could be clobbered. Use + * PACA_EXGEN as temporary storage to hold the store data, as + * interrupts are disabled here so it won't be clobbered. + */ + mtcr r6 + std r9,PACA_EXGEN+0(r13) + addi r9,r1,INT_FRAME_SIZE /* get original r1 */ + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + std r9,0(r1) /* perform store component of stdu */ + ld r9,PACA_EXGEN+0(r13) + + .ifc \srr,srr + RFI_TO_KERNEL + .else + HRFI_TO_KERNEL + .endif + b . /* prevent speculative execution */ +.endm + +interrupt_return_macro srr +#ifdef CONFIG_PPC_BOOK3S +interrupt_return_macro hsrr +#endif -- cgit v1.2.3 From dd152f70bdc1b91445b10c65ac874b90c93fb3b5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Jun 2021 01:51:06 +1000 Subject: powerpc/64s: system call avoid setting MSR[RI] until we set MSR[EE] This extends the MSR[RI]=0 window a little further into the system call in order to pair RI and EE enabling with a single mtmsrd. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-8-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 2 -- arch/powerpc/kernel/interrupt_64.S | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3d238a3b2a24..18c96df269c0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1861,8 +1861,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) mtctr r10 bctr .else - li r10,MSR_RI - mtmsrd r10,1 /* Set RI (EE=0) */ #ifdef CONFIG_RELOCATABLE __LOAD_HANDLER(r10, system_call_common) mtctr r10 diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index e17a77a2c1a1..ab6b99609d0e 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -283,9 +283,9 @@ END_BTB_FLUSH_SECTION * trace_hardirqs_off(). */ li r11,IRQS_ALL_DISABLED - li r12,PACA_IRQ_HARD_DIS + li r12,-1 /* Set MSR_EE and MSR_RI */ stb r11,PACAIRQSOFTMASK(r13) - stb r12,PACAIRQHAPPENED(r13) + mtmsrd r12,1 /* Calling convention has r9 = orig r0, r10 = regs */ mr r9,r0 -- cgit v1.2.3 From 63e40806eea984f770c992120bbfd71b589ea580 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Jun 2021 01:51:07 +1000 Subject: powerpc/64s: save one more register in the masked interrupt handler This frees up one more register (and takes advantage of that to clean things up a little bit). This register will be used in the following patch. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-9-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 18c96df269c0..ee4283d014a4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2633,7 +2633,6 @@ INT_DEFINE_END(soft_nmi) * and run it entirely with interrupts hard disabled. */ EXC_COMMON_BEGIN(soft_nmi_common) - mfspr r11,SPRN_SRR0 mr r10,r1 ld r1,PACAEMERGSP(r13) subi r1,r1,INT_FRAME_SIZE @@ -2668,19 +2667,24 @@ masked_Hinterrupt: .else masked_interrupt: .endif - lbz r11,PACAIRQHAPPENED(r13) - or r11,r11,r10 - stb r11,PACAIRQHAPPENED(r13) + stw r9,PACA_EXGEN+EX_CCR(r13) + lbz r9,PACAIRQHAPPENED(r13) + or r9,r9,r10 + stb r9,PACAIRQHAPPENED(r13) + + .if ! \hsrr cmpwi r10,PACA_IRQ_DEC bne 1f - lis r10,0x7fff - ori r10,r10,0xffff - mtspr SPRN_DEC,r10 + LOAD_REG_IMMEDIATE(r9, 0x7fffffff) + mtspr SPRN_DEC,r9 #ifdef CONFIG_PPC_WATCHDOG + lwz r9,PACA_EXGEN+EX_CCR(r13) b soft_nmi_common #else b 2f #endif + .endif + 1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK beq 2f xori r12,r12,MSR_EE /* clear MSR_EE */ @@ -2689,17 +2693,19 @@ masked_interrupt: .else mtspr SPRN_SRR1,r12 .endif - ori r11,r11,PACA_IRQ_HARD_DIS - stb r11,PACAIRQHAPPENED(r13) + ori r9,r9,PACA_IRQ_HARD_DIS + stb r9,PACAIRQHAPPENED(r13) 2: /* done */ - li r10,0 + li r9,0 .if \hsrr - stb r10,PACAHSRR_VALID(r13) + stb r9,PACAHSRR_VALID(r13) .else - stb r10,PACASRR_VALID(r13) + stb r9,PACASRR_VALID(r13) .endif - ld r10,PACA_EXGEN+EX_CTR(r13) - mtctr r10 + + ld r9,PACA_EXGEN+EX_CTR(r13) + mtctr r9 + lwz r9,PACA_EXGEN+EX_CCR(r13) mtcrf 0x80,r9 std r1,PACAR1(r13) ld r9,PACA_EXGEN+EX_R9(r13) -- cgit v1.2.3 From f23699c93becd746295aaa506537882a46a62219 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Jun 2021 01:51:08 +1000 Subject: powerpc/64: allow alternate return locations for soft-masked interrupts The exception table fixup adjusts a failed page fault's interrupt return location if it was taken at an address specified in the exception table, to a corresponding fixup handler address. Introduce a variation of that idea which adds a fixup table for NMIs and soft-masked asynchronous interrupts. This will be used to protect certain critical sections that are sensitive to being clobbered by interrupts coming in (due to using the same SPRs and/or irq soft-mask state). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-10-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 13 ++++++++++++ arch/powerpc/include/asm/ppc_asm.h | 8 +++++++ arch/powerpc/kernel/exceptions-64e.S | 37 ++++++++++++++++++++++++++++++-- arch/powerpc/kernel/exceptions-64s.S | 41 ++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 10 +++++++++ arch/powerpc/lib/Makefile | 2 +- arch/powerpc/lib/restart_table.c | 30 ++++++++++++++++++++++++++ 7 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/lib/restart_table.c diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index de36fb5d9c51..a4bf6c0013bb 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -73,6 +73,11 @@ #include #include +#ifdef CONFIG_PPC64 +extern char __end_soft_masked[]; +unsigned long search_kernel_restart_table(unsigned long addr); +#endif + #ifdef CONFIG_PPC_BOOK3S_64 static inline void srr_regs_clobbered(void) { @@ -269,6 +274,14 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter * new work to do (must use irq_work for that). */ +#ifdef CONFIG_PPC64 + if (arch_irq_disabled_regs(regs)) { + unsigned long rst = search_kernel_restart_table(regs->nip); + if (rst) + regs_set_return_ip(regs, rst); + } +#endif + #ifdef CONFIG_PPC64 if (nmi_disables_ftrace(regs)) this_cpu_set_ftrace_enabled(state->ftrace_enabled); diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index d6739d700f0a..c9c2c36c1f8f 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -762,6 +762,14 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) stringify_in_c(.long (_target) - . ;) \ stringify_in_c(.previous) +#define RESTART_TABLE(_start, _end, _target) \ + stringify_in_c(.section __restart_table,"a";)\ + stringify_in_c(.balign 8;) \ + stringify_in_c(.llong (_start);) \ + stringify_in_c(.llong (_end);) \ + stringify_in_c(.llong (_target);) \ + stringify_in_c(.previous) + #ifdef CONFIG_PPC_FSL_BOOK3E #define BTB_FLUSH(reg) \ lis reg,BUCSR_INIT@h; \ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index b35c97c7082f..1b79f8a75298 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -901,6 +901,28 @@ kernel_dbg_exc: bl unknown_exception b interrupt_return +.macro SEARCH_RESTART_TABLE + LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table) + LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table) +300: + cmpd r14,r15 + beq 302f + ld r11,0(r14) + cmpld r10,r11 + blt 301f + ld r11,8(r14) + cmpld r10,r11 + bge 301f + ld r11,16(r14) + b 303f +301: + addi r14,r14,24 + b 300b +302: + li r11,0 +303: +.endm + /* * An interrupt came in while soft-disabled; We mark paca->irq_happened * accordingly and if the interrupt is level sensitive, we hard disable @@ -909,6 +931,9 @@ kernel_dbg_exc: */ .macro masked_interrupt_book3e paca_irq full_mask + std r14,PACA_EXGEN+EX_R14(r13) + std r15,PACA_EXGEN+EX_R15(r13) + lbz r10,PACAIRQHAPPENED(r13) .if \full_mask == 1 ori r10,r10,\paca_irq | PACA_IRQ_HARD_DIS @@ -918,15 +943,23 @@ kernel_dbg_exc: stb r10,PACAIRQHAPPENED(r13) .if \full_mask == 1 - rldicl r10,r11,48,1 /* clear MSR_EE */ - rotldi r11,r10,16 + xori r11,r11,MSR_EE /* clear MSR_EE */ mtspr SPRN_SRR1,r11 .endif + mfspr r10,SPRN_SRR0 + SEARCH_RESTART_TABLE + cmpdi r11,0 + beq 1f + mtspr SPRN_SRR0,r11 /* return to restart address */ +1: + lwz r11,PACA_EXGEN+EX_CR(r13) mtcr r11 ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) mfspr r13,SPRN_SPRG_GEN_SCRATCH rfi b . diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ee4283d014a4..b8ed1ce8c347 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -591,6 +591,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) __GEN_COMMON_BODY \name .endm +.macro SEARCH_RESTART_TABLE +#ifdef CONFIG_RELOCATABLE + mr r12,r2 + ld r2,PACATOC(r13) + LOAD_REG_ADDR(r9, __start___restart_table) + LOAD_REG_ADDR(r10, __stop___restart_table) + mr r2,r12 +#else + LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___restart_table) + LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___restart_table) +#endif +300: + cmpd r9,r10 + beq 302f + ld r12,0(r9) + cmpld r11,r12 + blt 301f + ld r12,8(r9) + cmpld r11,r12 + bge 301f + ld r12,16(r9) + b 303f +301: + addi r9,r9,24 + b 300b +302: + li r12,0 +303: +.endm + /* * Restore all registers including H/SRR0/1 saved in a stack frame of a * standard exception. @@ -2646,6 +2676,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) mtmsrd r9,1 kuap_kernel_restore r9, r10 + EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL @@ -2703,6 +2734,16 @@ masked_interrupt: stb r9,PACASRR_VALID(r13) .endif + SEARCH_RESTART_TABLE + cmpdi r12,0 + beq 3f + .if \hsrr + mtspr SPRN_HSRR0,r12 + .else + mtspr SPRN_SRR0,r12 + .endif +3: + ld r9,PACA_EXGEN+EX_CTR(r13) mtctr r9 lwz r9,PACA_EXGEN+EX_CCR(r13) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 72fa3c00229a..16c5e13e00c4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -9,6 +9,14 @@ #define EMITS_PT_NOTE #define RO_EXCEPTION_TABLE_ALIGN 0 +#define RESTART_TABLE(align) \ + . = ALIGN(align); \ + __restart_table : AT(ADDR(__restart_table) - LOAD_OFFSET) { \ + __start___restart_table = .; \ + KEEP(*(__restart_table)) \ + __stop___restart_table = .; \ + } + #include #include #include @@ -124,6 +132,8 @@ SECTIONS RO_DATA(PAGE_SIZE) #ifdef CONFIG_PPC64 + RESTART_TABLE(8) + . = ALIGN(8); __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { __start___stf_entry_barrier_fixup = .; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index cc1a8a0f311e..4c92c80454f3 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -42,7 +42,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ memcpy_power7.o obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ - memcpy_64.o copy_mc_64.o + memcpy_64.o copy_mc_64.o restart_table.o ifndef CONFIG_PPC_QUEUED_SPINLOCKS obj64-$(CONFIG_SMP) += locks.o diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c new file mode 100644 index 000000000000..7cd20757cc33 --- /dev/null +++ b/arch/powerpc/lib/restart_table.c @@ -0,0 +1,30 @@ +#include +#include + +struct restart_table_entry { + unsigned long start; + unsigned long end; + unsigned long fixup; +}; + +extern struct restart_table_entry __start___restart_table[]; +extern struct restart_table_entry __stop___restart_table[]; + +/* Given an address, look for it in the kernel exception table */ +unsigned long search_kernel_restart_table(unsigned long addr) +{ + struct restart_table_entry *rte = __start___restart_table; + + while (rte < __stop___restart_table) { + unsigned long start = rte->start; + unsigned long end = rte->end; + unsigned long fixup = rte->fixup; + + if (addr >= start && addr < end) + return fixup; + + rte++; + } + return 0; +} +NOKPROBE_SYMBOL(search_kernel_restart_table); -- cgit v1.2.3 From 862fa563524b9f92d7e89fe332732bd3421772db Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Jun 2021 01:51:09 +1000 Subject: powerpc/64: interrupt soft-enable race fix Prevent interrupt restore from allowing racing hard interrupts going ahead of previous soft-pending ones, by using the soft-masked restart handler to allow a store to clear the soft-mask while knowing nothing is soft-pending. This probably doesn't matter much in practice, but it's a simple demonstrator / test case to exercise the restart table logic. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-11-npiggin@gmail.com --- arch/powerpc/kernel/irq.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 72cb45393ef2..8428caf3194e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -217,6 +217,100 @@ static inline void replay_soft_interrupts_irqrestore(void) #define replay_soft_interrupts_irqrestore() replay_soft_interrupts() #endif +#ifdef CONFIG_CC_HAS_ASM_GOTO +notrace void arch_local_irq_restore(unsigned long mask) +{ + unsigned char irq_happened; + + /* Write the new soft-enabled value if it is a disable */ + if (mask) { + irq_soft_mask_set(mask); + return; + } + + /* + * After the stb, interrupts are unmasked and there are no interrupts + * pending replay. The restart sequence makes this atomic with + * respect to soft-masked interrupts. If this was just a simple code + * sequence, a soft-masked interrupt could become pending right after + * the comparison and before the stb. + * + * This allows interrupts to be unmasked without hard disabling, and + * also without new hard interrupts coming in ahead of pending ones. + */ + asm_volatile_goto( +"1: \n" +" lbz 9,%0(13) \n" +" cmpwi 9,0 \n" +" bne %l[happened] \n" +" stb 9,%1(13) \n" +"2: \n" + RESTART_TABLE(1b, 2b, 1b) + : : "i" (offsetof(struct paca_struct, irq_happened)), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "cr0", "r9" + : happened); + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); + + return; + +happened: + irq_happened = get_irq_happened(); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!irq_happened); + + if (irq_happened == PACA_IRQ_HARD_DIS) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + irq_soft_mask_set(IRQS_ENABLED); + local_paca->irq_happened = 0; + __hard_irq_enable(); + return; + } + + /* Have interrupts to replay, need to hard disable first */ + if (!(irq_happened & PACA_IRQ_HARD_DIS)) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (!(mfmsr() & MSR_EE)) { + /* + * An interrupt could have come in and cleared + * MSR[EE] and set IRQ_HARD_DIS, so check + * IRQ_HARD_DIS again and warn if it is still + * clear. + */ + irq_happened = get_irq_happened(); + WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); + } + } + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + } else { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (WARN_ON_ONCE(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } + } + + /* + * Disable preempt here, so that the below preempt_enable will + * perform resched if required (a replayed interrupt may set + * need_resched). + */ + preempt_disable(); + irq_soft_mask_set(IRQS_ALL_DISABLED); + trace_hardirqs_off(); + + replay_soft_interrupts_irqrestore(); + local_paca->irq_happened = 0; + + trace_hardirqs_on(); + irq_soft_mask_set(IRQS_ENABLED); + __hard_irq_enable(); + preempt_enable(); +} +#else notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; @@ -288,6 +382,7 @@ notrace void arch_local_irq_restore(unsigned long mask) __hard_irq_enable(); preempt_enable(); } +#endif EXPORT_SYMBOL(arch_local_irq_restore); /* -- cgit v1.2.3 From 9d1988ca87dd90ecf80a0601c7fd13071fbb1a83 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Jun 2021 01:51:10 +1000 Subject: powerpc/64: treat low kernel text as irqs soft-masked Treat code below __end_soft_masked as soft-masked for the purpose of alternate return. 64s already mostly does this for scv entry. This will be used to exit from interrupts without disabling MSR[EE]. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-12-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 11 ++++++++--- arch/powerpc/kernel/exceptions-64e.S | 12 +++++++++++- arch/powerpc/kernel/exceptions-64s.S | 19 +++++++++++-------- arch/powerpc/kernel/interrupt_64.S | 6 +++++- 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index a4bf6c0013bb..832079e824bd 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -146,8 +146,13 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != INTERRUPT_PROGRAM) + if (TRAP(regs) != INTERRUPT_PROGRAM) { CT_WARN_ON(ct_state() != CONTEXT_KERNEL); + BUG_ON(regs->nip < (unsigned long)__end_soft_masked); + } + /* Move this under a debugging check */ + if (arch_irq_disabled_regs(regs)) + BUG_ON(search_kernel_restart_table(regs->nip)); } #endif @@ -238,8 +243,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte local_paca->irq_happened |= PACA_IRQ_HARD_DIS; if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) && - regs->nip < (unsigned long)__end_interrupts) { - // Kernel code running below __end_interrupts is + regs->nip < (unsigned long)__end_soft_masked) { + // Kernel code running below __end_soft_masked is // implicitly soft-masked. regs->softe = IRQS_ALL_DISABLED; } diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 1b79f8a75298..22fcd95dd8dc 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -342,7 +342,17 @@ ret_from_mc_except: #define PROLOG_ADDITION_MASKABLE_GEN(n) \ lbz r10,PACAIRQSOFTMASK(r13); /* are irqs soft-masked? */ \ andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \ - bne masked_interrupt_book3e_##n + bne masked_interrupt_book3e_##n; \ + /* Kernel code below __end_soft_masked is implicitly masked */ \ + andi. r10,r11,MSR_PR; \ + bne 1f; /* user -> not masked */ \ + std r14,PACA_EXGEN+EX_R14(r13); \ + LOAD_REG_IMMEDIATE_SYM(r14, r10, __end_soft_masked); \ + mfspr r10,SPRN_SRR0; \ + cmpld r10,r14; \ + ld r14,PACA_EXGEN+EX_R14(r13); \ + blt masked_interrupt_book3e_##n; \ +1: /* * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index b8ed1ce8c347..ecd07bf604c5 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -430,10 +430,13 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) andi. r10,r12,MSR_PR bne 2f - /* Kernel code running below __end_interrupts is implicitly - * soft-masked */ - LOAD_HANDLER(r10, __end_interrupts) + /* + * Kernel code running below __end_soft_masked is implicitly + * soft-masked + */ + LOAD_HANDLER(r10, __end_soft_masked) cmpld r11,r10 + li r10,IMASK blt- 1f @@ -751,17 +754,17 @@ __start_interrupts: * scv instructions enter the kernel without changing EE, RI, ME, or HV. * In particular, this means we can take a maskable interrupt at any point * in the scv handler, which is unlike any other interrupt. This is solved - * by treating the instruction addresses below __end_interrupts as being + * by treating the instruction addresses below __end_soft_masked as being * soft-masked. * * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and * ensure scv is never executed with relocation off, which means AIL-0 * should never happen. * - * Before leaving the below __end_interrupts text, at least of the following - * must be true: + * Before leaving the following inside-__end_soft_masked text, at least of the + * following must be true: * - MSR[PR]=1 (i.e., return to userspace) - * - MSR_EE|MSR_RI is set (no reentrant exceptions) + * - MSR_EE|MSR_RI is clear (no reentrant exceptions) * - Standard kernel environment is set up (stack, paca, etc) * * Call convention: @@ -2957,7 +2960,7 @@ MASKED_INTERRUPT hsrr=1 USE_FIXED_SECTION(virt_trampolines) /* - * All code below __end_interrupts is treated as soft-masked. If + * All code below __end_soft_masked is treated as soft-masked. If * any code runs here with MSR[EE]=1, it must then cope with pending * soft interrupt being raised (i.e., by ensuring it is replayed). * diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index ab6b99609d0e..33ee83791487 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -632,4 +632,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) interrupt_return_macro srr #ifdef CONFIG_PPC_BOOK3S interrupt_return_macro hsrr -#endif +#endif /* CONFIG_PPC_BOOK3S */ + + .globl __end_soft_masked +__end_soft_masked: +DEFINE_FIXED_SYMBOL(__end_soft_masked) -- cgit v1.2.3 From 13799748b957bc5659f97c036224b0f4b42172e2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Jun 2021 01:51:11 +1000 Subject: powerpc/64: use interrupt restart table to speed up return from interrupt Use the restart table facility to return from interrupt or system calls without disabling MSR[EE] or MSR[RI]. Interrupt return asm is put into the low soft-masked region, to prevent interrupts being processed here, although they are still taken as masked interrupts which causes SRRs to be clobbered, and a pending soft-masked interrupt to require replaying. The return code uses restart table regions to redirct to a fixup handler rather than continue with the exit, if such an interrupt happens. In this case the interrupt return is redirected to a fixup handler which reloads r1 for the interrupt stack and reloads registers and sets state up to replay the soft-masked interrupt and try the exit again. Some types of security exit fallback flushes and barriers are currently unable to cope with reentrant interrupts, e.g., because they store some state in the scratch SPR which would be clobbered even by masked interrupts. For now the interrupts-enabled exits are disabled when these flushes are used. Signed-off-by: Nicholas Piggin [mpe: Guard unused exit_must_hard_disable() as reported by lkp] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-13-npiggin@gmail.com --- arch/powerpc/include/asm/asm-prototypes.h | 5 + arch/powerpc/include/asm/hw_irq.h | 13 +- arch/powerpc/include/asm/interrupt.h | 2 + arch/powerpc/include/asm/paca.h | 3 + arch/powerpc/include/asm/ptrace.h | 1 + arch/powerpc/kernel/asm-offsets.c | 3 + arch/powerpc/kernel/interrupt.c | 393 ++++++++++++++++++++---------- arch/powerpc/kernel/interrupt_64.S | 143 +++++++++-- arch/powerpc/lib/feature-fixups.c | 52 +++- 9 files changed, 457 insertions(+), 158 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 8dc8dc006267..222823861a67 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -73,6 +73,11 @@ long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); +#ifdef CONFIG_PPC64 +unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs); +unsigned long interrupt_exit_user_restart(struct pt_regs *regs); +unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); +#endif long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 19bcef666cf6..21cc571ea9c2 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -18,8 +18,17 @@ * PACA flags in paca->irq_happened. * * This bits are set when interrupts occur while soft-disabled - * and allow a proper replay. Additionally, PACA_IRQ_HARD_DIS - * is set whenever we manually hard disable. + * and allow a proper replay. + * + * The PACA_IRQ_HARD_DIS is set whenever we hard disable. It is almost + * always in synch with the MSR[EE] state, except: + * - A window in interrupt entry, where hardware disables MSR[EE] and that + * must be "reconciled" with the soft mask state. + * - NMI interrupts that hit in awkward places, until they fix the state. + * - When local irqs are being enabled and state is being fixed up. + * - When returning from an interrupt there are some windows where this + * can become out of synch, but gets fixed before the RFI or before + * executing the next user instruction (see arch/powerpc/kernel/interrupt.c). */ #define PACA_IRQ_HARD_DIS 0x01 #define PACA_IRQ_DBELL 0x02 diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 832079e824bd..8b4b1e84e110 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -79,6 +79,8 @@ unsigned long search_kernel_restart_table(unsigned long addr); #endif #ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); + static inline void srr_regs_clobbered(void) { local_paca->srr_valid = 0; diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index f5f0f3408047..dc05a862e72a 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -165,6 +165,9 @@ struct paca_struct { u64 kstack; /* Saved Kernel stack addr */ u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */ u64 saved_msr; /* MSR saved here by enter_rtas */ +#ifdef CONFIG_PPC64 + u64 exit_save_r1; /* Syscall/interrupt R1 save */ +#endif #ifdef CONFIG_PPC_BOOK3E u16 trap_save; /* Used when bad stack is encountered */ #endif diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 5d69dafd80ad..fcf63f559344 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -53,6 +53,7 @@ struct pt_regs struct { #ifdef CONFIG_PPC64 unsigned long ppr; + unsigned long exit_result; #endif union { #ifdef CONFIG_PPC_KUAP diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 69a2885adfe9..a47eefa09bcb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -244,6 +244,9 @@ int main(void) OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default); +#ifdef CONFIG_PPC64 + OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1); +#endif #ifdef CONFIG_PPC_BOOK3E OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); #endif diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 05fa3ae56e25..acd6a3f5ec9d 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -26,6 +26,83 @@ unsigned long global_dbcr0[NR_CPUS]; typedef long (*syscall_fn)(long, long, long, long, long, long); +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); +static inline bool exit_must_hard_disable(void) +{ + return static_branch_unlikely(&interrupt_exit_not_reentrant); +} +#elif defined(CONFIG_PPC64) +static inline bool exit_must_hard_disable(void) +{ + return false; +} +#endif + +/* + * local irqs must be disabled. Returns false if the caller must re-enable + * them, check for new work, and try again. + * + * This should be called with local irqs disabled, but if they were previously + * enabled when the interrupt handler returns (indicating a process-context / + * synchronous interrupt) then irqs_enabled should be true. + */ +static notrace __always_inline bool prep_irq_for_user_exit(void) +{ + user_enter_irqoff(); + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + +#ifdef CONFIG_PPC32 + __hard_EE_RI_disable(); +#else + if (exit_must_hard_disable()) + __hard_EE_RI_disable(); + + /* This pattern matches prep_irq_for_idle */ + if (unlikely(lazy_irq_pending_nocheck())) { + if (exit_must_hard_disable()) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + __hard_RI_enable(); + } + trace_hardirqs_off(); + user_exit_irqoff(); + + return false; + } +#endif + return true; +} + +/* + * restartable is true then EE/RI can be left on because interrupts are handled + * with a restart sequence. + */ +static notrace __always_inline bool prep_irq_for_kernel_enabled_exit(bool restartable) +{ + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + +#ifdef CONFIG_PPC32 + __hard_EE_RI_disable(); +#else + if (exit_must_hard_disable() || !restartable) + __hard_EE_RI_disable(); + + /* This pattern matches prep_irq_for_idle */ + if (unlikely(lazy_irq_pending_nocheck())) { + if (exit_must_hard_disable() || !restartable) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + __hard_RI_enable(); + } + trace_hardirqs_off(); + + return false; + } +#endif + return true; +} + /* Has to run notrace because it is entered not completely "reconciled" */ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, @@ -144,71 +221,6 @@ notrace long system_call_exception(long r3, long r4, long r5, return f(r3, r4, r5, r6, r7, r8); } -/* - * local irqs must be disabled. Returns false if the caller must re-enable - * them, check for new work, and try again. - * - * This should be called with local irqs disabled, but if they were previously - * enabled when the interrupt handler returns (indicating a process-context / - * synchronous interrupt) then irqs_enabled should be true. - */ -static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) -{ - /* This must be done with RI=1 because tracing may touch vmaps */ - trace_hardirqs_on(); - - /* This pattern matches prep_irq_for_idle */ - if (clear_ri) - __hard_EE_RI_disable(); - else - __hard_irq_disable(); -#ifdef CONFIG_PPC64 - if (unlikely(lazy_irq_pending_nocheck())) { - /* Took an interrupt, may have more exit work to do. */ - if (clear_ri) - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - return false; - } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); -#endif - return true; -} - -static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) -{ - if (__prep_irq_for_enabled_exit(clear_ri)) - return true; - - /* - * Must replay pending soft-masked interrupts now. Don't just - * local_irq_enabe(); local_irq_disable(); because if we are - * returning from an asynchronous interrupt here, another one - * might hit after irqs are enabled, and it would exit via this - * same path allowing another to fire, and so on unbounded. - * - * If interrupts were enabled when this interrupt exited, - * indicating a process context (synchronous) interrupt, - * local_irq_enable/disable can be used, which will enable - * interrupts rather than keeping them masked (unclear how - * much benefit this is over just replaying for all cases, - * because we immediately disable again, so all we're really - * doing is allowing hard interrupts to execute directly for - * a very small time, rather than being masked and replayed). - */ - if (irqs_enabled) { - local_irq_enable(); - local_irq_disable(); - } else { - replay_soft_interrupts(); - } - - return false; -} - static notrace void booke_load_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -231,57 +243,11 @@ static notrace void booke_load_dbcr0(void) #endif } -/* - * This should be called after a syscall returns, with r3 the return value - * from the syscall. If this function returns non-zero, the system call - * exit assembly should additionally load all GPR registers and CTR and XER - * from the interrupt frame. - * - * The function graph tracer can not trace the return side of this function, - * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. - */ -notrace unsigned long syscall_exit_prepare(unsigned long r3, - struct pt_regs *regs, - long scv) +static notrace unsigned long syscall_exit_prepare_main(unsigned long r3, + struct pt_regs *regs) { unsigned long ti_flags; unsigned long ret = 0; - bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - - CT_WARN_ON(ct_state() == CONTEXT_USER); - - kuap_assert_locked(); - - regs->result = r3; - - /* Check whether the syscall is issued inside a restartable sequence */ - rseq_syscall(regs); - - ti_flags = current_thread_info()->flags; - - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { - if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { - r3 = -r3; - regs->ccr |= 0x10000000; /* Set SO bit in CR */ - } - } - - if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { - if (ti_flags & _TIF_RESTOREALL) - ret = _TIF_RESTOREALL; - else - regs->gpr[3] = r3; - clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); - } else { - regs->gpr[3] = r3; - } - - if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { - do_syscall_trace_leave(regs); - ret |= _TIF_RESTOREALL; - } - - local_irq_disable(); again: ti_flags = READ_ONCE(current_thread_info()->flags); @@ -327,11 +293,7 @@ again: } } - user_enter_irqoff(); - - /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { - user_exit_irqoff(); + if (!prep_irq_for_user_exit()) { local_irq_enable(); local_irq_disable(); goto again; @@ -352,6 +314,95 @@ again: return ret; } +/* + * This should be called after a syscall returns, with r3 the return value + * from the syscall. If this function returns non-zero, the system call + * exit assembly should additionally load all GPR registers and CTR and XER + * from the interrupt frame. + * + * The function graph tracer can not trace the return side of this function, + * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. + */ +notrace unsigned long syscall_exit_prepare(unsigned long r3, + struct pt_regs *regs, + long scv) +{ + unsigned long ti_flags; + unsigned long ret = 0; + bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; + + CT_WARN_ON(ct_state() == CONTEXT_USER); + + kuap_assert_locked(); + + regs->result = r3; + + /* Check whether the syscall is issued inside a restartable sequence */ + rseq_syscall(regs); + + ti_flags = current_thread_info()->flags; + + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { + if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { + r3 = -r3; + regs->ccr |= 0x10000000; /* Set SO bit in CR */ + } + } + + if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { + if (ti_flags & _TIF_RESTOREALL) + ret = _TIF_RESTOREALL; + else + regs->gpr[3] = r3; + clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); + } else { + regs->gpr[3] = r3; + } + + if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { + do_syscall_trace_leave(regs); + ret |= _TIF_RESTOREALL; + } + + local_irq_disable(); + ret |= syscall_exit_prepare_main(r3, regs); + +#ifdef CONFIG_PPC64 + regs->exit_result = ret; +#endif + + return ret; +} + +#ifdef CONFIG_PPC64 +notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs) +{ + /* + * This is called when detecting a soft-pending interrupt as well as + * an alternate-return interrupt. So we can't just have the alternate + * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless + * the soft-pending case were to fix things up as well). RI might be + * disabled, in which case it gets re-enabled by __hard_irq_disable(). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + trace_hardirqs_off(); + user_exit_irqoff(); + account_cpu_user_entry(); + + BUG_ON(!user_mode(regs)); + + regs->exit_result |= syscall_exit_prepare_main(r3, regs); + + return regs->exit_result; +} +#endif + notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) { unsigned long ti_flags; @@ -405,10 +456,7 @@ again: } } - user_enter_irqoff(); - - if (unlikely(!__prep_irq_for_enabled_exit(true))) { - user_exit_irqoff(); + if (!prep_irq_for_user_exit()) { local_irq_enable(); local_irq_disable(); goto again; @@ -422,6 +470,10 @@ again: account_cpu_user_exit(); +#ifdef CONFIG_PPC64 + regs->exit_result = ret; +#endif + /* Restore user access locks last */ kuap_user_restore(regs); kuep_unlock(); @@ -436,6 +488,8 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) unsigned long flags; unsigned long ret = 0; unsigned long kuap; + bool stack_store = current_thread_info()->flags & + _TIF_EMULATE_STACK_STORE; if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && unlikely(!(regs->msr & MSR_RI))) @@ -450,11 +504,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) kuap = kuap_get_and_assert_locked(); - if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { - clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); - ret = 1; - } - local_irq_save(flags); if (!arch_irq_disabled_regs(regs)) { @@ -469,17 +518,54 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) + /* + * Stack store exit can't be restarted because the interrupt + * stack frame might have been clobbered. + */ + if (!prep_irq_for_kernel_enabled_exit(unlikely(stack_store))) { + /* + * Replay pending soft-masked interrupts now. Don't + * just local_irq_enabe(); local_irq_disable(); because + * if we are returning from an asynchronous interrupt + * here, another one might hit after irqs are enabled, + * and it would exit via this same path allowing + * another to fire, and so on unbounded. + */ + hard_irq_disable(); + replay_soft_interrupts(); + /* Took an interrupt, may have more exit work to do. */ goto again; - } else { - /* Returning to a kernel context with local irqs disabled. */ - __hard_EE_RI_disable(); + } #ifdef CONFIG_PPC64 + /* + * An interrupt may clear MSR[EE] and set this concurrently, + * but it will be marked pending and the exit will be retried. + * This leaves a racy window where MSR[EE]=0 and HARD_DIS is + * clear, until interrupt_exit_kernel_restart() calls + * hard_irq_disable(), which will set HARD_DIS again. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + + } else { + if (unlikely(stack_store)) + __hard_EE_RI_disable(); + /* + * Returning to a kernel context with local irqs disabled. + * Here, if EE was enabled in the interrupted context, enable + * it on return as well. A problem exists here where a soft + * masked interrupt may have cleared MSR[EE] and set HARD_DIS + * here, and it will still exist on return to the caller. This + * will be resolved by the masked interrupt firing again. + */ if (regs->msr & MSR_EE) local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; -#endif +#endif /* CONFIG_PPC64 */ } + if (unlikely(stack_store)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); + ret = 1; + } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -494,3 +580,46 @@ again: return ret; } + +#ifdef CONFIG_PPC64 +notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs) +{ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + trace_hardirqs_off(); + user_exit_irqoff(); + account_cpu_user_entry(); + + BUG_ON(!user_mode(regs)); + + regs->exit_result |= interrupt_exit_user_prepare(regs); + + return regs->exit_result; +} + +/* + * No real need to return a value here because the stack store case does not + * get restarted. + */ +notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs) +{ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + if (regs->softe == IRQS_ENABLED) + trace_hardirqs_off(); + + BUG_ON(user_mode(regs)); + + return interrupt_exit_kernel_prepare(regs); +} +#endif diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 33ee83791487..e7a50613a570 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -58,7 +58,7 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne .Ltabort_syscall + bne tabort_syscall END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif SCV_INTERRUPT_TO_KERNEL @@ -116,9 +116,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) bl system_call_exception .Lsyscall_vectored_\name\()_exit: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_FRAME_OVERHEAD li r5,1 /* scv */ bl syscall_exit_prepare + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +.Lsyscall_vectored_\name\()_rst_start: + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- syscall_vectored_\name\()_restart + li r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS ld r2,_CCR(r1) ld r4,_NIP(r1) @@ -168,8 +177,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) b . /* prevent speculative execution */ .Lsyscall_vectored_\name\()_restore_regs: - li r3,0 - mtmsrd r3,1 mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r5 @@ -187,9 +194,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) REST_2GPRS(12, r1) ld r1,GPR1(r1) RFI_TO_USER +.Lsyscall_vectored_\name\()_rst_end: + +syscall_vectored_\name\()_restart: + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + ld r3,RESULT(r1) + addi r4,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl syscall_exit_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Lsyscall_vectored_\name\()_rst_start + +RESTART_TABLE(.Lsyscall_vectored_\name\()_rst_start, .Lsyscall_vectored_\name\()_rst_end, syscall_vectored_\name\()_restart) + .endm system_call_vectored common 0x3000 + /* * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 * which is tested by system_call_exception when r0 is -1 (as set by vector @@ -222,7 +246,7 @@ _ASM_NOKPROBE_SYMBOL(system_call_common) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne .Ltabort_syscall + bne tabort_syscall END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif mr r10,r1 @@ -292,9 +316,18 @@ END_BTB_FLUSH_SECTION bl system_call_exception .Lsyscall_exit: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_FRAME_OVERHEAD li r5,0 /* !scv */ bl syscall_exit_prepare + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +.Lsyscall_rst_start: + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- syscall_restart + li r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS ld r2,_CCR(r1) ld r6,_LINK(r1) @@ -361,9 +394,24 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) REST_8GPRS(4, r1) ld r12,GPR12(r1) b .Lsyscall_restore_regs_cont +.Lsyscall_rst_end: + +syscall_restart: + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + ld r3,RESULT(r1) + addi r4,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl syscall_exit_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Lsyscall_rst_start + +RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -.Ltabort_syscall: +tabort_syscall: /* Firstly we need to enable TM in the kernel */ mfmsr r10 li r9, 1 @@ -427,8 +475,10 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) ld r5,_MSR(r1) andi. r0,r5,MSR_PR #ifdef CONFIG_PPC_BOOK3S - bne .Lfast_user_interrupt_return_amr_srr - kuap_kernel_restore r3, r4 + beq 1f + kuap_user_restore r3, r4 + b .Lfast_user_interrupt_return_srr +1: kuap_kernel_restore r3, r4 andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return_srr @@ -452,18 +502,18 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) bl interrupt_exit_user_prepare cmpdi r3,0 bne- .Lrestore_nvgprs_\srr +.Lrestore_nvgprs_\srr\()_cont: + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +.Linterrupt_return_\srr\()_user_rst_start: + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- interrupt_return_\srr\()_user_restart + li r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS -#ifdef CONFIG_PPC_BOOK3S -.Lfast_user_interrupt_return_amr_\srr\(): - kuap_user_restore r3, r4 -#endif .Lfast_user_interrupt_return_\srr\(): - -BEGIN_FTR_SECTION - ld r10,_PPR(r1) - mtspr SPRN_PPR,r10 -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - #ifdef CONFIG_PPC_BOOK3S .ifc \srr,srr lbz r4,PACASRR_VALID(r13) @@ -493,6 +543,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .endif DEBUG_SRR_VALID \srr +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + lbz r4,PACAIRQSOFTMASK(r13) + tdnei r4,IRQS_ENABLED +#endif + +BEGIN_FTR_SECTION + ld r10,_PPR(r1) + mtspr SPRN_PPR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ FTR_SECTION_ELSE @@ -524,16 +584,44 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) HRFI_TO_USER .endif b . /* prevent speculative execution */ +.Linterrupt_return_\srr\()_user_rst_end: .Lrestore_nvgprs_\srr\(): REST_NVGPRS(r1) - b .Lfast_user_interrupt_return_\srr + b .Lrestore_nvgprs_\srr\()_cont + +interrupt_return_\srr\()_user_restart: + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + addi r3,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl interrupt_exit_user_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Linterrupt_return_\srr\()_user_rst_start + +RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart) .balign IFETCH_ALIGN_BYTES .Lkernel_interrupt_return_\srr\(): +.Linterrupt_return_\srr\()_kernel: addi r3,r1,STACK_FRAME_OVERHEAD bl interrupt_exit_kernel_prepare + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +.Linterrupt_return_\srr\()_kernel_rst_start: + ld r11,SOFTE(r1) + cmpwi r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + bne 1f + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- interrupt_return_\srr\()_kernel_restart + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS +1: + .Lfast_kernel_interrupt_return_\srr\(): cmpdi cr1,r3,0 #ifdef CONFIG_PPC_BOOK3S @@ -627,6 +715,21 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) HRFI_TO_KERNEL .endif b . /* prevent speculative execution */ +.Linterrupt_return_\srr\()_kernel_rst_end: + +interrupt_return_\srr\()_kernel_restart: + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + addi r3,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl interrupt_exit_kernel_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Linterrupt_return_\srr\()_kernel_rst_start + +RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart) + .endm interrupt_return_macro srr diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4537459d012f..cda17bee5afe 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -225,6 +226,9 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) : "unknown"); } +static bool stf_exit_reentrant = false; +static bool rfi_exit_reentrant = false; + static int __do_stf_barrier_fixups(void *data) { enum stf_barrier_type *types = data; @@ -239,11 +243,27 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) { /* * The call to the fallback entry flush, and the fallback/sync-ori exit - * flush can not be safely patched in/out while other CPUs are executing - * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs - * spin in the stop machine core with interrupts hard disabled. + * flush can not be safely patched in/out while other CPUs are + * executing them. So call __do_stf_barrier_fixups() on one CPU while + * all other CPUs spin in the stop machine core with interrupts hard + * disabled. + * + * The branch to mark interrupt exits non-reentrant is enabled first, + * then stop_machine runs which will ensure all CPUs are out of the + * low level interrupt exit code before patching. After the patching, + * if allowed, then flip the branch to allow fast exits. */ + static_branch_enable(&interrupt_exit_not_reentrant); + stop_machine(__do_stf_barrier_fixups, &types, NULL); + + if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI)) + stf_exit_reentrant = false; + else + stf_exit_reentrant = true; + + if (stf_exit_reentrant && rfi_exit_reentrant) + static_branch_disable(&interrupt_exit_not_reentrant); } void do_uaccess_flush_fixups(enum l1d_flush_type types) @@ -409,8 +429,9 @@ void do_entry_flush_fixups(enum l1d_flush_type types) stop_machine(__do_entry_flush_fixups, &types, NULL); } -void do_rfi_flush_fixups(enum l1d_flush_type types) +static int __do_rfi_flush_fixups(void *data) { + enum l1d_flush_type types = *(enum l1d_flush_type *)data; unsigned int instrs[3], *dest; long *start, *end; int i; @@ -453,6 +474,29 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) : "ori type" : (types & L1D_FLUSH_MTTRIG) ? "mttrig type" : "unknown"); + + return 0; +} + +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + /* + * stop_machine gets all CPUs out of the interrupt exit handler same + * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run + * without stop_machine, so this could be achieved with a broadcast + * IPI instead, but this matches the stf sequence. + */ + static_branch_enable(&interrupt_exit_not_reentrant); + + stop_machine(__do_rfi_flush_fixups, &types, NULL); + + if (types & L1D_FLUSH_FALLBACK) + rfi_exit_reentrant = false; + else + rfi_exit_reentrant = true; + + if (stf_exit_reentrant && rfi_exit_reentrant) + static_branch_disable(&interrupt_exit_not_reentrant); } void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) -- cgit v1.2.3 From f84aa284947f325c5697d35b92abd2047224f24b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Jun 2021 01:51:12 +1000 Subject: powerpc/interrupt: Rename and lightly change syscall_exit_prepare_main() Rename syscall_exit_prepare_main() into interrupt_exit_prepare_main() Pass it the 'ret' so that it can 'or' it directly instead of oring twice, once inside the function and once outside. And remove 'r3' parameter which is not used. Signed-off-by: Christophe Leroy Signed-off-by: Nicholas Piggin Reviewed-by: Nicholas Piggin [np: split out some changes into other patches] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-14-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index acd6a3f5ec9d..a579e1d5e6ed 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -243,11 +243,10 @@ static notrace void booke_load_dbcr0(void) #endif } -static notrace unsigned long syscall_exit_prepare_main(unsigned long r3, - struct pt_regs *regs) +static notrace unsigned long +interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) { unsigned long ti_flags; - unsigned long ret = 0; again: ti_flags = READ_ONCE(current_thread_info()->flags); @@ -365,7 +364,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, } local_irq_disable(); - ret |= syscall_exit_prepare_main(r3, regs); + ret = interrupt_exit_user_prepare_main(ret, regs); #ifdef CONFIG_PPC64 regs->exit_result = ret; @@ -397,7 +396,7 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg BUG_ON(!user_mode(regs)); - regs->exit_result |= syscall_exit_prepare_main(r3, regs); + regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs); return regs->exit_result; } -- cgit v1.2.3 From a214ee8802adb864d175ea6ca4176223bcc11d2b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Jun 2021 01:51:13 +1000 Subject: powerpc/interrupt: Refactor interrupt_exit_user_prepare() interrupt_exit_user_prepare() is a superset of interrupt_exit_user_prepare_main(). Refactor to avoid code duplication. Signed-off-by: Christophe Leroy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-15-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 57 +++-------------------------------------- 1 file changed, 3 insertions(+), 54 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index a579e1d5e6ed..33efa01ec63f 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -404,9 +404,7 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) { - unsigned long ti_flags; - unsigned long flags; - unsigned long ret = 0; + unsigned long ret; if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); @@ -420,63 +418,14 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) */ kuap_assert_locked(); - local_irq_save(flags); - -again: - ti_flags = READ_ONCE(current_thread_info()->flags); - while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { - local_irq_enable(); /* returning to user: may enable */ - if (ti_flags & _TIF_NEED_RESCHED) { - schedule(); - } else { - if (ti_flags & _TIF_SIGPENDING) - ret |= _TIF_RESTOREALL; - do_notify_resume(regs, ti_flags); - } - local_irq_disable(); - ti_flags = READ_ONCE(current_thread_info()->flags); - } - - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely((ti_flags & _TIF_RESTORE_TM))) { - restore_tm_state(regs); - } else { - unsigned long mathflags = MSR_FP; - - if (cpu_has_feature(CPU_FTR_VSX)) - mathflags |= MSR_VEC | MSR_VSX; - else if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mathflags |= MSR_VEC; - - /* See above restore_math comment */ - if ((regs->msr & mathflags) != mathflags) - restore_math(regs); - } - } - - if (!prep_irq_for_user_exit()) { - local_irq_enable(); - local_irq_disable(); - goto again; - } - - booke_load_dbcr0(); - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - local_paca->tm_scratch = regs->msr; -#endif + local_irq_disable(); - account_cpu_user_exit(); + ret = interrupt_exit_user_prepare_main(0, regs); #ifdef CONFIG_PPC64 regs->exit_result = ret; #endif - /* Restore user access locks last */ - kuap_user_restore(regs); - kuep_unlock(); - return ret; } -- cgit v1.2.3 From 99f98f849cf13e5fac532979ccdb77dff07665db Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Jun 2021 01:51:14 +1000 Subject: powerpc/interrupt: Interchange prep_irq_for_{kernel_enabled/user}_exit() prep_irq_for_user_exit() is a superset of prep_irq_for_kernel_enabled_exit(). In order to allow refactoring in following patch, interchange the two. This will allow prep_irq_for_user_exit() to call a renamed version of prep_irq_for_kernel_enabled_exit(). Signed-off-by: Christophe Leroy Signed-off-by: Nicholas Piggin Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-16-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 33efa01ec63f..58c7466d8ab3 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -46,27 +46,28 @@ static inline bool exit_must_hard_disable(void) * This should be called with local irqs disabled, but if they were previously * enabled when the interrupt handler returns (indicating a process-context / * synchronous interrupt) then irqs_enabled should be true. + * + * restartable is true then EE/RI can be left on because interrupts are handled + * with a restart sequence. */ -static notrace __always_inline bool prep_irq_for_user_exit(void) +static notrace __always_inline bool prep_irq_for_kernel_enabled_exit(bool restartable) { - user_enter_irqoff(); /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); #ifdef CONFIG_PPC32 __hard_EE_RI_disable(); #else - if (exit_must_hard_disable()) + if (exit_must_hard_disable() || !restartable) __hard_EE_RI_disable(); /* This pattern matches prep_irq_for_idle */ if (unlikely(lazy_irq_pending_nocheck())) { - if (exit_must_hard_disable()) { + if (exit_must_hard_disable() || !restartable) { local_paca->irq_happened |= PACA_IRQ_HARD_DIS; __hard_RI_enable(); } trace_hardirqs_off(); - user_exit_irqoff(); return false; } @@ -74,28 +75,26 @@ static notrace __always_inline bool prep_irq_for_user_exit(void) return true; } -/* - * restartable is true then EE/RI can be left on because interrupts are handled - * with a restart sequence. - */ -static notrace __always_inline bool prep_irq_for_kernel_enabled_exit(bool restartable) +static notrace __always_inline bool prep_irq_for_user_exit(void) { + user_enter_irqoff(); /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); #ifdef CONFIG_PPC32 __hard_EE_RI_disable(); #else - if (exit_must_hard_disable() || !restartable) + if (exit_must_hard_disable()) __hard_EE_RI_disable(); /* This pattern matches prep_irq_for_idle */ if (unlikely(lazy_irq_pending_nocheck())) { - if (exit_must_hard_disable() || !restartable) { + if (exit_must_hard_disable()) { local_paca->irq_happened |= PACA_IRQ_HARD_DIS; __hard_RI_enable(); } trace_hardirqs_off(); + user_exit_irqoff(); return false; } -- cgit v1.2.3 From 61eece2d1707796fd45225ea3d20e9289251311c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Jun 2021 01:51:15 +1000 Subject: powerpc/interrupt: Refactor prep_irq_for_{user/kernel_enabled}_exit() prep_irq_for_user_exit() is a superset of prep_irq_for_kernel_enabled_exit(). Rename prep_irq_for_kernel_enabled_exit() as prep_irq_for_enabled_exit() and have prep_irq_for_user_exit() use it. Signed-off-by: Christophe Leroy Signed-off-by: Nicholas Piggin Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-17-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 58c7466d8ab3..f75eda2cba72 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -50,7 +50,7 @@ static inline bool exit_must_hard_disable(void) * restartable is true then EE/RI can be left on because interrupts are handled * with a restart sequence. */ -static notrace __always_inline bool prep_irq_for_kernel_enabled_exit(bool restartable) +static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); @@ -77,29 +77,14 @@ static notrace __always_inline bool prep_irq_for_kernel_enabled_exit(bool restar static notrace __always_inline bool prep_irq_for_user_exit(void) { - user_enter_irqoff(); - /* This must be done with RI=1 because tracing may touch vmaps */ - trace_hardirqs_on(); - -#ifdef CONFIG_PPC32 - __hard_EE_RI_disable(); -#else - if (exit_must_hard_disable()) - __hard_EE_RI_disable(); + bool ret; - /* This pattern matches prep_irq_for_idle */ - if (unlikely(lazy_irq_pending_nocheck())) { - if (exit_must_hard_disable()) { - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - __hard_RI_enable(); - } - trace_hardirqs_off(); + user_enter_irqoff(); + ret = prep_irq_for_enabled_exit(true); + if (!ret) user_exit_irqoff(); - return false; - } -#endif - return true; + return ret; } /* Has to run notrace because it is entered not completely "reconciled" */ @@ -469,7 +454,7 @@ again: * Stack store exit can't be restarted because the interrupt * stack frame might have been clobbered. */ - if (!prep_irq_for_kernel_enabled_exit(unlikely(stack_store))) { + if (!prep_irq_for_enabled_exit(unlikely(stack_store))) { /* * Replay pending soft-masked interrupts now. Don't * just local_irq_enabe(); local_irq_disable(); because -- cgit v1.2.3 From ae58b1c645895c28ca155843db6788d57ea99e11 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Jun 2021 01:51:16 +1000 Subject: powerpc/interrupt: Remove prep_irq_for_user_exit() prep_irq_for_user_exit() has only one caller, squash it inside that caller. Signed-off-by: Christophe Leroy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210617155116.2167984-18-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index f75eda2cba72..cc945b27fc01 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -75,18 +75,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) return true; } -static notrace __always_inline bool prep_irq_for_user_exit(void) -{ - bool ret; - - user_enter_irqoff(); - ret = prep_irq_for_enabled_exit(true); - if (!ret) - user_exit_irqoff(); - - return ret; -} - /* Has to run notrace because it is entered not completely "reconciled" */ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, @@ -276,7 +264,9 @@ again: } } - if (!prep_irq_for_user_exit()) { + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { + user_exit_irqoff(); local_irq_enable(); local_irq_disable(); goto again; -- cgit v1.2.3 From 6eaaf9de3599865ee3b339d90cb24f2153f40bf4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Jun 2021 22:16:28 +1000 Subject: powerpc/64s/interrupt: Check and fix srr_valid without crashing The PPC_RFI_SRR_DEBUG check added by patch "powerpc/64s: avoid reloading (H)SRR registers if they are still valid" has a few deficiencies. It does not fix the actual problem, it's not enabled by default, and it causes a program check interrupt which can cause more difficulties. However there are a lot of paths which may clobber SRRs or change return regs, and difficult to have a high confidence that all paths are covered without wider testing. Add a relatively low overhead always-enabled check that catches most such cases, reports once, and fixes it so the kernel can continue. Signed-off-by: Nicholas Piggin [mpe: Rebase, use switch & INT names, squash in race fix from Nick] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/interrupt.c | 89 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index cc945b27fc01..cee12f2fd459 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -3,6 +3,7 @@ #include #include #include +#include /* for show_regs */ #include #include @@ -215,6 +216,88 @@ static notrace void booke_load_dbcr0(void) #endif } +static void check_return_regs_valid(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + unsigned long trap, srr0, srr1; + static bool warned; + u8 *validp; + char *h; + + if (regs->trap == 0x3000) + return; + + trap = regs->trap; + // EE in HV mode sets HSRRs like 0xea0 + if (cpu_has_feature(CPU_FTR_HVMODE) && trap == 0x500) + trap = 0xea0; + + switch (trap) { + case 0x980: + case INTERRUPT_H_DATA_STORAGE: + case 0xe20: + case 0xe40: + case INTERRUPT_HMI: + case 0xe80: + case 0xea0: + case INTERRUPT_H_FAC_UNAVAIL: + case 0x1200: + case 0x1500: + case 0x1600: + case 0x1800: + validp = &local_paca->hsrr_valid; + if (!*validp) + return; + + srr0 = mfspr(SPRN_HSRR0); + srr1 = mfspr(SPRN_HSRR1); + h = "H"; + + break; + default: + validp = &local_paca->srr_valid; + if (!*validp) + return; + + srr0 = mfspr(SPRN_SRR0); + srr1 = mfspr(SPRN_SRR1); + h = ""; + break; + } + + if (srr0 == regs->nip && srr1 == regs->msr) + return; + + /* + * A NMI / soft-NMI interrupt may have come in after we found + * srr_valid and before the SRRs are loaded. The interrupt then + * comes in and clobbers SRRs and clears srr_valid. Then we load + * the SRRs here and test them above and find they don't match. + * + * Test validity again after that, to catch such false positives. + * + * This test in general will have some window for false negatives + * and may not catch and fix all such cases if an NMI comes in + * later and clobbers SRRs without clearing srr_valid, but hopefully + * such things will get caught most of the time, statistically + * enough to be able to get a warning out. + */ + barrier(); + + if (!*validp) + return; + + if (!warned) { + warned = true; + printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); + printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); + show_regs(regs); + } + + *validp = 0; /* fixup */ +#endif +} + static notrace unsigned long interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) { @@ -264,6 +347,8 @@ again: } } + check_return_regs_valid(regs); + user_enter_irqoff(); if (!prep_irq_for_enabled_exit(true)) { user_exit_irqoff(); @@ -440,6 +525,8 @@ again: } } + check_return_regs_valid(regs); + /* * Stack store exit can't be restarted because the interrupt * stack frame might have been clobbered. @@ -469,6 +556,8 @@ again: local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; } else { + check_return_regs_valid(regs); + if (unlikely(stack_store)) __hard_EE_RI_disable(); /* -- cgit v1.2.3 From 633c8e9800f3884a26b2af59be8ce27696ad6ebf Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 23 Jun 2021 12:15:28 +1000 Subject: powerpc/pseries: Enable hardlockup watchdog for PowerVM partitions PowerVM will not arbitrarily oversubscribe or stop guests, page out the guest kernel text to a NFS volume connected by carrier pigeon to abacus based storage, etc., as a KVM host might. So PowerVM guests are not likely to be killed by the hard lockup watchdog in normal operation, even with shared processor LPARs which still get a minimum allotment of CPU time. Enable the hard lockup detector by default on !KVM guests, which we will assume is PowerVM. It has been useful in finding problems on bare metal kernels. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210623021528.702241-1-npiggin@gmail.com --- arch/powerpc/kernel/setup_64.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e42b85e4f1aa..428058dc5114 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -939,16 +940,20 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) * disable it by default. Book3S has a soft-nmi hardlockup detector based * on the decrementer interrupt, so it does not suffer from this problem. * - * It is likely to get false positives in VM guests, so disable it there - * by default too. + * It is likely to get false positives in KVM guests, so disable it there + * by default too. PowerVM will not stop or arbitrarily oversubscribe + * CPUs, but give a minimum regular allotment even with SPLPAR, so enable + * the detector for non-KVM guests, assume PowerVM. */ static int __init disable_hardlockup_detector(void) { #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF hardlockup_detector_disable(); #else - if (firmware_has_feature(FW_FEATURE_LPAR)) - hardlockup_detector_disable(); + if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (is_kvm_guest()) + hardlockup_detector_disable(); + } #endif return 0; -- cgit v1.2.3 From 0cdff98b395e5ab71b650c3df154217b1348e9b5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 23 Jun 2021 12:29:24 +1000 Subject: powerpc/64s: Remove irq mask workaround in accumulate_stolen_time() The caller has been moved to C after irq soft-mask state has been reconciled, and Linux IRQs have been marked as disabled, so this no longer needs to play games with IRQ internals. Fixes: 68b34588e202 ("powerpc/64/sycall: Implement syscall entry/exit logic in C") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210623022924.704645-1-npiggin@gmail.com --- arch/powerpc/kernel/time.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index da995c5fb97d..e45ce427bffb 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -231,24 +231,13 @@ static u64 scan_dispatch_log(u64 stop_tb) void notrace accumulate_stolen_time(void) { u64 sst, ust; - unsigned long save_irq_soft_mask = irq_soft_mask_return(); struct cpu_accounting_data *acct = &local_paca->accounting; - /* We are called early in the exception entry, before - * soft/hard_enabled are sync'ed to the expected state - * for the exception. We are hard disabled but the PACA - * needs to reflect that so various debug stuff doesn't - * complain - */ - irq_soft_mask_set(IRQS_DISABLED); - sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); acct->stime -= sst; acct->utime -= ust; acct->steal_time += ust + sst; - - irq_soft_mask_set(save_irq_soft_mask); } static inline u64 calculate_stolen_time(u64 stop_tb) -- cgit v1.2.3 From f5f48e8cb93f4acd77411df0327b61066985bea8 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 23 Jun 2021 13:29:09 +1000 Subject: powerpc: Make PPC_IRQ_SOFT_MASK_DEBUG depend on PPC64 32-bit platforms don't have irq soft masking. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210623032909.826010-1-npiggin@gmail.com --- arch/powerpc/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 3a8ce31a2ae6..205cd77f321f 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -84,6 +84,7 @@ config MSI_BITMAP_SELFTEST config PPC_IRQ_SOFT_MASK_DEBUG bool "Include extra checks for powerpc irq soft masking" + depends on PPC64 config PPC_RFI_SRR_DEBUG bool "Include extra checks for RFI SRR register validity" -- cgit v1.2.3 From bab26238bbd44d5a4687c0a64fd2c7f2755ea937 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 23 Jun 2021 14:12:45 +1000 Subject: powerpc: Offline CPU in stop_this_cpu() printk_safe_flush_on_panic() has special lock breaking code for the case where we panic()ed with the console lock held. It relies on panic IPI causing other CPUs to mark themselves offline. Do as most other architectures do. This effectively reverts commit de6e5d38417e ("powerpc: smp_send_stop do not offline stopped CPUs"), unfortunately it may result in some false positive warnings, but the alternative is more situations where we can crash without getting messages out. Fixes: de6e5d38417e ("powerpc: smp_send_stop do not offline stopped CPUs") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210623041245.865134-1-npiggin@gmail.com --- arch/powerpc/kernel/smp.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 820ae31e0231..1788fe6c47da 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -619,6 +619,8 @@ static void nmi_stop_this_cpu(struct pt_regs *regs) /* * IRQs are already hard disabled by the smp_handle_nmi_ipi. */ + set_cpu_online(smp_processor_id(), false); + spin_begin(); while (1) spin_cpu_relax(); @@ -634,6 +636,15 @@ void smp_send_stop(void) static void stop_this_cpu(void *dummy) { hard_irq_disable(); + + /* + * Offlining CPUs in stop_this_cpu can result in scheduler warnings, + * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants + * to know other CPUs are offline before it breaks locks to flush + * printk buffers, in case we panic()ed while holding the lock. + */ + set_cpu_online(smp_processor_id(), false); + spin_begin(); while (1) spin_cpu_relax(); -- cgit v1.2.3 From b3e3b4db7a9bafdd81735e6f8db0c5ee9b9d0b5f Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 22 Jun 2021 10:39:21 -0300 Subject: powerpc/pseries: skip reserved LMBs in dlpar_memory_add_by_count() The function is counting reserved LMBs as available to be added, but they aren't. This will cause the function to miscalculate the available LMBs and can trigger errors later on when executing dlpar_add_lmb(). Signed-off-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210622133923.295373-2-danielhb413@gmail.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 36f66556a7c6..28a7fd90232f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -683,6 +683,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) /* Validate that there are enough LMBs to satisfy the request */ for_each_drmem_lmb(lmb) { + if (lmb->flags & DRCONF_MEM_RESERVED) + continue; + if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) lmbs_available++; -- cgit v1.2.3 From c2aaddcc65b343fad4ed184e625abd3e68f63b9b Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 22 Jun 2021 10:39:22 -0300 Subject: powerpc/pseries: break early in dlpar_memory_add_by_count() loops After a successful dlpar_add_lmb() call the LMB is marked as reserved. Later on, depending whether we added enough LMBs or not, we rely on the marked LMBs to see which ones might need to be removed, and we remove the reservation of all of them. These are done in for_each_drmem_lmb() loops without any break condition. This means that we're going to check all LMBs of the partition even after going through all the reserved ones. This patch adds break conditions in both loops to avoid this. The 'lmbs_added' variable was renamed to 'lmbs_reserved', and it's now being decremented each time a lmb reservation is removed, indicating if there are still marked LMBs to be processed. Signed-off-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210622133923.295373-3-danielhb413@gmail.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 28a7fd90232f..c0a03e1537cb 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -673,7 +673,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) { struct drmem_lmb *lmb; int lmbs_available = 0; - int lmbs_added = 0; + int lmbs_reserved = 0; int rc; pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add); @@ -714,13 +714,12 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) * requested LMBs cannot be added. */ drmem_mark_lmb_reserved(lmb); - - lmbs_added++; - if (lmbs_added == lmbs_to_add) + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_add) break; } - if (lmbs_added != lmbs_to_add) { + if (lmbs_reserved != lmbs_to_add) { pr_err("Memory hot-add failed, removing any added LMBs\n"); for_each_drmem_lmb(lmb) { @@ -735,6 +734,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) dlpar_release_drc(lmb->drc_index); drmem_remove_lmb_reservation(lmb); + lmbs_reserved--; + + if (lmbs_reserved == 0) + break; } rc = -EINVAL; } else { @@ -745,6 +748,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) pr_debug("Memory at %llx (drc index %x) was hot-added\n", lmb->base_addr, lmb->drc_index); drmem_remove_lmb_reservation(lmb); + lmbs_reserved--; + + if (lmbs_reserved == 0) + break; } rc = 0; } -- cgit v1.2.3 From 0e5962b2ec3d07204fd995ed07870bff3322525c Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 22 Jun 2021 10:39:23 -0300 Subject: powerpc/pseries: fail quicker in dlpar_memory_add_by_ic() The validation done at the start of dlpar_memory_add_by_ic() is an all of nothing scenario - if any LMBs in the range is marked as RESERVED we can fail right away. We then can remove the 'lmbs_available' var and its check with 'lmbs_to_add' since the whole LMB range was already validated in the previous step. Signed-off-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210622133923.295373-4-danielhb413@gmail.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index c0a03e1537cb..377d852f5a9a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -796,7 +796,6 @@ static int dlpar_memory_add_by_index(u32 drc_index) static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; - int lmbs_available = 0; int rc; pr_info("Attempting to hot-add %u LMB(s) at index %x\n", @@ -811,15 +810,14 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) /* Validate that the LMBs in this range are not reserved */ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { - if (lmb->flags & DRCONF_MEM_RESERVED) - break; - - lmbs_available++; + /* Fail immediately if the whole range can't be hot-added */ + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } } - if (lmbs_available < lmbs_to_add) - return -EINVAL; - for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { if (lmb->flags & DRCONF_MEM_ASSIGNED) continue; -- cgit v1.2.3 From 0e8554b5d7801b0aebc6c348a0a9f7706aa17b3b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 16 Jun 2021 16:43:03 +0300 Subject: powerpc/papr_scm: Properly handle UUID types and API Parse to and export from UUID own type, before dereferencing. This also fixes wrong comment (Little Endian UUID is something else) and should eliminate the direct strict types assignments. Fixes: 43001c52b603 ("powerpc/papr_scm: Use ibm,unit-guid as the iset cookie") Fixes: 259a948c4ba1 ("powerpc/pseries/scm: Use a specific endian format for storing uuid from the device tree") Signed-off-by: Andy Shevchenko Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210616134303.58185-1-andriy.shevchenko@linux.intel.com --- arch/powerpc/platforms/pseries/papr_scm.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index e2b69cc3beaf..b43be41e8ff7 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -18,6 +18,7 @@ #include #include #include +#include #define BIND_ANY_ADDR (~0ul) @@ -1101,8 +1102,9 @@ static int papr_scm_probe(struct platform_device *pdev) u32 drc_index, metadata_size; u64 blocks, block_size; struct papr_scm_priv *p; + u8 uuid_raw[UUID_SIZE]; const char *uuid_str; - u64 uuid[2]; + uuid_t uuid; int rc; /* check we have all the required DT properties */ @@ -1145,16 +1147,23 @@ static int papr_scm_probe(struct platform_device *pdev) p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); /* We just need to ensure that set cookies are unique across */ - uuid_parse(uuid_str, (uuid_t *) uuid); + uuid_parse(uuid_str, &uuid); + /* - * cookie1 and cookie2 are not really little endian - * we store a little endian representation of the - * uuid str so that we can compare this with the label - * area cookie irrespective of the endian config with which - * the kernel is built. + * The cookie1 and cookie2 are not really little endian. + * We store a raw buffer representation of the + * uuid string so that we can compare this with the label + * area cookie irrespective of the endian configuration + * with which the kernel is built. + * + * Historically we stored the cookie in the below format. + * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa + * cookie1 was 0xfd423b0b671b5172 + * cookie2 was 0xaabce8cae35b1d8d */ - p->nd_set.cookie1 = cpu_to_le64(uuid[0]); - p->nd_set.cookie2 = cpu_to_le64(uuid[1]); + export_uuid(uuid_raw, &uuid); + p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]); + p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]); /* might be zero */ p->metadata_size = metadata_size; -- cgit v1.2.3 From 57307f1b6edd781fba2bf9f7ec5f4d17a881ea54 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 19 Apr 2021 10:47:26 +0000 Subject: powerpc/mm: Leave a gap between early allocated IO areas Vmalloc system leaves a gap between allocated areas. It helps catching overflows. Do the same for IO areas which are allocated with early_ioremap_range() until slab_is_available(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c433e358190fb5d47650463ea1ab755fc7b73e6e.1618828806.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ioremap_32.c | 4 ++-- arch/powerpc/mm/ioremap_64.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c index 743e11384dea..9d13143b8be4 100644 --- a/arch/powerpc/mm/ioremap_32.c +++ b/arch/powerpc/mm/ioremap_32.c @@ -70,10 +70,10 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call */ pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller); - err = early_ioremap_range(ioremap_bot - size, p, size, prot); + err = early_ioremap_range(ioremap_bot - size - PAGE_SIZE, p, size, prot); if (err) return NULL; - ioremap_bot -= size; + ioremap_bot -= size + PAGE_SIZE; return (void __iomem *)ioremap_bot + offset; } diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c index ba5cbb0d66bd..3acece00b33e 100644 --- a/arch/powerpc/mm/ioremap_64.c +++ b/arch/powerpc/mm/ioremap_64.c @@ -38,7 +38,7 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, return NULL; ret = (void __iomem *)ioremap_bot + offset; - ioremap_bot += size; + ioremap_bot += size + PAGE_SIZE; return ret; } -- cgit v1.2.3 From 6ca6512c716afd6e37281372c4c35aa6afd71d10 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 19 Apr 2021 10:47:27 +0000 Subject: powerpc/mm: Properly coalesce pages in ptdump Commit aaa229529244 ("powerpc/mm: Add physical address to Linux page table dump") changed range coalescing to only combine ranges that are both virtually and physically contiguous, in order to avoid erroneous combination of unrelated mappings in IOREMAP space. But in the VMALLOC space, mappings almost never have contiguous physical pages, so the commit mentionned above leads to dumping one line per page for vmalloc mappings. Taking into account the vmalloc always leave a gap between two areas, we never have two mappings dumped as a single combination even if they have the exact same flags. The only space that may have encountered such an issue was the early IOREMAP which is not using vmalloc engine. But previous commits added gaps between early IO mappings, so it is not an issue anymore. That commit created some difficulties with KASAN mappings, see commit cabe8138b23c ("powerpc: dump as a single line areas mapping a single physical page.") and with huge page, see commit b00ff6d8c1c3 ("powerpc/ptdump: Properly handle non standard page size"). So, almost revert commit aaa229529244 to properly coalesce pages mapped with the same flags as before, only keep the display of the first physical address of the range, as it can be usefull especially for IO mappings. It brings back powerpc at the same level as other architectures and simplifies the conversion to GENERIC PTDUMP. With the patch: ---[ kasan shadow mem start ]--- 0xf8000000-0xf8ffffff 0x07000000 16M huge rw present dirty accessed 0xf9000000-0xf91fffff 0x01434000 2M r present accessed 0xf9200000-0xf95affff 0x02104000 3776K rw present dirty accessed 0xfef5c000-0xfeffffff 0x01434000 656K r present accessed ---[ kasan shadow mem end ]--- Before: ---[ kasan shadow mem start ]--- 0xf8000000-0xf8ffffff 0x07000000 16M huge rw present dirty accessed 0xf9000000-0xf91fffff 0x01434000 16K r present accessed 0xf9200000-0xf9203fff 0x02104000 16K rw present dirty accessed 0xf9204000-0xf9207fff 0x0213c000 16K rw present dirty accessed 0xf9208000-0xf920bfff 0x02174000 16K rw present dirty accessed 0xf920c000-0xf920ffff 0x02188000 16K rw present dirty accessed 0xf9210000-0xf9213fff 0x021dc000 16K rw present dirty accessed 0xf9214000-0xf9217fff 0x02220000 16K rw present dirty accessed 0xf9218000-0xf921bfff 0x023c0000 16K rw present dirty accessed 0xf921c000-0xf921ffff 0x023d4000 16K rw present dirty accessed 0xf9220000-0xf9227fff 0x023ec000 32K rw present dirty accessed ... 0xf93b8000-0xf93e3fff 0x02614000 176K rw present dirty accessed 0xf93e4000-0xf94c3fff 0x027c0000 896K rw present dirty accessed 0xf94c4000-0xf94c7fff 0x0236c000 16K rw present dirty accessed 0xf94c8000-0xf94cbfff 0x041f0000 16K rw present dirty accessed 0xf94cc000-0xf94cffff 0x029c0000 16K rw present dirty accessed 0xf94d0000-0xf94d3fff 0x041ec000 16K rw present dirty accessed 0xf94d4000-0xf94d7fff 0x0407c000 16K rw present dirty accessed 0xf94d8000-0xf94f7fff 0x041c0000 128K rw present dirty accessed ... 0xf95ac000-0xf95affff 0x042b0000 16K rw present dirty accessed 0xfef5c000-0xfeffffff 0x01434000 16K r present accessed ---[ kasan shadow mem end ]--- Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c56ce1f5c3c75adc9811b1a5f9c410fa74183a8d.1618828806.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/ptdump.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index aca354fb670b..5062c58b1e5b 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -58,8 +58,6 @@ struct pg_state { const struct addr_marker *marker; unsigned long start_address; unsigned long start_pa; - unsigned long last_pa; - unsigned long page_size; unsigned int level; u64 current_flags; bool check_wx; @@ -163,8 +161,6 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info static void dump_addr(struct pg_state *st, unsigned long addr) { - unsigned long delta; - #ifdef CONFIG_PPC64 #define REG "0x%016lx" #else @@ -172,14 +168,8 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #endif pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); - if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) { - pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); - delta = st->page_size >> 10; - } else { - pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); - delta = (addr - st->start_address) >> 10; - } - pt_dump_size(st->seq, delta); + pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); + pt_dump_size(st->seq, (addr - st->start_address) >> 10); } static void note_prot_wx(struct pg_state *st, unsigned long addr) @@ -208,7 +198,6 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr, st->current_flags = flag; st->start_address = addr; st->start_pa = pa; - st->page_size = page_size; while (addr >= st->marker[1].start_address) { st->marker++; @@ -220,7 +209,6 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned int level, u64 val, unsigned long page_size) { u64 flag = val & pg_level[level].mask; - u64 pa = val & PTE_RPN_MASK; /* At first no level is set */ if (!st->level) { @@ -232,12 +220,9 @@ static void note_page(struct pg_state *st, unsigned long addr, * - we change levels in the tree. * - the address is in a different section of memory and is thus * used for a different purpose, regardless of the flags. - * - the pa of this page is not adjacent to the last inspected page */ } else if (flag != st->current_flags || level != st->level || - addr >= st->marker[1].start_address || - (pa != st->last_pa + st->page_size && - (pa != st->start_pa || st->start_pa != st->last_pa))) { + addr >= st->marker[1].start_address) { /* Check the PTE flags */ if (st->current_flags) { @@ -259,7 +244,6 @@ static void note_page(struct pg_state *st, unsigned long addr, */ note_page_update_state(st, addr, level, val, page_size); } - st->last_pa = pa; } static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) -- cgit v1.2.3 From 56afad885228902d090558a0b1cf75e9809e831d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Jun 2021 09:57:36 +0000 Subject: powerpc: Remove klimit klimit is a global variable initialised at build time with the value of _end. This variable is never modified, so _end symbol can be used directly. Remove klimit. Signed-off-by: Christophe Leroy Reviewed-by: Kefeng Wang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9fa9ba6807c17f93f35a582c199c646c4a8bfd9c.1622800638.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/setup.h | 1 - arch/powerpc/kernel/head_book3s_32.S | 6 ++---- arch/powerpc/kernel/prom.c | 2 +- arch/powerpc/kernel/setup-common.c | 4 +--- arch/powerpc/platforms/powermac/bootx_init.c | 2 +- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index e89bfebd4e00..6c1a7d217d1a 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -10,7 +10,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern unsigned long long memory_limit; extern bool init_mem_is_free; -extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); struct device_node; diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index e8d861e48c25..764edd860ed4 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -766,9 +766,6 @@ PerformanceMonitor: * the kernel image to physical address PHYSICAL_START. */ relocate_kernel: - addis r9,r26,klimit@ha /* fetch klimit */ - lwz r25,klimit@l(r9) - addis r25,r25,-KERNELBASE@h lis r3,PHYSICAL_START@h /* Destination base address */ li r6,0 /* Destination offset */ li r5,0x4000 /* # bytes of memory to copy */ @@ -776,7 +773,8 @@ relocate_kernel: addi r0,r3,4f@l /* jump to the address of 4f */ mtctr r0 /* in copy and do the rest. */ bctr /* jump to the copy */ -4: mr r5,r25 +4: lis r5,_end-KERNELBASE@h + ori r5,r5,_end-KERNELBASE@l bl copy_and_flush /* copy the rest */ b turn_on_mmu diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fbe9deebc8e1..f620e04dc9bf 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -758,7 +758,7 @@ void __init early_init_devtree(void *params) first_memblock_size = min_t(u64, first_memblock_size, memory_limit); setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ - memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); + memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START); /* If relocatable, reserve first 32k for interrupt vectors etc. */ if (PHYSICAL_START > MEMORY_START) memblock_reserve(MEMORY_START, 0x8000); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 74a98fff2c2f..138bb7f49ef9 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -91,8 +91,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); int dcache_bsize; int icache_bsize; -unsigned long klimit = (unsigned long) _end; - /* * This still seems to be needed... -- paulus */ @@ -930,7 +928,7 @@ void __init setup_arch(char **cmdline_p) init_mm.start_code = (unsigned long)_stext; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; + init_mm.brk = (unsigned long)_end; mm_iommu_init(&init_mm); irqstack_early_init(); diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index 9d4ecd292255..d20ef35e6d9d 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -433,7 +433,7 @@ static void __init btext_welcome(boot_infos_t *bi) bootx_printf("\nframe buffer at : 0x%x", bi->dispDeviceBase); bootx_printf(" (phys), 0x%x", bi->logicalDisplayBase); bootx_printf(" (log)"); - bootx_printf("\nklimit : 0x%x",(unsigned long)klimit); + bootx_printf("\nklimit : 0x%x",(unsigned long)_end); bootx_printf("\nboot_info at : 0x%x", bi); __asm__ __volatile__ ("mfmsr %0" : "=r" (flags)); bootx_printf("\nMSR : 0x%x", flags); -- cgit v1.2.3 From 10e9252f043ecda0dad7cde6ef87db5d10dff2c7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Jun 2021 14:54:12 +0000 Subject: powerpc/32: Interchange r10 and r12 in SYSCALL_ENTRY on non booke To better match booke version of SYSCALL_ENTRY macro, interchange r10 and r12 in the non booke version. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5ab3a517bc883a2fc905fb2cb5ee9344f37b2cfa.1622818435.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index a8221ddcbd66..1e55bc054659 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -142,42 +142,42 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) .macro SYSCALL_ENTRY trapno mfspr r9, SPRN_SRR1 - mfspr r10, SPRN_SRR0 + mfspr r12, SPRN_SRR0 LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */ - lis r12, 1f@h - ori r12, r12, 1f@l + lis r10, 1f@h + ori r10, r10, 1f@l mtspr SPRN_SRR1, r11 - mtspr SPRN_SRR0, r12 - mfspr r12,SPRN_SPRG_THREAD + mtspr SPRN_SRR0, r10 + mfspr r10,SPRN_SPRG_THREAD mr r11, r1 - lwz r1,TASK_STACK-THREAD(r12) - tovirt(r12, r12) + lwz r1,TASK_STACK-THREAD(r10) + tovirt(r10, r10) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE rfi 1: stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 - stw r10,_NIP(r11) - mflr r10 - stw r10, _LINK(r11) - mfcr r10 - rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ - stw r10,_CCR(r11) /* save registers */ + stw r12,_NIP(r11) + mflr r12 + stw r12, _LINK(r11) + mfcr r12 + rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ + stw r12,_CCR(r11) /* save registers */ #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #endif - lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ stw r2,GPR2(r11) - addi r10,r10,STACK_FRAME_REGS_MARKER@l + addi r12,r12,STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r11) li r2, \trapno - stw r10,8(r11) + stw r12,8(r11) stw r2,_TRAP(r11) SAVE_GPR(0, r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) - addi r2,r12,-THREAD + addi r2,r10,-THREAD b transfer_to_syscall /* jump to handler */ .endm -- cgit v1.2.3 From 275dcf24e253f4f5b200bc8cca5eac32a23b08c8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Jun 2021 14:54:13 +0000 Subject: powerpc/32: Interchange r1 and r11 in SYSCALL_ENTRY on booke To better match non booke version of SYSCALL_ENTRY macro, interchange r1 and r11 in the booke version. While at it, in both versions use r1 instead of r11 to save _NIP and _CCR. All other uses of r11 will go away in next patch, so don't bother changing them for now. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1684c39724a069b0ce1aa82eaee6ec194e354e4e.1622818435.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 4 ++-- arch/powerpc/kernel/head_booke.h | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 1e55bc054659..7ca25eb9bc75 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -158,12 +158,12 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 - stw r12,_NIP(r11) + stw r12,_NIP(r1) mflr r12 stw r12, _LINK(r11) mfcr r12 rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ - stw r12,_CCR(r11) /* save registers */ + stw r12,_CCR(r1) #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #endif diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index f82470091697..4a2fad9f225e 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -135,17 +135,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif mfspr r9, SPRN_SRR1 BOOKE_CLEAR_BTB(r11) - lwz r11, TASK_STACK - THREAD(r10) + mr r11, r1 + lwz r1, TASK_STACK - THREAD(r10) rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ - ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) - stw r12, _CCR(r11) /* save various registers */ + ALLOC_STACK_FRAME(r1, THREAD_SIZE - INT_FRAME_SIZE) + stw r12, _CCR(r1) mflr r12 - stw r12,_LINK(r11) + stw r12,_LINK(r1) mfspr r12,SPRN_SRR0 - stw r1, GPR1(r11) - stw r1, 0(r11) - mr r1, r11 - stw r12,_NIP(r11) + stw r11, GPR1(r1) + stw r11, 0(r1) + mr r11, r1 + stw r12,_NIP(r1) rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ stw r2,GPR2(r11) -- cgit v1.2.3 From 4bd9e05ac7b8b1f7d0c28702cb684417501a5e39 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Jun 2021 14:54:14 +0000 Subject: powerpc/32: Reduce code duplication of system call entry booke and non booke do pretty similar things in SYSCALL_ENTRY macro just before calling jumping to transfer_to_syscall(). Do them in transfer_to_syscall() instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/552e27fa09394a6bc70585fcdfa237f99a5d1267.1622818435.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 19 +++++++++++++++++++ arch/powerpc/kernel/head_32.h | 19 ------------------- arch/powerpc/kernel/head_booke.h | 18 ------------------ 3 files changed, 19 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 6c09ebb853ec..0273a1349006 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -32,6 +32,7 @@ #include #include #include +#include #include "head_32.h" @@ -74,6 +75,24 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) .globl transfer_to_syscall transfer_to_syscall: + stw r11, GPR1(r1) + stw r11, 0(r1) + mflr r12 + stw r12, _LINK(r1) +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#endif + lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + SAVE_GPR(2, r1) + addi r12,r12,STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r1) + li r2, INTERRUPT_SYSCALL + stw r12,8(r1) + stw r2,_TRAP(r1) + SAVE_GPR(0, r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) + addi r2,r10,-THREAD SAVE_NVGPRS(r1) /* Calling convention has r9 = orig r0, r10 = regs */ diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 7ca25eb9bc75..6b1ec9e3541b 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -155,29 +155,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE rfi 1: - stw r11,GPR1(r1) - stw r11,0(r1) - mr r11, r1 stw r12,_NIP(r1) - mflr r12 - stw r12, _LINK(r11) mfcr r12 rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ stw r12,_CCR(r1) -#ifdef CONFIG_40x - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ -#endif - lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ - stw r2,GPR2(r11) - addi r12,r12,STACK_FRAME_REGS_MARKER@l - stw r9,_MSR(r11) - li r2, \trapno - stw r12,8(r11) - stw r2,_TRAP(r11) - SAVE_GPR(0, r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) - addi r2,r10,-THREAD b transfer_to_syscall /* jump to handler */ .endm diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 4a2fad9f225e..10f31146b472 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -140,26 +140,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ ALLOC_STACK_FRAME(r1, THREAD_SIZE - INT_FRAME_SIZE) stw r12, _CCR(r1) - mflr r12 - stw r12,_LINK(r1) mfspr r12,SPRN_SRR0 - stw r11, GPR1(r1) - stw r11, 0(r1) - mr r11, r1 stw r12,_NIP(r1) - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ - lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ - stw r2,GPR2(r11) - addi r12, r12, STACK_FRAME_REGS_MARKER@l - stw r9,_MSR(r11) - li r2, \trapno - stw r12, 8(r11) - stw r2,_TRAP(r11) - SAVE_GPR(0, r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) - - addi r2,r10,-THREAD b transfer_to_syscall /* jump to handler */ .endm -- cgit v1.2.3 From a27755d57e0b8c1109a6b1485e52a5f9d51bd4eb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Jun 2021 14:54:15 +0000 Subject: powerpc/32: Avoid #ifdef nested with FTR_SECTION on booke syscall entry On booke, SYSCALL_ENTRY macro nests an FTR_SECTION with a #ifdef CONFIG_KVM_BOOKE_HV. Duplicate the single instruction alternative to avoid nesting. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/33db61d5f85146262dbe26648f8f87eca3cae393.1622818435.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_booke.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 10f31146b472..87b806e8eded 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -128,10 +128,10 @@ BEGIN_FTR_SECTION mr r12, r13 lwz r13, THREAD_NORMSAVE(2)(r10) FTR_SECTION_ELSE -#endif mfcr r12 -#ifdef CONFIG_KVM_BOOKE_HV ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) +#else + mfcr r12 #endif mfspr r9, SPRN_SRR1 BOOKE_CLEAR_BTB(r11) -- cgit v1.2.3 From f35d2f249ef05b9671e7898f09ad89aa78f99122 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Jun 2021 15:30:36 +1000 Subject: powerpc/64s: Fix copy-paste data exposure into newly created tasks copy-paste contains implicit "copy buffer" state that can contain arbitrary user data (if the user process executes a copy instruction). This could be snooped by another process if a context switch hits while the state is live. So cp_abort is executed on context switch to clear out possible sensitive data and prevent the leak. cp_abort is done after the low level _switch(), which means it is never reached by newly created tasks, so they could snoop on this buffer between their first and second context switch. Fix this by doing the cp_abort before calling _switch. Add some comments which should make the issue harder to miss. Fixes: 07d2a628bc000 ("powerpc/64s: Avoid cpabort in context switch when possible") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210622053036.474678-1-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 48 ++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8ab1bfdfd31a..77facd9d41a6 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1223,6 +1223,19 @@ struct task_struct *__switch_to(struct task_struct *prev, __flush_tlb_pending(batch); batch->active = 0; } + + /* + * On POWER9 the copy-paste buffer can only paste into + * foreign real addresses, so unprivileged processes can not + * see the data or use it in any way unless they have + * foreign real mappings. If the new process has the foreign + * real address mappings, we must issue a cp_abort to clear + * any state and prevent snooping, corruption or a covert + * channel. ISA v3.1 supports paste into local memory. + */ + if (new->mm && (cpu_has_feature(CPU_FTR_ARCH_31) || + atomic_read(&new->mm->context.vas_windows))) + asm volatile(PPC_CP_ABORT); #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -1273,30 +1286,33 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif last = _switch(old_thread, new_thread); + /* + * Nothing after _switch will be run for newly created tasks, + * because they switch directly to ret_from_fork/ret_from_kernel_thread + * etc. Code added here should have a comment explaining why that is + * okay. + */ + #ifdef CONFIG_PPC_BOOK3S_64 + /* + * This applies to a process that was context switched while inside + * arch_enter_lazy_mmu_mode(), to re-activate the batch that was + * deactivated above, before _switch(). This will never be the case + * for new tasks. + */ if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } - if (current->thread.regs) { + /* + * Math facilities are masked out of the child MSR in copy_thread. + * A new task does not need to restore_math because it will + * demand fault them. + */ + if (current->thread.regs) restore_math(current->thread.regs); - - /* - * On POWER9 the copy-paste buffer can only paste into - * foreign real addresses, so unprivileged processes can not - * see the data or use it in any way unless they have - * foreign real mappings. If the new process has the foreign - * real address mappings, we must issue a cp_abort to clear - * any state and prevent snooping, corruption or a covert - * channel. ISA v3.1 supports paste into local memory. - */ - if (current->mm && - (cpu_has_feature(CPU_FTR_ARCH_31) || - atomic_read(¤t->mm->context.vas_windows))) - asm volatile(PPC_CP_ABORT); - } #endif /* CONFIG_PPC_BOOK3S_64 */ return last; -- cgit v1.2.3 From 511eea5e2ccdfdbf3d626bde0314e551f247dd18 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 23 Jun 2021 05:23:30 +0000 Subject: powerpc/kprobes: Fix Oops by passing ppc_inst as a pointer to emulate_step() on ppc32 Trying to use a kprobe on ppc32 results in the below splat: BUG: Unable to handle kernel data access on read at 0x7c0802a6 Faulting instruction address: 0xc002e9f0 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=4K PowerPC 44x Platform Modules linked in: CPU: 0 PID: 89 Comm: sh Not tainted 5.13.0-rc1-01824-g3a81c0495fdb #7 NIP: c002e9f0 LR: c0011858 CTR: 00008a47 REGS: c292fd50 TRAP: 0300 Not tainted (5.13.0-rc1-01824-g3a81c0495fdb) MSR: 00009000 CR: 24002002 XER: 20000000 DEAR: 7c0802a6 ESR: 00000000 NIP [c002e9f0] emulate_step+0x28/0x324 LR [c0011858] optinsn_slot+0x128/0x10000 Call Trace: opt_pre_handler+0x7c/0xb4 (unreliable) optinsn_slot+0x128/0x10000 ret_from_syscall+0x0/0x28 The offending instruction is: 81 24 00 00 lwz r9,0(r4) Here, we are trying to load the second argument to emulate_step(): struct ppc_inst, which is the instruction to be emulated. On ppc64, structures are passed in registers when passed by value. However, per the ppc32 ABI, structures are always passed to functions as pointers. This isn't being adhered to when setting up the call to emulate_step() in the optprobe trampoline. Fix the same. Fixes: eacf4c0202654a ("powerpc: Enable OPTPROBES on PPC32") Cc: stable@vger.kernel.org Signed-off-by: Naveen N. Rao Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5bdc8cbc9a95d0779e27c9ddbf42b40f51f883c0.1624425798.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/optprobes.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 8b9f82dc6ece..c79899abcec8 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -228,8 +228,12 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 3. load instruction to be emulated into relevant register, and */ - temp = ppc_inst_read(p->ainsn.insn); - patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); + if (IS_ENABLED(CONFIG_PPC64)) { + temp = ppc_inst_read(p->ainsn.insn); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); + } else { + patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX); + } /* * 4. branch back from trampoline -- cgit v1.2.3 From ed78f56e1271f108e8af61baeba383dcd77adbec Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Thu, 13 May 2021 14:53:49 +0530 Subject: powerpc/papr_scm: Make 'perf_stats' invisible if perf-stats unavailable In case performance stats for an nvdimm are not available, reading the 'perf_stats' sysfs file returns an -ENOENT error. A better approach is to make the 'perf_stats' file entirely invisible to indicate that performance stats for an nvdimm are unavailable. So this patch updates 'papr_nd_attribute_group' to add a 'is_visible' callback implemented as newly introduced 'papr_nd_attribute_visible()' that returns an appropriate mode in case performance stats aren't supported in a given nvdimm. Also the initialization of 'papr_scm_priv.stat_buffer_len' is moved from papr_scm_nvdimm_init() to papr_scm_probe() so that it value is available when 'papr_nd_attribute_visible()' is called during nvdimm initialization. Even though 'perf_stats' attribute is available since v5.9, there are no known user-space tools/scripts that are dependent on presence of its sysfs file. Hence I dont expect any user-space breakage with this patch. Fixes: 2d02bf835e57 ("powerpc/papr_scm: Fetch nvdimm performance stats from PHYP") Signed-off-by: Vaibhav Jain Reviewed-by: Dan Williams Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210513092349.285021-1-vaibhav@linux.ibm.com --- Documentation/ABI/testing/sysfs-bus-papr-pmem | 8 +++--- arch/powerpc/platforms/pseries/papr_scm.c | 35 ++++++++++++++++++--------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem index 92e2db0e2d3d..95254cec92bf 100644 --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -39,9 +39,11 @@ KernelVersion: v5.9 Contact: linuxppc-dev , nvdimm@lists.linux.dev, Description: (RO) Report various performance stats related to papr-scm NVDIMM - device. Each stat is reported on a new line with each line - composed of a stat-identifier followed by it value. Below are - currently known dimm performance stats which are reported: + device. This attribute is only available for NVDIMM devices + that support reporting NVDIMM performance stats. Each stat is + reported on a new line with each line composed of a + stat-identifier followed by it value. Below are currently known + dimm performance stats which are reported: * "CtlResCt" : Controller Reset Count * "CtlResTm" : Controller Reset Elapsed Time diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index b43be41e8ff7..daa11600d34b 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -908,6 +908,20 @@ static ssize_t flags_show(struct device *dev, } DEVICE_ATTR_RO(flags); +static umode_t papr_nd_attribute_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(nvdimm); + + /* For if perf-stats not available remove perf_stats sysfs */ + if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0) + return 0; + + return attr->mode; +} + /* papr_scm specific dimm attributes */ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, @@ -917,6 +931,7 @@ static struct attribute *papr_nd_attributes[] = { static struct attribute_group papr_nd_attribute_group = { .name = "papr", + .is_visible = papr_nd_attribute_visible, .attrs = papr_nd_attributes, }; @@ -932,7 +947,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) struct nd_region_desc ndr_desc; unsigned long dimm_flags; int target_nid, online_nid; - ssize_t stat_size; p->bus_desc.ndctl = papr_scm_ndctl; p->bus_desc.module = THIS_MODULE; @@ -1017,16 +1031,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) list_add_tail(&p->region_list, &papr_nd_regions); mutex_unlock(&papr_ndr_lock); - /* Try retriving the stat buffer and see if its supported */ - stat_size = drc_pmem_query_stats(p, NULL, 0); - if (stat_size > 0) { - p->stat_buffer_len = stat_size; - dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", - p->stat_buffer_len); - } else { - dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n"); - } - return 0; err: nvdimm_bus_unregister(p->bus); @@ -1104,6 +1108,7 @@ static int papr_scm_probe(struct platform_device *pdev) struct papr_scm_priv *p; u8 uuid_raw[UUID_SIZE]; const char *uuid_str; + ssize_t stat_size; uuid_t uuid; int rc; @@ -1188,6 +1193,14 @@ static int papr_scm_probe(struct platform_device *pdev) p->res.name = pdev->name; p->res.flags = IORESOURCE_MEM; + /* Try retrieving the stat buffer and see if its supported */ + stat_size = drc_pmem_query_stats(p, NULL, 0); + if (stat_size > 0) { + p->stat_buffer_len = stat_size; + dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", + p->stat_buffer_len); + } + rc = papr_scm_nvdimm_init(p); if (rc) goto err2; -- cgit v1.2.3 From de21e1377c4fe65bfd8d31e446482c1bc2232997 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Thu, 24 Jun 2021 13:36:21 +0530 Subject: powerpc/papr_scm: Add support for reporting dirty-shutdown-count Persistent memory devices like NVDIMMs can loose cached writes in case something prevents flush on power-fail. Such situations are termed as dirty shutdown and are exposed to applications as last-shutdown-state (LSS) flag and a dirty-shutdown-counter(DSC) as described at [1]. The latter being useful in conditions where multiple applications want to detect a dirty shutdown event without racing with one another. PAPR-NVDIMMs have so far only exposed LSS style flags to indicate a dirty-shutdown-state. This patch further adds support for DSC via the "ibm,persistence-failed-count" device tree property of an NVDIMM. This property is a monotonic increasing 64-bit counter thats an indication of number of times an NVDIMM has encountered a dirty-shutdown event causing persistence loss. Since this value is not expected to change after system-boot hence papr_scm reads & caches its value during NVDIMM probe and exposes it as a PAPR sysfs attributed named 'dirty_shutdown' to match the name of similarly named NFIT sysfs attribute. Also this value is available to libnvdimm via PAPR_PDSM_HEALTH payload. 'struct nd_papr_pdsm_health' has been extended to add a new member called 'dimm_dsc' presence of which is indicated by the newly introduced PDSM_DIMM_DSC_VALID flag. References: [1] https://pmem.io/documents/Dirty_Shutdown_Handling-V1.0.pdf Signed-off-by: Vaibhav Jain Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210624080621.252038-1-vaibhav@linux.ibm.com --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 6 ++++++ arch/powerpc/platforms/pseries/papr_scm.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 50ef95e2f5b1..82488b1e7276 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -77,6 +77,9 @@ /* Indicate that the 'dimm_fuel_gauge' field is valid */ #define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1 +/* Indicate that the 'dimm_dsc' field is valid */ +#define PDSM_DIMM_DSC_VALID 2 + /* * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH * Various flags indicate the health status of the dimm. @@ -105,6 +108,9 @@ struct nd_papr_pdsm_health { /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */ __u16 dimm_fuel_gauge; + + /* Extension flag PDSM_DIMM_DSC_VALID */ + __u64 dimm_dsc; }; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; }; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index daa11600d34b..4b93c20b24b5 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -115,6 +115,9 @@ struct papr_scm_priv { /* Health information for the dimm */ u64 health_bitmap; + /* Holds the last known dirty shutdown counter value */ + u64 dirty_shutdown_counter; + /* length of the stat buffer as expected by phyp */ size_t stat_buffer_len; }; @@ -604,6 +607,16 @@ free_stats: return rc; } +/* Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_dsc(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + payload->health.extension_flags |= PDSM_DIMM_DSC_VALID; + payload->health.dimm_dsc = p->dirty_shutdown_counter; + + return sizeof(struct nd_papr_pdsm_health); +} + /* Fetch the DIMM health info and populate it in provided package. */ static int papr_pdsm_health(struct papr_scm_priv *p, union nd_pdsm_payload *payload) @@ -647,6 +660,8 @@ static int papr_pdsm_health(struct papr_scm_priv *p, /* Populate the fuel gauge meter in the payload */ papr_pdsm_fuel_gauge(p, payload); + /* Populate the dirty-shutdown-counter field */ + papr_pdsm_dsc(p, payload); rc = sizeof(struct nd_papr_pdsm_health); @@ -908,6 +923,16 @@ static ssize_t flags_show(struct device *dev, } DEVICE_ATTR_RO(flags); +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter); +} +DEVICE_ATTR_RO(dirty_shutdown); + static umode_t papr_nd_attribute_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -926,6 +951,7 @@ static umode_t papr_nd_attribute_visible(struct kobject *kobj, static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, &dev_attr_perf_stats.attr, + &dev_attr_dirty_shutdown.attr, NULL, }; @@ -1151,6 +1177,10 @@ static int papr_scm_probe(struct platform_device *pdev) p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); + if (of_property_read_u64(dn, "ibm,persistence-failed-count", + &p->dirty_shutdown_counter)) + p->dirty_shutdown_counter = 0; + /* We just need to ensure that set cookies are unique across */ uuid_parse(uuid_str, &uuid); -- cgit v1.2.3 From 0566fa760d235c119cef92119efc3ab11486a08a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 19 May 2021 16:17:18 +0530 Subject: powerpc/kprobes: Roll IS_RFI() macro into IS_RFID() In kprobes and xmon, we should exclude both 32-bit and 64-bit variants of mtmsr and rfi instructions from being stepped. Have IS_RFID() also detect a rfi instruction similar to IS_MTMSRD(). Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/eee32e1b75dae85d471c89b4c0a123ad4b0aabf8.1621416666.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/include/asm/sstep.h | 7 +++---- arch/powerpc/kernel/kprobes.c | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 972ed0df154d..1df867c2e054 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -13,12 +13,11 @@ struct pt_regs; * we don't allow putting a breakpoint on an mtmsrd instruction. * Similarly we don't allow breakpoints on rfid instructions. * These macros tell us if an instruction is a mtmsrd or rfid. - * Note that IS_MTMSRD returns true for both an mtmsr (32-bit) - * and an mtmsrd (64-bit). + * Note that these return true for both mtmsr/rfi (32-bit) + * and mtmsrd/rfid (64-bit). */ #define IS_MTMSRD(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x7c000124) -#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000024) -#define IS_RFI(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000064) +#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x4c000024) enum instruction_type { COMPUTE, /* arith/logical/CR op, etc. */ diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 04714895d3f9..2303511c7745 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -129,8 +129,8 @@ int arch_prepare_kprobe(struct kprobe *p) if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); ret = -EINVAL; - } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { - printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); + } else if (IS_MTMSRD(insn) || IS_RFID(insn)) { + printk("Cannot register a kprobe on mtmsr[d]/rfi[d]\n"); ret = -EINVAL; } else if ((unsigned long)p->addr & ~PAGE_MASK && ppc_inst_prefixed(ppc_inst_read(p->addr - 1))) { -- cgit v1.2.3 From 12b58492e60bf5a31d7f41e8a6f8ceb6f87e710e Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 19 May 2021 16:17:21 +0530 Subject: powerpc/kprobes: Warn if instruction patching failed When arming and disarming probes, we currently assume that instruction patching can never fail, and don't have a mechanism to surface errors. Add a warning in case instruction patching ever fails. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/18d7b1309f938c08ce07738100932b551bdd3a52.1621416666.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2303511c7745..eeb6beade975 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -165,13 +165,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); void arch_arm_kprobe(struct kprobe *p) { - patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)); + WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION))); } NOKPROBE_SYMBOL(arch_arm_kprobe); void arch_disarm_kprobe(struct kprobe *p) { - patch_instruction(p->addr, ppc_inst(p->opcode)); + WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(p->opcode))); } NOKPROBE_SYMBOL(arch_disarm_kprobe); -- cgit v1.2.3 From dcf57af201eb2ba401e62df374afb7543c81b523 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 24 May 2021 17:32:27 +0530 Subject: powerpc/configs: Enable STACK_TRACER and FTRACE_SYSCALLS in some of the configs Both these config options are generally enabled in distro kernels. Enable the same in a few powerpc64 configs to get better coverage and testing. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210524120227.3333208-1-naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/configs/powernv_defconfig | 1 + arch/powerpc/configs/ppc64_defconfig | 2 ++ arch/powerpc/configs/pseries_defconfig | 2 ++ 3 files changed, 5 insertions(+) diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 2c87e856d839..8bfeea6c7de7 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -309,6 +309,7 @@ CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_FUNCTION_TRACER=y CONFIG_SCHED_TRACER=y +CONFIG_STACK_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PPC_EMULATED_STATS=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 701811c91a6f..0ad2291337a7 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -368,7 +368,9 @@ CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_DEBUG_MUTEXES=y CONFIG_FUNCTION_TRACER=y +CONFIG_FTRACE_SYSCALLS=y CONFIG_SCHED_TRACER=y +CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_CODE_PATCHING_SELFTEST=y CONFIG_FTR_FIXUP_SELFTEST=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 50168dde4ea5..b183629f1bcf 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -289,7 +289,9 @@ CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_FUNCTION_TRACER=y +CONFIG_FTRACE_SYSCALLS=y CONFIG_SCHED_TRACER=y +CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_CODE_PATCHING_SELFTEST=y CONFIG_FTR_FIXUP_SELFTEST=y -- cgit v1.2.3 From b8ee3e6d6c5eec7e66de82449382f1a206db2046 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 1 Jun 2021 13:18:01 +0530 Subject: powerpc/xmon: Add support for running a command on all cpus in xmon It is sometimes desirable to run a command on all cpus in xmon. A typical scenario is to obtain the backtrace from all cpus in xmon if there is a soft lockup. Add rudimentary support for the same. The command to be run on all cpus should be prefixed with 'c#'. As an example, 'c#t' will run 't' command and produce a backtrace on all cpus in xmon. Since many xmon commands are not sensible for running in this manner, we only allow a predefined list of commands -- 'r', 'S' and 't' for now. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210601074801.617363-1-naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/xmon/xmon.c | 148 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 126 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index dbc38483473c..12dcfb5ef3b6 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -70,6 +70,9 @@ static cpumask_t cpus_in_xmon = CPU_MASK_NONE; static unsigned long xmon_taken = 1; static int xmon_owner; static int xmon_gate; +static int xmon_batch; +static unsigned long xmon_batch_start_cpu; +static cpumask_t xmon_batch_cpus = CPU_MASK_NONE; #else #define xmon_owner 0 #endif /* CONFIG_SMP */ @@ -133,6 +136,12 @@ static void prdump(unsigned long, long); static int ppc_inst_dump(unsigned long, long, int); static void dump_log_buf(void); +#ifdef CONFIG_SMP +static int xmon_switch_cpu(unsigned long); +static int xmon_batch_next_cpu(void); +static int batch_cmds(struct pt_regs *); +#endif + #ifdef CONFIG_PPC_POWERNV static void dump_opal_msglog(void); #else @@ -216,7 +225,8 @@ Commands:\n\ #ifdef CONFIG_SMP "\ c print cpus stopped in xmon\n\ - c# try to switch to cpu number h (in hex)\n" + c# try to switch to cpu number h (in hex)\n\ + c# $ run command '$' (one of 'r','S' or 't') on all cpus in xmon\n" #endif "\ C checksum\n\ @@ -644,7 +654,12 @@ static int xmon_core(struct pt_regs *regs, int fromipi) spin_cpu_relax(); touch_nmi_watchdog(); } else { - if (!locked_down) + cmd = 1; +#ifdef CONFIG_SMP + if (xmon_batch) + cmd = batch_cmds(regs); +#endif + if (!locked_down && cmd) cmd = cmds(regs); if (locked_down || cmd != 0) { /* exiting xmon */ @@ -1243,11 +1258,112 @@ static void bootcmds(void) } } +#ifdef CONFIG_SMP +static int xmon_switch_cpu(unsigned long cpu) +{ + int timeout; + + xmon_taken = 0; + mb(); + xmon_owner = cpu; + timeout = 10000000; + while (!xmon_taken) { + if (--timeout == 0) { + if (test_and_set_bit(0, &xmon_taken)) + break; + /* take control back */ + mb(); + xmon_owner = smp_processor_id(); + printf("cpu 0x%lx didn't take control\n", cpu); + return 0; + } + barrier(); + } + return 1; +} + +static int xmon_batch_next_cpu(void) +{ + unsigned long cpu; + + while (!cpumask_empty(&xmon_batch_cpus)) { + cpu = cpumask_next_wrap(smp_processor_id(), &xmon_batch_cpus, + xmon_batch_start_cpu, true); + if (cpu == nr_cpumask_bits) + break; + if (xmon_batch_start_cpu == -1) + xmon_batch_start_cpu = cpu; + if (xmon_switch_cpu(cpu)) + return 0; + cpumask_clear_cpu(cpu, &xmon_batch_cpus); + } + + xmon_batch = 0; + printf("%x:mon> \n", smp_processor_id()); + return 1; +} + +static int batch_cmds(struct pt_regs *excp) +{ + int cmd; + + /* simulate command entry */ + cmd = xmon_batch; + termch = '\n'; + + last_cmd = NULL; + xmon_regs = excp; + + printf("%x:", smp_processor_id()); + printf("mon> "); + printf("%c\n", (char)cmd); + + switch (cmd) { + case 'r': + prregs(excp); /* print regs */ + break; + case 'S': + super_regs(); + break; + case 't': + backtrace(excp); + break; + } + + cpumask_clear_cpu(smp_processor_id(), &xmon_batch_cpus); + + return xmon_batch_next_cpu(); +} + static int cpu_cmd(void) { -#ifdef CONFIG_SMP unsigned long cpu, first_cpu, last_cpu; - int timeout; + + cpu = skipbl(); + if (cpu == '#') { + xmon_batch = skipbl(); + if (xmon_batch) { + switch (xmon_batch) { + case 'r': + case 'S': + case 't': + cpumask_copy(&xmon_batch_cpus, &cpus_in_xmon); + if (cpumask_weight(&xmon_batch_cpus) <= 1) { + printf("There are no other cpus in xmon\n"); + break; + } + xmon_batch_start_cpu = -1; + if (!xmon_batch_next_cpu()) + return 1; + break; + default: + printf("c# only supports 'r', 'S' and 't' commands\n"); + } + xmon_batch = 0; + return 0; + } + } + termch = cpu; if (!scanhex(&cpu)) { /* print cpus waiting or in xmon */ @@ -1279,27 +1395,15 @@ static int cpu_cmd(void) #endif return 0; } - xmon_taken = 0; - mb(); - xmon_owner = cpu; - timeout = 10000000; - while (!xmon_taken) { - if (--timeout == 0) { - if (test_and_set_bit(0, &xmon_taken)) - break; - /* take control back */ - mb(); - xmon_owner = smp_processor_id(); - printf("cpu 0x%lx didn't take control\n", cpu); - return 0; - } - barrier(); - } - return 1; + + return xmon_switch_cpu(cpu); +} #else +static int cpu_cmd(void) +{ return 0; -#endif /* CONFIG_SMP */ } +#endif /* CONFIG_SMP */ static unsigned short fcstab[256] = { 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, -- cgit v1.2.3 From 20ccb004bad659c186f9091015a956da220d615d Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 9 Jun 2021 14:30:24 +0530 Subject: powerpc/bpf: Use bctrl for making function calls blrl corrupts the link stack. Instead use bctrl when making function calls from BPF programs. Reported-by: Anton Blanchard Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210609090024.1446800-1-naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 4 ++-- arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 92e17fbaaabb..cbe5b399ed86 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -195,8 +195,8 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun /* Load function address into r0 */ EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); EMIT(PPC_RAW_ORI(_R0, _R0, IMM_L(func))); - EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_BLRL()); + EMIT(PPC_RAW_MTCTR(_R0)); + EMIT(PPC_RAW_BCTRL()); } } diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 7ec9c852f105..5cad5b5a7e97 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -153,8 +153,8 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, PPC_LI64(b2p[TMP_REG_2], func); /* Load actual entry point from function descriptor */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to LR */ - EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); + /* ... and move it to CTR */ + EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); /* * Load TOC from function descriptor at offset 8. * We can clobber r2 since we get called through a @@ -165,9 +165,9 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, #else /* We can clobber r12 */ PPC_FUNC_ADDR(12, func); - EMIT(PPC_RAW_MTLR(12)); + EMIT(PPC_RAW_MTCTR(12)); #endif - EMIT(PPC_RAW_BLRL()); + EMIT(PPC_RAW_BCTRL()); } void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) @@ -202,8 +202,8 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun PPC_BPF_LL(12, 12, 0); #endif - EMIT(PPC_RAW_MTLR(12)); - EMIT(PPC_RAW_BLRL()); + EMIT(PPC_RAW_MTCTR(12)); + EMIT(PPC_RAW_BCTRL()); } static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) -- cgit v1.2.3 From c6c27e3d84648e6188fba314ae21a005e60bdcd4 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Tue, 22 Jun 2021 22:17:46 -0700 Subject: selftests/powerpc: Use req_max_processed_len from sysfs NX capabilities On PowerVM, the hypervisor defines the maximum buffer length for each NX request and the kernel exported this value via sysfs. This patch reads this value if the sysfs entry is available and is used to limit the request length. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ed908341b1eb7ca0183c028a4ed4a0cf48bfe0f6.camel@linux.ibm.com --- tools/testing/selftests/powerpc/nx-gzip/Makefile | 4 ++-- tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile index 640fad6cc2c7..0785c2e99d40 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/Makefile +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile @@ -1,8 +1,8 @@ -CFLAGS = -O3 -m64 -I./include +CFLAGS = -O3 -m64 -I./include -I../include TEST_GEN_FILES := gzfht_test gunz_test TEST_PROGS := nx-gzip-test.sh include ../../lib.mk -$(TEST_GEN_FILES): gzip_vas.c +$(TEST_GEN_FILES): gzip_vas.c ../utils.c diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c index b099753b50e4..095195a25687 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -60,6 +60,7 @@ #include #include #include +#include "utils.h" #include "nxu.h" #include "nx.h" @@ -70,6 +71,8 @@ FILE *nx_gzip_log; #define FNAME_MAX 1024 #define FEXT ".nx.gz" +#define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len" + /* * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure. */ @@ -244,6 +247,7 @@ int compress_file(int argc, char **argv, void *handle) struct nx_gzip_crb_cpb_t *cmdp; uint32_t pagelen = 65536; int fault_tries = NX_MAX_FAULTS; + char buf[32]; cmdp = (void *)(uintptr_t) aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t), @@ -263,8 +267,17 @@ int compress_file(int argc, char **argv, void *handle) assert(NULL != (outbuf = (char *)malloc(outlen))); nxu_touch_pages(outbuf, outlen, pagelen, 1); - /* Compress piecemeal in smallish chunks */ - chunk = 1<<22; + /* + * On PowerVM, the hypervisor defines the maximum request buffer + * size is defined and this value is available via sysfs. + */ + if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) { + chunk = atoi(buf); + } else { + /* sysfs entry is not available on PowerNV */ + /* Compress piecemeal in smallish chunks */ + chunk = 1<<22; + } /* Write the gzip header to the stream */ num_hdr_bytes = gzip_header_blank(outbuf); -- cgit v1.2.3 From 24d33ac5b8ffb7a0e697344fea8591376162548f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Jun 2021 23:04:54 +1000 Subject: powerpc/64s: Make prom_init require RELOCATABLE When we boot from open firmware (OF) using PPC_OF_BOOT_TRAMPOLINE, aka. prom_init, we run parts of the kernel at an address other than the link address. That happens because OF loads the kernel above zero (OF is at zero) and we run prom_init before copying the kernel down to zero. Currently that works even for non-relocatable kernels, because we do various fixups to the prom_init code to make it run where it's loaded. However those fixups are not sufficient if the kernel becomes large enough. In that case prom_init()'s final call to __start() can end up generating a plt branch: bl c000000002000018 <00000078.plt_branch.__start> That results in the kernel jumping to the linked address of __start, 0xc000000000000000, when really it needs to jump to the 0xc000000000000000 + the runtime address because the kernel is still running at the load address. We could do further shenanigans to handle that, see Jordan's patch for example: https://lore.kernel.org/linuxppc-dev/20210421021721.1539289-1-jniethe5@gmail.com However it is much simpler to just require a kernel with prom_init() to be built relocatable. The result works in all configurations without further work, and requires less code. This should have no effect on most people, as our defconfigs and essentially all distro configs already have RELOCATABLE enabled. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210623130454.2542945-1-mpe@ellerman.id.au --- arch/powerpc/kernel/prom_init.c | 58 ++--------------------------------------- arch/powerpc/platforms/Kconfig | 1 + 2 files changed, 3 insertions(+), 56 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 05ce15b854e2..a5bf355ce1d6 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -3243,54 +3243,6 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) #endif /* CONFIG_BLK_DEV_INITRD */ } -#ifdef CONFIG_PPC64 -#ifdef CONFIG_RELOCATABLE -static void reloc_toc(void) -{ -} - -static void unreloc_toc(void) -{ -} -#else -static void __reloc_toc(unsigned long offset, unsigned long nr_entries) -{ - unsigned long i; - unsigned long *toc_entry; - - /* Get the start of the TOC by using r2 directly. */ - asm volatile("addi %0,2,-0x8000" : "=b" (toc_entry)); - - for (i = 0; i < nr_entries; i++) { - *toc_entry = *toc_entry + offset; - toc_entry++; - } -} - -static void reloc_toc(void) -{ - unsigned long offset = reloc_offset(); - unsigned long nr_entries = - (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long); - - __reloc_toc(offset, nr_entries); - - mb(); -} - -static void unreloc_toc(void) -{ - unsigned long offset = reloc_offset(); - unsigned long nr_entries = - (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long); - - mb(); - - __reloc_toc(-offset, nr_entries); -} -#endif -#endif - #ifdef CONFIG_PPC_SVM /* * Perform the Enter Secure Mode ultracall. @@ -3324,14 +3276,12 @@ static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) * relocated it so the check will fail. Restore the original image by * relocating it back to the kernel virtual base address. */ - if (IS_ENABLED(CONFIG_RELOCATABLE)) - relocate(KERNELBASE); + relocate(KERNELBASE); ret = enter_secure_mode(kbase, fdt); /* Relocate the kernel again. */ - if (IS_ENABLED(CONFIG_RELOCATABLE)) - relocate(kbase); + relocate(kbase); if (ret != U_SUCCESS) { prom_printf("Returned %d from switching to secure mode.\n", ret); @@ -3359,8 +3309,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, #ifdef CONFIG_PPC32 unsigned long offset = reloc_offset(); reloc_got2(offset); -#else - reloc_toc(); #endif /* @@ -3537,8 +3485,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, #ifdef CONFIG_PPC32 reloc_got2(-offset); -#else - unreloc_toc(); #endif /* Move to secure memory if we're supposed to be secure guests. */ diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 2f071fb9694c..e02d29a9d12f 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -51,6 +51,7 @@ config PPC_NATIVE config PPC_OF_BOOT_TRAMPOLINE bool "Support booting from Open Firmware or yaboot" depends on PPC_BOOK3S_32 || PPC64 + select RELOCATABLE if PPC64 default y help Support from booting from Open Firmware or yaboot using an -- cgit v1.2.3 From 95839225639ba7c3d8d7231b542728dcf222bf2d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Jun 2021 23:05:14 +1000 Subject: powerpc: Fix is_kvm_guest() / kvm_para_available() Commit a21d1becaa3f ("powerpc: Reintroduce is_kvm_guest() as a fast-path check") added is_kvm_guest() and changed kvm_para_available() to use it. is_kvm_guest() checks a static key, kvm_guest, and that static key is set in check_kvm_guest(). The problem is check_kvm_guest() is only called on pseries, and even then only in some configurations. That means is_kvm_guest() always returns false on all non-pseries and some pseries depending on configuration. That's a bug. For PR KVM guests this is noticable because they no longer do live patching of themselves, which can be detected by the omission of a message in dmesg such as: KVM: Live patching for a fast VM worked To fix it make check_kvm_guest() an initcall, to ensure it's always called at boot. It needs to be core so that it runs before kvm_guest_init() which is postcore. To be an initcall it needs to return int, where 0 means success, so update that. We still call it manually in pSeries_smp_probe(), because that runs before init calls are run. Fixes: a21d1becaa3f ("powerpc: Reintroduce is_kvm_guest() as a fast-path check") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210623130514.2543232-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/kvm_guest.h | 4 ++-- arch/powerpc/kernel/firmware.c | 10 ++++++---- arch/powerpc/platforms/pseries/smp.c | 4 +++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h index 2fca299f7e19..c63105d2c9e7 100644 --- a/arch/powerpc/include/asm/kvm_guest.h +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -16,10 +16,10 @@ static inline bool is_kvm_guest(void) return static_branch_unlikely(&kvm_guest); } -bool check_kvm_guest(void); +int check_kvm_guest(void); #else static inline bool is_kvm_guest(void) { return false; } -static inline bool check_kvm_guest(void) { return false; } +static inline int check_kvm_guest(void) { return 0; } #endif #endif /* _ASM_POWERPC_KVM_GUEST_H_ */ diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index c9e2819b095a..c7022c41cc31 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -23,18 +23,20 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) DEFINE_STATIC_KEY_FALSE(kvm_guest); -bool check_kvm_guest(void) +int __init check_kvm_guest(void) { struct device_node *hyper_node; hyper_node = of_find_node_by_path("/hypervisor"); if (!hyper_node) - return false; + return 0; if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return false; + return 0; static_branch_enable(&kvm_guest); - return true; + + return 0; } +core_initcall(check_kvm_guest); // before kvm_guest_init() #endif diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index c70b4be9f0a5..096629f54576 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -211,7 +211,9 @@ static __init void pSeries_smp_probe(void) if (!cpu_has_feature(CPU_FTR_SMT)) return; - if (check_kvm_guest()) { + check_kvm_guest(); + + if (is_kvm_guest()) { /* * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp * faults to the hypervisor which then reads the instruction -- cgit v1.2.3 From d2827e5e2e0f0941a651f4b1ca5e9b778c4b5293 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 18 Apr 2021 13:10:03 +0530 Subject: powerpc/papr_scm: trivial: fix typo in a comment There is a spelling mistake "byes" -> "bytes" in a comment of function drc_pmem_query_stats(). Fix that typo. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210418074003.6651-1-kjain@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 4b93c20b24b5..f48e87ac89c9 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -264,7 +264,7 @@ err_out: * Query the Dimm performance stats from PHYP and copy them (if returned) to * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast * (num_stats + header) bytes. - * - If buff_stats == NULL the return value is the size in byes of the buffer + * - If buff_stats == NULL the return value is the size in bytes of the buffer * needed to hold all supported performance-statistics. * - If buff_stats != NULL and num_stats == 0 then we copy all known * performance-statistics to 'buff_stat' and expect to be large enough to -- cgit v1.2.3 From 4bfa5ddff924c2d5b2427f752515ca594dade19f Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 3 May 2021 12:58:11 -0500 Subject: powerpc/rtas-rtc: remove unused constant RTAS_CLOCK_BUSY is unused, remove it. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210503175811.1528208-1-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas-rtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c index a28239b8b0c0..33c07c8af6c8 100644 --- a/arch/powerpc/kernel/rtas-rtc.c +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -12,7 +12,7 @@ #define MAX_RTC_WAIT 5000 /* 5 sec */ -#define RTAS_CLOCK_BUSY (-2) + time64_t __init rtas_get_boot_time(void) { int ret[8]; -- cgit v1.2.3 From bfb0c9fcf5870036e54081b28cae2af5f9ee7088 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 3 May 2021 21:53:29 -0500 Subject: powerpc/pseries/dlpar: use rtas_get_sensor() Instead of making bare calls to get-sensor-state, use rtas_get_sensor(), which correctly handles busy and extended delay statuses. Fixes: ab519a011caa ("powerpc/pseries: Kernel DLPAR Infrastructure") Signed-off-by: Nathan Lynch Reviewed-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210504025329.1713878-1-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/dlpar.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 3ac70790ec7a..b1f01ac0c29e 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -289,8 +289,7 @@ int dlpar_acquire_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_UNUSABLE) return -1; @@ -311,8 +310,7 @@ int dlpar_release_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_PRESENT) return -1; @@ -333,8 +331,7 @@ int dlpar_unisolate_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_PRESENT) return -1; -- cgit v1.2.3 From d40a82be2f79d16cc18c28c14d267da240659949 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 4 May 2021 17:43:43 +1000 Subject: powerpc/pmu: Make the generic compat PMU use the architected events This changes generic-compat-pmu.c so that it only uses architected events defined in Power ISA v3.0B, rather than event encodings which, while common to all the IBM Power Systems implementations, are nevertheless implementation-specific rather than architected. The intention is that any CPU implementation designed to conform to Power ISA v3.0B or later can use generic-compat-pmu.c. In addition to the existing events for cycles and instructions, this adds several other architected events, including alternative encodings for some events. In order to make it possible to measure cycles and instructions at the same time as each other, we set the CC5-6RUN bit in MMCR0, which makes PMC5 and PMC6 count instructions and cycles regardless of the run bit, so their events are now PM_CYC and PM_INST_CMPL rather than PM_RUN_CYC and PM_RUN_INST_CMPL (the latter are still available via other event codes). Note that POWER9 has an erratum where one architected event (PM_FLOP_CMPL, floating-point operations completed, code 0x100f4) does not work correctly. Given that there is a specific PMU driver for P9 which will be used in preference to generic-compat-pmu.c, that is not a real problem. Signed-off-by: Paul Mackerras Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YJD7L9yeoxvxqeYi@thinks.paulus.ozlabs.org --- arch/powerpc/perf/generic-compat-pmu.c | 170 ++++++++++++++++++++++++++------- 1 file changed, 134 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index eb8a6aaf4cc1..695975227e60 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -14,45 +14,119 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ pmc ] [unit ] [ ] m [ pmcxsel ] - * | | - * | *- mark - * | - * | - * *- combine - * - * Below uses IBM bit numbering. - * - * MMCR1[x:y] = unit (PMCxUNIT) - * MMCR1[24] = pmc1combine[0] - * MMCR1[25] = pmc1combine[1] - * MMCR1[26] = pmc2combine[0] - * MMCR1[27] = pmc2combine[1] - * MMCR1[28] = pmc3combine[0] - * MMCR1[29] = pmc3combine[1] - * MMCR1[30] = pmc4combine[0] - * MMCR1[31] = pmc4combine[1] - * + * [ pmc ] [ pmcxsel ] */ /* - * Some power9 event codes. + * Event codes defined in ISA v3.0B */ #define EVENT(_name, _code) _name = _code, enum { -EVENT(PM_CYC, 0x0001e) -EVENT(PM_INST_CMPL, 0x00002) + /* Cycles, alternate code */ + EVENT(PM_CYC_ALT, 0x100f0) + /* One or more instructions completed in a cycle */ + EVENT(PM_CYC_INST_CMPL, 0x100f2) + /* Floating-point instruction completed */ + EVENT(PM_FLOP_CMPL, 0x100f4) + /* Instruction ERAT/L1-TLB miss */ + EVENT(PM_L1_ITLB_MISS, 0x100f6) + /* All instructions completed and none available */ + EVENT(PM_NO_INST_AVAIL, 0x100f8) + /* A load-type instruction completed (ISA v3.0+) */ + EVENT(PM_LD_CMPL, 0x100fc) + /* Instruction completed, alternate code (ISA v3.0+) */ + EVENT(PM_INST_CMPL_ALT, 0x100fe) + /* A store-type instruction completed */ + EVENT(PM_ST_CMPL, 0x200f0) + /* Instruction Dispatched */ + EVENT(PM_INST_DISP, 0x200f2) + /* Run_cycles */ + EVENT(PM_RUN_CYC, 0x200f4) + /* Data ERAT/L1-TLB miss/reload */ + EVENT(PM_L1_DTLB_RELOAD, 0x200f6) + /* Taken branch completed */ + EVENT(PM_BR_TAKEN_CMPL, 0x200fa) + /* Demand iCache Miss */ + EVENT(PM_L1_ICACHE_MISS, 0x200fc) + /* L1 Dcache reload from memory */ + EVENT(PM_L1_RELOAD_FROM_MEM, 0x200fe) + /* L1 Dcache store miss */ + EVENT(PM_ST_MISS_L1, 0x300f0) + /* Alternate code for PM_INST_DISP */ + EVENT(PM_INST_DISP_ALT, 0x300f2) + /* Branch direction or target mispredicted */ + EVENT(PM_BR_MISPREDICT, 0x300f6) + /* Data TLB miss/reload */ + EVENT(PM_DTLB_MISS, 0x300fc) + /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ + EVENT(PM_DATA_FROM_L3MISS, 0x300fe) + /* L1 Dcache load miss */ + EVENT(PM_LD_MISS_L1, 0x400f0) + /* Cycle when instruction(s) dispatched */ + EVENT(PM_CYC_INST_DISP, 0x400f2) + /* Branch or branch target mispredicted */ + EVENT(PM_BR_MPRED_CMPL, 0x400f6) + /* Instructions completed with run latch set */ + EVENT(PM_RUN_INST_CMPL, 0x400fa) + /* Instruction TLB miss/reload */ + EVENT(PM_ITLB_MISS, 0x400fc) + /* Load data not cached */ + EVENT(PM_LD_NOT_CACHED, 0x400fe) + /* Instructions */ + EVENT(PM_INST_CMPL, 0x500fa) + /* Cycles */ + EVENT(PM_CYC, 0x600f4) }; #undef EVENT +/* Table of alternatives, sorted in increasing order of column 0 */ +/* Note that in each row, column 0 must be the smallest */ +static const unsigned int generic_event_alternatives[][MAX_ALT] = { + { PM_CYC_ALT, PM_CYC }, + { PM_INST_CMPL_ALT, PM_INST_CMPL }, + { PM_INST_DISP, PM_INST_DISP_ALT }, +}; + +static int generic_get_alternatives(u64 event, unsigned int flags, u64 alt[]) +{ + int num_alt = 0; + + num_alt = isa207_get_alternatives(event, alt, + ARRAY_SIZE(generic_event_alternatives), flags, + generic_event_alternatives); + + return num_alt; +} + GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); +GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_NO_INST_AVAIL); +GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); + +CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); +CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); +CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); +CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); +CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); +CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); +CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); static struct attribute *generic_compat_events_attr[] = { GENERIC_EVENT_PTR(PM_CYC), GENERIC_EVENT_PTR(PM_INST_CMPL), + GENERIC_EVENT_PTR(PM_NO_INST_AVAIL), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), NULL }; @@ -63,17 +137,11 @@ static struct attribute_group generic_compat_pmu_events_group = { PMU_FORMAT_ATTR(event, "config:0-19"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); -PMU_FORMAT_ATTR(mark, "config:8"); -PMU_FORMAT_ATTR(combine, "config:10-11"); -PMU_FORMAT_ATTR(unit, "config:12-15"); PMU_FORMAT_ATTR(pmc, "config:16-19"); static struct attribute *generic_compat_pmu_format_attr[] = { &format_attr_event.attr, &format_attr_pmcxsel.attr, - &format_attr_mark.attr, - &format_attr_combine.attr, - &format_attr_unit.attr, &format_attr_pmc.attr, NULL, }; @@ -92,6 +160,9 @@ static const struct attribute_group *generic_compat_pmu_attr_groups[] = { static int compat_generic_events[] = { [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_NO_INST_AVAIL, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, }; #define C(x) PERF_COUNT_HW_CACHE_##x @@ -105,11 +176,11 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_LD_MISS_L1, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_ST_MISS_L1, }, [ C(OP_PREFETCH) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -119,7 +190,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1I) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -133,7 +204,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(LL) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -147,7 +218,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_DTLB_MISS, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, @@ -161,7 +232,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_ITLB_MISS, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, @@ -175,7 +246,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(BPU) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, @@ -204,13 +275,30 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { #undef C +/* + * We set MMCR0[CC5-6RUN] so we can use counters 5 and 6 for + * PM_INST_CMPL and PM_CYC. + */ +static int generic_compute_mmcr(u64 event[], int n_ev, + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[], u32 flags) +{ + int ret; + + ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags); + if (!ret) + mmcr->mmcr0 |= MMCR0_C56RUN; + return ret; +} + static struct power_pmu generic_compat_pmu = { .name = "GENERIC_COMPAT", .n_counter = MAX_PMU_COUNTERS, .add_fields = ISA207_ADD_FIELDS, .test_adder = ISA207_TEST_ADDER, - .compute_mmcr = isa207_compute_mmcr, + .compute_mmcr = generic_compute_mmcr, .get_constraint = isa207_get_constraint, + .get_alternatives = generic_get_alternatives, .disable_pmc = isa207_disable_pmc, .flags = PPMU_HAS_SIER | PPMU_ARCH_207S, .n_generic = ARRAY_SIZE(compat_generic_events), @@ -223,6 +311,16 @@ int init_generic_compat_pmu(void) { int rc = 0; + /* + * From ISA v2.07 on, PMU features are architected; + * we require >= v3.0 because (a) that has PM_LD_CMPL and + * PM_INST_CMPL_ALT, which v2.07 doesn't have, and + * (b) we don't expect any non-IBM Power ISA + * implementations that conform to v2.07 but not v3.0. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -ENODEV; + rc = register_power_pmu(&generic_compat_pmu); if (rc) return rc; -- cgit v1.2.3 From a2305e3de819394a7adf68078964a92d06f9db33 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Apr 2021 10:06:38 +0200 Subject: powerpc: mark local variables around longjmp as volatile gcc-11 points out that modifying local variables next to a longjmp/setjmp may cause undefined behavior: arch/powerpc/kexec/crash.c: In function 'crash_kexec_prepare_cpus.constprop': arch/powerpc/kexec/crash.c:108:22: error: variable 'ncpus' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbere d] arch/powerpc/kexec/crash.c:109:13: error: variable 'tries' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbere d] arch/powerpc/xmon/xmon.c: In function 'xmon_print_symbol': arch/powerpc/xmon/xmon.c:3625:21: error: variable 'name' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] arch/powerpc/xmon/xmon.c: In function 'stop_spus': arch/powerpc/xmon/xmon.c:4057:13: error: variable 'i' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] arch/powerpc/xmon/xmon.c: In function 'restart_spus': arch/powerpc/xmon/xmon.c:4098:13: error: variable 'i' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] arch/powerpc/xmon/xmon.c: In function 'dump_opal_msglog': arch/powerpc/xmon/xmon.c:3008:16: error: variable 'pos' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] arch/powerpc/xmon/xmon.c: In function 'show_pte': arch/powerpc/xmon/xmon.c:3207:29: error: variable 'tsk' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] arch/powerpc/xmon/xmon.c: In function 'show_tasks': arch/powerpc/xmon/xmon.c:3302:29: error: variable 'tsk' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] arch/powerpc/xmon/xmon.c: In function 'xmon_core': arch/powerpc/xmon/xmon.c:494:13: error: variable 'cmd' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] arch/powerpc/xmon/xmon.c:860:21: error: variable 'bp' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] arch/powerpc/xmon/xmon.c:860:21: error: variable 'bp' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] arch/powerpc/xmon/xmon.c:492:48: error: argument 'fromipi' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] According to the documentation, marking these as 'volatile' is sufficient to avoid the problem, and it shuts up the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210429080708.1520360-1-arnd@kernel.org --- arch/powerpc/kexec/crash.c | 4 ++-- arch/powerpc/xmon/xmon.c | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 0196d0c211ac..10f997e6bb95 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -105,8 +105,8 @@ void crash_ipi_callback(struct pt_regs *regs) static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - int tries = 0; + volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + volatile int tries = 0; int (*old_handler)(struct pt_regs *regs); printk(KERN_EMERG "Sending IPI to other CPUs\n"); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 12dcfb5ef3b6..fe34c4fdd24a 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -499,10 +499,10 @@ static void xmon_touch_watchdogs(void) touch_nmi_watchdog(); } -static int xmon_core(struct pt_regs *regs, int fromipi) +static int xmon_core(struct pt_regs *regs, volatile int fromipi) { - int cmd = 0; - struct bpt *bp; + volatile int cmd = 0; + struct bpt *volatile bp; long recurse_jmp[JMP_BUF_LEN]; bool locked_down; unsigned long offset; @@ -872,7 +872,7 @@ static inline void force_enable_xmon(void) static struct bpt *at_breakpoint(unsigned long pc) { int i; - struct bpt *bp; + struct bpt *volatile bp; bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) @@ -3109,7 +3109,7 @@ static void dump_opal_msglog(void) { unsigned char buf[128]; ssize_t res; - loff_t pos = 0; + volatile loff_t pos = 0; if (!firmware_has_feature(FW_FEATURE_OPAL)) { printf("Machine is not running OPAL firmware.\n"); @@ -3264,7 +3264,7 @@ memzcan(void) printf("%.8lx\n", a - mskip); } -static void show_task(struct task_struct *tsk) +static void show_task(struct task_struct *volatile tsk) { char state; @@ -3308,7 +3308,7 @@ static void format_pte(void *ptep, unsigned long pte) static void show_pte(unsigned long addr) { unsigned long tskv = 0; - struct task_struct *tsk = NULL; + struct task_struct *volatile tsk = NULL; struct mm_struct *mm; pgd_t *pgdp; p4d_t *p4dp; @@ -3403,7 +3403,7 @@ static void show_pte(unsigned long addr) static void show_tasks(void) { unsigned long tskv; - struct task_struct *tsk = NULL; + struct task_struct *volatile tsk = NULL; printf(" task_struct ->thread.ksp ->thread.regs PID PPID S P CMD\n"); @@ -3726,7 +3726,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid, const char *after) { char *modname; - const char *name = NULL; + const char *volatile name = NULL; unsigned long offset, size; printf(REG, address); @@ -4158,7 +4158,7 @@ void xmon_register_spus(struct list_head *list) static void stop_spus(void) { struct spu *spu; - int i; + volatile int i; u64 tmp; for (i = 0; i < XMON_NUM_SPUS; i++) { @@ -4199,7 +4199,7 @@ static void stop_spus(void) static void restart_spus(void) { struct spu *spu; - int i; + volatile int i; for (i = 0; i < XMON_NUM_SPUS; i++) { if (!spu_info[i].spu) -- cgit v1.2.3 From c736fb97051c39c74bf5d4684bdac1766d43e175 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 25 Jun 2021 14:50:24 +1000 Subject: powerpc/pseries/vas: Include irqdomain.h There are patches in flight to break the dependency between asm/irq.h and linux/irqdomain.h, which would break compilation of vas.c because it needs the declaration of irq_create_mapping() etc. So add an explicit include of irqdomain.h to avoid that becoming a problem in future. Reported-by: Stephen Rothwell Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210625045337.3197833-1-mpe@ellerman.id.au --- arch/powerpc/platforms/pseries/vas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 3385b5400cc6..b5c1cf1bc64d 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 7c6986ade69e3c81bac831645bc72109cd798a80 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 25 Jun 2021 16:28:41 +1000 Subject: powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi() In raise_backtrace_ipi() we iterate through the cpumask of CPUs, sending each an IPI asking them to do a backtrace, but we don't wait for the backtrace to happen. We then iterate through the CPU mask again, and if any CPU hasn't done the backtrace and cleared itself from the mask, we print a trace on its behalf, noting that the trace may be "stale". This works well enough when a CPU is not responding, because in that case it doesn't receive the IPI and the sending CPU is left to print the trace. But when all CPUs are responding we are left with a race between the sending and receiving CPUs, if the sending CPU wins the race then it will erroneously print a trace. This leads to spurious "stale" traces from the sending CPU, which can then be interleaved messily with the receiving CPU, note the CPU numbers, eg: [ 1658.929157][ C7] rcu: Stack dump where RCU GP kthread last ran: [ 1658.929223][ C7] Sending NMI from CPU 7 to CPUs 1: [ 1658.929303][ C1] NMI backtrace for cpu 1 [ 1658.929303][ C7] CPU 1 didn't respond to backtrace IPI, inspecting paca. [ 1658.929362][ C1] CPU: 1 PID: 325 Comm: kworker/1:1H Tainted: G W E 5.13.0-rc2+ #46 [ 1658.929405][ C7] irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 325 (kworker/1:1H) [ 1658.929465][ C1] Workqueue: events_highpri test_work_fn [test_lockup] [ 1658.929549][ C7] Back trace of paca->saved_r1 (0xc0000000057fb400) (possibly stale): [ 1658.929592][ C1] NIP: c00000000002cf50 LR: c008000000820178 CTR: c00000000002cfa0 To fix it, change the logic so that the sending CPU waits 5s for the receiving CPU to print its trace. If the receiving CPU prints its trace successfully then the sending CPU just continues, avoiding any spurious "stale" trace. This has the added benefit of allowing all CPUs to print their traces in order and avoids any interleaving of their output. Fixes: 5cc05910f26e ("powerpc/64s: Wire up arch_trigger_cpumask_backtrace()") Cc: stable@vger.kernel.org # v4.18+ Reported-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210625140408.3351173-1-mpe@ellerman.id.au --- arch/powerpc/kernel/stacktrace.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 1961e6d5e33b..2b0d04a1b7d2 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -172,17 +172,31 @@ static void handle_backtrace_ipi(struct pt_regs *regs) static void raise_backtrace_ipi(cpumask_t *mask) { + struct paca_struct *p; unsigned int cpu; + u64 delay_us; for_each_cpu(cpu, mask) { - if (cpu == smp_processor_id()) + if (cpu == smp_processor_id()) { handle_backtrace_ipi(NULL); - else - smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC); - } + continue; + } - for_each_cpu(cpu, mask) { - struct paca_struct *p = paca_ptrs[cpu]; + delay_us = 5 * USEC_PER_SEC; + + if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) { + // Now wait up to 5s for the other CPU to do its backtrace + while (cpumask_test_cpu(cpu, mask) && delay_us) { + udelay(1); + delay_us--; + } + + // Other CPU cleared itself from the mask + if (delay_us) + continue; + } + + p = paca_ptrs[cpu]; cpumask_clear_cpu(cpu, mask); -- cgit v1.2.3 From 5f0f95f1e1b64fe55679059837dafb3439b57012 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 25 Jun 2021 11:13:16 +0000 Subject: powerpc/ptrace: Move set_return_regs_changed() before regs_set_return_{msr/ip} regs_set_return_msr() and regs_set_return_ip() have a copy of the code of set_return_regs_changed(). Move up set_return_regs_changed() so it can be reused by regs_set_return_{msr/ip} Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/49f4fb051a3e1cb69f7305d5b6768aec14727c32.1624619582.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index fcf63f559344..14b8105a1e27 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -135,26 +135,26 @@ extern unsigned long profile_pc(struct pt_regs *regs); long do_syscall_trace_enter(struct pt_regs *regs); void do_syscall_trace_leave(struct pt_regs *regs); -static inline void regs_set_return_ip(struct pt_regs *regs, unsigned long ip) +static inline void set_return_regs_changed(void) { - regs->nip = ip; #ifdef CONFIG_PPC_BOOK3S_64 local_paca->hsrr_valid = 0; local_paca->srr_valid = 0; #endif } -static inline void regs_set_return_msr(struct pt_regs *regs, unsigned long msr) +static inline void regs_set_return_ip(struct pt_regs *regs, unsigned long ip) { - regs->msr = msr; + regs->nip = ip; #ifdef CONFIG_PPC_BOOK3S_64 local_paca->hsrr_valid = 0; local_paca->srr_valid = 0; #endif } -static inline void set_return_regs_changed(void) +static inline void regs_set_return_msr(struct pt_regs *regs, unsigned long msr) { + regs->msr = msr; #ifdef CONFIG_PPC_BOOK3S_64 local_paca->hsrr_valid = 0; local_paca->srr_valid = 0; -- cgit v1.2.3 From cae4644673ec5f54c77deab67a57c41395a1539f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 25 Jun 2021 11:13:17 +0000 Subject: powerpc/ptrace: Refactor regs_set_return_{msr/ip} regs_set_return_msr() and regs_set_return_ip() have a copy of the code of set_return_regs_changed(). Call the later instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/baf64a91557d3811c155616a6aa23ed7b3b21da4.1624619582.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 14b8105a1e27..3e5d470a6155 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -146,19 +146,13 @@ static inline void set_return_regs_changed(void) static inline void regs_set_return_ip(struct pt_regs *regs, unsigned long ip) { regs->nip = ip; -#ifdef CONFIG_PPC_BOOK3S_64 - local_paca->hsrr_valid = 0; - local_paca->srr_valid = 0; -#endif + set_return_regs_changed(); } static inline void regs_set_return_msr(struct pt_regs *regs, unsigned long msr) { regs->msr = msr; -#ifdef CONFIG_PPC_BOOK3S_64 - local_paca->hsrr_valid = 0; - local_paca->srr_valid = 0; -#endif + set_return_regs_changed(); } static inline void regs_add_return_ip(struct pt_regs *regs, long offset) -- cgit v1.2.3 From 590e1e4254458455477e4e857cd00733280a3d73 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 24 Jun 2021 14:36:32 +0800 Subject: powerpc/sysfs: Replace sizeof(arr)/sizeof(arr[0]) with ARRAY_SIZE The ARRAY_SIZE macro is more compact and more formal in linux source. Signed-off-by: Jason Wang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210624063632.25632-1-wangborong@cdjrlc.com --- arch/powerpc/kernel/sysfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 2e08640bb3b4..5ff0e55d0db1 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -843,14 +843,14 @@ static int register_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(ibm_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(g4_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -858,7 +858,7 @@ static int register_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(pa6t_attrs); pmc_attrs = NULL; break; #endif @@ -940,14 +940,14 @@ static int unregister_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(ibm_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(g4_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -955,7 +955,7 @@ static int unregister_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(pa6t_attrs); pmc_attrs = NULL; break; #endif -- cgit v1.2.3 From 767e6e7130b288d856e4f2be365554dc6147a80a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 25 Jun 2021 14:31:46 +0000 Subject: powerpc/interrupt: Also use exit_must_hard_disable() on PPC32 Reduce #ifdefs a bit by making exit_must_hard_disable() return true on PPC32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/52531029563c1fc823b790058e799d0ca71b028c.1624631463.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/interrupt.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index cee12f2fd459..1b4a99ecb7e5 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -33,10 +33,10 @@ static inline bool exit_must_hard_disable(void) { return static_branch_unlikely(&interrupt_exit_not_reentrant); } -#elif defined(CONFIG_PPC64) +#else static inline bool exit_must_hard_disable(void) { - return false; + return IS_ENABLED(CONFIG_PPC32); } #endif @@ -56,12 +56,10 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); -#ifdef CONFIG_PPC32 - __hard_EE_RI_disable(); -#else if (exit_must_hard_disable() || !restartable) __hard_EE_RI_disable(); +#ifdef CONFIG_PPC64 /* This pattern matches prep_irq_for_idle */ if (unlikely(lazy_irq_pending_nocheck())) { if (exit_must_hard_disable() || !restartable) { -- cgit v1.2.3 From b064037ea4104120418ccbf39951a6d529a9d6d5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 25 Jun 2021 14:49:09 +0000 Subject: powerpc/interrupt: Use names in check_return_regs_valid() trap->regs == 0x3000 is trap_is_scv() trap 0x500 is INTERRUPT_EXTERNAL Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d48bf0184a1de185eb0ed3282247f8a294710674.1624632537.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/interrupt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 1b4a99ecb7e5..0052702ee5ac 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -222,12 +222,12 @@ static void check_return_regs_valid(struct pt_regs *regs) u8 *validp; char *h; - if (regs->trap == 0x3000) + if (trap_is_scv(regs)) return; trap = regs->trap; // EE in HV mode sets HSRRs like 0xea0 - if (cpu_has_feature(CPU_FTR_HVMODE) && trap == 0x500) + if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL) trap = 0xea0; switch (trap) { -- cgit v1.2.3 From c89e632658e793fbbdcbfbe80a6c13bbf7203e9b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 28 Jun 2021 06:56:11 +0000 Subject: powerpc/32s: Fix setup_{kuap/kuep}() on SMP On SMP, setup_kup() is also called from start_secondary(). start_secondary() is not an __init function. Remove the __init marker from setup_kuep() and setup_kuap(). Fixes: 86f46f343272 ("powerpc/32s: Initialise KUAP and KUEP in C") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/42f4bd12b476942e4d5dc81c0e839d8871b20b1c.1624863319.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/kuap.c | 2 +- arch/powerpc/mm/book3s32/kuep.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c index 9df6911b8fde..0f920f09af57 100644 --- a/arch/powerpc/mm/book3s32/kuap.c +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -18,7 +18,7 @@ void kuap_unlock_all_ool(void) } EXPORT_SYMBOL(kuap_unlock_all_ool); -void __init setup_kuap(bool disabled) +void setup_kuap(bool disabled) { if (!disabled) kuap_lock_all_ool(); diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c index 3f6eb6e23fca..c20733d6e02c 100644 --- a/arch/powerpc/mm/book3s32/kuep.c +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -5,7 +5,7 @@ struct static_key_false disable_kuep_key; -void __init setup_kuep(bool disabled) +void setup_kuep(bool disabled) { if (!disabled) kuep_lock(); -- cgit v1.2.3 From fc4999864bca323f1b844fefe1b402632443c076 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Jun 2021 12:24:21 +0000 Subject: powerpc/4xx: Fix setup_kuep() on SMP On SMP, setup_kuep() is also called from start_secondary() since commit 86f46f343272 ("powerpc/32s: Initialise KUAP and KUEP in C"). start_secondary() is not an __init function. Remove the __init marker from setup_kuep() and bail out when not caller on the first CPU as the work is already done. Fixes: 10248dcba120 ("powerpc/44x: Implement Kernel Userspace Exec Protection (KUEP)") Fixes: 86f46f343272 ("powerpc/32s: Initialise KUAP and KUEP in C") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8ee05934288994a65743a987acb1558f12c0c8c1.1624969450.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/44x.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c index 7da6d1e9fc9b..e079f26b267e 100644 --- a/arch/powerpc/mm/nohash/44x.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -241,8 +242,11 @@ void __init mmu_init_secondary(int cpu) #endif /* CONFIG_SMP */ #ifdef CONFIG_PPC_KUEP -void __init setup_kuep(bool disabled) +void setup_kuep(bool disabled) { + if (smp_processor_id() != boot_cpuid) + return; + if (disabled) patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP())); else -- cgit v1.2.3 From 5567b1ee29b7a83e8c01d99d34b5bbd306ce0bcf Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 Jun 2021 17:46:13 +1000 Subject: powerpc/64s: fix hash page fault interrupt handler The early bad fault or key fault test in do_hash_fault() ends up calling into ___do_page_fault without having gone through an interrupt handler wrapper (except the initial _RAW one). This can end up calling local irq functions while the interrupt has not been reconciled, which will likely cause crashes and it trips up on a later patch that adds more assertions. pkey_exec_prot from selftests causes this path to be executed. There is no real reason to run the in_nmi() test should be performed before the key fault check. In fact if a perf interrupt in the hash fault code did a stack walk that was made to take a key fault somehow then running ___do_page_fault could possibly cause another hash fault causing problems. Move the in_nmi() test first, and then do everything else inside the regular interrupt handler function. Fixes: 3a96570ffceb ("powerpc: convert interrupt handlers to use wrappers") Reported-by: Sachin Sant Signed-off-by: Nicholas Piggin Tested-by: Sachin Sant Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210630074621.2109197-2-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 96d9aa164007..ac5720371c0d 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1522,8 +1522,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault); -DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) +DECLARE_INTERRUPT_HANDLER(__do_hash_fault); +DEFINE_INTERRUPT_HANDLER(__do_hash_fault) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1533,6 +1533,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) unsigned int region_id; long err; + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) { + hash__do_page_fault(regs); + return; + } + region_id = get_region_id(ea); if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID)) mm = &init_mm; @@ -1571,9 +1576,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) bad_page_fault(regs, SIGBUS); } err = 0; - } - return err; + } else if (err) { + hash__do_page_fault(regs); + } } /* @@ -1582,13 +1588,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) */ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) { - unsigned long dsisr = regs->dsisr; - - if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) { - hash__do_page_fault(regs); - return 0; - } - /* * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then * don't call hash_page, just fail the fault. This is required to @@ -1607,8 +1606,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) return 0; } - if (__do_hash_fault(regs)) - hash__do_page_fault(regs); + __do_hash_fault(regs); return 0; } -- cgit v1.2.3 From fce01acf830a697110ed72ecace4b0afdbcd53cb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 Jun 2021 17:46:14 +1000 Subject: powerpc/64e: fix CONFIG_RELOCATABLE build warnings CONFIG_RELOCATABLE=y causes build warnings from unresolved relocations. Fix these by using TOC addressing for these cases. Commit 24d33ac5b8ff ("powerpc/64s: Make prom_init require RELOCATABLE") caused some 64e configs to select RELOCATABLE resulting in these warnings, but the underlying issue was already there. This passes basic qemu testing. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210630074621.2109197-3-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64e.S | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 22fcd95dd8dc..d634bfceed2c 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -912,8 +912,14 @@ kernel_dbg_exc: b interrupt_return .macro SEARCH_RESTART_TABLE +#ifdef CONFIG_RELOCATABLE + ld r11,PACATOC(r13) + ld r14,__start___restart_table@got(r11) + ld r15,__stop___restart_table@got(r11) +#else LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table) LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table) +#endif 300: cmpd r14,r15 beq 302f @@ -1329,7 +1335,12 @@ a2_tlbinit_code_start: a2_tlbinit_after_linear_map: /* Now we branch the new virtual address mapped by this entry */ +#ifdef CONFIG_RELOCATABLE + ld r5,PACATOC(r13) + ld r3,1f@got(r5) +#else LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f) +#endif mtctr r3 bctr -- cgit v1.2.3 From 9b69d48c7516a29cdaacd18d8bf5f575014a42a1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 Jun 2021 17:46:15 +1000 Subject: powerpc/64e: remove implicit soft-masking and interrupt exit restart logic The implicit soft-masking to speed up interrupt return was going to be used by 64e as well, but it has not been extensively tested on that platform and is not considered ready. It was intended to be disabled before merge. Disable it for now. Most of the restart code is common with 64s, so with more correctness and performance testing this could be re-enabled again by adding the extra soft-mask checks to interrupt handlers and flipping exit_must_hard_disable(). Fixes: 9d1988ca87dd ("powerpc/64: treat low kernel text as irqs soft-masked") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210630074621.2109197-4-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 33 ++++++++++++++++++++++++--------- arch/powerpc/kernel/exceptions-64e.S | 12 +----------- arch/powerpc/kernel/interrupt.c | 2 +- arch/powerpc/kernel/interrupt_64.S | 16 ++++++++++++++-- 4 files changed, 40 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 8b4b1e84e110..f13c93b033c7 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -73,20 +73,34 @@ #include #include -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 extern char __end_soft_masked[]; unsigned long search_kernel_restart_table(unsigned long addr); -#endif -#ifdef CONFIG_PPC_BOOK3S_64 DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); +static inline bool is_implicit_soft_masked(struct pt_regs *regs) +{ + if (regs->msr & MSR_PR) + return false; + + if (regs->nip >= (unsigned long)__end_soft_masked) + return false; + + return true; +} + static inline void srr_regs_clobbered(void) { local_paca->srr_valid = 0; local_paca->hsrr_valid = 0; } #else +static inline bool is_implicit_soft_masked(struct pt_regs *regs) +{ + return false; +} + static inline void srr_regs_clobbered(void) { } @@ -150,11 +164,13 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ if (TRAP(regs) != INTERRUPT_PROGRAM) { CT_WARN_ON(ct_state() != CONTEXT_KERNEL); - BUG_ON(regs->nip < (unsigned long)__end_soft_masked); + BUG_ON(is_implicit_soft_masked(regs)); } +#ifdef CONFIG_PPC_BOOK3S /* Move this under a debugging check */ if (arch_irq_disabled_regs(regs)) BUG_ON(search_kernel_restart_table(regs->nip)); +#endif } #endif @@ -244,10 +260,9 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte local_paca->irq_soft_mask = IRQS_ALL_DISABLED; local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) && - regs->nip < (unsigned long)__end_soft_masked) { - // Kernel code running below __end_soft_masked is - // implicitly soft-masked. + if (is_implicit_soft_masked(regs)) { + // Adjust regs->softe soft implicit soft-mask, so + // arch_irq_disabled_regs(regs) behaves as expected. regs->softe = IRQS_ALL_DISABLED; } @@ -282,6 +297,7 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter */ #ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S if (arch_irq_disabled_regs(regs)) { unsigned long rst = search_kernel_restart_table(regs->nip); if (rst) @@ -289,7 +305,6 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter } #endif -#ifdef CONFIG_PPC64 if (nmi_disables_ftrace(regs)) this_cpu_set_ftrace_enabled(state->ftrace_enabled); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index d634bfceed2c..1401787b0b93 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -342,17 +342,7 @@ ret_from_mc_except: #define PROLOG_ADDITION_MASKABLE_GEN(n) \ lbz r10,PACAIRQSOFTMASK(r13); /* are irqs soft-masked? */ \ andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \ - bne masked_interrupt_book3e_##n; \ - /* Kernel code below __end_soft_masked is implicitly masked */ \ - andi. r10,r11,MSR_PR; \ - bne 1f; /* user -> not masked */ \ - std r14,PACA_EXGEN+EX_R14(r13); \ - LOAD_REG_IMMEDIATE_SYM(r14, r10, __end_soft_masked); \ - mfspr r10,SPRN_SRR0; \ - cmpld r10,r14; \ - ld r14,PACA_EXGEN+EX_R14(r13); \ - blt masked_interrupt_book3e_##n; \ -1: + bne masked_interrupt_book3e_##n /* * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 0052702ee5ac..21bbd615ca41 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -36,7 +36,7 @@ static inline bool exit_must_hard_disable(void) #else static inline bool exit_must_hard_disable(void) { - return IS_ENABLED(CONFIG_PPC32); + return true; } #endif diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index e7a50613a570..09b8d8846c67 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -231,7 +231,7 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) li r10,IRQS_ALL_DISABLED stb r10,PACAIRQSOFTMASK(r13) b system_call_vectored_common -#endif +#endif /* CONFIG_PPC_BOOK3S */ .balign IFETCH_ALIGN_BYTES .globl system_call_common_real @@ -320,10 +320,12 @@ END_BTB_FLUSH_SECTION li r5,0 /* !scv */ bl syscall_exit_prepare std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +#ifdef CONFIG_PPC_BOOK3S .Lsyscall_rst_start: lbz r11,PACAIRQHAPPENED(r13) andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l bne- syscall_restart +#endif li r11,IRQS_ENABLED stb r11,PACAIRQSOFTMASK(r13) li r11,0 @@ -396,6 +398,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) b .Lsyscall_restore_regs_cont .Lsyscall_rst_end: +#ifdef CONFIG_PPC_BOOK3S syscall_restart: GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) @@ -409,6 +412,7 @@ syscall_restart: b .Lsyscall_rst_start RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart) +#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tabort_syscall: @@ -504,10 +508,12 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) bne- .Lrestore_nvgprs_\srr .Lrestore_nvgprs_\srr\()_cont: std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +#ifdef CONFIG_PPC_BOOK3S .Linterrupt_return_\srr\()_user_rst_start: lbz r11,PACAIRQHAPPENED(r13) andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l bne- interrupt_return_\srr\()_user_restart +#endif li r11,IRQS_ENABLED stb r11,PACAIRQSOFTMASK(r13) li r11,0 @@ -590,6 +596,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) REST_NVGPRS(r1) b .Lrestore_nvgprs_\srr\()_cont +#ifdef CONFIG_PPC_BOOK3S interrupt_return_\srr\()_user_restart: GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) @@ -602,6 +609,7 @@ interrupt_return_\srr\()_user_restart: b .Linterrupt_return_\srr\()_user_rst_start RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart) +#endif .balign IFETCH_ALIGN_BYTES .Lkernel_interrupt_return_\srr\(): @@ -615,9 +623,11 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr cmpwi r11,IRQS_ENABLED stb r11,PACAIRQSOFTMASK(r13) bne 1f +#ifdef CONFIG_PPC_BOOK3S lbz r11,PACAIRQHAPPENED(r13) andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l bne- interrupt_return_\srr\()_kernel_restart +#endif li r11,0 stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS 1: @@ -717,6 +727,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) b . /* prevent speculative execution */ .Linterrupt_return_\srr\()_kernel_rst_end: +#ifdef CONFIG_PPC_BOOK3S interrupt_return_\srr\()_kernel_restart: GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) @@ -729,14 +740,15 @@ interrupt_return_\srr\()_kernel_restart: b .Linterrupt_return_\srr\()_kernel_rst_start RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart) +#endif .endm interrupt_return_macro srr #ifdef CONFIG_PPC_BOOK3S interrupt_return_macro hsrr -#endif /* CONFIG_PPC_BOOK3S */ .globl __end_soft_masked __end_soft_masked: DEFINE_FIXED_SYMBOL(__end_soft_masked) +#endif /* CONFIG_PPC_BOOK3S */ -- cgit v1.2.3 From 325678fd052259e7c05ef29060a73c705ea90432 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 Jun 2021 17:46:16 +1000 Subject: powerpc/64s: add a table of implicit soft-masked addresses Commit 9d1988ca87dd ("powerpc/64: treat low kernel text as irqs soft-masked") ends up catching too much code, including ret_from_fork, and parts of interrupt and syscall return that do not expect to be interrupts to be soft-masked. If an interrupt gets marked pending, and then the code proceeds out of the implicit soft-masked region it will fail to deal with the pending interrupt. Fix this by adding a new table of addresses which explicitly marks the regions of code that are soft masked. This table is only checked for interrupts that below __end_soft_masked, so most kernel interrupts will not have the overhead of the table search. Fixes: 9d1988ca87dd ("powerpc/64: treat low kernel text as irqs soft-masked") Reported-by: Sachin Sant Signed-off-by: Nicholas Piggin Tested-by: Sachin Sant Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210630074621.2109197-5-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 3 +- arch/powerpc/include/asm/ppc_asm.h | 7 ++++ arch/powerpc/kernel/exceptions-64s.S | 64 ++++++++++++++++++++++++++++++------ arch/powerpc/kernel/interrupt_64.S | 8 +++++ arch/powerpc/kernel/vmlinux.lds.S | 9 +++++ arch/powerpc/lib/restart_table.c | 26 +++++++++++++++ 6 files changed, 106 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index f13c93b033c7..d7df247a149c 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -75,6 +75,7 @@ #ifdef CONFIG_PPC_BOOK3S_64 extern char __end_soft_masked[]; +bool search_kernel_soft_mask_table(unsigned long addr); unsigned long search_kernel_restart_table(unsigned long addr); DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); @@ -87,7 +88,7 @@ static inline bool is_implicit_soft_masked(struct pt_regs *regs) if (regs->nip >= (unsigned long)__end_soft_masked) return false; - return true; + return search_kernel_soft_mask_table(regs->nip); } static inline void srr_regs_clobbered(void) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index c9c2c36c1f8f..116c1519728a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -762,6 +762,13 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) stringify_in_c(.long (_target) - . ;) \ stringify_in_c(.previous) +#define SOFT_MASK_TABLE(_start, _end) \ + stringify_in_c(.section __soft_mask_table,"a";)\ + stringify_in_c(.balign 8;) \ + stringify_in_c(.llong (_start);) \ + stringify_in_c(.llong (_end);) \ + stringify_in_c(.previous) + #define RESTART_TABLE(_start, _end, _target) \ stringify_in_c(.section __restart_table,"a";)\ stringify_in_c(.balign 8;) \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ecd07bf604c5..4aec59a77d4c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -428,21 +428,31 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) /* If coming from user, skip soft-mask tests. */ andi. r10,r12,MSR_PR - bne 2f + bne 3f /* - * Kernel code running below __end_soft_masked is implicitly - * soft-masked + * Kernel code running below __end_soft_masked may be + * implicitly soft-masked if it is within the regions + * in the soft mask table. */ LOAD_HANDLER(r10, __end_soft_masked) cmpld r11,r10 - + bge+ 1f + + /* SEARCH_SOFT_MASK_TABLE clobbers r9,r10,r12 */ + mtctr r12 + stw r9,PACA_EXGEN+EX_CCR(r13) + SEARCH_SOFT_MASK_TABLE + cmpdi r12,0 + mfctr r12 /* Restore r12 to SRR1 */ + lwz r9,PACA_EXGEN+EX_CCR(r13) + beq 1f /* Not in soft-mask table */ li r10,IMASK - blt- 1f + b 2f /* In soft-mask table, always mask */ /* Test the soft mask state against our interrupt's bit */ - lbz r10,PACAIRQSOFTMASK(r13) -1: andi. r10,r10,IMASK +1: lbz r10,PACAIRQSOFTMASK(r13) +2: andi. r10,r10,IMASK /* Associate vector numbers with bits in paca->irq_happened */ .if IVEC == 0x500 || IVEC == 0xea0 li r10,PACA_IRQ_EE @@ -473,7 +483,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ -2: mr r10,r1 /* Save r1 */ +3: mr r10,r1 /* Save r1 */ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */ beq- 100f ld r1,PACAKSAVE(r13) /* kernel stack to use */ @@ -624,6 +634,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 303: .endm +.macro SEARCH_SOFT_MASK_TABLE +#ifdef CONFIG_RELOCATABLE + mr r12,r2 + ld r2,PACATOC(r13) + LOAD_REG_ADDR(r9, __start___soft_mask_table) + LOAD_REG_ADDR(r10, __stop___soft_mask_table) + mr r2,r12 +#else + LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___soft_mask_table) + LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___soft_mask_table) +#endif +300: + cmpd r9,r10 + beq 302f + ld r12,0(r9) + cmpld r11,r12 + blt 301f + ld r12,8(r9) + cmpld r11,r12 + bge 301f + li r12,1 + b 303f +301: + addi r9,r9,16 + b 300b +302: + li r12,0 +303: +.endm + /* * Restore all registers including H/SRR0/1 saved in a stack frame of a * standard exception. @@ -754,8 +794,8 @@ __start_interrupts: * scv instructions enter the kernel without changing EE, RI, ME, or HV. * In particular, this means we can take a maskable interrupt at any point * in the scv handler, which is unlike any other interrupt. This is solved - * by treating the instruction addresses below __end_soft_masked as being - * soft-masked. + * by treating the instruction addresses in the handler as being soft-masked, + * by adding a SOFT_MASK_TABLE entry for them. * * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and * ensure scv is never executed with relocation off, which means AIL-0 @@ -772,6 +812,7 @@ __start_interrupts: * syscall register convention is in Documentation/powerpc/syscall64-abi.rst */ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) +1: /* SCV 0 */ mr r9,r13 GET_PACA(r13) @@ -801,8 +842,11 @@ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) b system_call_vectored_sigill #endif .endr +2: EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000) +SOFT_MASK_TABLE(1b, 2b) // Treat scv vectors as soft-masked, see comment above. + #ifdef CONFIG_RELOCATABLE TRAMP_VIRT_BEGIN(system_call_vectored_tramp) __LOAD_HANDLER(r10, system_call_vectored_common) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 09b8d8846c67..2c92bbca02ca 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -207,7 +207,9 @@ syscall_vectored_\name\()_restart: bl syscall_exit_restart std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Lsyscall_vectored_\name\()_rst_start +1: +SOFT_MASK_TABLE(.Lsyscall_vectored_\name\()_rst_start, 1b) RESTART_TABLE(.Lsyscall_vectored_\name\()_rst_start, .Lsyscall_vectored_\name\()_rst_end, syscall_vectored_\name\()_restart) .endm @@ -410,7 +412,9 @@ syscall_restart: bl syscall_exit_restart std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Lsyscall_rst_start +1: +SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b) RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart) #endif @@ -607,7 +611,9 @@ interrupt_return_\srr\()_user_restart: bl interrupt_exit_user_restart std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Linterrupt_return_\srr\()_user_rst_start +1: +SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_user_rst_start, 1b) RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart) #endif @@ -738,7 +744,9 @@ interrupt_return_\srr\()_kernel_restart: bl interrupt_exit_kernel_restart std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Linterrupt_return_\srr\()_kernel_rst_start +1: +SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, 1b) RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart) #endif diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 16c5e13e00c4..40bdefe9caa7 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -9,6 +9,14 @@ #define EMITS_PT_NOTE #define RO_EXCEPTION_TABLE_ALIGN 0 +#define SOFT_MASK_TABLE(align) \ + . = ALIGN(align); \ + __soft_mask_table : AT(ADDR(__soft_mask_table) - LOAD_OFFSET) { \ + __start___soft_mask_table = .; \ + KEEP(*(__soft_mask_table)) \ + __stop___soft_mask_table = .; \ + } + #define RESTART_TABLE(align) \ . = ALIGN(align); \ __restart_table : AT(ADDR(__restart_table) - LOAD_OFFSET) { \ @@ -132,6 +140,7 @@ SECTIONS RO_DATA(PAGE_SIZE) #ifdef CONFIG_PPC64 + SOFT_MASK_TABLE(8) RESTART_TABLE(8) . = ALIGN(8); diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c index 7cd20757cc33..bccb662c1b7b 100644 --- a/arch/powerpc/lib/restart_table.c +++ b/arch/powerpc/lib/restart_table.c @@ -1,15 +1,41 @@ #include #include +struct soft_mask_table_entry { + unsigned long start; + unsigned long end; +}; + struct restart_table_entry { unsigned long start; unsigned long end; unsigned long fixup; }; +extern struct soft_mask_table_entry __start___soft_mask_table[]; +extern struct soft_mask_table_entry __stop___soft_mask_table[]; + extern struct restart_table_entry __start___restart_table[]; extern struct restart_table_entry __stop___restart_table[]; +/* Given an address, look for it in the soft mask table */ +bool search_kernel_soft_mask_table(unsigned long addr) +{ + struct soft_mask_table_entry *smte = __start___soft_mask_table; + + while (smte < __stop___soft_mask_table) { + unsigned long start = smte->start; + unsigned long end = smte->end; + + if (addr >= start && addr < end) + return true; + + smte++; + } + return false; +} +NOKPROBE_SYMBOL(search_kernel_soft_mask_table); + /* Given an address, look for it in the kernel exception table */ unsigned long search_kernel_restart_table(unsigned long addr) { -- cgit v1.2.3 From 1b0482229c302a3c6afd00d6b3bf0169cf279b44 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 Jun 2021 17:46:17 +1000 Subject: powerpc/64s/interrupt: preserve regs->softe for NMI interrupts If an NMI interrupt hits in an implicit soft-masked region, regs->softe is modified to reflect that. This may not be necessary for correctness at the moment, but it is less surprising and it's unhelpful when debugging or adding checks. Make sure this is changed back to how it was found before returning. Fixes: 4ec5feec1ad0 ("powerpc/64s: Make NMI record implicitly soft-masked code as irqs disabled") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210630074621.2109197-6-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index d7df247a149c..789311d1e283 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -227,6 +227,7 @@ struct interrupt_nmi_state { u8 irq_soft_mask; u8 irq_happened; u8 ftrace_enabled; + u64 softe; #endif }; @@ -252,6 +253,7 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte #ifdef CONFIG_PPC64 state->irq_soft_mask = local_paca->irq_soft_mask; state->irq_happened = local_paca->irq_happened; + state->softe = regs->softe; /* * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does @@ -311,6 +313,7 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter /* Check we didn't change the pending interrupt mask. */ WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); + regs->softe = state->softe; local_paca->irq_happened = state->irq_happened; local_paca->irq_soft_mask = state->irq_soft_mask; #endif -- cgit v1.2.3 From 2b43dd7653cca47d297756980846ebbfe8887fa1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 Jun 2021 17:46:18 +1000 Subject: powerpc/64: enable MSR[EE] in irq replay pt_regs Similar to commit 2b48e96be2f9f ("powerpc/64: fix irq replay pt_regs->softe value"), enable MSR_EE in pt_regs->msr. This makes the regs look more normal. It also allows some extra debug checks to be added to interrupt handler entry. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210630074621.2109197-7-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 4 ++++ arch/powerpc/kernel/irq.c | 1 + 2 files changed, 5 insertions(+) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 789311d1e283..d4bdf7d274ac 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -173,6 +173,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup BUG_ON(search_kernel_restart_table(regs->nip)); #endif } + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); #endif booke_restore_dbcr0(); @@ -268,6 +270,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte // arch_irq_disabled_regs(regs) behaves as expected. regs->softe = IRQS_ALL_DISABLED; } + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); /* Don't do any per-CPU operations until interrupt state is fixed */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8428caf3194e..91e63eac4e8f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -121,6 +121,7 @@ void replay_soft_interrupts(void) ppc_save_regs(®s); regs.softe = IRQS_ENABLED; + regs.msr |= MSR_EE; again: if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) -- cgit v1.2.3 From 98798f33c6be5a511ab61958b40835b3ef08def2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 Jun 2021 17:46:19 +1000 Subject: powerpc/64/interrupt: add missing kprobe annotations on interrupt exit symbols If one interrupt exit symbol must not be kprobed, none of them can be, without more justification for why it's safe. Disallow kprobing on any of the (non-local) labels in the exit paths. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210630074621.2109197-8-npiggin@gmail.com --- arch/powerpc/kernel/interrupt_64.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 2c92bbca02ca..5c18362693fe 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -197,6 +197,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .Lsyscall_vectored_\name\()_rst_end: syscall_vectored_\name\()_restart: +_ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) ld r2,PACATOC(r13) @@ -238,6 +239,7 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) .balign IFETCH_ALIGN_BYTES .globl system_call_common_real system_call_common_real: +_ASM_NOKPROBE_SYMBOL(system_call_common_real) ld r10,PACAKMSR(r13) /* get MSR value for kernel */ mtmsrd r10 @@ -402,6 +404,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) #ifdef CONFIG_PPC_BOOK3S syscall_restart: +_ASM_NOKPROBE_SYMBOL(syscall_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) ld r2,PACATOC(r13) @@ -420,6 +423,7 @@ RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tabort_syscall: +_ASM_NOKPROBE_SYMBOL(tabort_syscall) /* Firstly we need to enable TM in the kernel */ mfmsr r10 li r9, 1 @@ -602,6 +606,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) #ifdef CONFIG_PPC_BOOK3S interrupt_return_\srr\()_user_restart: +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) ld r2,PACATOC(r13) @@ -735,6 +740,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) #ifdef CONFIG_PPC_BOOK3S interrupt_return_\srr\()_kernel_restart: +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) ld r2,PACATOC(r13) -- cgit v1.2.3 From c59458b00aec4ba580d9628d36d6c984af94d192 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 Jun 2021 17:46:20 +1000 Subject: powerpc/64s/interrupt: clean up interrupt return labels Normal kernel-interrupt exits can get interrupt_return_srr_user_restart in their backtrace, which is an unusual and notable function, and it is part of the user-interrupt exit path, which is doubly confusing. Add non-local labels for both user and kernel interrupt exit cases to address this and make the user and kernel cases more symmetric. Also get rid of an unused label. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210630074621.2109197-9-npiggin@gmail.com --- arch/powerpc/kernel/interrupt_64.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 5c18362693fe..c4336e2e2ce8 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -509,7 +509,9 @@ interrupt_return_\srr\(): _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) ld r4,_MSR(r1) andi. r0,r4,MSR_PR - beq .Lkernel_interrupt_return_\srr + beq interrupt_return_\srr\()_kernel +interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) addi r3,r1,STACK_FRAME_OVERHEAD bl interrupt_exit_user_prepare cmpdi r3,0 @@ -623,8 +625,8 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr #endif .balign IFETCH_ALIGN_BYTES -.Lkernel_interrupt_return_\srr\(): -.Linterrupt_return_\srr\()_kernel: +interrupt_return_\srr\()_kernel: +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) addi r3,r1,STACK_FRAME_OVERHEAD bl interrupt_exit_kernel_prepare -- cgit v1.2.3 From 91fc46eced0f70526d74468ac6c932c90a8585b3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 Jun 2021 17:46:21 +1000 Subject: powerpc/64s: move ret_from_fork etc above __end_soft_masked Code which runs with interrupts enabled should be moved above __end_soft_masked where possible, because maskable interrupts that hit below that symbol will need to consult the soft mask table, which is an extra cost. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210630074621.2109197-10-npiggin@gmail.com --- arch/powerpc/kernel/interrupt_64.S | 52 +++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index c4336e2e2ce8..4063e8a3f704 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -449,32 +449,6 @@ _ASM_NOKPROBE_SYMBOL(tabort_syscall) b . /* prevent speculative execution */ #endif -#ifdef CONFIG_PPC_BOOK3S -_GLOBAL(ret_from_fork_scv) - bl schedule_tail - REST_NVGPRS(r1) - li r3,0 /* fork() return value */ - b .Lsyscall_vectored_common_exit -#endif - -_GLOBAL(ret_from_fork) - bl schedule_tail - REST_NVGPRS(r1) - li r3,0 /* fork() return value */ - b .Lsyscall_exit - -_GLOBAL(ret_from_kernel_thread) - bl schedule_tail - REST_NVGPRS(r1) - mtctr r14 - mr r3,r15 -#ifdef PPC64_ELF_ABI_v2 - mr r12,r14 -#endif - bctrl - li r3,0 - b .Lsyscall_exit - /* * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not * touched, no exit work created, then this can be used. @@ -768,3 +742,29 @@ interrupt_return_macro hsrr __end_soft_masked: DEFINE_FIXED_SYMBOL(__end_soft_masked) #endif /* CONFIG_PPC_BOOK3S */ + +#ifdef CONFIG_PPC_BOOK3S +_GLOBAL(ret_from_fork_scv) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_vectored_common_exit +#endif + +_GLOBAL(ret_from_fork) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_exit + +_GLOBAL(ret_from_kernel_thread) + bl schedule_tail + REST_NVGPRS(r1) + mtctr r14 + mr r3,r15 +#ifdef PPC64_ELF_ABI_v2 + mr r12,r14 +#endif + bctrl + li r3,0 + b .Lsyscall_exit -- cgit v1.2.3 From 4ebbbaa4ce8524b853dd6febf0176a6efa3482d7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 1 Jul 2021 21:18:30 +1000 Subject: powerpc: Only build restart_table.c for 64s Commit 9b69d48c7516 ("powerpc/64e: remove implicit soft-masking and interrupt exit restart logic") limited the implicit soft masking and restart logic to 64-bit Book3S only. However we are still building restart_table.c for all 64-bit, ie. Book3E also. There's no need to build it for 64e, and it also causes missing prototype warnings for 64e builds, because the prototype is already behind an #ifdef PPC_BOOK3S_64. Fixes: 9b69d48c7516 ("powerpc/64e: remove implicit soft-masking and interrupt exit restart logic") Reported-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701125026.292224-1-mpe@ellerman.id.au --- arch/powerpc/lib/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 4c92c80454f3..99a7c9132422 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -39,10 +39,10 @@ extra-$(CONFIG_PPC64) += crtsavres.o endif obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ - memcpy_power7.o + memcpy_power7.o restart_table.o obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ - memcpy_64.o copy_mc_64.o restart_table.o + memcpy_64.o copy_mc_64.o ifndef CONFIG_PPC_QUEUED_SPINLOCKS obj64-$(CONFIG_SMP) += locks.o -- cgit v1.2.3