From 67c3e59443f5fc77be39e2ce0db75fbfa78c7965 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 2 Jul 2018 11:08:16 +0200 Subject: powerpc/pseries: Fix missing of_node_put() in rng_init() The call to of_find_compatible_node() returns a node pointer with refcount incremented thus it must be explicitly decremented here before returning. Fixes: a489043f4626 ("powerpc/pseries: Implement arch_get_random_long() based on H_RANDOM") Signed-off-by: Nicholas Mc Guire Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1530522496-14816-1-git-send-email-hofrat@osadl.org --- arch/powerpc/platforms/pseries/rng.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index bbb97169bf63..6268545947b8 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -36,6 +36,7 @@ static __init int rng_init(void) ppc_md.get_random_seed = pseries_get_random_long; + of_node_put(dn); return 0; } machine_subsys_initcall(pseries, rng_init); -- cgit v1.2.3 From d3e669f31ec35856f5e85df9224ede5bdbf1bc7b Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Wed, 4 Jul 2018 10:03:27 +0200 Subject: powerpc/icp-hv: Fix missing of_node_put() in success path Both of_find_compatible_node() and of_find_node_by_type() will return a refcounted node on success - thus for the success path the node must be explicitly released with a of_node_put(). Fixes: 0b05ac6e2480 ("powerpc/xics: Rewrite XICS driver") Signed-off-by: Nicholas Mc Guire Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1530691407-3991-1-git-send-email-hofrat@osadl.org --- arch/powerpc/sysdev/xics/icp-hv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index ad8117148ea3..21b9d1bf39ff 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -174,6 +174,7 @@ int icp_hv_init(void) icp_ops = &icp_hv_ops; + of_node_put(np); return 0; } -- cgit v1.2.3 From 374f6178f3483dcad151fc14b2fad92ed6652f07 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Fri, 3 Apr 2020 17:38:38 +0200 Subject: ocxl: Remove custom service to allocate interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now allocate interrupts through xive directly. Signed-off-by: Frederic Barrat Reviewed-by: Cédric Le Goater Reviewed-by: Greg Kurz Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200403153838.29224-5-fbarrat@linux.ibm.com --- arch/powerpc/include/asm/pnv-ocxl.h | 3 --- arch/powerpc/platforms/powernv/ocxl.c | 30 ------------------------------ 2 files changed, 33 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index ee79d2cd9fb6..d37ededca3ee 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -28,7 +28,4 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **p void pnv_ocxl_spa_release(void *platform_data); int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); -int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); -void pnv_ocxl_free_xive_irq(u32 irq); - #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 8c65aacda9c8..ecdad219d704 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -2,7 +2,6 @@ // Copyright 2017 IBM Corp. #include #include -#include #include #include "pci.h" @@ -484,32 +483,3 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); - -int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr) -{ - __be64 flags, trigger_page; - s64 rc; - u32 hwirq; - - hwirq = xive_native_alloc_irq(); - if (!hwirq) - return -ENOENT; - - rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL, - NULL); - if (rc || !trigger_page) { - xive_native_free_irq(hwirq); - return -ENOENT; - } - *irq = hwirq; - *trigger_addr = be64_to_cpu(trigger_page); - return 0; - -} -EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq); - -void pnv_ocxl_free_xive_irq(u32 irq) -{ - xive_native_free_irq(irq); -} -EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq); -- cgit v1.2.3 From 8c7614d648037b0776e0b76cb62911be3b059ea4 Mon Sep 17 00:00:00 2001 From: Biwen Li Date: Wed, 27 May 2020 11:42:27 +0800 Subject: powerpc/dts/t4240rdb: remove interrupts property Since the interrupt pin for RTC DS1374 is not connected to the CPU on T4240RDB, remove the interrupt property from the device tree. This also fix the following warning for hwclock.util-linux: $ hwclock.util-linux hwclock.util-linux: select() to /dev/rtc0 to wait for clock tick timed out Signed-off-by: Biwen Li Acked-by: Li Yang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200527034228.23793-1-biwen.li@oss.nxp.com --- arch/powerpc/boot/dts/fsl/t4240rdb.dts | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/boot/dts/fsl/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t4240rdb.dts index a56a705d41f7..145896f2eef6 100644 --- a/arch/powerpc/boot/dts/fsl/t4240rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t4240rdb.dts @@ -144,7 +144,6 @@ rtc@68 { compatible = "dallas,ds1374"; reg = <0x68>; - interrupts = <0x1 0x1 0 0>; }; }; -- cgit v1.2.3 From 843dc8ee23d1b353fa9cc24da3e52be0111d5931 Mon Sep 17 00:00:00 2001 From: Biwen Li Date: Wed, 27 May 2020 11:42:28 +0800 Subject: powerc/dtc/t1024rdb: remove interrupts property Since the interrupt pin for RTC DS1339 is not connected to the CPU on T1024RDB, remove the interrupt property from the device tree. This also fix the following warning for hwclock.util-linux: $ hwclock.util-linux hwclock.util-linux: select() to /dev/rtc0 to wait for clock tick timed out Signed-off-by: Biwen Li Acked-by: Li Yang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200527034228.23793-2-biwen.li@oss.nxp.com --- arch/powerpc/boot/dts/fsl/t1024rdb.dts | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts index 73a645324bc1..dbcd31cc35dc 100644 --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts @@ -161,7 +161,6 @@ rtc@68 { compatible = "dallas,ds1339"; reg = <0x68>; - interrupts = <0x1 0x1 0 0>; }; }; -- cgit v1.2.3 From 738e6cad0ace88edec8f4ffa082749ad5df26409 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Tue, 19 Nov 2019 14:14:30 +0800 Subject: powerpc/fadump: Remove set but not used variable 'elf' Fix gcc '-Wunused-but-set-variable' warning: arch/powerpc/kernel/fadump.c: In function fadump_update_elfcore_header: arch/powerpc/kernel/fadump.c:790:17: warning: variable elf set but not used [-Wunused-but-set-variable] It is introduced by commit ebaeb5ae2437 ("fadump: Convert firmware-assisted cpu state dump data into elf notes."), but never used, so remove it. Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1574144074-142032-2-git-send-email-zhengbin13@huawei.com --- arch/powerpc/kernel/fadump.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 10ebb4bf71ad..d192067a112c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -754,10 +754,8 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) void fadump_update_elfcore_header(char *bufp) { - struct elfhdr *elf; struct elf_phdr *phdr; - elf = (struct elfhdr *)bufp; bufp += sizeof(struct elfhdr); /* First note is a place holder for cpu notes info. */ -- cgit v1.2.3 From ef23cf9a89a7aec19a29d548d1e219d436b23b6e Mon Sep 17 00:00:00 2001 From: zhengbin Date: Tue, 19 Nov 2019 14:14:31 +0800 Subject: powerpc/perf: Remove set but not used variable 'target' Fix gcc '-Wunused-but-set-variable' warning: arch/powerpc/perf/imc-pmu.c: In function trace_imc_event_init: arch/powerpc/perf/imc-pmu.c:1292:22: warning: variable target set but not used [-Wunused-but-set-variable] It is introduced by commit 012ae244845f ("powerpc/perf: Trace imc PMU functions"), but never used, so remove it. Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1574144074-142032-3-git-send-email-zhengbin13@huawei.com --- arch/powerpc/perf/imc-pmu.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index a45d694a5d5d..00bb186caed9 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1426,8 +1426,6 @@ static void trace_imc_event_del(struct perf_event *event, int flags) static int trace_imc_event_init(struct perf_event *event) { - struct task_struct *target; - if (event->attr.type != event->pmu->type) return -ENOENT; @@ -1458,7 +1456,6 @@ static int trace_imc_event_init(struct perf_event *event) mutex_unlock(&imc_global_refc.lock); event->hw.idx = -1; - target = event->hw.target; event->pmu->task_ctx_nr = perf_hw_context; event->destroy = reset_global_refc; -- cgit v1.2.3 From 18102e4bcc47f5b5ac70e2e4461d022c1ee6df24 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Tue, 19 Nov 2019 14:14:34 +0800 Subject: powerpc/powernv: Remove set but not used variable 'parent' Fix gcc '-Wunused-but-set-variable' warning: arch/powerpc/platforms/powernv/pci-ioda.c: In function pnv_ioda_configure_pe: arch/powerpc/platforms/powernv/pci-ioda.c:867:18: warning: variable parent set but not used [-Wunused-but-set-variable] It is not used since commit b131a8425c34 ("powerpc/powernv: Set PELTV for compound PEs") Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1574144074-142032-6-git-send-email-zhengbin13@huawei.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 023a4f987bb2..2b4ceb5e6ce4 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -894,7 +894,6 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { - struct pci_dev *parent; uint8_t bcomp, dcomp, fcomp; long rc, rid_end, rid; @@ -904,7 +903,6 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; - parent = pe->pbus->self; if (pe->flags & PNV_IODA_PE_BUS_ALL) count = resource_size(&pe->pbus->busn_res); else @@ -925,12 +923,6 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) } rid_end = pe->rid + (count << 8); } else { -#ifdef CONFIG_PCI_IOV - if (pe->flags & PNV_IODA_PE_VF) - parent = pe->parent_dev; - else -#endif /* CONFIG_PCI_IOV */ - parent = pe->pdev->bus->self; bcomp = OpalPciBusAll; dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; -- cgit v1.2.3 From ccaea15296f9773abd43aaa17ee4b88848e4a505 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 31 Jul 2020 17:04:59 +0530 Subject: powerpc/vmemmap: Fix memory leak with vmemmap list allocation failures. If we fail to allocate vmemmap list, we don't keep track of allocated vmemmap block buf. Hence on section deactivate we skip vmemmap block buf free. This results in memory leak. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200731113500.248306-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/init_64.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 02e127fa5777..41b7da84030e 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -162,16 +162,16 @@ static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) return next++; } -static __meminit void vmemmap_list_populate(unsigned long phys, - unsigned long start, - int node) +static __meminit int vmemmap_list_populate(unsigned long phys, + unsigned long start, + int node) { struct vmemmap_backing *vmem_back; vmem_back = vmemmap_list_alloc(node); if (unlikely(!vmem_back)) { - WARN_ON(1); - return; + pr_debug("vmemap list allocation failed\n"); + return -ENOMEM; } vmem_back->phys = phys; @@ -179,6 +179,7 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmem_back->list = vmemmap_list; vmemmap_list = vmem_back; + return 0; } static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, @@ -199,6 +200,7 @@ static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long star int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { + bool altmap_alloc; unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; /* Align to the page size of the linear mapping. */ @@ -228,13 +230,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, p = vmemmap_alloc_block_buf(page_size, node, altmap); if (!p) pr_debug("altmap block allocation failed, falling back to system memory"); + else + altmap_alloc = true; } - if (!p) + if (!p) { p = vmemmap_alloc_block_buf(page_size, node, NULL); + altmap_alloc = false; + } if (!p) return -ENOMEM; - vmemmap_list_populate(__pa(p), start, node); + if (vmemmap_list_populate(__pa(p), start, node)) { + /* + * If we don't populate vmemap list, we don't have + * the ability to free the allocated vmemmap + * pages in section_deactivate. Hence free them + * here. + */ + int nr_pfns = page_size >> PAGE_SHIFT; + unsigned long page_order = get_order(page_size); + + if (altmap_alloc) + vmem_altmap_free(altmap, nr_pfns); + else + free_pages((unsigned long)p, page_order); + return -ENOMEM; + } pr_debug(" * %016lx..%016lx allocated at %p\n", start, start + page_size, p); -- cgit v1.2.3 From 1c0a7ac0ec63ee626f669c9a4e278f6ae1dbfcf2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 31 Jul 2020 17:05:00 +0530 Subject: powerpc/vmemmap: Don't warn if we don't find a mapping vmemmap list entry Now that we are handling vmemmap list allocation failure correctly, don't WARN in section deactivate when we don't find a mapping vmemmap list entry. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200731113500.248306-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/init_64.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 41b7da84030e..a8618f7d00a3 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -285,10 +285,8 @@ static unsigned long vmemmap_list_free(unsigned long start) vmem_back_prev = vmem_back; } - if (unlikely(!vmem_back)) { - WARN_ON(1); + if (unlikely(!vmem_back)) return 0; - } /* remove it from vmemmap_list */ if (vmem_back == vmemmap_list) /* remove head */ -- cgit v1.2.3 From 346427e668163e85cbbe14e4d9a2ddd49df1536c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Aug 2020 18:43:16 +0100 Subject: powerpc/oprofile: fix spelling mistake "contex" -> "context" There is a spelling mistake in a pr_debug message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200804174316.402425-1-colin.king@canonical.com --- arch/powerpc/oprofile/cell/spu_task_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index df59d0bb121f..489f993100d5 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -572,7 +572,7 @@ void spu_sync_buffer(int spu_num, unsigned int *samples, * samples are recorded. * No big deal -- so we just drop a few samples. */ - pr_debug("SPU_PROF: No cached SPU contex " + pr_debug("SPU_PROF: No cached SPU context " "for SPU #%d. Dropping samples.\n", spu_num); goto out; } -- cgit v1.2.3 From f6bac19cf65c5be21d14a0c9684c8f560f2096dd Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 4 Aug 2020 10:54:05 +1000 Subject: powerpc/powernv/smp: Fix spurious DBG() warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with W=1 we get the following warning: arch/powerpc/platforms/powernv/smp.c: In function ‘pnv_smp_cpu_kill_self’: arch/powerpc/platforms/powernv/smp.c:276:16: error: suggest braces around empty body in an ‘if’ statement [-Werror=empty-body] 276 | cpu, srr1); | ^ cc1: all warnings being treated as errors The full context is this block: if (srr1 && !generic_check_cpu_restart(cpu)) DBG("CPU%d Unexpected exit while offline srr1=%lx!\n", cpu, srr1); When building with DEBUG undefined DBG() expands to nothing and GCC emits the warning due to the lack of braces around an empty statement. Signed-off-by: Oliver O'Halloran Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200804005410.146094-2-oohall@gmail.com --- arch/powerpc/platforms/powernv/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index b2ba3e95bda7..bbf361f23ae8 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -43,7 +43,7 @@ #include #define DBG(fmt...) udbg_printf(fmt) #else -#define DBG(fmt...) +#define DBG(fmt...) do { } while (0) #endif static void pnv_smp_setup_cpu(int cpu) -- cgit v1.2.3 From 8471c1dd93de9a2278d41c527b76291e4ace8f1c Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 4 Aug 2020 10:54:06 +1000 Subject: powerpc/powernv: Include asm/powernv.h from the local powernv.h The asm/powernv.h header provides prototypes for functions which need to be called by non-powernv platform code. Also include it in the powernv.h that's local to the platform directory to squash some warnings about non-static functions missing prototypes. Also include powernv.h since from opal-memcons.c since it has the prototypes for the memcons wrangling functions which are used for the opal and ultravisor msglog. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200804005410.146094-3-oohall@gmail.com --- arch/powerpc/platforms/powernv/opal-msglog.c | 2 ++ arch/powerpc/platforms/powernv/powernv.h | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index d26da19a611f..d3b6e135c18b 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -12,6 +12,8 @@ #include #include +#include "powernv.h" + /* OPAL in-memory console. Defined in OPAL source at core/console.c */ struct memcons { __be64 magic; diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 1aa51c4fa904..11df4e16a1cc 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -2,6 +2,13 @@ #ifndef _POWERNV_H #define _POWERNV_H +/* + * There's various hacks scattered throughout the generic powerpc arch code + * that needs to call into powernv platform stuff. The prototypes for those + * functions are in asm/powernv.h + */ +#include + #ifdef CONFIG_SMP extern void pnv_smp_init(void); #else -- cgit v1.2.3 From 3b70464aa78917e88c1d4bfc2100c344c0eda8e0 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 4 Aug 2020 10:54:07 +1000 Subject: powerpc/powernv: Staticify functions without prototypes There's a few scattered in the powernv platform. Signed-off-by: Oliver O'Halloran Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200804005410.146094-4-oohall@gmail.com --- arch/powerpc/platforms/powernv/eeh-powernv.c | 4 ++-- arch/powerpc/platforms/powernv/rng.c | 2 +- arch/powerpc/platforms/powernv/vas-window.c | 9 ++++----- 3 files changed, 7 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 9af8c3b98853..663bd69ac51b 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -38,7 +38,7 @@ static int eeh_event_irq = -EINVAL; -void pnv_pcibios_bus_add_device(struct pci_dev *pdev) +static void pnv_pcibios_bus_add_device(struct pci_dev *pdev) { dev_dbg(&pdev->dev, "EEH: Setting up device\n"); eeh_probe_device(pdev); @@ -190,7 +190,7 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10); #endif /* CONFIG_DEBUG_FS */ -void pnv_eeh_enable_phbs(void) +static void pnv_eeh_enable_phbs(void) { struct pci_controller *hose; struct pnv_phb *phb; diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 8035caf6e297..72c25295c1c2 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -65,7 +65,7 @@ int powernv_get_random_real_mode(unsigned long *v) return 1; } -int powernv_get_random_darn(unsigned long *v) +static int powernv_get_random_darn(unsigned long *v) { unsigned long val; diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 6434f9cb5aed..5f5fe63a3d1c 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -186,7 +186,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window) * OS/User Window Context (UWC) MMIO Base Address Region for the given window. * Map these bus addresses and save the mapped kernel addresses in @window. */ -int map_winctx_mmio_bars(struct vas_window *window) +static int map_winctx_mmio_bars(struct vas_window *window) { int len; u64 start; @@ -214,7 +214,7 @@ int map_winctx_mmio_bars(struct vas_window *window) * registers are not sequential. And, we can only write to offsets * with valid registers. */ -void reset_window_regs(struct vas_window *window) +static void reset_window_regs(struct vas_window *window) { write_hvwc_reg(window, VREG(LPID), 0ULL); write_hvwc_reg(window, VREG(PID), 0ULL); @@ -357,7 +357,8 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin, * as a one-time task? That could work for NX but what about other * receivers? Let the receivers tell us the rx-fifo buffers for now. */ -int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx) +static void init_winctx_regs(struct vas_window *window, + struct vas_winctx *winctx) { u64 val; int fifo_size; @@ -499,8 +500,6 @@ int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx) val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win); val = SET_FIELD(VAS_WINCTL_OPEN, val, 1); write_hvwc_reg(window, VREG(WINCTL), val); - - return 0; } static void vas_release_window_id(struct ida *ida, int winid) -- cgit v1.2.3 From fb248c3121af713f31736af359608491544cfc23 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 4 Aug 2020 10:54:08 +1000 Subject: powerpc/powernv: Fix spurious kerneldoc warnings in opal-prd.c Comments opening with /** are parsed by kerneldoc and this causes the following warning to be printed: arch/powerpc/platforms/powernv/opal-prd.c:31: warning: cannot understand function prototype: 'struct opal_prd_msg_queue_item ' opal_prd_mesg_queue_item is an internal data structure so there's no real need for it to be documented at all. Fix up the comment to squash the warning. Signed-off-by: Oliver O'Halloran Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200804005410.146094-5-oohall@gmail.com --- arch/powerpc/platforms/powernv/opal-prd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 45f4223a790f..deddaebf8c14 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -24,7 +24,7 @@ #include -/** +/* * The msg member must be at the end of the struct, as it's followed by the * message data. */ -- cgit v1.2.3 From 3ced132a055c4e5046d21732393ae6848ff309e0 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 4 Aug 2020 10:54:10 +1000 Subject: powerpc/nx: Don't pack struct coprocessor_request_block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building with W=1 results in the following warning: In file included from arch/powerpc/platforms/powernv/vas-fault.c:16: ./arch/powerpc/include/asm/icswx.h:159:1: error: alignment 1 of ‘struct coprocessor_request_block’ is less than 16 [-Werror=packed-not-aligned] 159 | } __packed; | ^ ./arch/powerpc/include/asm/icswx.h:159:1: error: alignment 1 of ‘struct coprocessor_request_block’ is less than 16 [-Werror=packed-not-aligned] ./arch/powerpc/include/asm/icswx.h:159:1: error: alignment 1 of ‘struct coprocessor_request_block’ is less than 16 [-Werror=packed-not-aligned] ./arch/powerpc/include/asm/icswx.h:159:1: error: alignment 1 of ‘struct coprocessor_request_block’ is less than 16 [-Werror=packed-not-aligned] cc1: all warnings being treated as errors This happens because coprocessor_request_block includes several sub-structures with an alignment specified using the __aligned(XX) attribute. The problem comes from coprocessor_request_block having the __packed attribute. Packing the structure causes the preferred alignment of the nested structures to be ignored and we get the warnings as a result. This isn't a problem in practice since the struct is defined with explicit padding in the form of reserved fields, but we'd like to get rid of the spurious warnings. The simplest solution is to remove the packed attribute and use a BUILD_BUG_ON() to ensure the struct is the correct (expected by HW) size compile time. Also add a __aligned(128) to the request block structure since Book4 for P8 suggests the HW requires it to be aligned to a 128 byte boundary. There's a similar requirement for P9 since the COPY and PASTE instructions used to invoke VAS/NX accelerators operates on a cache line boundary. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200804005410.146094-7-oohall@gmail.com --- arch/powerpc/include/asm/icswx.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h index b0c70a35fd0e..f6599ccb3012 100644 --- a/arch/powerpc/include/asm/icswx.h +++ b/arch/powerpc/include/asm/icswx.h @@ -156,8 +156,7 @@ struct coprocessor_request_block { u8 reserved[32]; struct coprocessor_status_block csb; -} __packed; - +} __aligned(128); /* RFC02167 Initiate Coprocessor Instructions document * Chapter 8.2.1.1.1 RS @@ -188,6 +187,9 @@ static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb) __be64 ccw_reg = ccw; u32 cr; + /* NB: the same structures are used by VAS-NX */ + BUILD_BUG_ON(sizeof(*crb) != 128); + __asm__ __volatile__( PPC_ICSWX(%1,0,%2) "\n" "mfcr %0\n" -- cgit v1.2.3 From b51ba4fe2e134b631f9c8f45423707aab71449b5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Aug 2020 06:01:42 +0000 Subject: powerpc/32s: Fix assembler warning about r0 The assembler says: arch/powerpc/kernel/head_32.S:1095: Warning: invalid register expression It's objecting to the use of r0 as the RA argument. That's because when RA = 0 the literal value 0 is used, rather than the content of r0, making the use of r0 in the source potentially confusing. Fix it to use a literal 0, the generated code is identical. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2b69ac8e1cddff6f808fc7415907179eab4aae9e.1596693679.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index f3ab94d73936..5624db0e09a1 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -1092,7 +1092,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) */ lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(r0) /* This much match your Abatron config */ + stw r5, 0xf0(0) /* This much match your Abatron config */ lis r6, swapper_pg_dir@h ori r6, r6, swapper_pg_dir@l tophys(r5, r5) -- cgit v1.2.3 From 169b9afee572853522901b7cbf34842c0494a887 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Aug 2020 12:19:06 +0000 Subject: powerpc/hwirq: Remove stale forward irq_chip declaration Since commit identified below, the forward declaration of struct irq_chip is useless (was struct hw_interrupt_type at that time) Remove it, together with the associated comment. Fixes: c0ad90a32fb6 ("[PATCH] genirq: add ->retrigger() irq op to consolidate hw_irq_resend()") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fbe58d27cf128d5fe581e4510ded8701858f268e.1596716328.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/hw_irq.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 3a0db7b0b46e..538698facb80 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -372,12 +372,6 @@ static inline void may_hard_irq_enable(void) { } #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST -/* - * interrupt-retrigger: should we handle this via lost interrupts and IPIs - * or should we not care like we do now ? --BenH. - */ -struct irq_chip; - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ -- cgit v1.2.3 From b134cfc3e3276ccd5d29e39de5c848a45b08e410 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Aug 2020 12:19:46 +0000 Subject: powerpc/irq: Drop forward declaration of struct irqaction Since the commit identified below, the forward declaration of struct irqaction is useless. Drop it. Fixes: b709c0832824 ("ppc64: move stack switching up in interrupt processing") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e0bcdabac45fcd26c02d7df273bd4a5827c6033d.1596716375.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/irq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 814dfab7e392..4f983ca4030a 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -35,7 +35,6 @@ static __inline__ int irq_canonicalize(int irq) extern int distribute_irqs; -struct irqaction; struct pt_regs; #define __ARCH_HAS_DO_SOFTIRQ -- cgit v1.2.3 From 63442de4301188129e1fcff144fbfb966ad5eb19 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Aug 2020 12:20:34 +0000 Subject: powerpc/fpu: Drop cvt_fd() and cvt_df() Those two functions have been unused since commit identified below. Drop them. Fixes: 31bfdb036f12 ("powerpc: Use instruction emulation infrastructure to handle alignment faults") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d5641ada199b8dd2af16ad00a66084cf974f2704.1596716418.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/processor.h | 2 -- arch/powerpc/kernel/fpu.S | 15 --------------- 2 files changed, 17 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index ed0d633ab5aa..2d68dbe56601 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -439,8 +439,6 @@ extern void flush_instruction_cache(void); extern void hard_reset_now(void); extern void poweroff_now(void); extern int fix_alignment(struct pt_regs *); -extern void cvt_fd(float *from, double *to); -extern void cvt_df(double *from, float *to); extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val); #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 4ae39db70044..825893d4cb59 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -134,18 +134,3 @@ _GLOBAL(save_fpu) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) blr - -/* - * These are used in the alignment trap handler when emulating - * single-precision loads and stores. - */ - -_GLOBAL(cvt_fd) - lfs 0,0(r3) - stfd 0,0(r4) - blr - -_GLOBAL(cvt_df) - lfd 0,0(r3) - stfs 0,0(r4) - blr -- cgit v1.2.3 From 82eb1792426f8a171cdaa6cfccb63c39f55bc9bd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Aug 2020 12:20:35 +0000 Subject: powerpc: drop hard_reset_now() and poweroff_now() declaration Those function have never existed. Drop their declaration. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/edcdd72a36495d25213c0256c8022367458e0d19.1596716418.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/processor.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 2d68dbe56601..8184eb357f10 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -436,8 +436,6 @@ extern void power9_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask); extern void flush_instruction_cache(void); -extern void hard_reset_now(void); -extern void poweroff_now(void); extern int fix_alignment(struct pt_regs *); extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val); -- cgit v1.2.3 From 59562b5c33d6ff3685509ed58b2ed3c5b5712704 Mon Sep 17 00:00:00 2001 From: Scott Cheloha Date: Mon, 27 Jul 2020 13:46:04 -0500 Subject: powerpc/perf: consolidate GPCI hcall structs into asm/hvcall.h The H_GetPerformanceCounterInfo (GPCI) hypercall input/output structs are useful to modules outside of perf/, so move them into asm/hvcall.h to live alongside the other powerpc hypercall structs. Leave the perf-specific GPCI stuff in perf/hv-gpci.h. Signed-off-by: Scott Cheloha Acked-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200727184605.2945095-1-cheloha@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 36 ++++++++++++++++++++++++++++++++++++ arch/powerpc/perf/hv-gpci.c | 9 --------- arch/powerpc/perf/hv-gpci.h | 27 --------------------------- 3 files changed, 36 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index fbb377055471..e8f116a425f9 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -560,6 +560,42 @@ struct hv_guest_state { /* Latest version of hv_guest_state structure */ #define HV_GUEST_STATE_VERSION 1 +/* + * From the document "H_GetPerformanceCounterInfo Interface" v1.07 + * + * H_GET_PERF_COUNTER_INFO argument + */ +struct hv_get_perf_counter_info_params { + __be32 counter_request; /* I */ + __be32 starting_index; /* IO */ + __be16 secondary_index; /* IO */ + __be16 returned_values; /* O */ + __be32 detail_rc; /* O, only needed when called via *_norets() */ + + /* + * O, size each of counter_value element in bytes, only set for version + * >= 0x3 + */ + __be16 cv_element_size; + + /* I, 0 (zero) for versions < 0x3 */ + __u8 counter_info_version_in; + + /* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */ + __u8 counter_info_version_out; + __u8 reserved[0xC]; + __u8 counter_value[]; +} __packed; + +#define HGPCI_REQ_BUFFER_SIZE 4096 +#define HGPCI_MAX_DATA_BYTES \ + (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params)) + +struct hv_gpci_request_buffer { + struct hv_get_perf_counter_info_params params; + uint8_t bytes[HGPCI_MAX_DATA_BYTES]; +} __packed; + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 6884d16ec19b..1667315b82e9 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -123,17 +123,8 @@ static const struct attribute_group *attr_groups[] = { NULL, }; -#define HGPCI_REQ_BUFFER_SIZE 4096 -#define HGPCI_MAX_DATA_BYTES \ - (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params)) - static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); -struct hv_gpci_request_buffer { - struct hv_get_perf_counter_info_params params; - uint8_t bytes[HGPCI_MAX_DATA_BYTES]; -} __packed; - static unsigned long single_gpci_request(u32 req, u32 starting_index, u16 secondary_index, u8 version_in, u32 offset, u8 length, u64 *value) diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h index a3053eda5dcc..4d108262bed7 100644 --- a/arch/powerpc/perf/hv-gpci.h +++ b/arch/powerpc/perf/hv-gpci.h @@ -2,33 +2,6 @@ #ifndef LINUX_POWERPC_PERF_HV_GPCI_H_ #define LINUX_POWERPC_PERF_HV_GPCI_H_ -#include - -/* From the document "H_GetPerformanceCounterInfo Interface" v1.07 */ - -/* H_GET_PERF_COUNTER_INFO argument */ -struct hv_get_perf_counter_info_params { - __be32 counter_request; /* I */ - __be32 starting_index; /* IO */ - __be16 secondary_index; /* IO */ - __be16 returned_values; /* O */ - __be32 detail_rc; /* O, only needed when called via *_norets() */ - - /* - * O, size each of counter_value element in bytes, only set for version - * >= 0x3 - */ - __be16 cv_element_size; - - /* I, 0 (zero) for versions < 0x3 */ - __u8 counter_info_version_in; - - /* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */ - __u8 counter_info_version_out; - __u8 reserved[0xC]; - __u8 counter_value[]; -} __packed; - /* * counter info version => fw version/reference (spec version) * -- cgit v1.2.3 From 5d1bc776428f34941a6237afb9454061b5b5e1e1 Mon Sep 17 00:00:00 2001 From: Scott Cheloha Date: Mon, 27 Jul 2020 13:46:05 -0500 Subject: powerpc/pseries: new lparcfg key/value pair: partition_affinity_score The H_GetPerformanceCounterInfo (GPCI) PHYP hypercall has a subcall, Affinity_Domain_Info_By_Partition, which returns, among other things, a "partition affinity score" for a given LPAR. This score, a value on [0-100], represents the processor-memory affinity for the LPAR in question. A score of 0 indicates the worst possible affinity while a score of 100 indicates perfect affinity. The score can be used to reason about performance. This patch adds the score for the local LPAR to the lparcfg procfile under a new 'partition_affinity_score' key. Signed-off-by: Scott Cheloha Reviewed-by: Tyrel Datwyler Acked-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200727184605.2945095-2-cheloha@linux.ibm.com --- arch/powerpc/platforms/pseries/lparcfg.c | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index b8d28ab88178..e278390ab28d 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -136,6 +136,39 @@ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) return rc; } +static void show_gpci_data(struct seq_file *m) +{ + struct hv_gpci_request_buffer *buf; + unsigned int affinity_score; + long ret; + + buf = kmalloc(sizeof(*buf), GFP_KERNEL); + if (buf == NULL) + return; + + /* + * Show the local LPAR's affinity score. + * + * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall. + * The score is at byte 0xB in the output buffer. + */ + memset(&buf->params, 0, sizeof(buf->params)); + buf->params.counter_request = cpu_to_be32(0xB1); + buf->params.starting_index = cpu_to_be32(-1); /* local LPAR */ + buf->params.counter_info_version_in = 0x5; /* v5+ for score */ + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf), + sizeof(*buf)); + if (ret != H_SUCCESS) { + pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n", + ret, be32_to_cpu(buf->params.detail_rc)); + goto out; + } + affinity_score = buf->bytes[0xB]; + seq_printf(m, "partition_affinity_score=%u\n", affinity_score); +out: + kfree(buf); +} + static unsigned h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) { @@ -487,6 +520,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) partition_active_processors * 100); } + show_gpci_data(m); + seq_printf(m, "partition_active_processors=%d\n", partition_active_processors); -- cgit v1.2.3 From 6c9100ea39d209e1625ba0fe06134192d9c4752a Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Fri, 7 Aug 2020 17:27:13 +0200 Subject: powerpc: Use simple i2c probe function The i2c probe functions here don't use the id information provided in their second argument, so the single-parameter i2c probe function ("probe_new") can be used instead. This avoids scanning the identifier tables during probes. Signed-off-by: Stephen Kitt Acked-by: Wolfram Sang Reviewed-by: Luca Ceresoli Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200807152713.381588-1-steve@sk2.org --- arch/powerpc/platforms/44x/ppc476.c | 5 ++--- arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index cba83eee685c..07f7e3ce67b5 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -86,8 +86,7 @@ static void __noreturn avr_reset_system(char *cmd) avr_halt_system(AVR_PWRCTL_RESET); } -static int avr_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int avr_probe(struct i2c_client *client) { avr_i2c_client = client; ppc_md.restart = avr_reset_system; @@ -104,7 +103,7 @@ static struct i2c_driver avr_driver = { .driver = { .name = "akebono-avr", }, - .probe = avr_probe, + .probe_new = avr_probe, .id_table = avr_id, }; diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 0967bdfb1691..409481016928 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -142,7 +142,7 @@ static int mcu_gpiochip_remove(struct mcu *mcu) return 0; } -static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id) +static int mcu_probe(struct i2c_client *client) { struct mcu *mcu; int ret; @@ -221,7 +221,7 @@ static struct i2c_driver mcu_driver = { .name = "mcu-mpc8349emitx", .of_match_table = mcu_of_match_table, }, - .probe = mcu_probe, + .probe_new = mcu_probe, .remove = mcu_remove, .id_table = mcu_ids, }; -- cgit v1.2.3 From e53281bc21f061f96c9004f534bc3e807d70cb73 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Aug 2020 06:54:49 +0000 Subject: powerpc: Drop _nmask_and_or_msr() _nmask_and_or_msr() is only used at two places to set MSR_IP. The SYNC is unnecessary as the users are not PowerPC 601. Can be easily writen in C. Do it, and drop _nmask_and_or_msr() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c2d2b8dfb8dd677026b26dffc8d31070c38a6b89.1597388079.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/processor.h | 1 - arch/powerpc/kernel/misc_32.S | 13 ------------- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 3 ++- arch/powerpc/platforms/embedded6xx/storcenter.c | 3 ++- 4 files changed, 4 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8184eb357f10..8c7ad14d68de 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -437,7 +437,6 @@ extern void power9_idle_type(unsigned long stop_psscr_val, extern void flush_instruction_cache(void); extern int fix_alignment(struct pt_regs *); -extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val); #ifdef CONFIG_PPC64 /* diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index b24f866fef81..8d9cb5df580e 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -215,19 +215,6 @@ _GLOBAL(low_choose_7447a_dfs) #endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */ -/* - * complement mask on the msr then "or" some values on. - * _nmask_and_or_msr(nmask, value_to_or) - */ -_GLOBAL(_nmask_and_or_msr) - mfmsr r0 /* Get current msr */ - andc r0,r0,r3 /* And off the bits set in r3 (first parm) */ - or r0,r0,r4 /* Or on the bits in r4 (second parm) */ - SYNC /* Some chip revs have problems here... */ - mtmsr r0 /* Update machine state */ - isync - blr /* Done */ - #ifdef CONFIG_40x /* diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index 15437abe1f6d..b95c3380d2b5 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -147,7 +147,8 @@ static void __noreturn mpc7448_hpc2_restart(char *cmd) local_irq_disable(); /* Set exception prefix high - to the firmware */ - _nmask_and_or_msr(0, MSR_IP); + mtmsr(mfmsr() | MSR_IP); + isync(); for (;;) ; /* Spin until reset happens */ } diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index ed1914dd34bb..e346ddcef45e 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -101,7 +101,8 @@ static void __noreturn storcenter_restart(char *cmd) local_irq_disable(); /* Set exception prefix high - to the firmware */ - _nmask_and_or_msr(0, MSR_IP); + mtmsr(mfmsr() | MSR_IP); + isync(); /* Wait for reset to happen */ for (;;) ; -- cgit v1.2.3 From 9d6792ffe140240ae54c881cc4183f9acc24b4df Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 13 Aug 2020 10:11:31 -0500 Subject: powerpc/pseries: explicitly reschedule during drmem_lmb list traversal The drmem lmb list can have hundreds of thousands of entries, and unfortunately lookups take the form of linear searches. As long as this is the case, traversals have the potential to monopolize the CPU and provoke lockup reports, workqueue stalls, and the like unless they explicitly yield. Rather than placing cond_resched() calls within various for_each_drmem_lmb() loop blocks in the code, put it in the iteration expression of the loop macro itself so users can't omit it. Introduce a drmem_lmb_next() iteration helper function which calls cond_resched() at a regular interval during array traversal. Each iteration of the loop in DLPAR code paths can involve around ten RTAS calls which can each take up to 250us, so this ensures the check is performed at worst every few milliseconds. Fixes: 6c6ea53725b3 ("powerpc/mm: Separate ibm, dynamic-memory data from DT format") Signed-off-by: Nathan Lynch Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200813151131.2070161-1-nathanl@linux.ibm.com --- arch/powerpc/include/asm/drmem.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 17ccc6474ab6..6fb928605ed1 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -8,6 +8,8 @@ #ifndef _ASM_POWERPC_LMB_H #define _ASM_POWERPC_LMB_H +#include + struct drmem_lmb { u64 base_addr; u32 drc_index; @@ -26,8 +28,22 @@ struct drmem_lmb_info { extern struct drmem_lmb_info *drmem_info; +static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb, + const struct drmem_lmb *start) +{ + /* + * DLPAR code paths can take several milliseconds per element + * when interacting with firmware. Ensure that we don't + * unfairly monopolize the CPU. + */ + if (((++lmb - start) % 16) == 0) + cond_resched(); + + return lmb; +} + #define for_each_drmem_lmb_in_range(lmb, start, end) \ - for ((lmb) = (start); (lmb) < (end); (lmb)++) + for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start)) #define for_each_drmem_lmb(lmb) \ for_each_drmem_lmb_in_range((lmb), \ -- cgit v1.2.3 From e426ab39f41045a4c163031272b2f48d944b69c0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Aug 2020 05:56:24 +0000 Subject: powerpc: Remove flush_instruction_cache for book3s/32 The only callers of flush_instruction_cache() are: arch/powerpc/kernel/swsusp_booke.S: bl flush_instruction_cache arch/powerpc/mm/nohash/40x.c: flush_instruction_cache(); arch/powerpc/mm/nohash/44x.c: flush_instruction_cache(); arch/powerpc/mm/nohash/fsl_booke.c: flush_instruction_cache(); arch/powerpc/platforms/44x/machine_check.c: flush_instruction_cache(); arch/powerpc/platforms/44x/machine_check.c: flush_instruction_cache(); This function is not used by book3s/32, drop it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/50098f49877cea0f46730a9df82dcabf84160e4b.1597384512.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/misc_32.S | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 8d9cb5df580e..aa860b8d1dcc 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -258,9 +258,8 @@ _ASM_NOKPROBE_SYMBOL(real_writeb) /* * Flush instruction cache. - * This is a no-op on the 601. */ -#ifndef CONFIG_PPC_8xx +#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32) _GLOBAL(flush_instruction_cache) #if defined(CONFIG_4xx) lis r3, KERNELBASE@h @@ -277,18 +276,11 @@ _GLOBAL(flush_instruction_cache) mfspr r3,SPRN_L1CSR1 ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR mtspr SPRN_L1CSR1,r3 -#elif defined(CONFIG_PPC_BOOK3S_601) - blr /* for 601, do nothing */ -#else - /* 603/604 processor - use invalidate-all bit in HID0 */ - mfspr r3,SPRN_HID0 - ori r3,r3,HID0_ICFI - mtspr SPRN_HID0,r3 #endif /* CONFIG_4xx */ isync blr EXPORT_SYMBOL(flush_instruction_cache) -#endif /* CONFIG_PPC_8xx */ +#endif /* * Copy a whole page. We use the dcbz instruction on the destination -- cgit v1.2.3 From f663f3312051402d32952c44d156a20c0b854753 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Aug 2020 05:56:25 +0000 Subject: powerpc: Move flush_instruction_cache() prototype in asm/cacheflush.h flush_instruction_cache() belongs to the cache flushing function family. Move its prototype in asm/cacheflush.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/993445b5227e8ca2f0e38bcc9ea3dfea6e865920.1597384512.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cacheflush.h | 2 ++ arch/powerpc/include/asm/processor.h | 1 - arch/powerpc/platforms/44x/machine_check.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 54764c6e922d..481877879fec 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -98,6 +98,8 @@ static inline void invalidate_dcache_range(unsigned long start, mb(); /* sync */ } +void flush_instruction_cache(void); + #include #endif /* _ASM_POWERPC_CACHEFLUSH_H */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8c7ad14d68de..36a71cd41f37 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -435,7 +435,6 @@ extern void power7_idle_type(unsigned long type); extern void power9_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask); -extern void flush_instruction_cache(void); extern int fix_alignment(struct pt_regs *); #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c index 90ad6ac529d2..a5c898bb9bab 100644 --- a/arch/powerpc/platforms/44x/machine_check.c +++ b/arch/powerpc/platforms/44x/machine_check.c @@ -7,6 +7,7 @@ #include #include +#include int machine_check_440A(struct pt_regs *regs) { -- cgit v1.2.3 From de39b19452e784de5f90ae899851ab29a29bb42c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Aug 2020 05:56:26 +0000 Subject: powerpc: Rewrite 4xx flush_cache_instruction() in C Nothing prevents flush_cache_instruction() from being writen in C. Do it to improve readability and maintainability. This function is very small and isn't called from assembly, make it static inline in asm/cacheflush.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/93d93fc69b4b3ad3ceba2fc0756333c0c0245bb7.1597384512.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cacheflush.h | 8 ++++++++ arch/powerpc/kernel/misc_32.S | 7 +------ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 481877879fec..138e46d8c04e 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -98,7 +98,15 @@ static inline void invalidate_dcache_range(unsigned long start, mb(); /* sync */ } +#ifdef CONFIG_4xx +static inline void flush_instruction_cache(void) +{ + iccci((void *)KERNELBASE); + isync(); +} +#else void flush_instruction_cache(void); +#endif #include diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index aa860b8d1dcc..9e8f730fe04d 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -259,12 +259,8 @@ _ASM_NOKPROBE_SYMBOL(real_writeb) /* * Flush instruction cache. */ -#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32) +#ifdef CONFIG_FSL_BOOKE _GLOBAL(flush_instruction_cache) -#if defined(CONFIG_4xx) - lis r3, KERNELBASE@h - iccci 0,r3 -#elif defined(CONFIG_FSL_BOOKE) #ifdef CONFIG_E200 mfspr r3,SPRN_L1CSR0 ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC @@ -276,7 +272,6 @@ _GLOBAL(flush_instruction_cache) mfspr r3,SPRN_L1CSR1 ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR mtspr SPRN_L1CSR1,r3 -#endif /* CONFIG_4xx */ isync blr EXPORT_SYMBOL(flush_instruction_cache) -- cgit v1.2.3 From 704dfe931df951895dea98bd1d9cacbb601b6451 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Aug 2020 05:56:27 +0000 Subject: powerpc: Rewrite FSL_BOOKE flush_cache_instruction() in C Nothing prevents flush_cache_instruction() from being writen in C. Do it to improve readability and maintainability. This function is only use by low level callers, it is not intended to be used by module. Don't export it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f989eff8296800c427622c0985384148404e4f0b.1597384512.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/misc_32.S | 22 ---------------------- arch/powerpc/mm/nohash/fsl_booke.c | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 9e8f730fe04d..717e658b90fd 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -255,28 +255,6 @@ _ASM_NOKPROBE_SYMBOL(real_writeb) #endif /* CONFIG_40x */ - -/* - * Flush instruction cache. - */ -#ifdef CONFIG_FSL_BOOKE -_GLOBAL(flush_instruction_cache) -#ifdef CONFIG_E200 - mfspr r3,SPRN_L1CSR0 - ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC - /* msync; isync recommended here */ - mtspr SPRN_L1CSR0,r3 - isync - blr -#endif - mfspr r3,SPRN_L1CSR1 - ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR - mtspr SPRN_L1CSR1,r3 - isync - blr -EXPORT_SYMBOL(flush_instruction_cache) -#endif - /* * Copy a whole page. We use the dcbz instruction on the destination * to reduce memory traffic (it eliminates the unnecessary reads of diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c index 0c294827d6e5..36bda962d3b3 100644 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -219,6 +219,22 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; } +void flush_instruction_cache(void) +{ + unsigned long tmp; + + if (IS_ENABLED(CONFIG_E200)) { + tmp = mfspr(SPRN_L1CSR0); + tmp |= L1CSR0_CFI | L1CSR0_CLFC; + mtspr(SPRN_L1CSR0, tmp); + } else { + tmp = mfspr(SPRN_L1CSR1); + tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; + mtspr(SPRN_L1CSR1, tmp); + } + isync(); +} + /* * MMU_init_hw does the chip-specific initialization of the MMU hardware. */ -- cgit v1.2.3 From e5e179aa3a39c818db8fbc2dce8d2cd24adaf657 Mon Sep 17 00:00:00 2001 From: Scott Cheloha Date: Mon, 10 Aug 2020 20:51:15 -0500 Subject: pseries/drmem: don't cache node id in drmem_lmb struct At memory hot-remove time we can retrieve an LMB's nid from its corresponding memory_block. There is no need to store the nid in multiple locations. Note that lmb_to_memblock() uses find_memory_block() to get the corresponding memory_block. As find_memory_block() runs in sub-linear time this approach is negligibly slower than what we do at present. In exchange for this lookup at hot-remove time we no longer need to call memory_add_physaddr_to_nid() during drmem_init() for each LMB. On powerpc, memory_add_physaddr_to_nid() is a linear search, so this spares us an O(n^2) initialization during boot. On systems with many LMBs that initialization overhead is palpable and disruptive. For example, on a box with 249854 LMBs we're seeing drmem_init() take upwards of 30 seconds to complete: [ 53.721639] drmem: initializing drmem v2 [ 80.604346] watchdog: BUG: soft lockup - CPU#65 stuck for 23s! [swapper/0:1] [ 80.604377] Modules linked in: [ 80.604389] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc2+ #4 [ 80.604397] NIP: c0000000000a4980 LR: c0000000000a4940 CTR: 0000000000000000 [ 80.604407] REGS: c0002dbff8493830 TRAP: 0901 Not tainted (5.6.0-rc2+) [ 80.604412] MSR: 8000000002009033 CR: 44000248 XER: 0000000d [ 80.604431] CFAR: c0000000000a4a38 IRQMASK: 0 [ 80.604431] GPR00: c0000000000a4940 c0002dbff8493ac0 c000000001904400 c0003cfffffede30 [ 80.604431] GPR04: 0000000000000000 c000000000f4095a 000000000000002f 0000000010000000 [ 80.604431] GPR08: c0000bf7ecdb7fb8 c0000bf7ecc2d3c8 0000000000000008 c00c0002fdfb2001 [ 80.604431] GPR12: 0000000000000000 c00000001e8ec200 [ 80.604477] NIP [c0000000000a4980] hot_add_scn_to_nid+0xa0/0x3e0 [ 80.604486] LR [c0000000000a4940] hot_add_scn_to_nid+0x60/0x3e0 [ 80.604492] Call Trace: [ 80.604498] [c0002dbff8493ac0] [c0000000000a4940] hot_add_scn_to_nid+0x60/0x3e0 (unreliable) [ 80.604509] [c0002dbff8493b20] [c000000000087c10] memory_add_physaddr_to_nid+0x20/0x60 [ 80.604521] [c0002dbff8493b40] [c0000000010d4880] drmem_init+0x25c/0x2f0 [ 80.604530] [c0002dbff8493c10] [c000000000010154] do_one_initcall+0x64/0x2c0 [ 80.604540] [c0002dbff8493ce0] [c0000000010c4aa0] kernel_init_freeable+0x2d8/0x3a0 [ 80.604550] [c0002dbff8493db0] [c000000000010824] kernel_init+0x2c/0x148 [ 80.604560] [c0002dbff8493e20] [c00000000000b648] ret_from_kernel_thread+0x5c/0x74 [ 80.604567] Instruction dump: [ 80.604574] 392918e8 e9490000 e90a000a e92a0000 80ea000c 1d080018 3908ffe8 7d094214 [ 80.604586] 7fa94040 419d00dc e9490010 714a0088 <2faa0008> 409e00ac e9490000 7fbe5040 [ 89.047390] drmem: 249854 LMB(s) With a patched kernel on the same machine we're no longer seeing the soft lockup. drmem_init() now completes in negligible time, even when the LMB count is large. Fixes: b2d3b5ee66f2 ("powerpc/pseries: Track LMB nid instead of using device tree") Signed-off-by: Scott Cheloha Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200811015115.63677-1-cheloha@linux.ibm.com --- arch/powerpc/include/asm/drmem.h | 21 --------------------- arch/powerpc/mm/drmem.c | 6 +----- arch/powerpc/platforms/pseries/hotplug-memory.c | 24 ++++++++++++++++-------- 3 files changed, 17 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 6fb928605ed1..030a19d92213 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -15,9 +15,6 @@ struct drmem_lmb { u32 drc_index; u32 aa_index; u32 flags; -#ifdef CONFIG_MEMORY_HOTPLUG - int nid; -#endif }; struct drmem_lmb_info { @@ -121,22 +118,4 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) lmb->aa_index = 0xffffffff; } -#ifdef CONFIG_MEMORY_HOTPLUG -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ - lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ - lmb->nid = -1; -} -#else -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ -} -#endif - #endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index b2eeea39684c..9af3832c9d8d 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -389,10 +389,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) if (!drmem_info->lmbs) return; - for_each_drmem_lmb(lmb) { + for_each_drmem_lmb(lmb) read_drconf_v1_cell(lmb, &prop); - lmb_set_nid(lmb); - } } static void __init init_drmem_v2_lmbs(const __be32 *prop) @@ -437,8 +435,6 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) lmb->aa_index = dr_cell.aa_index; lmb->flags = dr_cell.flags; - - lmb_set_nid(lmb); } } } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 5d545b78111f..0ea976d1cac4 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -354,25 +354,32 @@ static int dlpar_add_lmb(struct drmem_lmb *); static int dlpar_remove_lmb(struct drmem_lmb *lmb) { + struct memory_block *mem_block; unsigned long block_sz; int rc; if (!lmb_is_removable(lmb)) return -EINVAL; + mem_block = lmb_to_memblock(lmb); + if (mem_block == NULL) + return -EINVAL; + rc = dlpar_offline_lmb(lmb); - if (rc) + if (rc) { + put_device(&mem_block->dev); return rc; + } block_sz = pseries_memory_block_size(); - __remove_memory(lmb->nid, lmb->base_addr, block_sz); + __remove_memory(mem_block->nid, lmb->base_addr, block_sz); + put_device(&mem_block->dev); /* Update memory regions for memory remove */ memblock_remove(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); - lmb_clear_nid(lmb); lmb->flags &= ~DRCONF_MEM_ASSIGNED; return 0; @@ -591,7 +598,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) static int dlpar_add_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; - int rc; + int nid, rc; if (lmb->flags & DRCONF_MEM_ASSIGNED) return -EINVAL; @@ -602,11 +609,13 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) return rc; } - lmb_set_nid(lmb); block_sz = memory_block_size_bytes(); + /* Find the node id for this address. */ + nid = memory_add_physaddr_to_nid(lmb->base_addr); + /* Add the memory */ - rc = __add_memory(lmb->nid, lmb->base_addr, block_sz); + rc = __add_memory(nid, lmb->base_addr, block_sz); if (rc) { invalidate_lmb_associativity_index(lmb); return rc; @@ -614,9 +623,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = dlpar_online_lmb(lmb); if (rc) { - __remove_memory(lmb->nid, lmb->base_addr, block_sz); + __remove_memory(nid, lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); - lmb_clear_nid(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; } -- cgit v1.2.3 From d9de6b0da85c9f51734f7648f6e860b89f94c801 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Aug 2020 00:04:28 +1000 Subject: powerpc: unrel_branch_check.sh: fix shellcheck complaints No functional change Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200811140435.20957-2-sfr@canb.auug.org.au --- arch/powerpc/tools/unrel_branch_check.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 6e6a30aea3ed..4c1e04ba5081 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright © 2016 IBM Corporation # # This program is free software; you can redistribute it and/or @@ -26,7 +27,7 @@ awk '{print $1}' BRANCHES=$( $objdump -R "$vmlinux" -D --start-address=0xc000000000000000 \ - --stop-address=${end_intr} | + --stop-address="$end_intr" | grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" | grep -v '\<__start_initialization_multiplatform>' | grep -v -e 'b.\?.\?ctr' | @@ -40,12 +41,12 @@ awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}' for tuple in $BRANCHES do - from=`echo $tuple | cut -d':' -f1` - branch=`echo $tuple | cut -d':' -f2` - to=`echo $tuple | cut -d':' -f3 | sed 's/cr[0-7],//'` - sym=`echo $tuple | cut -d':' -f4` + from=$(echo "$tuple" | cut -d':' -f1) + branch=$(echo "$tuple" | cut -d':' -f2) + to=$(echo "$tuple" | cut -d':' -f3 | sed 's/cr[0-7],//') + sym=$(echo "$tuple" | cut -d':' -f4) - if (( $to > $end_intr )) + if (( to > end_intr )) then if [ -z "$bad_branches" ]; then echo "WARNING: Unrelocated relative branches" -- cgit v1.2.3 From 20ff8ec182160df86571a8af5773ff1e52837d73 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Aug 2020 00:04:29 +1000 Subject: powerpc: unrel_branch_check.sh: simplify and combine some executions Also some minor style changes. There should still be no change in behaviour. Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200811140435.20957-3-sfr@canb.auug.org.au --- arch/powerpc/tools/unrel_branch_check.sh | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 4c1e04ba5081..d735e3875b5e 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -17,37 +17,34 @@ objdump="$1" vmlinux="$2" #__end_interrupts should be located within the first 64K +kstart=0xc000000000000000 +printf -v kend '0x%x' $(( kstart + 0x10000 )) end_intr=0x$( -$objdump -R "$vmlinux" -d --start-address=0xc000000000000000 \ - --stop-address=0xc000000000010000 | -grep '\<__end_interrupts>:' | -awk '{print $1}' +$objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" | +awk '$2 == "<__end_interrupts>:" { print $1 }' ) BRANCHES=$( -$objdump -R "$vmlinux" -D --start-address=0xc000000000000000 \ - --stop-address="$end_intr" | +$objdump -R -D --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" | grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" | -grep -v '\<__start_initialization_multiplatform>' | -grep -v -e 'b.\?.\?ctr' | -grep -v -e 'b.\?.\?lr' | -sed -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \ +sed -e '/\<__start_initialization_multiplatform>/d' \ + -e '/b.\?.\?ctr/d' \ + -e '/b.\?.\?lr/d' \ + -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \ -e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \ -e 's/[[:space:]]0x/ /' \ -e 's/://' | awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}' ) -for tuple in $BRANCHES -do +for tuple in $BRANCHES; do from=$(echo "$tuple" | cut -d':' -f1) branch=$(echo "$tuple" | cut -d':' -f2) to=$(echo "$tuple" | cut -d':' -f3 | sed 's/cr[0-7],//') sym=$(echo "$tuple" | cut -d':' -f4) - if (( to > end_intr )) - then + if (( to > end_intr )); then if [ -z "$bad_branches" ]; then echo "WARNING: Unrelocated relative branches" bad_branches="yes" -- cgit v1.2.3 From 4e71106c343c625c0bf72a65b244e35e7d2cd037 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Aug 2020 00:04:30 +1000 Subject: powerpc: unrel_branch_check.sh: simplify objdump's asm output We don't use the raw hex instruction dump, so elide it and adjust the following expressions. Also use \s instead of [[:space:]] everywhere. Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200811140435.20957-4-sfr@canb.auug.org.au --- arch/powerpc/tools/unrel_branch_check.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index d735e3875b5e..7e936e2cf70d 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -26,16 +26,16 @@ awk '$2 == "<__end_interrupts>:" { print $1 }' ) BRANCHES=$( -$objdump -R -D --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" | -grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" | +$objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" | +grep -e "^c[0-9a-f]*:\s*b" | sed -e '/\<__start_initialization_multiplatform>/d' \ -e '/b.\?.\?ctr/d' \ -e '/b.\?.\?lr/d' \ - -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \ - -e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \ - -e 's/[[:space:]]0x/ /' \ + -e 's/\bbt.\?\s*[[:digit:]][[:digit:]]*,/beq/' \ + -e 's/\bbf.\?\s*[[:digit:]][[:digit:]]*,/bne/' \ + -e 's/\s0x/ /' \ -e 's/://' | -awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}' +awk '{ print $1 ":" $2 ":0x" $3 ":" $4 " "}' ) for tuple in $BRANCHES; do -- cgit v1.2.3 From 3d97abbc9f6fe90973551f3e3eef47ffef863114 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Aug 2020 00:04:31 +1000 Subject: powerpc: unrel_branch_check.sh: convert grep | sed | awk to just sed Also start using sed -E and make all the separate expressions into a single one with comments. Pull the stripping of condition registers back into the sed command. Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200811140435.20957-5-sfr@canb.auug.org.au --- arch/powerpc/tools/unrel_branch_check.sh | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 7e936e2cf70d..dc82289b2252 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -27,21 +27,31 @@ awk '$2 == "<__end_interrupts>:" { print $1 }' BRANCHES=$( $objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" | -grep -e "^c[0-9a-f]*:\s*b" | -sed -e '/\<__start_initialization_multiplatform>/d' \ - -e '/b.\?.\?ctr/d' \ - -e '/b.\?.\?lr/d' \ - -e 's/\bbt.\?\s*[[:digit:]][[:digit:]]*,/beq/' \ - -e 's/\bbf.\?\s*[[:digit:]][[:digit:]]*,/bne/' \ - -e 's/\s0x/ /' \ - -e 's/://' | -awk '{ print $1 ":" $2 ":0x" $3 ":" $4 " "}' +sed -E -n ' +# match lines that start with a kernel address +/^c[0-9a-f]*:\s*b/ { + # drop a target that we do not care about + /\<__start_initialization_multiplatform>/d + # drop branches via ctr or lr + /\ end_intr )); then -- cgit v1.2.3 From b84eaab6ede6477484edc043456cf7d7cfc7f8b3 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Aug 2020 00:04:32 +1000 Subject: powerpc: unrel_branch_check.sh: simplify and tidy up the final loop Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200811140435.20957-6-sfr@canb.auug.org.au --- arch/powerpc/tools/unrel_branch_check.sh | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index dc82289b2252..54ebd05615d4 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -25,7 +25,6 @@ $objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" | awk '$2 == "<__end_interrupts>:" { print $1 }' ) -BRANCHES=$( $objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" | sed -E -n ' # match lines that start with a kernel address @@ -45,24 +44,19 @@ sed -E -n ' # strip out condition registers s/:0xcr[0-7],/:0x/ p -}' -) - -for tuple in $BRANCHES; do - from=$(echo "$tuple" | cut -d':' -f1) - branch=$(echo "$tuple" | cut -d':' -f2) - to=$(echo "$tuple" | cut -d':' -f3) - sym=$(echo "$tuple" | cut -d':' -f4) +}' | { +all_good=true +while IFS=: read -r from branch to sym; do if (( to > end_intr )); then - if [ -z "$bad_branches" ]; then - echo "WARNING: Unrelocated relative branches" - bad_branches="yes" + if $all_good; then + printf '%s\n' 'WARNING: Unrelocated relative branches' + all_good=false fi - echo "$from $branch-> $to $sym" + printf '%s %s-> %s %s\n' "$from" "$branch" "$to" "$sym" fi done -if [ -z "$bad_branches" ]; then - exit 0 -fi +$all_good + +} -- cgit v1.2.3 From 3745ae63b405b09c86718f95d96c4b2d2827b087 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Aug 2020 00:04:33 +1000 Subject: powerpc: unrel_branch_check.sh: fix up the file header Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200811140435.20957-7-sfr@canb.auug.org.au --- arch/powerpc/tools/unrel_branch_check.sh | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 54ebd05615d4..4489f16a443c 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -1,16 +1,9 @@ #!/bin/bash -# Copyright © 2016 IBM Corporation +# SPDX-License-Identifier: GPL-2.0+ +# Copyright © 2016,2020 IBM Corporation # -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version -# 2 of the License, or (at your option) any later version. -# -# This script checks the relocations of a vmlinux for "suspicious" -# branches from unrelocated code (head_64.S code). - -# Turn this on if you want more debug output: -# set -x +# This script checks the unrelocated code of a vmlinux for "suspicious" +# branches to relocated code (head_64.S code). # Have Kbuild supply the path to objdump so we handle cross compilation. objdump="$1" -- cgit v1.2.3 From af13a2244d59c4d63a25abd8257cbaef9d9ffebc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Aug 2020 00:04:34 +1000 Subject: powerpc: unrel_branch_check.sh: exit silently for early errors If we can't find the address of __end_interrupts, then we still exit successfully as that is the current behaviour. Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200811140435.20957-8-sfr@canb.auug.org.au --- arch/powerpc/tools/unrel_branch_check.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 4489f16a443c..70da90270c78 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -14,9 +14,12 @@ kstart=0xc000000000000000 printf -v kend '0x%x' $(( kstart + 0x10000 )) end_intr=0x$( -$objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" | +$objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" 2>/dev/null | awk '$2 == "<__end_interrupts>:" { print $1 }' ) +if [ "$end_intr" = "0x" ]; then + exit 0 +fi $objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" | sed -E -n ' -- cgit v1.2.3 From b71dca9891b330d5c2d3ff5d41704aa6f64f8e32 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Aug 2020 18:10:35 +1000 Subject: powerpc: unrel_branch_check.sh: use nm to find symbol value This is considerably faster then parsing the objdump asm output. It will also make the enabling of llvm-objdump a little easier. Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200812081036.7969-2-sfr@canb.auug.org.au --- arch/powerpc/Makefile.postlink | 2 +- arch/powerpc/tools/unrel_branch_check.sh | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink index 2268396ff4bb..a6c77f4d32b2 100644 --- a/arch/powerpc/Makefile.postlink +++ b/arch/powerpc/Makefile.postlink @@ -18,7 +18,7 @@ quiet_cmd_relocs_check = CHKREL $@ ifdef CONFIG_PPC_BOOK3S_64 cmd_relocs_check = \ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \ - $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@" + $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$(NM)" "$@" else cmd_relocs_check = \ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 70da90270c78..0369eb2e7e4b 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -5,18 +5,15 @@ # This script checks the unrelocated code of a vmlinux for "suspicious" # branches to relocated code (head_64.S code). -# Have Kbuild supply the path to objdump so we handle cross compilation. +# Have Kbuild supply the path to objdump and nm so we handle cross compilation. objdump="$1" -vmlinux="$2" +nm="$2" +vmlinux="$3" -#__end_interrupts should be located within the first 64K kstart=0xc000000000000000 -printf -v kend '0x%x' $(( kstart + 0x10000 )) -end_intr=0x$( -$objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" 2>/dev/null | -awk '$2 == "<__end_interrupts>:" { print $1 }' -) +end_intr=0x$($nm -p "$vmlinux" | + sed -E -n '/\s+[[:alpha:]]\s+__end_interrupts\s*$/{s///p;q}') if [ "$end_intr" = "0x" ]; then exit 0 fi -- cgit v1.2.3 From 6b1992bcdee8b86a74362192d4d8906731918bcc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Aug 2020 18:10:36 +1000 Subject: powerpc: unrel_branch_check.sh: enable the use of llvm-objdump v9, 10 or 11 Currently, using llvm-objtool, this script just silently succeeds without actually do the intended checking. So this updates it to work properly. Firstly, llvm-objdump does not add target symbol names to the end of branches in its asm output, so we have to drop the branch to __start_initialization_multiplatform using its address. Secondly, v9 and 10 specify branch targets as .+, so we convert those to actual addresses. Thirdly, v10 and 11 error out on a vmlinux if given the -R option complaining that it is "not a dynamic object". The -R does not make any difference to the asm output, so remove it. Lastly, v11 produces asm that is very similar to Gnu objtool (at least as far as branches are concerned), so no further changes are necessary to make it work. Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200812081036.7969-3-sfr@canb.auug.org.au --- arch/powerpc/tools/unrel_branch_check.sh | 34 +++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 0369eb2e7e4b..8301efee1e6c 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -18,12 +18,16 @@ if [ "$end_intr" = "0x" ]; then exit 0 fi -$objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" | +# we know that there is a correct branch to +# __start_initialization_multiplatform, so find its address +# so we can exclude it. +sim=0x$($nm -p "$vmlinux" | + sed -E -n '/\s+[[:alpha:]]\s+__start_initialization_multiplatform\s*$/{s///p;q}') + +$objdump -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" | sed -E -n ' # match lines that start with a kernel address /^c[0-9a-f]*:\s*b/ { - # drop a target that we do not care about - /\<__start_initialization_multiplatform>/d # drop branches via ctr or lr /\= 0x2000000 )); then + to=$(( to - 0x4000000 )) + fi + elif (( to >= 0x8000 )); then + to=$(( to - 0x10000 )) + fi + printf -v to '0x%x' $(( "0x$from" + to )) + ;; + *) printf 'Unkown branch format\n' + ;; + esac + if [ "$to" = "$sim" ]; then + continue + fi if (( to > end_intr )); then if $all_good; then printf '%s\n' 'WARNING: Unrelocated relative branches' -- cgit v1.2.3 From 76d46a1e2fe2c35f24c07b7cc8a41afbf98b349e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Aug 2020 05:49:29 +0000 Subject: powerpc: Remove flush_instruction_cache() on 8xx flush_instruction_cache() is never used on 8xx, remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/245cabd8f291facac8c8c5fd370e361a69e02860.1597384145.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/8xx.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index d2b37146ae6c..231ca95f9ffb 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -244,13 +244,6 @@ void set_context(unsigned long id, pgd_t *pgd) mb(); } -void flush_instruction_cache(void) -{ - isync(); - mtspr(SPRN_IC_CST, IDC_INVALL); - isync(); -} - #ifdef CONFIG_PPC_KUEP void __init setup_kuep(bool disabled) { -- cgit v1.2.3 From c20beffeec3cb6f6f52d9aef27f91a3f453a91f4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 12 Aug 2020 12:25:16 +0000 Subject: powerpc/uaccess: Use flexible addressing with __put_user()/__get_user() At the time being, __put_user()/__get_user() and friends only use D-form addressing, with 0 offset. Ex: lwz reg1, 0(reg2) Give the compiler the opportunity to use other adressing modes whenever possible, to get more optimised code. Hereunder is a small exemple: struct test { u32 item1; u16 item2; u8 item3; u64 item4; }; int set_test_user(struct test __user *from, struct test __user *to) { int err; u32 item1; u16 item2; u8 item3; u64 item4; err = __get_user(item1, &from->item1); err |= __get_user(item2, &from->item2); err |= __get_user(item3, &from->item3); err |= __get_user(item4, &from->item4); err |= __put_user(item1, &to->item1); err |= __put_user(item2, &to->item2); err |= __put_user(item3, &to->item3); err |= __put_user(item4, &to->item4); return err; } Before the patch: 00000df0 : df0: 94 21 ff f0 stwu r1,-16(r1) df4: 39 40 00 00 li r10,0 df8: 93 c1 00 08 stw r30,8(r1) dfc: 93 e1 00 0c stw r31,12(r1) e00: 7d 49 53 78 mr r9,r10 e04: 80 a3 00 00 lwz r5,0(r3) e08: 38 e3 00 04 addi r7,r3,4 e0c: 7d 46 53 78 mr r6,r10 e10: a0 e7 00 00 lhz r7,0(r7) e14: 7d 29 33 78 or r9,r9,r6 e18: 39 03 00 06 addi r8,r3,6 e1c: 7d 46 53 78 mr r6,r10 e20: 89 08 00 00 lbz r8,0(r8) e24: 7d 29 33 78 or r9,r9,r6 e28: 38 63 00 08 addi r3,r3,8 e2c: 7d 46 53 78 mr r6,r10 e30: 83 c3 00 00 lwz r30,0(r3) e34: 83 e3 00 04 lwz r31,4(r3) e38: 7d 29 33 78 or r9,r9,r6 e3c: 7d 43 53 78 mr r3,r10 e40: 90 a4 00 00 stw r5,0(r4) e44: 7d 29 1b 78 or r9,r9,r3 e48: 38 c4 00 04 addi r6,r4,4 e4c: 7d 43 53 78 mr r3,r10 e50: b0 e6 00 00 sth r7,0(r6) e54: 7d 29 1b 78 or r9,r9,r3 e58: 38 e4 00 06 addi r7,r4,6 e5c: 7d 43 53 78 mr r3,r10 e60: 99 07 00 00 stb r8,0(r7) e64: 7d 23 1b 78 or r3,r9,r3 e68: 38 84 00 08 addi r4,r4,8 e6c: 93 c4 00 00 stw r30,0(r4) e70: 93 e4 00 04 stw r31,4(r4) e74: 7c 63 53 78 or r3,r3,r10 e78: 83 c1 00 08 lwz r30,8(r1) e7c: 83 e1 00 0c lwz r31,12(r1) e80: 38 21 00 10 addi r1,r1,16 e84: 4e 80 00 20 blr After the patch: 00000dbc : dbc: 39 40 00 00 li r10,0 dc0: 7d 49 53 78 mr r9,r10 dc4: 80 03 00 00 lwz r0,0(r3) dc8: 7d 48 53 78 mr r8,r10 dcc: a1 63 00 04 lhz r11,4(r3) dd0: 7d 29 43 78 or r9,r9,r8 dd4: 7d 48 53 78 mr r8,r10 dd8: 88 a3 00 06 lbz r5,6(r3) ddc: 7d 29 43 78 or r9,r9,r8 de0: 7d 48 53 78 mr r8,r10 de4: 80 c3 00 08 lwz r6,8(r3) de8: 80 e3 00 0c lwz r7,12(r3) dec: 7d 29 43 78 or r9,r9,r8 df0: 7d 43 53 78 mr r3,r10 df4: 90 04 00 00 stw r0,0(r4) df8: 7d 29 1b 78 or r9,r9,r3 dfc: 7d 43 53 78 mr r3,r10 e00: b1 64 00 04 sth r11,4(r4) e04: 7d 29 1b 78 or r9,r9,r3 e08: 7d 43 53 78 mr r3,r10 e0c: 98 a4 00 06 stb r5,6(r4) e10: 7d 23 1b 78 or r3,r9,r3 e14: 90 c4 00 08 stw r6,8(r4) e18: 90 e4 00 0c stw r7,12(r4) e1c: 7c 63 53 78 or r3,r3,r10 e20: 4e 80 00 20 blr Signed-off-by: Christophe Leroy Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c27bc4e598daf3bbb225de7a1f5c52121cf1e279.1597235091.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 00699903f1ef..b1d5e8b66b31 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -158,7 +158,7 @@ extern long __put_user_bad(void); */ #define __put_user_asm(x, addr, err, op) \ __asm__ __volatile__( \ - "1: " op " %1,0(%2) # put_user\n" \ + "1: " op "%X2 %1,%2 # put_user\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -166,7 +166,7 @@ extern long __put_user_bad(void); ".previous\n" \ EX_TABLE(1b, 3b) \ : "=r" (err) \ - : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) + : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err)) #ifdef __powerpc64__ #define __put_user_asm2(x, ptr, retval) \ @@ -174,8 +174,8 @@ extern long __put_user_bad(void); #else /* __powerpc64__ */ #define __put_user_asm2(x, addr, err) \ __asm__ __volatile__( \ - "1: stw %1,0(%2)\n" \ - "2: stw %1+1,4(%2)\n" \ + "1: stw%X2 %1,%2\n" \ + "2: stw%X2 %L1,%L2\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ "4: li %0,%3\n" \ @@ -184,7 +184,7 @@ extern long __put_user_bad(void); EX_TABLE(1b, 4b) \ EX_TABLE(2b, 4b) \ : "=r" (err) \ - : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) + : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ #define __put_user_size_allowed(x, ptr, size, retval) \ @@ -316,7 +316,7 @@ extern long __get_user_bad(void); #define __get_user_asm(x, addr, err, op) \ __asm__ __volatile__( \ - "1: "op" %1,0(%2) # get_user\n" \ + "1: "op"%X2 %1, %2 # get_user\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -325,7 +325,7 @@ extern long __get_user_bad(void); ".previous\n" \ EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ - : "b" (addr), "i" (-EFAULT), "0" (err)) + : "m" (*addr), "i" (-EFAULT), "0" (err)) #ifdef __powerpc64__ #define __get_user_asm2(x, addr, err) \ @@ -333,8 +333,8 @@ extern long __get_user_bad(void); #else /* __powerpc64__ */ #define __get_user_asm2(x, addr, err) \ __asm__ __volatile__( \ - "1: lwz %1,0(%2)\n" \ - "2: lwz %1+1,4(%2)\n" \ + "1: lwz%X2 %1, %2\n" \ + "2: lwz%X2 %L1, %L2\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ "4: li %0,%3\n" \ @@ -345,7 +345,7 @@ extern long __get_user_bad(void); EX_TABLE(1b, 4b) \ EX_TABLE(2b, 4b) \ : "=r" (err), "=&r" (x) \ - : "b" (addr), "i" (-EFAULT), "0" (err)) + : "m" (*addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ #define __get_user_size_allowed(x, ptr, size, retval) \ @@ -355,10 +355,10 @@ do { \ if (size > sizeof(x)) \ (x) = __get_user_bad(); \ switch (size) { \ - case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \ - case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \ - case 4: __get_user_asm(x, ptr, retval, "lwz"); break; \ - case 8: __get_user_asm2(x, ptr, retval); break; \ + case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break; \ + case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break; \ + case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break; \ + case 8: __get_user_asm2(x, (u64 __user *)ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ } while (0) -- cgit v1.2.3 From 2f279eeb68b8eda43a95255db701b4faaeedbe0f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 12 Aug 2020 12:25:17 +0000 Subject: powerpc/uaccess: Add pre-update addressing to __get_user_asm() and __put_user_asm() Enable pre-update addressing mode in __get_user_asm() and __put_user_asm() Signed-off-by: Christophe Leroy Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/13041c7df39e89ddf574ea0cdc6dedfdd9734140.1597235091.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index b1d5e8b66b31..7c2427f237e1 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -158,7 +158,7 @@ extern long __put_user_bad(void); */ #define __put_user_asm(x, addr, err, op) \ __asm__ __volatile__( \ - "1: " op "%X2 %1,%2 # put_user\n" \ + "1: " op "%U2%X2 %1,%2 # put_user\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -166,7 +166,7 @@ extern long __put_user_bad(void); ".previous\n" \ EX_TABLE(1b, 3b) \ : "=r" (err) \ - : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err)) + : "r" (x), "m<>" (*addr), "i" (-EFAULT), "0" (err)) #ifdef __powerpc64__ #define __put_user_asm2(x, ptr, retval) \ @@ -316,7 +316,7 @@ extern long __get_user_bad(void); #define __get_user_asm(x, addr, err, op) \ __asm__ __volatile__( \ - "1: "op"%X2 %1, %2 # get_user\n" \ + "1: "op"%U2%X2 %1, %2 # get_user\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -325,7 +325,7 @@ extern long __get_user_bad(void); ".previous\n" \ EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ - : "m" (*addr), "i" (-EFAULT), "0" (err)) + : "m<>" (*addr), "i" (-EFAULT), "0" (err)) #ifdef __powerpc64__ #define __get_user_asm2(x, addr, err) \ -- cgit v1.2.3 From 353bce211e00d183344f464ba1ee0e1ffb0e2a6c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 05:46:39 +0000 Subject: powerpc/process: Remove unnecessary #ifdef CONFIG_FUNCTION_GRAPH_TRACER ftrace_graph_ret_addr() is always defined and returns 'ip' when CONFIG_FUNCTION GRAPH_TRACER is not set. So the #ifdef is not needed, remove it. Signed-off-by: Christophe Leroy Acked-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9d11143d4e27ba8274369a926968756917584868.1597643153.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 016bd831908e..febe9f7cda2f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2096,10 +2096,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, unsigned long sp, ip, lr, newsp; int count = 0; int firstframe = 1; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER unsigned long ret_addr; int ftrace_idx = 0; -#endif if (tsk == NULL) tsk = current; @@ -2127,12 +2125,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, if (!firstframe || ip != lr) { printk("%s["REG"] ["REG"] %pS", loglvl, sp, ip, (void *)ip); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER ret_addr = ftrace_graph_ret_addr(current, &ftrace_idx, ip, stack); if (ret_addr != ip) pr_cont(" (%pS)", (void *)ret_addr); -#endif if (firstframe) pr_cont(" (unreliable)"); pr_cont("\n"); -- cgit v1.2.3 From 10bf59d923c2766ec8d6f0243481c865c7db9979 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 18 Aug 2020 14:45:57 +1000 Subject: powerpc/pseries/eeh: Fix dumb linebreaks These annoy me every time I see them. Why are they here? They're not even needed for 80cols compliance. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200818044557.135497-1-oohall@gmail.com --- arch/powerpc/platforms/pseries/eeh_pseries.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index cb2d9a970b7b..1db74cec72bc 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -161,8 +161,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in BUID_LO(phb->buid), option); /* If fundamental-reset not supported, try hot-reset */ - if (option == EEH_RESET_FUNDAMENTAL && - ret == -8) { + if (option == EEH_RESET_FUNDAMENTAL && ret == -8) { option = EEH_RESET_HOT; ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, config_addr, BUID_HI(phb->buid), @@ -170,8 +169,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in } /* We need reset hold or settlement delay */ - if (option == EEH_RESET_FUNDAMENTAL || - option == EEH_RESET_HOT) + if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT) msleep(EEH_PE_RST_HOLD_TIME); else msleep(EEH_PE_RST_SETTLE_TIME); @@ -621,8 +619,7 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) /* Not support */ return 0; default: - pr_err("%s: Invalid option %d\n", - __func__, option); + pr_err("%s: Invalid option %d\n", __func__, option); return -EINVAL; } @@ -954,8 +951,7 @@ static int pseries_notify_resume(struct eeh_dev *edev) if (!edev) return -EEXIST; - if (rtas_token("ibm,open-sriov-allow-unfreeze") - == RTAS_UNKNOWN_SERVICE) + if (rtas_token("ibm,open-sriov-allow-unfreeze") == RTAS_UNKNOWN_SERVICE) return -EINVAL; if (edev->pdev->is_physfn || edev->pdev->is_virtfn) -- cgit v1.2.3 From 529d2bd56ada4b8a4904909042792879868208cd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:57:04 +1000 Subject: powerpc/64: Remove unused generic_secondary_thread_init() The last caller was removed in 2014 in commit fb5a515704d7 ("powerpc: Remove platforms/wsp and associated pieces"). As Jordan noticed even though there are no callers, the code above in fsl_secondary_thread_init() falls through into generic_secondary_thread_init(). So we can remove the _GLOBAL but not the body of the function. However because fsl_secondary_thread_init() is inside #ifdef CONFIG_PPC_BOOK3E, we can never reach the body of generic_secondary_thread_init() unless CONFIG_PPC_BOOK3E is enabled, so we can wrap the whole thing in a single #ifdef. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015704.1976364-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/smp.h | 1 - arch/powerpc/kernel/head_64.S | 7 ++----- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 49a25e2400f2..81a49566ccd8 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -243,7 +243,6 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); * 64-bit but defining them all here doesn't harm */ extern void generic_secondary_smp_init(void); -extern void generic_secondary_thread_init(void); extern unsigned long __secondary_hold_spinloop; extern unsigned long __secondary_hold_acknowledge; extern char __secondary_hold; diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 0e05a9a47a4b..1510b2a56669 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -300,9 +300,6 @@ _GLOBAL(fsl_secondary_thread_init) rlwimi r3, r3, 30, 2, 30 mtspr SPRN_PIR, r3 1: -#endif - -_GLOBAL(generic_secondary_thread_init) mr r24,r3 /* turn on 64-bit mode */ @@ -312,13 +309,13 @@ _GLOBAL(generic_secondary_thread_init) bl relative_toc tovirt(r2,r2) -#ifdef CONFIG_PPC_BOOK3E /* Book3E initialization */ mr r3,r24 bl book3e_secondary_thread_init -#endif b generic_secondary_common_init +#endif /* CONFIG_PPC_BOOK3E */ + /* * On pSeries and most other platforms, secondary processors spin * in the following code. -- cgit v1.2.3 From 364b236a0b6e86439b9025d961da8602db23d5bf Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Tue, 25 Aug 2020 13:51:47 +1000 Subject: powerpc/boot: Update Makefile comment for 64bit wrapper As of commit 147c05168fc8 ("powerpc/boot: Add support for 64bit little endian wrapper") the comment in the Makefile is misleading. The wrapper packaging 64bit kernel may built as a 32 or 64 bit elf. Update the comment to reflect this. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200825035147.3239-1-jniethe5@gmail.com --- arch/powerpc/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index b88fd27a45f0..f8ce6d2dde7b 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -7,7 +7,7 @@ # Based on coffboot by Paul Mackerras # Simplified for ppc64 by Todd Inglett # -# NOTE: this code is built for 32 bit in ELF32 format even though +# NOTE: this code may be built for 32 bit in ELF32 format even though # it packages a 64 bit kernel. We do this to simplify the # bootloader and increase compatibility with OpenFirmware. # -- cgit v1.2.3 From 0fb4871bcc8997acbb8edf14b301fc150101d6c0 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 28 Aug 2020 12:05:42 +1000 Subject: powerpc/tools: Remove 90 line limit in checkpatch script As of commit bdc48fa11e46, scripts/checkpatch.pl now has a default line length warning of 100 characters. The powerpc wrapper script was using a length of 90 instead of 80 in order to make checkpatch less restrictive, but now it's making it more restrictive instead. I think it makes sense to just use the default value now. Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200828020542.393022-1-ruscur@russell.cc --- arch/powerpc/tools/checkpatch.sh | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/tools/checkpatch.sh b/arch/powerpc/tools/checkpatch.sh index 3ce5c093b19d..91c04802ec31 100755 --- a/arch/powerpc/tools/checkpatch.sh +++ b/arch/powerpc/tools/checkpatch.sh @@ -9,7 +9,6 @@ script_base=$(realpath $(dirname $0)) exec $script_base/../../../scripts/checkpatch.pl \ --subjective \ --no-summary \ - --max-line-length=90 \ --show-types \ --ignore ARCH_INCLUDE_LINUX \ --ignore BIT_MACRO \ -- cgit v1.2.3 From cac3e629086f1b2e31c87a6c9b0130d29843ae86 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Wed, 5 Aug 2020 00:04:52 -0300 Subject: powerpc/pseries/iommu: Create defines for operations in ibm, ddw-applicable Create defines to help handling ibm,ddw-applicable values, avoiding confusion about the index of given operations. Signed-off-by: Leonardo Bras Tested-by: David Dai Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200805030455.123024-2-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 43 ++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 6d47b4a3ce39..ac0d6376bdad 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -39,6 +39,14 @@ #include "pseries.h" +enum { + DDW_QUERY_PE_DMA_WIN = 0, + DDW_CREATE_PE_DMA_WIN = 1, + DDW_REMOVE_PE_DMA_WIN = 2, + + DDW_APPLICABLE_SIZE +}; + static struct iommu_table_group *iommu_pseries_alloc_group(int node) { struct iommu_table_group *table_group; @@ -771,12 +779,12 @@ static void remove_ddw(struct device_node *np, bool remove_prop) { struct dynamic_dma_window_prop *dwp; struct property *win64; - u32 ddw_avail[3]; + u32 ddw_avail[DDW_APPLICABLE_SIZE]; u64 liobn; int ret = 0; ret = of_property_read_u32_array(np, "ibm,ddw-applicable", - &ddw_avail[0], 3); + &ddw_avail[0], DDW_APPLICABLE_SIZE); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); if (!win64) @@ -798,15 +806,15 @@ static void remove_ddw(struct device_node *np, bool remove_prop) pr_debug("%pOF successfully cleared tces in window.\n", np); - ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); + ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn); if (ret) pr_warn("%pOF: failed to remove direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", - np, ret, ddw_avail[2], liobn); + np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); else pr_debug("%pOF: successfully removed direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", - np, ret, ddw_avail[2], liobn); + np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); delprop: if (remove_prop) @@ -889,11 +897,11 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, buid = pdn->phb->buid; cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); - ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, - cfg_addr, BUID_HI(buid), BUID_LO(buid)); + ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, 5, (u32 *)query, + cfg_addr, BUID_HI(buid), BUID_LO(buid)); dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x" - " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid), - BUID_LO(buid), ret); + " returned %d\n", ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, + BUID_HI(buid), BUID_LO(buid), ret); return ret; } @@ -920,15 +928,16 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ - ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, - cfg_addr, BUID_HI(buid), BUID_LO(buid), - page_shift, window_shift); + ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4, + (u32 *)create, cfg_addr, BUID_HI(buid), + BUID_LO(buid), page_shift, window_shift); } while (rtas_busy_delay(ret)); dev_info(&dev->dev, "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " - "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1], - cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, - window_shift, ret, create->liobn, create->addr_hi, create->addr_lo); + "(liobn = 0x%x starting addr = %x %x)\n", + ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid), + BUID_LO(buid), page_shift, window_shift, ret, create->liobn, + create->addr_hi, create->addr_lo); return ret; } @@ -996,7 +1005,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) int page_shift; u64 dma_addr, max_addr; struct device_node *dn; - u32 ddw_avail[3]; + u32 ddw_avail[DDW_APPLICABLE_SIZE]; struct direct_window *window; struct property *win64; struct dynamic_dma_window_prop *ddwprop; @@ -1029,7 +1038,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * the property is actually in the parent, not the PE */ ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", - &ddw_avail[0], 3); + &ddw_avail[0], DDW_APPLICABLE_SIZE); if (ret) goto out_failed; -- cgit v1.2.3 From 80f0251231131d164eddab78d2b6c1b8e37d0093 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Wed, 5 Aug 2020 00:04:53 -0300 Subject: powerpc/pseries/iommu: Update call to ibm, query-pe-dma-windows >From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can make the number of outputs from "ibm,query-pe-dma-windows" go from 5 to 6. This change of output size is meant to expand the address size of largest_available_block PE TCE from 32-bit to 64-bit, which ends up shifting page_size and migration_capable. This ends up requiring the update of ddw_query_response->largest_available_block from u32 to u64, and manually assigning the values from the buffer into this struct, according to output size. Also, a routine was created for helping reading the ddw extensions as suggested by LoPAR: First reading the size of the extension array from index 0, checking if the property exists, and then returning it's value. Signed-off-by: Leonardo Bras Tested-by: David Dai Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200805030455.123024-3-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 91 ++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index ac0d6376bdad..1a933c4e8bba 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -47,6 +47,12 @@ enum { DDW_APPLICABLE_SIZE }; +enum { + DDW_EXT_SIZE = 0, + DDW_EXT_RESET_DMA_WIN = 1, + DDW_EXT_QUERY_OUT_SIZE = 2 +}; + static struct iommu_table_group *iommu_pseries_alloc_group(int node) { struct iommu_table_group *table_group; @@ -342,7 +348,7 @@ struct direct_window { /* Dynamic DMA Window support */ struct ddw_query_response { u32 windows_available; - u32 largest_available_block; + u64 largest_available_block; u32 page_size; u32 migration_capable; }; @@ -877,14 +883,62 @@ static int find_existing_ddw_windows(void) } machine_arch_initcall(pseries, find_existing_ddw_windows); +/** + * ddw_read_ext - Get the value of an DDW extension + * @np: device node from which the extension value is to be read. + * @extnum: index number of the extension. + * @value: pointer to return value, modified when extension is available. + * + * Checks if "ibm,ddw-extensions" exists for this node, and get the value + * on index 'extnum'. + * It can be used only to check if a property exists, passing value == NULL. + * + * Returns: + * 0 if extension successfully read + * -EINVAL if the "ibm,ddw-extensions" does not exist, + * -ENODATA if "ibm,ddw-extensions" does not have a value, and + * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension. + */ +static inline int ddw_read_ext(const struct device_node *np, int extnum, + u32 *value) +{ + static const char propname[] = "ibm,ddw-extensions"; + u32 count; + int ret; + + ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count); + if (ret) + return ret; + + if (count < extnum) + return -EOVERFLOW; + + if (!value) + value = &count; + + return of_property_read_u32_index(np, propname, extnum, value); +} + static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, - struct ddw_query_response *query) + struct ddw_query_response *query, + struct device_node *parent) { struct device_node *dn; struct pci_dn *pdn; - u32 cfg_addr; + u32 cfg_addr, ext_query, query_out[5]; u64 buid; - int ret; + int ret, out_sz; + + /* + * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many + * output parameters ibm,query-pe-dma-windows will have, ranging from + * 5 to 6. + */ + ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query); + if (!ret && ext_query == 1) + out_sz = 6; + else + out_sz = 5; /* * Get the config address and phb buid of the PE window. @@ -897,11 +951,28 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, buid = pdn->phb->buid; cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); - ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, 5, (u32 *)query, + ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out, cfg_addr, BUID_HI(buid), BUID_LO(buid)); - dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x" - " returned %d\n", ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, - BUID_HI(buid), BUID_LO(buid), ret); + dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n", + ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), + BUID_LO(buid), ret); + + switch (out_sz) { + case 5: + query->windows_available = query_out[0]; + query->largest_available_block = query_out[1]; + query->page_size = query_out[2]; + query->migration_capable = query_out[3]; + break; + case 6: + query->windows_available = query_out[0]; + query->largest_available_block = ((u64)query_out[1] << 32) | + query_out[2]; + query->page_size = query_out[3]; + query->migration_capable = query_out[4]; + break; + } + return ret; } @@ -1049,7 +1120,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * of page sizes: supported and supported for migrate-dma. */ dn = pci_device_to_OF_node(dev); - ret = query_ddw(dev, ddw_avail, &query); + ret = query_ddw(dev, ddw_avail, &query, pdn); if (ret != 0) goto out_failed; @@ -1077,7 +1148,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) /* check largest block * page size > max memory hotplug addr */ max_addr = ddw_memory_hotplug_max(); if (query.largest_available_block < (max_addr >> page_shift)) { - dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u " + dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu " "%llu-sized pages\n", max_addr, query.largest_available_block, 1ULL << page_shift); goto out_failed; -- cgit v1.2.3 From 74d0b3994e147a2b503170b5e02f1d07dc086586 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Wed, 5 Aug 2020 00:04:54 -0300 Subject: powerpc/pseries/iommu: Move window-removing part of remove_ddw into remove_dma_window Move the window-removing part of remove_ddw into a new function (remove_dma_window), so it can be used to remove other DMA windows. It's useful for removing DMA windows that don't create DIRECT64_PROPNAME property, like the default DMA window from the device, which uses "ibm,dma-window". Signed-off-by: Leonardo Bras Tested-by: David Dai Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200805030455.123024-4-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 45 ++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 1a933c4e8bba..4e33147825cc 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -781,25 +781,14 @@ static int __init disable_ddw_setup(char *str) early_param("disable_ddw", disable_ddw_setup); -static void remove_ddw(struct device_node *np, bool remove_prop) +static void remove_dma_window(struct device_node *np, u32 *ddw_avail, + struct property *win) { struct dynamic_dma_window_prop *dwp; - struct property *win64; - u32 ddw_avail[DDW_APPLICABLE_SIZE]; u64 liobn; - int ret = 0; - - ret = of_property_read_u32_array(np, "ibm,ddw-applicable", - &ddw_avail[0], DDW_APPLICABLE_SIZE); - - win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); - if (!win64) - return; - - if (ret || win64->length < sizeof(*dwp)) - goto delprop; + int ret; - dwp = win64->value; + dwp = win->value; liobn = (u64)be32_to_cpu(dwp->liobn); /* clear the whole window, note the arg is in kernel pages */ @@ -821,10 +810,30 @@ static void remove_ddw(struct device_node *np, bool remove_prop) pr_debug("%pOF: successfully removed direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); +} + +static void remove_ddw(struct device_node *np, bool remove_prop) +{ + struct property *win; + u32 ddw_avail[DDW_APPLICABLE_SIZE]; + int ret = 0; + + ret = of_property_read_u32_array(np, "ibm,ddw-applicable", + &ddw_avail[0], DDW_APPLICABLE_SIZE); + if (ret) + return; + + win = of_find_property(np, DIRECT64_PROPNAME, NULL); + if (!win) + return; + + if (win->length >= sizeof(struct dynamic_dma_window_prop)) + remove_dma_window(np, ddw_avail, win); + + if (!remove_prop) + return; -delprop: - if (remove_prop) - ret = of_remove_property(np, win64); + ret = of_remove_property(np, win); if (ret) pr_warn("%pOF: failed to remove direct window property: %d\n", np, ret); -- cgit v1.2.3 From 8c0d51592f6f0123953633d1ecf21e843fce0bfd Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Wed, 5 Aug 2020 00:04:55 -0300 Subject: powerpc/pseries/iommu: Allow bigger 64bit window by removing default DMA window On LoPAR "DMA Window Manipulation Calls", it's recommended to remove the default DMA window for the device, before attempting to configure a DDW, in order to make the maximum resources available for the next DDW to be created. This is a requirement for using DDW on devices in which hypervisor allows only one DMA window. If setting up a new DDW fails anywhere after the removal of this default DMA window, it's needed to restore the default DMA window. For this, an implementation of ibm,reset-pe-dma-windows rtas call is needed: Platforms supporting the DDW option starting with LoPAR level 2.7 implement ibm,ddw-extensions. The first extension available (index 2) carries the token for ibm,reset-pe-dma-windows rtas call, which is used to restore the default DMA window for a device, if it has been deleted. It does so by resetting the TCE table allocation for the PE to it's boot time value, available in "ibm,dma-window" device tree node. Signed-off-by: Leonardo Bras Tested-by: David Dai Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200805030455.123024-5-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 73 ++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 4e33147825cc..e4198700ed1a 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1066,6 +1066,38 @@ static phys_addr_t ddw_memory_hotplug_max(void) return max_addr; } +/* + * Platforms supporting the DDW option starting with LoPAR level 2.7 implement + * ibm,ddw-extensions, which carries the rtas token for + * ibm,reset-pe-dma-windows. + * That rtas-call can be used to restore the default DMA window for the device. + */ +static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) +{ + int ret; + u32 cfg_addr, reset_dma_win; + u64 buid; + struct device_node *dn; + struct pci_dn *pdn; + + ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win); + if (ret) + return; + + dn = pci_device_to_OF_node(dev); + pdn = PCI_DN(dn); + buid = pdn->phb->buid; + cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8); + + ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid), + BUID_LO(buid)); + if (ret) + dev_info(&dev->dev, + "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ", + reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid), + ret); +} + /* * If the PE supports dynamic dma windows, and there is space for a table * that can map all pages in a linear offset, then setup such a table, @@ -1090,6 +1122,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) struct property *win64; struct dynamic_dma_window_prop *ddwprop; struct failed_ddw_pdn *fpdn; + bool default_win_removed = false; mutex_lock(&direct_window_init_mutex); @@ -1133,14 +1166,38 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (ret != 0) goto out_failed; + /* + * If there is no window available, remove the default DMA window, + * if it's present. This will make all the resources available to the + * new DDW window. + * If anything fails after this, we need to restore it, so also check + * for extensions presence. + */ if (query.windows_available == 0) { - /* - * no additional windows are available for this device. - * We might be able to reallocate the existing window, - * trading in for a larger page size. - */ - dev_dbg(&dev->dev, "no free dynamic windows"); - goto out_failed; + struct property *default_win; + int reset_win_ext; + + default_win = of_find_property(pdn, "ibm,dma-window", NULL); + if (!default_win) + goto out_failed; + + reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL); + if (reset_win_ext) + goto out_failed; + + remove_dma_window(pdn, ddw_avail, default_win); + default_win_removed = true; + + /* Query again, to check if the window is available */ + ret = query_ddw(dev, ddw_avail, &query, pdn); + if (ret != 0) + goto out_failed; + + if (query.windows_available == 0) { + /* no windows are available for this device. */ + dev_dbg(&dev->dev, "no free dynamic windows"); + goto out_failed; + } } if (query.page_size & 4) { page_shift = 24; /* 16MB */ @@ -1231,6 +1288,8 @@ out_free_prop: kfree(win64); out_failed: + if (default_win_removed) + reset_dma_window(dev, pdn); fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); if (!fpdn) -- cgit v1.2.3 From 8f55984f530d7275531e17f36ea29229c2c410dd Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 1 Aug 2019 14:46:30 +0930 Subject: powerpc/powernv: Print helpful message when cores guarded Often the firmware will guard out cores after a crash. This often undesirable, and is not immediately noticeable. This adds an informative message when a CPU device tree nodes are marked bad in the device tree. Signed-off-by: Joel Stanley [mpe: Use an eye-catcher that's less likely to get us in trouble] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190801051630.5804-1-joel@jms.id.au --- arch/powerpc/platforms/powernv/setup.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 7fcb88623081..9acaa0f131b9 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -130,6 +130,28 @@ static void pnv_setup_rfi_flush(void) setup_count_cache_flush(); } +static void __init pnv_check_guarded_cores(void) +{ + struct device_node *dn; + int bad_count = 0; + + for_each_node_by_type(dn, "cpu") { + if (of_property_match_string(dn, "status", "bad") >= 0) + bad_count++; + }; + + if (bad_count) { + printk(" _ _______________\n"); + pr_cont(" | | / \\\n"); + pr_cont(" | | | WARNING! |\n"); + pr_cont(" | | | |\n"); + pr_cont(" | | | It looks like |\n"); + pr_cont(" |_| | you have %*d |\n", 3, bad_count); + pr_cont(" _ | guarded cores |\n"); + pr_cont(" (_) \\_______________/\n"); + } +} + static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -150,6 +172,8 @@ static void __init pnv_setup_arch(void) /* Enable NAP mode */ powersave_nap = 1; + pnv_check_guarded_cores(); + /* XXX PMCS */ } -- cgit v1.2.3 From a02f6d42357acf6e5de6ffc728e6e77faf3ad217 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 2 Sep 2020 09:30:11 +0930 Subject: powerpc: Warn about use of smt_snooze_delay It's not done anything for a long time. Save the percpu variable, and emit a warning to remind users to not expect it to do anything. This uses pr_warn_once instead of pr_warn_ratelimit as testing 'ppc64_cpu --smt=off' on a 24 core / 4 SMT system showed the warning to be noisy, as the online/offline loop is slow. Fixes: 3fa8cad82b94 ("powerpc/pseries/cpuidle: smt-snooze-delay cleanup.") Cc: stable@vger.kernel.org # v3.14 Signed-off-by: Joel Stanley Acked-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902000012.3440389-1-joel@jms.id.au --- arch/powerpc/kernel/sysfs.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 46b4ebc33db7..5dea98fa2f93 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -32,29 +32,27 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); -/* - * SMT snooze delay stuff, 64-bit only for now - */ - #ifdef CONFIG_PPC64 -/* Time in microseconds we delay before sleeping in the idle loop */ -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; +/* + * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle: + * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in + * 2014: + * + * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean + * up the kernel code." + * + * powerpc-utils stopped using it as of 1.3.8. At some point in the future this + * code should be removed. + */ static ssize_t store_smt_snooze_delay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - ssize_t ret; - long snooze; - - ret = sscanf(buf, "%ld", &snooze); - if (ret != 1) - return -EINVAL; - - per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; + pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n", + current->comm, current->pid); return count; } @@ -62,9 +60,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); + pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n", + current->comm, current->pid); + return sprintf(buf, "100\n"); } static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, @@ -72,16 +70,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, static int __init setup_smt_snooze_delay(char *str) { - unsigned int cpu; - long snooze; - if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - snooze = simple_strtol(str, NULL, 10); - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = snooze; - + pr_warn("smt-snooze-delay command line option has no effect\n"); return 1; } __setup("smt-snooze-delay=", setup_smt_snooze_delay); -- cgit v1.2.3 From dc462267d2d7aacffc3c1d99b02d7a7c59db7c66 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 25 Aug 2020 17:55:35 +1000 Subject: powerpc/64s: handle ISA v3.1 local copy-paste context switches The ISA v3.1 the copy-paste facility has a new memory move functionality which allows the copy buffer to be pasted to domestic memory (RAM) as opposed to foreign memory (accelerator). This means the POWER9 trick of avoiding the cp_abort on context switch if the process had not mapped foreign memory does not work on POWER10. Do the cp_abort unconditionally there. KVM must also cp_abort on guest exit to prevent copy buffer state leaking between contexts. Signed-off-by: Nicholas Piggin Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200825075535.224536-1-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 16 +++++++++------- arch/powerpc/kvm/book3s_hv.c | 7 +++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 ++++++++ 3 files changed, 24 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index febe9f7cda2f..2a6fadde58b4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1250,15 +1250,17 @@ struct task_struct *__switch_to(struct task_struct *prev, restore_math(current->thread.regs); /* - * The copy-paste buffer can only store into foreign real - * addresses, so unprivileged processes can not see the - * data or use it in any way unless they have foreign real - * mappings. If the new process has the foreign real address - * mappings, we must issue a cp_abort to clear any state and - * prevent snooping, corruption or a covert channel. + * On POWER9 the copy-paste buffer can only paste into + * foreign real addresses, so unprivileged processes can not + * see the data or use it in any way unless they have + * foreign real mappings. If the new process has the foreign + * real address mappings, we must issue a cp_abort to clear + * any state and prevent snooping, corruption or a covert + * channel. ISA v3.1 supports paste into local memory. */ if (current->mm && - atomic_read(¤t->mm->context.vas_windows)) + (cpu_has_feature(CPU_FTR_ARCH_31) || + atomic_read(¤t->mm->context.vas_windows))) asm volatile(PPC_CP_ABORT); } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 4ba06a2a306c..3bd3118c7633 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3530,6 +3530,13 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, */ asm volatile("eieio; tlbsync; ptesync"); + /* + * cp_abort is required if the processor supports local copy-paste + * to clear the copy buffer that was under control of the guest. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + asm volatile(PPC_CP_ABORT); + mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */ isync(); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 799d6d0f4ead..cd9995ee8441 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1830,6 +1830,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG) 2: #endif /* CONFIG_PPC_RADIX_MMU */ + /* + * cp_abort is required if the processor supports local copy-paste + * to clear the copy buffer that was under control of the guest. + */ +BEGIN_FTR_SECTION + PPC_CP_ABORT +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) + /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do -- cgit v1.2.3 From 231b232df8f67e7d37af01259c21f2a131c3911e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 27 Aug 2020 23:17:13 +1000 Subject: powerpc/64: Make VDSO32 track COMPAT on 64-bit When we added the VDSO32 kconfig symbol, which controls building of the 32-bit VDSO, we made it depend on CPU_BIG_ENDIAN (for 64-bit). That was because back then COMPAT was always enabled for 64-bit, so depending on it would have left the 32-bit VDSO always enabled, which we didn't want. But since then we have made COMPAT selectable, and off by default for ppc64le, so VDSO32 should really depend on that. For most people this makes no difference, none of the defconfigs change, it's only if someone is building ppc64le with COMPAT=y, they will now also get VDSO32. If they've enabled COMPAT in order to run 32-bit binaries they presumably also want the 32-bit VDSO. Signed-off-by: Michael Ellerman Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/r/20200908125850.407939-1-mpe@ellerman.id.au --- arch/powerpc/platforms/Kconfig.cputype | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 1dc9d3c81872..e74ec220b5d6 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -490,13 +490,12 @@ endmenu config VDSO32 def_bool y - depends on PPC32 || CPU_BIG_ENDIAN + depends on PPC32 || COMPAT help This symbol controls whether we build the 32-bit VDSO. We obviously want to do that if we're building a 32-bit kernel. If we're building - a 64-bit kernel then we only want a 32-bit VDSO if we're building for - big endian. That is because the only little endian configuration we - support is ppc64le which is 64-bit only. + a 64-bit kernel then we only want a 32-bit VDSO if we're also enabling + COMPAT. choice prompt "Endianness selection" -- cgit v1.2.3 From eae9eec476d13fad9af6da1f44a054ee02b7b161 Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Tue, 18 Aug 2020 19:11:26 -0300 Subject: powerpc/pseries/svm: Allocate SWIOTLB buffer anywhere in memory POWER secure guests (i.e., guests which use the Protected Execution Facility) need to use SWIOTLB to be able to do I/O with the hypervisor, but they don't need the SWIOTLB memory to be in low addresses since the hypervisor doesn't have any addressing limitation. This solves a SWIOTLB initialization problem we are seeing in secure guests with 128 GB of RAM: they are configured with 4 GB of crashkernel reserved memory, which leaves no space for SWIOTLB in low addresses. To do this, we use mostly the same code as swiotlb_init(), but allocate the buffer using memblock_alloc() instead of memblock_alloc_low(). Fixes: 2efbc58f157a ("powerpc/pseries/svm: Force SWIOTLB for secure guests") Signed-off-by: Thiago Jung Bauermann Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200818221126.391073-1-bauerman@linux.ibm.com --- arch/powerpc/include/asm/svm.h | 4 ++++ arch/powerpc/mm/mem.c | 6 +++++- arch/powerpc/platforms/pseries/svm.c | 26 ++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h index 85580b30aba4..7546402d796a 100644 --- a/arch/powerpc/include/asm/svm.h +++ b/arch/powerpc/include/asm/svm.h @@ -15,6 +15,8 @@ static inline bool is_secure_guest(void) return mfmsr() & MSR_S; } +void __init svm_swiotlb_init(void); + void dtl_cache_ctor(void *addr); #define get_dtl_cache_ctor() (is_secure_guest() ? dtl_cache_ctor : NULL) @@ -25,6 +27,8 @@ static inline bool is_secure_guest(void) return false; } +static inline void svm_swiotlb_init(void) {} + #define get_dtl_cache_ctor() NULL #endif /* CONFIG_PPC_SVM */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 42e25874f5a8..ddc32cc1b6cf 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -282,7 +283,10 @@ void __init mem_init(void) * back to to-down. */ memblock_set_bottom_up(true); - swiotlb_init(0); + if (is_secure_guest()) + svm_swiotlb_init(); + else + swiotlb_init(0); #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index e6d7a344d9f2..7b739cc7a8a9 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -35,6 +36,31 @@ static int __init init_svm(void) } machine_early_initcall(pseries, init_svm); +/* + * Initialize SWIOTLB. Essentially the same as swiotlb_init(), except that it + * can allocate the buffer anywhere in memory. Since the hypervisor doesn't have + * any addressing limitation, we don't need to allocate it in low addresses. + */ +void __init svm_swiotlb_init(void) +{ + unsigned char *vstart; + unsigned long bytes, io_tlb_nslabs; + + io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE); + if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false)) + return; + + if (io_tlb_start) + memblock_free_early(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + panic("SVM: Cannot allocate SWIOTLB buffer"); +} + int set_memory_encrypted(unsigned long addr, int numpages) { if (!PAGE_ALIGNED(addr)) -- cgit v1.2.3 From 4759c11ed20454b7b36db4ec15f7d5aa1519af4a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 2 Sep 2020 09:59:38 +0530 Subject: powerpc/watchpoint: Fix quadword instruction handling on p10 predecessors On p10 predecessors, watchpoint with quadword access is compared at quadword length. If the watch range is doubleword or less than that in a first half of quadword aligned 16 bytes, and if there is any unaligned quadword access which will access only the 2nd half, the handler should consider it as extraneous and emulate/single-step it before continuing. Fixes: 74c6881019b7 ("powerpc/watchpoint: Prepare handler to handle more than one watchpoint") Reported-by: Pedro Miraglia Franco de Carvalho Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902042945.129369-2-ravi.bangoria@linux.ibm.com --- arch/powerpc/include/asm/hw_breakpoint.h | 1 + arch/powerpc/kernel/hw_breakpoint.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index db206a7f38e2..9b68eafebf43 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -42,6 +42,7 @@ struct arch_hw_breakpoint { #else #define HW_BREAKPOINT_SIZE 0x8 #endif +#define HW_BREAKPOINT_SIZE_QUADWORD 0x10 #define DABR_MAX_LEN 8 #define DAWR_MAX_LEN 512 diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 1f4a1efa0074..9f7df1c37233 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -520,9 +520,17 @@ static bool ea_hw_range_overlaps(unsigned long ea, int size, struct arch_hw_breakpoint *info) { unsigned long hw_start_addr, hw_end_addr; + unsigned long align_size = HW_BREAKPOINT_SIZE; - hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); - hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); + /* + * On p10 predecessors, quadword is handle differently then + * other instructions. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16) + align_size = HW_BREAKPOINT_SIZE_QUADWORD; + + hw_start_addr = ALIGN_DOWN(info->address, align_size); + hw_end_addr = ALIGN(info->address + info->len, align_size); return ((ea < hw_end_addr) && (ea + size > hw_start_addr)); } -- cgit v1.2.3 From 4441eb02333a9b46a0d919aa7a6d3b137b5f2562 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 2 Sep 2020 09:59:39 +0530 Subject: powerpc/watchpoint: Fix handling of vector instructions Vector load/store instructions are special because they are always aligned. Thus unaligned EA needs to be aligned down before comparing it with watch ranges. Otherwise we might consider valid event as invalid. Fixes: 74c6881019b7 ("powerpc/watchpoint: Prepare handler to handle more than one watchpoint") Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902042945.129369-3-ravi.bangoria@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 9f7df1c37233..f6b24838ca3c 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -644,6 +644,8 @@ static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, if (*type == CACHEOP) { *size = cache_op_size(); *ea &= ~(*size - 1); + } else if (*type == LOAD_VMX || *type == STORE_VMX) { + *ea &= ~(*size - 1); } } -- cgit v1.2.3 From 9b6b7c680cc20971444d9f836e49fc98848bcd0a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 2 Sep 2020 09:59:40 +0530 Subject: powerpc/watchpoint/ptrace: Fix SETHWDEBUG when CONFIG_HAVE_HW_BREAKPOINT=N When kernel is compiled with CONFIG_HAVE_HW_BREAKPOINT=N, user can still create watchpoint using PPC_PTRACE_SETHWDEBUG, with limited functionalities. But, such watchpoints are never firing because of the missing privilege settings. Fix that. It's safe to set HW_BRK_TYPE_PRIV_ALL because we don't really leak any kernel address in signal info. Setting HW_BRK_TYPE_PRIV_ALL will also help to find scenarios when kernel accesses user memory. Reported-by: Pedro Miraglia Franco de Carvalho Suggested-by: Pedro Miraglia Franco de Carvalho Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902042945.129369-4-ravi.bangoria@linux.ibm.com --- arch/powerpc/kernel/ptrace/ptrace-noadv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index 697c7e4b5877..57a0ab822334 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -217,7 +217,7 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf return -EIO; brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE); - brk.type = HW_BRK_TYPE_TRANSLATE; + brk.type = HW_BRK_TYPE_TRANSLATE | HW_BRK_TYPE_PRIV_ALL; brk.len = DABR_MAX_LEN; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) brk.type |= HW_BRK_TYPE_READ; -- cgit v1.2.3 From edc8dd99b29e4d705c45e2a3a6c01b096ee056db Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 2 Sep 2020 09:59:41 +0530 Subject: powerpc/watchpoint: Move DAWR detection logic outside of hw_breakpoint.c Power10 hw has multiple DAWRs but hw doesn't tell which DAWR caused the exception. So we have a sw logic to detect that in hw_breakpoint.c. But hw_breakpoint.c gets compiled only with CONFIG_HAVE_HW_BREAKPOINT=Y. Move DAWR detection logic outside of hw_breakpoint.c so that it can be reused when CONFIG_HAVE_HW_BREAKPOINT is not set. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902042945.129369-5-ravi.bangoria@linux.ibm.com --- arch/powerpc/include/asm/hw_breakpoint.h | 8 ++ arch/powerpc/kernel/Makefile | 3 +- arch/powerpc/kernel/hw_breakpoint.c | 159 +---------------------- arch/powerpc/kernel/hw_breakpoint_constraints.c | 162 ++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 158 deletions(-) create mode 100644 arch/powerpc/kernel/hw_breakpoint_constraints.c (limited to 'arch') diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 9b68eafebf43..81872c420476 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -10,6 +10,7 @@ #define _PPC_BOOK3S_64_HW_BREAKPOINT_H #include +#include #ifdef __KERNEL__ struct arch_hw_breakpoint { @@ -52,6 +53,13 @@ static inline int nr_wp_slots(void) return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1; } +bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, + unsigned long ea, int type, int size, + struct arch_hw_breakpoint *info); + +void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, + int *type, int *size, unsigned long *ea); + #ifdef CONFIG_HAVE_HW_BREAKPOINT #include #include diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index cbf41fb4ee89..a5550c2b24c4 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -45,7 +45,8 @@ obj-y := cputable.o syscalls.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o firmware.o + of_platform.o prom_parse.o firmware.o \ + hw_breakpoint_constraints.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ paca.o nvram_64.o note.o syscall_64.o diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f6b24838ca3c..f4e8f21046f5 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -494,161 +494,6 @@ reset: } } -static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info) -{ - return ((info->address <= dar) && (dar - info->address < info->len)); -} - -static bool ea_user_range_overlaps(unsigned long ea, int size, - struct arch_hw_breakpoint *info) -{ - return ((ea < info->address + info->len) && - (ea + size > info->address)); -} - -static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) -{ - unsigned long hw_start_addr, hw_end_addr; - - hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); - hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); - - return ((hw_start_addr <= dar) && (hw_end_addr > dar)); -} - -static bool ea_hw_range_overlaps(unsigned long ea, int size, - struct arch_hw_breakpoint *info) -{ - unsigned long hw_start_addr, hw_end_addr; - unsigned long align_size = HW_BREAKPOINT_SIZE; - - /* - * On p10 predecessors, quadword is handle differently then - * other instructions. - */ - if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16) - align_size = HW_BREAKPOINT_SIZE_QUADWORD; - - hw_start_addr = ALIGN_DOWN(info->address, align_size); - hw_end_addr = ALIGN(info->address + info->len, align_size); - - return ((ea < hw_end_addr) && (ea + size > hw_start_addr)); -} - -/* - * If hw has multiple DAWR registers, we also need to check all - * dawrx constraint bits to confirm this is _really_ a valid event. - * If type is UNKNOWN, but privilege level matches, consider it as - * a positive match. - */ -static bool check_dawrx_constraints(struct pt_regs *regs, int type, - struct arch_hw_breakpoint *info) -{ - if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ)) - return false; - - /* - * The Cache Management instructions other than dcbz never - * cause a match. i.e. if type is CACHEOP, the instruction - * is dcbz, and dcbz is treated as Store. - */ - if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE)) - return false; - - if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL)) - return false; - - if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER)) - return false; - - return true; -} - -/* - * Return true if the event is valid wrt dawr configuration, - * including extraneous exception. Otherwise return false. - */ -static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, - unsigned long ea, int type, int size, - struct arch_hw_breakpoint *info) -{ - bool in_user_range = dar_in_user_range(regs->dar, info); - bool dawrx_constraints; - - /* - * 8xx supports only one breakpoint and thus we can - * unconditionally return true. - */ - if (IS_ENABLED(CONFIG_PPC_8xx)) { - if (!in_user_range) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - return true; - } - - if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) { - if (cpu_has_feature(CPU_FTR_ARCH_31) && - !dar_in_hw_range(regs->dar, info)) - return false; - - return true; - } - - dawrx_constraints = check_dawrx_constraints(regs, type, info); - - if (type == UNKNOWN) { - if (cpu_has_feature(CPU_FTR_ARCH_31) && - !dar_in_hw_range(regs->dar, info)) - return false; - - return dawrx_constraints; - } - - if (ea_user_range_overlaps(ea, size, info)) - return dawrx_constraints; - - if (ea_hw_range_overlaps(ea, size, info)) { - if (dawrx_constraints) { - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - return true; - } - } - return false; -} - -static int cache_op_size(void) -{ -#ifdef __powerpc64__ - return ppc64_caches.l1d.block_size; -#else - return L1_CACHE_BYTES; -#endif -} - -static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, - int *type, int *size, unsigned long *ea) -{ - struct instruction_op op; - - if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip)) - return; - - analyse_instr(&op, regs, *instr); - *type = GETTYPE(op.type); - *ea = op.ea; -#ifdef __powerpc64__ - if (!(regs->msr & MSR_64BIT)) - *ea &= 0xffffffffUL; -#endif - - *size = GETSIZE(op.type); - if (*type == CACHEOP) { - *size = cache_op_size(); - *ea &= ~(*size - 1); - } else if (*type == LOAD_VMX || *type == STORE_VMX) { - *ea &= ~(*size - 1); - } -} - static bool is_larx_stcx_instr(int type) { return type == LARX || type == STCX; @@ -732,7 +577,7 @@ int hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); if (!IS_ENABLED(CONFIG_PPC_8xx)) - get_instr_detail(regs, &instr, &type, &size, &ea); + wp_get_instr_detail(regs, &instr, &type, &size, &ea); for (i = 0; i < nr_wp_slots(); i++) { bp[i] = __this_cpu_read(bp_per_reg[i]); @@ -742,7 +587,7 @@ int hw_breakpoint_handler(struct die_args *args) info[i] = counter_arch_bp(bp[i]); info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (check_constraints(regs, instr, ea, type, size, info[i])) { + if (wp_check_constraints(regs, instr, ea, type, size, info[i])) { if (!IS_ENABLED(CONFIG_PPC_8xx) && ppc_inst_equal(instr, ppc_inst(0))) { handler_error(bp[i], info[i]); diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c new file mode 100644 index 000000000000..867ee4aa026a --- /dev/null +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include +#include +#include +#include +#include +#include + +static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info) +{ + return ((info->address <= dar) && (dar - info->address < info->len)); +} + +static bool ea_user_range_overlaps(unsigned long ea, int size, + struct arch_hw_breakpoint *info) +{ + return ((ea < info->address + info->len) && + (ea + size > info->address)); +} + +static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) +{ + unsigned long hw_start_addr, hw_end_addr; + + hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); + + return ((hw_start_addr <= dar) && (hw_end_addr > dar)); +} + +static bool ea_hw_range_overlaps(unsigned long ea, int size, + struct arch_hw_breakpoint *info) +{ + unsigned long hw_start_addr, hw_end_addr; + unsigned long align_size = HW_BREAKPOINT_SIZE; + + /* + * On p10 predecessors, quadword is handle differently then + * other instructions. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16) + align_size = HW_BREAKPOINT_SIZE_QUADWORD; + + hw_start_addr = ALIGN_DOWN(info->address, align_size); + hw_end_addr = ALIGN(info->address + info->len, align_size); + + return ((ea < hw_end_addr) && (ea + size > hw_start_addr)); +} + +/* + * If hw has multiple DAWR registers, we also need to check all + * dawrx constraint bits to confirm this is _really_ a valid event. + * If type is UNKNOWN, but privilege level matches, consider it as + * a positive match. + */ +static bool check_dawrx_constraints(struct pt_regs *regs, int type, + struct arch_hw_breakpoint *info) +{ + if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ)) + return false; + + /* + * The Cache Management instructions other than dcbz never + * cause a match. i.e. if type is CACHEOP, the instruction + * is dcbz, and dcbz is treated as Store. + */ + if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE)) + return false; + + if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL)) + return false; + + if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER)) + return false; + + return true; +} + +/* + * Return true if the event is valid wrt dawr configuration, + * including extraneous exception. Otherwise return false. + */ +bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, + unsigned long ea, int type, int size, + struct arch_hw_breakpoint *info) +{ + bool in_user_range = dar_in_user_range(regs->dar, info); + bool dawrx_constraints; + + /* + * 8xx supports only one breakpoint and thus we can + * unconditionally return true. + */ + if (IS_ENABLED(CONFIG_PPC_8xx)) { + if (!in_user_range) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + return true; + } + + if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && + !dar_in_hw_range(regs->dar, info)) + return false; + + return true; + } + + dawrx_constraints = check_dawrx_constraints(regs, type, info); + + if (type == UNKNOWN) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && + !dar_in_hw_range(regs->dar, info)) + return false; + + return dawrx_constraints; + } + + if (ea_user_range_overlaps(ea, size, info)) + return dawrx_constraints; + + if (ea_hw_range_overlaps(ea, size, info)) { + if (dawrx_constraints) { + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + return true; + } + } + return false; +} + +static int cache_op_size(void) +{ +#ifdef __powerpc64__ + return ppc64_caches.l1d.block_size; +#else + return L1_CACHE_BYTES; +#endif +} + +void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, + int *type, int *size, unsigned long *ea) +{ + struct instruction_op op; + + if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip)) + return; + + analyse_instr(&op, regs, *instr); + *type = GETTYPE(op.type); + *ea = op.ea; +#ifdef __powerpc64__ + if (!(regs->msr & MSR_64BIT)) + *ea &= 0xffffffffUL; +#endif + + *size = GETSIZE(op.type); + if (*type == CACHEOP) { + *size = cache_op_size(); + *ea &= ~(*size - 1); + } else if (*type == LOAD_VMX || *type == STORE_VMX) { + *ea &= ~(*size - 1); + } +} -- cgit v1.2.3 From 5b905d77987de065bdd3a2906816b5f143df087b Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 2 Sep 2020 09:59:42 +0530 Subject: powerpc/watchpoint: Fix exception handling for CONFIG_HAVE_HW_BREAKPOINT=N On powerpc, ptrace watchpoint works in one-shot mode. i.e. kernel disables event every time it fires and user has to re-enable it. Also, in case of ptrace watchpoint, kernel notifies ptrace user before executing instruction. With CONFIG_HAVE_HW_BREAKPOINT=N, kernel is missing to disable ptrace event and thus it's causing infinite loop of exceptions. This is especially harmful when user watches on a data which is also read/written by kernel, eg syscall parameters. In such case, infinite exceptions happens in kernel mode which causes soft-lockup. Fixes: 9422de3e953d ("powerpc: Hardware breakpoints rewrite to handle non DABR breakpoint registers") Reported-by: Pedro Miraglia Franco de Carvalho Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902042945.129369-6-ravi.bangoria@linux.ibm.com --- arch/powerpc/include/asm/hw_breakpoint.h | 3 ++ arch/powerpc/kernel/process.c | 48 +++++++++++++++++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace-noadv.c | 4 ++- 3 files changed, 54 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 81872c420476..abebfbee5b1c 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -18,6 +18,7 @@ struct arch_hw_breakpoint { u16 type; u16 len; /* length of the target data symbol */ u16 hw_len; /* length programmed in hw */ + u8 flags; }; /* Note: Don't change the first 6 bits below as they are in the same order @@ -37,6 +38,8 @@ struct arch_hw_breakpoint { #define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \ HW_BRK_TYPE_HYP) +#define HW_BRK_FLAG_DISABLED 0x1 + /* Minimum granularity */ #ifdef CONFIG_PPC_8xx #define HW_BREAKPOINT_SIZE 0x4 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 142680e885ad..483e36a42617 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -642,6 +642,44 @@ void do_send_trap(struct pt_regs *regs, unsigned long address, (void __user *)address); } #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ + +static void do_break_handler(struct pt_regs *regs) +{ + struct arch_hw_breakpoint null_brk = {0}; + struct arch_hw_breakpoint *info; + struct ppc_inst instr = ppc_inst(0); + int type = 0; + int size = 0; + unsigned long ea; + int i; + + /* + * If underneath hw supports only one watchpoint, we know it + * caused exception. 8xx also falls into this category. + */ + if (nr_wp_slots() == 1) { + __set_breakpoint(0, &null_brk); + current->thread.hw_brk[0] = null_brk; + current->thread.hw_brk[0].flags |= HW_BRK_FLAG_DISABLED; + return; + } + + /* Otherwise findout which DAWR caused exception and disable it. */ + wp_get_instr_detail(regs, &instr, &type, &size, &ea); + + for (i = 0; i < nr_wp_slots(); i++) { + info = ¤t->thread.hw_brk[i]; + if (!info->address) + continue; + + if (wp_check_constraints(regs, instr, ea, type, size, info)) { + __set_breakpoint(i, &null_brk); + current->thread.hw_brk[i] = null_brk; + current->thread.hw_brk[i].flags |= HW_BRK_FLAG_DISABLED; + } + } +} + void do_break (struct pt_regs *regs, unsigned long address, unsigned long error_code) { @@ -653,6 +691,16 @@ void do_break (struct pt_regs *regs, unsigned long address, if (debugger_break_match(regs)) return; + /* + * We reach here only when watchpoint exception is generated by ptrace + * event (or hw is buggy!). Now if CONFIG_HAVE_HW_BREAKPOINT is set, + * watchpoint is already handled by hw_breakpoint_handler() so we don't + * have to do anything. But when CONFIG_HAVE_HW_BREAKPOINT is not set, + * we need to manually handle the watchpoint here. + */ + if (!IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT)) + do_break_handler(regs); + /* Deliver the signal to userspace */ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address); } diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index 57a0ab822334..c9122ed91340 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -286,11 +286,13 @@ long ppc_del_hwdebug(struct task_struct *child, long data) } return ret; #else /* CONFIG_HAVE_HW_BREAKPOINT */ - if (child->thread.hw_brk[data - 1].address == 0) + if (!(child->thread.hw_brk[data - 1].flags & HW_BRK_FLAG_DISABLED) && + child->thread.hw_brk[data - 1].address == 0) return -ENOENT; child->thread.hw_brk[data - 1].address = 0; child->thread.hw_brk[data - 1].type = 0; + child->thread.hw_brk[data - 1].flags = 0; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ return 0; -- cgit v1.2.3 From 58da5984d2ea6d95f3f9d9e8dd9f7e1b0dddfb3c Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 2 Sep 2020 09:59:43 +0530 Subject: powerpc/watchpoint: Add hw_len wherever missing There are couple of places where we set len but not hw_len. For ptrace/perf watchpoints, when CONFIG_HAVE_HW_BREAKPOINT=Y, hw_len will be calculated and set internally while parsing watchpoint. But when CONFIG_HAVE_HW_BREAKPOINT=N, we need to manually set 'hw_len'. Similarly for xmon as well, hw_len needs to be set directly. Fixes: b57aeab811db ("powerpc/watchpoint: Fix length calculation for unaligned target") Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902042945.129369-7-ravi.bangoria@linux.ibm.com --- arch/powerpc/kernel/ptrace/ptrace-noadv.c | 1 + arch/powerpc/xmon/xmon.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index c9122ed91340..48c52426af80 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -219,6 +219,7 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE); brk.type = HW_BRK_TYPE_TRANSLATE | HW_BRK_TYPE_PRIV_ALL; brk.len = DABR_MAX_LEN; + brk.hw_len = DABR_MAX_LEN; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) brk.type |= HW_BRK_TYPE_READ; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index df7bca00f5ec..55c43a6c9111 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -969,6 +969,7 @@ static void insert_cpu_bpts(void) brk.address = dabr[i].address; brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; brk.len = 8; + brk.hw_len = 8; __set_breakpoint(i, &brk); } } -- cgit v1.2.3 From fa725cc53d353110f39a9e5b9f60d6acb2c7ff49 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 2 Sep 2020 09:59:44 +0530 Subject: powerpc/watchpoint/ptrace: Introduce PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 can be used to determine whether we are running on an ISA 3.1 compliant machine. Which is needed to determine DAR behaviour, 512 byte boundary limit etc. This was requested by Pedro Miraglia Franco de Carvalho for extending watchpoint features in gdb. Note that availability of 2nd DAWR is independent of this flag and should be checked using ppc_debug_info->num_data_bps. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902042945.129369-8-ravi.bangoria@linux.ibm.com --- Documentation/powerpc/ptrace.rst | 1 + arch/powerpc/include/uapi/asm/ptrace.h | 1 + arch/powerpc/kernel/ptrace/ptrace-noadv.c | 2 ++ 3 files changed, 4 insertions(+) (limited to 'arch') diff --git a/Documentation/powerpc/ptrace.rst b/Documentation/powerpc/ptrace.rst index 864d4b6dddd1..77725d69eb4a 100644 --- a/Documentation/powerpc/ptrace.rst +++ b/Documentation/powerpc/ptrace.rst @@ -46,6 +46,7 @@ features will have bits indicating whether there is support for:: #define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x4 #define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x8 #define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10 + #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 2. PTRACE_SETHWDEBUG diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h index f5f1ccc740fc..7004cfea3f5f 100644 --- a/arch/powerpc/include/uapi/asm/ptrace.h +++ b/arch/powerpc/include/uapi/asm/ptrace.h @@ -222,6 +222,7 @@ struct ppc_debug_info { #define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x0000000000000004 #define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x0000000000000008 #define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x0000000000000010 +#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x0000000000000020 #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index 48c52426af80..aa36fcad36cd 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -57,6 +57,8 @@ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo) } else { dbginfo->features = 0; } + if (cpu_has_feature(CPU_FTR_ARCH_31)) + dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_ARCH_31; } int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, -- cgit v1.2.3 From 2a32abac8860aa1c3a1fc99973ce67179575b36c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 8 Jun 2020 12:39:01 +0530 Subject: powerpc/percpu: Update percpu bootmem allocator This update the ppc64 version to be closer to x86/sparc. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200608070904.387440-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kernel/setup_64.c | 45 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6be430107c6f..55bbbf89ea82 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -756,17 +756,46 @@ void __init emergency_stack_init(void) } #ifdef CONFIG_SMP -#define PCPU_DYN_SIZE () - -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) +/** + * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu + * @cpu: cpu to allocate for + * @size: size allocation in bytes + * @align: alignment + * + * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper + * does the right thing for NUMA regardless of the current + * configuration. + * + * RETURNS: + * Pointer to the allocated area on success, NULL on failure. + */ +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, + size_t align) { - return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_ACCESSIBLE, - early_cpu_to_node(cpu)); + const unsigned long goal = __pa(MAX_DMA_ADDRESS); +#ifdef CONFIG_NEED_MULTIPLE_NODES + int node = early_cpu_to_node(cpu); + void *ptr; + if (!node_online(node) || !NODE_DATA(node)) { + ptr = memblock_alloc_from(size, align, goal); + pr_info("cpu %d has no node %d or node-local memory\n", + cpu, node); + pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", + cpu, size, __pa(ptr)); + } else { + ptr = memblock_alloc_try_nid(size, align, goal, + MEMBLOCK_ALLOC_ACCESSIBLE, node); + pr_debug("per cpu data for cpu%d %lu bytes on node%d at " + "%016lx\n", cpu, size, node, __pa(ptr)); + } + return ptr; +#else + return memblock_alloc_from(size, align, goal); +#endif } -static void __init pcpu_fc_free(void *ptr, size_t size) +static void __init pcpu_free_bootmem(void *ptr, size_t size) { memblock_free(__pa(ptr), size); } @@ -801,7 +830,7 @@ void __init setup_per_cpu_areas(void) atom_size = 1 << 20; rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, - pcpu_fc_alloc, pcpu_fc_free); + pcpu_alloc_bootmem, pcpu_free_bootmem); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); -- cgit v1.2.3 From eb553f16973ade990d05946af9ae191394712c8a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 8 Jun 2020 12:39:02 +0530 Subject: powerpc/64/mm: implement page mapping percpu first chunk allocator Implement page mapping percpu first chunk allocator as a fallback to the embedding allocator. With 4K hash translation we limit our page table range to 64TB and commit: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the same 0xc range") moved all kernel mapping to that 64TB range. In-order to support sparse memory layout we need to increase our linear mapping space and reduce other mappings. With such a layout percpu embedded first chunk allocator will fail because of small vmalloc range. Add a fallback to page mapping percpu first chunk allocator for such failures. The below dmesg output can be observed in such case. percpu: max_distance=0x1ffffef00000 too large for vmalloc space 0x10000000000 PERCPU: auto allocator failed (-22), falling back to page size percpu: 40 4K pages/cpu s148816 r0 d15024 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200608070904.387440-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/Kconfig | 5 +++- arch/powerpc/kernel/setup_64.c | 62 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 787e829b6f25..4b33477dafb8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -59,7 +59,10 @@ config HAVE_SETUP_PER_CPU_AREA def_bool PPC64 config NEED_PER_CPU_EMBED_FIRST_CHUNK - def_bool PPC64 + def_bool y if PPC64 + +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y if PPC64 config NR_IRQS int "Number of virtual interrupt numbers" diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 55bbbf89ea82..bb9cab3641d7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "setup.h" @@ -811,13 +812,58 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to) unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +static void __init pcpu_populate_pte(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + pud_t *new; + + new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); + if (!new) + goto err_alloc; + p4d_populate(&init_mm, p4d, new); + } + + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new; + + new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); + if (!new) + goto err_alloc; + pud_populate(&init_mm, pud, new); + } + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + pte_t *new; + + new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); + if (!new) + goto err_alloc; + pmd_populate_kernel(&init_mm, pmd, new); + } + + return; + +err_alloc: + panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); +} + + void __init setup_per_cpu_areas(void) { const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t atom_size; unsigned long delta; unsigned int cpu; - int rc; + int rc = -EINVAL; /* * Linear mapping is one of 4K, 1M and 16M. For 4K, no need @@ -829,8 +875,18 @@ void __init setup_per_cpu_areas(void) else atom_size = 1 << 20; - rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, - pcpu_alloc_bootmem, pcpu_free_bootmem); + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, + pcpu_alloc_bootmem, pcpu_free_bootmem); + if (rc) + pr_warn("PERCPU: %s allocator failed (%d), " + "falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); + } + + if (rc < 0) + rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem, + pcpu_populate_pte); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); -- cgit v1.2.3 From 7746406baa3bc9e23fdd7b7da2f04d86e25ab837 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 8 Jun 2020 12:39:03 +0530 Subject: powerpc/book3s64/hash/4k: Support large linear mapping range with 4K With commit: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the same 0xc range"), we now split the 64TB address range into 4 contexts each of 16TB. That implies we can do only 16TB linear mapping. On some systems, eg. Power9, memory attached to nodes > 0 will appear above 16TB in the linear mapping. This resulted in kernel crash when we boot such systems in hash translation mode with 4K PAGE_SIZE. This patch updates the kernel mapping such that we now start supporting upto 61TB of memory with 4K. The kernel mapping now looks like below 4K PAGE_SIZE and hash translation. vmalloc start = 0xc0003d0000000000 IO start = 0xc0003e0000000000 vmemmap start = 0xc0003f0000000000 Our MAX_PHYSMEM_BITS for 4K is still 64TB even though we can only map 61TB. We prevent bolt mapping anything outside 61TB range by checking against H_VMALLOC_START. Fixes: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the same 0xc range") Reported-by: Cameron Berkenpas Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200608070904.387440-3-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 082b98808701..b3ca542f871e 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -13,20 +13,19 @@ */ #define MAX_EA_BITS_PER_CONTEXT 46 -#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2) /* - * Our page table limit us to 64TB. Hence for the kernel mapping, - * each MAP area is limited to 16 TB. - * The four map areas are: linear mapping, vmap, IO and vmemmap + * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB + * of vmemmap space. To better support sparse memory layout, we use 61TB + * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap. */ +#define REGION_SHIFT (40) #define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT) /* - * Define the address range of the kernel non-linear virtual area - * 16TB + * Define the address range of the kernel non-linear virtual area (61TB) */ -#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000) +#define H_KERN_VIRT_START ASM_CONST(0xc0003d0000000000) #ifndef __ASSEMBLY__ #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) -- cgit v1.2.3 From b32d5d7e920a364287f6206af2d20179978a617d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 8 Jun 2020 12:39:04 +0530 Subject: powerpc/mm/book3s: Split radix and hash MAX_PHYSMEM limit MAX_PHYSMEM #define is used along with sparsemem to determine the SECTION_SHIFT value. Powerpc also uses the same value to limit the max memory enabled on the system. With 4K PAGE_SIZE and hash translation mode, we want to limit the max memory enabled to 64TB due to page table size restrictions. However, with radix translation, we don't have these restrictions. Hence split the radix and hash MA_PHYSMEM limit and use different limit for each of them. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200608070904.387440-4-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 5 +++++ arch/powerpc/include/asm/book3s/64/hash-64k.h | 13 +++++++++++++ arch/powerpc/include/asm/book3s/64/mmu-hash.h | 4 ++-- arch/powerpc/include/asm/book3s/64/mmu.h | 15 --------------- arch/powerpc/include/asm/book3s/64/pgtable.h | 7 +++++++ arch/powerpc/include/asm/book3s/64/radix.h | 16 ++++++++++++++++ arch/powerpc/kernel/prom.c | 5 +++++ arch/powerpc/mm/book3s64/slb.c | 4 ++-- 8 files changed, 50 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index b3ca542f871e..b6ac4f86c87b 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -22,6 +22,11 @@ #define REGION_SHIFT (40) #define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT) +/* + * Limits the linear mapping range + */ +#define H_MAX_PHYSMEM_BITS 46 + /* * Define the address range of the kernel non-linear virtual area (61TB) */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index f20de1149ebe..338e62fbea0b 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -7,6 +7,19 @@ #define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB #define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB +/* + * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS + * if we increase SECTIONS_WIDTH we will not store node details in page->flags and + * page_to_nid does a page->section->node lookup + * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce + * memory requirements with large number of sections. + * 51 bits is the max physical real address on POWER9 + */ +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) +#define H_MAX_PHYSMEM_BITS 51 +#else +#define H_MAX_PHYSMEM_BITS 46 +#endif /* * Each context is 512TB size. SLB miss for first context/default context diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 93d18da5e7ec..683a9c7d1b03 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -577,8 +577,8 @@ extern void slb_set_size(u16 size); * For vmalloc and memmap, we use just one context with 512TB. With 64 byte * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)). */ -#if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT) -#define MAX_KERNEL_CTX_CNT (1UL << (MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT)) +#if (H_MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT) +#define MAX_KERNEL_CTX_CNT (1UL << (H_MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT)) #else #define MAX_KERNEL_CTX_CNT 1 #endif diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index b392384a3b15..ddc414ab3c4d 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -27,21 +27,6 @@ struct mmu_psize_def { extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; #endif /* __ASSEMBLY__ */ -/* - * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS - * if we increase SECTIONS_WIDTH we will not store node details in page->flags and - * page_to_nid does a page->section->node lookup - * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce - * memory requirements with large number of sections. - * 51 bits is the max physical real address on POWER9 - */ -#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ - defined(CONFIG_PPC_64K_PAGES) -#define MAX_PHYSMEM_BITS 51 -#else -#define MAX_PHYSMEM_BITS 46 -#endif - /* 64-bit classic hash table MMU */ #include diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 495fc0ccb453..76880c7066fa 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -294,6 +294,13 @@ extern unsigned long pci_io_base; #include #include +#if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS +#define MAX_PHYSMEM_BITS H_MAX_PHYSMEM_BITS +#else +#define MAX_PHYSMEM_BITS R_MAX_PHYSMEM_BITS +#endif + + #ifdef CONFIG_PPC_64K_PAGES #include #else diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 0cba794c4fb8..c7813dc628fc 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -91,6 +91,22 @@ * +------------------------------+ Kernel linear (0xc.....) */ + +/* + * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS + * if we increase SECTIONS_WIDTH we will not store node details in page->flags and + * page_to_nid does a page->section->node lookup + * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce + * memory requirements with large number of sections. + * 51 bits is the max physical real address on POWER9 + */ + +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) +#define R_MAX_PHYSMEM_BITS 51 +#else +#define R_MAX_PHYSMEM_BITS 46 +#endif + #define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) /* * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index d8a2fb87ba0c..c1545f22c077 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -776,6 +776,11 @@ void __init early_init_devtree(void *params) limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) + if (!early_radix_enabled()) + memblock_cap_memory_range(0, 1UL << (H_MAX_PHYSMEM_BITS)); +#endif + memblock_allow_resize(); memblock_dump_all(); diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 156c38f89511..c30fcbfa0e32 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -765,8 +765,8 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) if (id == LINEAR_MAP_REGION_ID) { - /* We only support upto MAX_PHYSMEM_BITS */ - if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS)) + /* We only support upto H_MAX_PHYSMEM_BITS */ + if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS)) return -EFAULT; flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp; -- cgit v1.2.3 From 66943005cc41f48e4d05614e8f76c0ca1812f0fd Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 5 Sep 2020 09:02:20 +1000 Subject: powerpc/tau: Use appropriate temperature sample interval According to the MPC750 Users Manual, the SITV value in Thermal Management Register 3 is 13 bits long. The present code calculates the SITV value as 60 * 500 cycles. This would overflow to give 10 us on a 500 MHz CPU rather than the intended 60 us. (But according to the Microprocessor Datasheet, there is also a factor of 266 that has to be applied to this value on certain parts i.e. speed sort above 266 MHz.) Always use the maximum cycle count, as recommended by the Datasheet. Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Signed-off-by: Finn Thain Tested-by: Stan Johnson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/896f542e5f0f1d6cf8218524c2b67d79f3d69b3c.1599260540.git.fthain@telegraphics.com.au --- arch/powerpc/include/asm/reg.h | 2 +- arch/powerpc/kernel/tau_6xx.c | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 88fb88491fe9..5647006ed373 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -817,7 +817,7 @@ #define THRM1_TIN (1 << 31) #define THRM1_TIV (1 << 30) #define THRM1_THRES(x) ((x&0x7f)<<23) -#define THRM3_SITV(x) ((x&0x3fff)<<1) +#define THRM3_SITV(x) ((x & 0x1fff) << 1) #define THRM1_TID (1<<2) #define THRM1_TIE (1<<1) #define THRM1_V (1<<0) diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index e2ab8a111b69..976d5bc1b517 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -178,15 +178,11 @@ static void tau_timeout(void * info) * complex sleep code needs to be added. One mtspr every time * tau_timeout is called is probably not a big deal. * - * Enable thermal sensor and set up sample interval timer - * need 20 us to do the compare.. until a nice 'cpu_speed' function - * call is implemented, just assume a 500 mhz clock. It doesn't really - * matter if we take too long for a compare since it's all interrupt - * driven anyway. - * - * use a extra long time.. (60 us @ 500 mhz) + * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet" + * recommends that "the maximum value be set in THRM3 under all + * conditions." */ - mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); + mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E); local_irq_restore(flags); } -- cgit v1.2.3 From b1c6a0a10bfaf36ec82fde6f621da72407fa60a1 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 5 Sep 2020 09:02:20 +1000 Subject: powerpc/tau: Convert from timer to workqueue Since commit 19dbdcb8039cf ("smp: Warn on function calls from softirq context") the Thermal Assist Unit driver causes a warning like the following when CONFIG_SMP is enabled. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/smp.c:428 smp_call_function_many_cond+0xf4/0x38c Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.7.0-pmac #3 NIP: c00b37a8 LR: c00b3abc CTR: c001218c REGS: c0799c60 TRAP: 0700 Not tainted (5.7.0-pmac) MSR: 00029032 CR: 42000224 XER: 00000000 GPR00: c00b3abc c0799d18 c076e300 c079ef5c c0011fec 00000000 00000000 00000000 GPR08: 00000100 00000100 00008000 ffffffff 42000224 00000000 c079d040 c079d044 GPR16: 00000001 00000000 00000004 c0799da0 c079f054 c07a0000 c07a0000 00000000 GPR24: c0011fec 00000000 c079ef5c c079ef5c 00000000 00000000 00000000 00000000 NIP [c00b37a8] smp_call_function_many_cond+0xf4/0x38c LR [c00b3abc] on_each_cpu+0x38/0x68 Call Trace: [c0799d18] [ffffffff] 0xffffffff (unreliable) [c0799d68] [c00b3abc] on_each_cpu+0x38/0x68 [c0799d88] [c0096704] call_timer_fn.isra.26+0x20/0x7c [c0799d98] [c0096b40] run_timer_softirq+0x1d4/0x3fc [c0799df8] [c05b4368] __do_softirq+0x118/0x240 [c0799e58] [c0039c44] irq_exit+0xc4/0xcc [c0799e68] [c000ade8] timer_interrupt+0x1b0/0x230 [c0799ea8] [c0013520] ret_from_except+0x0/0x14 --- interrupt: 901 at arch_cpu_idle+0x24/0x6c LR = arch_cpu_idle+0x24/0x6c [c0799f70] [00000001] 0x1 (unreliable) [c0799f80] [c0060990] do_idle+0xd8/0x17c [c0799fa0] [c0060ba8] cpu_startup_entry+0x24/0x28 [c0799fb0] [c072d220] start_kernel+0x434/0x44c [c0799ff0] [00003860] 0x3860 Instruction dump: 8129f204 2f890000 40beff98 3d20c07a 8929eec4 2f890000 40beff88 0fe00000 81220000 552805de 550802ef 4182ff84 <0fe00000> 3860ffff 7f65db78 7f44d378 ---[ end trace 34a886e47819c2eb ]--- Don't call on_each_cpu() from a timer callback, call it from a worker thread instead. Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bb61650bea4f4c91fb8e24b9a6f130a1438651a7.1599260540.git.fthain@telegraphics.com.au --- arch/powerpc/kernel/tau_6xx.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 976d5bc1b517..268205cc347d 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -13,13 +13,14 @@ */ #include -#include #include #include #include #include #include #include +#include +#include #include #include @@ -39,8 +40,6 @@ static struct tau_temp unsigned char grew; } tau[NR_CPUS]; -struct timer_list tau_timer; - #undef DEBUG /* TODO: put these in a /proc interface, with some sanity checks, and maybe @@ -50,7 +49,7 @@ struct timer_list tau_timer; #define step_size 2 /* step size when temp goes out of range */ #define window_expand 1 /* expand the window by this much */ /* configurable values for shrinking the window */ -#define shrink_timer 2*HZ /* period between shrinking the window */ +#define shrink_timer 2000 /* period between shrinking the window */ #define min_window 2 /* minimum window size, degrees C */ static void set_thresholds(unsigned long cpu) @@ -187,14 +186,18 @@ static void tau_timeout(void * info) local_irq_restore(flags); } -static void tau_timeout_smp(struct timer_list *unused) -{ +static struct workqueue_struct *tau_workq; - /* schedule ourselves to be run again */ - mod_timer(&tau_timer, jiffies + shrink_timer) ; +static void tau_work_func(struct work_struct *work) +{ + msleep(shrink_timer); on_each_cpu(tau_timeout, NULL, 0); + /* schedule ourselves to be run again */ + queue_work(tau_workq, work); } +DECLARE_WORK(tau_work, tau_work_func); + /* * setup the TAU * @@ -227,21 +230,16 @@ static int __init TAU_init(void) return 1; } - - /* first, set up the window shrinking timer */ - timer_setup(&tau_timer, tau_timeout_smp, 0); - tau_timer.expires = jiffies + shrink_timer; - add_timer(&tau_timer); + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); + if (!tau_workq) + return -ENOMEM; on_each_cpu(TAU_init_smp, NULL, 0); - printk("Thermal assist unit "); -#ifdef CONFIG_TAU_INT - printk("using interrupts, "); -#else - printk("using timers, "); -#endif - printk("shrink_timer: %d jiffies\n", shrink_timer); + queue_work(tau_workq, &tau_work); + + pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n", + IS_ENABLED(CONFIG_TAU_INT) ? "interrupts" : "workqueue", shrink_timer); tau_initialized = 1; return 0; -- cgit v1.2.3 From 420ab2bc7544d978a5d0762ee736412fe9c796ab Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 5 Sep 2020 09:02:20 +1000 Subject: powerpc/tau: Remove duplicated set_thresholds() call The commentary at the call site seems to disagree with the code. The conditional prevents calling set_thresholds() via the exception handler, which appears to crash. Perhaps that's because it immediately triggers another TAU exception. Anyway, calling set_thresholds() from TAUupdate() is redundant because tau_timeout() does so. Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Signed-off-by: Finn Thain Tested-by: Stan Johnson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d7c7ee33232cf72a6a6bbb6ef05838b2e2b113c0.1599260540.git.fthain@telegraphics.com.au --- arch/powerpc/kernel/tau_6xx.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 268205cc347d..b8d7e7d498e0 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -110,11 +110,6 @@ static void TAUupdate(int cpu) #ifdef DEBUG printk("grew = %d\n", tau[cpu].grew); #endif - -#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */ - set_thresholds(cpu); -#endif - } #ifdef CONFIG_TAU_INT -- cgit v1.2.3 From 5e3119e15fed5b9a9a7e528665ff098a4a8dbdbc Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 5 Sep 2020 09:02:20 +1000 Subject: powerpc/tau: Check processor type before enabling TAU interrupt According to Freescale's documentation, MPC74XX processors have an erratum that prevents the TAU interrupt from working, so don't try to use it when running on those processors. Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Signed-off-by: Finn Thain Tested-by: Stan Johnson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c281611544768e758bd58fe812cf702a5bd2d042.1599260540.git.fthain@telegraphics.com.au --- arch/powerpc/kernel/tau_6xx.c | 33 ++++++++++++++------------------- arch/powerpc/platforms/Kconfig | 5 ++--- 2 files changed, 16 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index b8d7e7d498e0..614b5b272d9c 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -40,6 +40,8 @@ static struct tau_temp unsigned char grew; } tau[NR_CPUS]; +static bool tau_int_enable; + #undef DEBUG /* TODO: put these in a /proc interface, with some sanity checks, and maybe @@ -54,22 +56,13 @@ static struct tau_temp static void set_thresholds(unsigned long cpu) { -#ifdef CONFIG_TAU_INT - /* - * setup THRM1, - * threshold, valid bit, enable interrupts, interrupt when below threshold - */ - mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); + u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0; - /* setup THRM2, - * threshold, valid bit, enable interrupts, interrupt when above threshold - */ - mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); -#else - /* same thing but don't enable interrupts */ - mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID); - mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V); -#endif + /* setup THRM1, threshold, valid bit, interrupt when below threshold */ + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID); + + /* setup THRM2, threshold, valid bit, interrupt when above threshold */ + mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie); } static void TAUupdate(int cpu) @@ -142,9 +135,8 @@ static void tau_timeout(void * info) local_irq_save(flags); cpu = smp_processor_id(); -#ifndef CONFIG_TAU_INT - TAUupdate(cpu); -#endif + if (!tau_int_enable) + TAUupdate(cpu); size = tau[cpu].high - tau[cpu].low; if (size > min_window && ! tau[cpu].grew) { @@ -225,6 +217,9 @@ static int __init TAU_init(void) return 1; } + tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && + !strcmp(cur_cpu_spec->platform, "ppc750"); + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); if (!tau_workq) return -ENOMEM; @@ -234,7 +229,7 @@ static int __init TAU_init(void) queue_work(tau_workq, &tau_work); pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n", - IS_ENABLED(CONFIG_TAU_INT) ? "interrupts" : "workqueue", shrink_timer); + tau_int_enable ? "interrupts" : "workqueue", shrink_timer); tau_initialized = 1; return 0; diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index fb7515b4fa9c..9fe36f0b54c1 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -223,9 +223,8 @@ config TAU temperature within 2-4 degrees Celsius. This option shows the current on-die temperature in /proc/cpuinfo if the cpu supports it. - Unfortunately, on some chip revisions, this sensor is very inaccurate - and in many cases, does not work at all, so don't assume the cpu - temp is actually what /proc/cpuinfo says it is. + Unfortunately, this sensor is very inaccurate when uncalibrated, so + don't assume the cpu temp is actually what /proc/cpuinfo says it is. config TAU_INT bool "Interrupt driven TAU driver (DANGEROUS)" -- cgit v1.2.3 From e63d6fb5637e92725cf143559672a34b706bca4f Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 5 Sep 2020 09:02:20 +1000 Subject: powerpc/tau: Disable TAU between measurements Enabling CONFIG_TAU_INT causes random crashes: Unrecoverable exception 1700 at c0009414 (msr=1000) Oops: Unrecoverable exception, sig: 6 [#1] BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.7.0-pmac-00043-gd5f545e1a8593 #5 NIP: c0009414 LR: c0009414 CTR: c00116fc REGS: c0799eb8 TRAP: 1700 Not tainted (5.7.0-pmac-00043-gd5f545e1a8593) MSR: 00001000 CR: 22000228 XER: 00000100 GPR00: 00000000 c0799f70 c076e300 00800000 0291c0ac 00e00000 c076e300 00049032 GPR08: 00000001 c00116fc 00000000 dfbd3200 ffffffff 007f80a8 00000000 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 c075ce04 GPR24: c075ce04 dfff8880 c07b0000 c075ce04 00080000 00000001 c079ef98 c079ef5c NIP [c0009414] arch_cpu_idle+0x24/0x6c LR [c0009414] arch_cpu_idle+0x24/0x6c Call Trace: [c0799f70] [00000001] 0x1 (unreliable) [c0799f80] [c0060990] do_idle+0xd8/0x17c [c0799fa0] [c0060ba4] cpu_startup_entry+0x20/0x28 [c0799fb0] [c072d220] start_kernel+0x434/0x44c [c0799ff0] [00003860] 0x3860 Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX 3d20c07b XXXXXXXX XXXXXXXX XXXXXXXX 7c0802a6 XXXXXXXX XXXXXXXX XXXXXXXX 4e800421 XXXXXXXX XXXXXXXX XXXXXXXX 7d2000a6 ---[ end trace 3a0c9b5cb216db6b ]--- Resolve this problem by disabling each THRMn comparator when handling the associated THRMn interrupt and by disabling the TAU entirely when updating THRMn thresholds. Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Signed-off-by: Finn Thain Tested-by: Stan Johnson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5a0ba3dc5612c7aac596727331284a3676c08472.1599260540.git.fthain@telegraphics.com.au --- arch/powerpc/kernel/tau_6xx.c | 65 ++++++++++++++++-------------------------- arch/powerpc/platforms/Kconfig | 9 ++---- 2 files changed, 26 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 614b5b272d9c..0b4694b8d248 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -42,8 +42,6 @@ static struct tau_temp static bool tau_int_enable; -#undef DEBUG - /* TODO: put these in a /proc interface, with some sanity checks, and maybe * dynamic adjustment to minimize # of interrupts */ /* configurable values for step size and how much to expand the window when @@ -67,42 +65,33 @@ static void set_thresholds(unsigned long cpu) static void TAUupdate(int cpu) { - unsigned thrm; - -#ifdef DEBUG - printk("TAUupdate "); -#endif + u32 thrm; + u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V; /* if both thresholds are crossed, the step_sizes cancel out * and the window winds up getting expanded twice. */ - if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */ - if(thrm & THRM1_TIN){ /* crossed low threshold */ - if (tau[cpu].low >= step_size){ - tau[cpu].low -= step_size; - tau[cpu].high -= (step_size - window_expand); - } - tau[cpu].grew = 1; -#ifdef DEBUG - printk("low threshold crossed "); -#endif + thrm = mfspr(SPRN_THRM1); + if ((thrm & bits) == bits) { + mtspr(SPRN_THRM1, 0); + + if (tau[cpu].low >= step_size) { + tau[cpu].low -= step_size; + tau[cpu].high -= (step_size - window_expand); } + tau[cpu].grew = 1; + pr_debug("%s: low threshold crossed\n", __func__); } - if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */ - if(thrm & THRM1_TIN){ /* crossed high threshold */ - if (tau[cpu].high <= 127-step_size){ - tau[cpu].low += (step_size - window_expand); - tau[cpu].high += step_size; - } - tau[cpu].grew = 1; -#ifdef DEBUG - printk("high threshold crossed "); -#endif + thrm = mfspr(SPRN_THRM2); + if ((thrm & bits) == bits) { + mtspr(SPRN_THRM2, 0); + + if (tau[cpu].high <= 127 - step_size) { + tau[cpu].low += (step_size - window_expand); + tau[cpu].high += step_size; } + tau[cpu].grew = 1; + pr_debug("%s: high threshold crossed\n", __func__); } - -#ifdef DEBUG - printk("grew = %d\n", tau[cpu].grew); -#endif } #ifdef CONFIG_TAU_INT @@ -127,17 +116,17 @@ void TAUException(struct pt_regs * regs) static void tau_timeout(void * info) { int cpu; - unsigned long flags; int size; int shrink; - /* disabling interrupts *should* be okay */ - local_irq_save(flags); cpu = smp_processor_id(); if (!tau_int_enable) TAUupdate(cpu); + /* Stop thermal sensor comparisons and interrupts */ + mtspr(SPRN_THRM3, 0); + size = tau[cpu].high - tau[cpu].low; if (size > min_window && ! tau[cpu].grew) { /* do an exponential shrink of half the amount currently over size */ @@ -159,18 +148,12 @@ static void tau_timeout(void * info) set_thresholds(cpu); - /* - * Do the enable every time, since otherwise a bunch of (relatively) - * complex sleep code needs to be added. One mtspr every time - * tau_timeout is called is probably not a big deal. - * + /* Restart thermal sensor comparisons and interrupts. * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet" * recommends that "the maximum value be set in THRM3 under all * conditions." */ mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E); - - local_irq_restore(flags); } static struct workqueue_struct *tau_workq; diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 9fe36f0b54c1..b439b027a42f 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -227,7 +227,7 @@ config TAU don't assume the cpu temp is actually what /proc/cpuinfo says it is. config TAU_INT - bool "Interrupt driven TAU driver (DANGEROUS)" + bool "Interrupt driven TAU driver (EXPERIMENTAL)" depends on TAU help The TAU supports an interrupt driven mode which causes an interrupt @@ -235,12 +235,7 @@ config TAU_INT to get notified the temp has exceeded a range. With this option off, a timer is used to re-check the temperature periodically. - However, on some cpus it appears that the TAU interrupt hardware - is buggy and can cause a situation which would lead unexplained hard - lockups. - - Unless you are extending the TAU driver, or enjoy kernel/hardware - debugging, leave this option off. + If in doubt, say N here. config TAU_AVERAGE bool "Average high and low temp" -- cgit v1.2.3 From 542db12a9c42d1ce70c45091765e02f74c129f43 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 31 Aug 2020 07:58:19 +0000 Subject: powerpc: Fix random segfault when freeing hugetlb range The following random segfault is observed from time to time with map_hugetlb selftest: root@localhost:~# ./map_hugetlb 1 19 524288 kB hugepages Mapping 1 Mbytes Segmentation fault [ 31.219972] map_hugetlb[365]: segfault (11) at 117 nip 77974f8c lr 779a6834 code 1 in ld-2.23.so[77966000+21000] [ 31.220192] map_hugetlb[365]: code: 9421ffc0 480318d1 93410028 90010044 9361002c 93810030 93a10034 93c10038 [ 31.220307] map_hugetlb[365]: code: 93e1003c 93210024 8123007c 81430038 <80e90004> 814a0004 7f443a14 813a0004 [ 31.221911] BUG: Bad rss-counter state mm:(ptrval) type:MM_FILEPAGES val:33 [ 31.229362] BUG: Bad rss-counter state mm:(ptrval) type:MM_ANONPAGES val:5 This fault is due to hugetlb_free_pgd_range() freeing page tables that are also used by regular pages. As explain in the comment at the beginning of hugetlb_free_pgd_range(), the verification done in free_pgd_range() on floor and ceiling is not done here, which means hugetlb_free_pte_range() can free outside the expected range. As the verification cannot be done in hugetlb_free_pgd_range(), it must be done in hugetlb_free_pte_range(). Fixes: b250c8c08c79 ("powerpc/8xx: Manage 512k huge pages as standard pages.") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f0cb2a5477cd87d1eaadb128042e20aeb2bc2859.1598860677.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/hugetlbpage.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 26292544630f..e7ae2a2c4545 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -330,10 +330,24 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif get_hugepd_cache_index(pdshift - shift)); } -static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) { + unsigned long start = addr; pgtable_t token = pmd_pgtable(*pmd); + start &= PMD_MASK; + if (start < floor) + return; + if (ceiling) { + ceiling &= PMD_MASK; + if (!ceiling) + return; + } + if (end - 1 > ceiling - 1) + return; + pmd_clear(pmd); pte_free_tlb(tlb, token, addr); mm_dec_nr_ptes(tlb->mm); @@ -363,7 +377,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, */ WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx)); - hugetlb_free_pte_range(tlb, pmd, addr); + hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling); continue; } -- cgit v1.2.3 From 175a99991511fed16108dcb823f0af8e72325a1f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 31 Aug 2020 08:30:43 +0000 Subject: powerpc/8xx: Refactor calculation of number of entries per PTE in page tables On 8xx, the number of entries occupied by a PTE in the page tables depends on the size of the page. At the time being, this calculation is done in two places: in pte_update() and in set_huge_pte_at() Refactor this calculation into a helper called number_of_cells_per_pte(). For the time being, the val param is unused. It will be used by following patch. Instead of opencoding is_hugepd(), use hugepd_ok() with a forward declaration. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f6ea2483c2c389567b007945948f704d18cfaeea.1598862623.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 18 ++++++++++++------ arch/powerpc/mm/pgtable.c | 6 ++++-- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b9e134d0f03a..80bbc21b87f0 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -227,6 +227,17 @@ static inline void pmd_clear(pmd_t *pmdp) */ #ifdef CONFIG_PPC_8xx static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); +static int hugepd_ok(hugepd_t hpd); + +static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge) +{ + if (!huge) + return PAGE_SIZE / SZ_4K; + else if (hugepd_ok(*((hugepd_t *)pmd))) + return 1; + else + return SZ_512K / SZ_4K; +} static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) @@ -237,12 +248,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p int num, i; pmd_t *pmd = pmd_off(mm, addr); - if (!huge) - num = PAGE_SIZE / SZ_4K; - else if ((pmd_val(*pmd) & _PMD_PAGE_MASK) != _PMD_PAGE_8M) - num = SZ_512K / SZ_4K; - else - num = 1; + num = number_of_cells_per_pte(pmd, new, huge); for (i = 0; i < num; i++, entry++, new += SZ_4K) *entry = new; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 9c0547d77af3..2dcad640b869 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -266,8 +266,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_ pmd_t *pmd = pmd_off(mm, addr); pte_basic_t val; pte_basic_t *entry = &ptep->pte; - int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K; - int i; + int num, i; /* * Make sure hardware valid bit is not set. We don't do @@ -280,6 +279,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_ pte = set_pte_filter(pte); val = pte_val(pte); + + num = number_of_cells_per_pte(pmd, val, 1); + for (i = 0; i < num; i++, entry++, val += SZ_4K) *entry = val; } -- cgit v1.2.3 From e47168f3d1b14af5281cf50c59561d59d28201f9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 31 Aug 2020 08:30:44 +0000 Subject: powerpc/8xx: Support 16k hugepages with 4k pages The 8xx has 4 page sizes: 4k, 16k, 512k and 8M 4k and 16k can be selected at build time as standard page sizes, and 512k and 8M are hugepages. When 4k standard pages are selected, 16k pages are not available. Allow 16k pages as hugepages when 4k pages are used. To allow that, implement arch_make_huge_pte() which receives the necessary arguments to allow setting the PTE in accordance with the page size: - 512 k pages must have _PAGE_HUGE and _PAGE_SPS. They are set by pte_mkhuge(). arch_make_huge_pte() does nothing. - 16 k pages must have only _PAGE_SPS. arch_make_huge_pte() clears _PAGE_HUGE. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a518abc29266a708dfbccc8fce9ae6694fe4c2c6.1598862623.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 14 ++++++++++++++ arch/powerpc/include/asm/nohash/32/pgtable.h | 2 ++ arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/nohash/tlb.c | 4 ---- arch/powerpc/mm/ptdump/8xx.c | 5 +++++ include/uapi/asm-generic/hugetlb_encode.h | 1 + include/uapi/linux/mman.h | 1 + 7 files changed, 24 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index e752a5807a59..39be9aea86db 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -65,4 +65,18 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, pte_update(mm, addr, ptep, clr, set, 1); } +#ifdef CONFIG_PPC_4K_PAGES +static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + size_t size = huge_page_size(hstate_vma(vma)); + + if (size == SZ_16K) + return __pte(pte_val(entry) & ~_PAGE_HUGE); + else + return entry; +} +#define arch_make_huge_pte arch_make_huge_pte +#endif + #endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 80bbc21b87f0..ee2243ba96cf 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -235,6 +235,8 @@ static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge) return PAGE_SIZE / SZ_4K; else if (hugepd_ok(*((hugepd_t *)pmd))) return 1; + else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE)) + return SZ_16K / SZ_4K; else return SZ_512K / SZ_4K; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index e7ae2a2c4545..36c3800769fb 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -180,7 +180,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (!hpdp) return NULL; - if (IS_ENABLED(CONFIG_PPC_8xx) && sz == SZ_512K) + if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT) return pte_alloc_map(mm, (pmd_t *)hpdp, addr); BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 14514585db98..5872f69141d5 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -83,16 +83,12 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { }; #elif defined(CONFIG_PPC_8xx) struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { - /* we only manage 4k and 16k pages as normal pages */ -#ifdef CONFIG_PPC_4K_PAGES [MMU_PAGE_4K] = { .shift = 12, }, -#else [MMU_PAGE_16K] = { .shift = 14, }, -#endif [MMU_PAGE_512K] = { .shift = 19, }, diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c index 8a797dcbf475..86da2a669680 100644 --- a/arch/powerpc/mm/ptdump/8xx.c +++ b/arch/powerpc/mm/ptdump/8xx.c @@ -11,8 +11,13 @@ static const struct flag_info flag_array[] = { { +#ifdef CONFIG_PPC_16K_PAGES .mask = _PAGE_HUGE, .val = _PAGE_HUGE, +#else + .mask = _PAGE_SPS, + .val = _PAGE_SPS, +#endif .set = "huge", .clear = " ", }, { diff --git a/include/uapi/asm-generic/hugetlb_encode.h b/include/uapi/asm-generic/hugetlb_encode.h index b0f8e87235bd..4f3d5aaa11f5 100644 --- a/include/uapi/asm-generic/hugetlb_encode.h +++ b/include/uapi/asm-generic/hugetlb_encode.h @@ -20,6 +20,7 @@ #define HUGETLB_FLAG_ENCODE_SHIFT 26 #define HUGETLB_FLAG_ENCODE_MASK 0x3f +#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index 923cc162609c..f55bc680b5b0 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -27,6 +27,7 @@ #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK +#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB #define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB #define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB #define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB -- cgit v1.2.3 From fcf1f26895a4f14618b0dc04e0801b123c55e4a3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Sep 2020 10:46:47 +0000 Subject: powerpc/uaccess: Add pre-update addressing to __put_user_asm_goto() Enable pre-update addressing mode in __put_user_asm_goto() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/346f65d677adb11865f7762c25a1ca3c64404ba5.1599216023.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 7c2427f237e1..a5cfe867fbdc 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -254,7 +254,7 @@ do { \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ - : "r" (x), "m" (*addr) \ + : "r" (x), "m<>" (*addr) \ : \ : label) -- cgit v1.2.3 From ee0a49a6870ea75e25b4d4984c1bb6b3b7c65f2b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Sep 2020 11:01:30 +0000 Subject: powerpc/uaccess: Switch __put_user_size_allowed() to __put_user_asm_goto() __put_user_asm_goto() provides more flexibility to GCC and avoids using a local variable to tell if the write succeeded or not. GCC can then avoid implementing a cmp in the fast path. See the difference for a small function like the PPC64 version of save_general_regs() in arch/powerpc/kernel/signal_32.c: Before the patch (unreachable nop removed): 0000000000000c10 <.save_general_regs>: c10: 39 20 00 2c li r9,44 c14: 39 40 00 00 li r10,0 c18: 7d 29 03 a6 mtctr r9 c1c: 38 c0 00 00 li r6,0 c20: 48 00 00 14 b c34 <.save_general_regs+0x24> c30: 42 40 00 40 bdz c70 <.save_general_regs+0x60> c34: 28 2a 00 27 cmpldi r10,39 c38: 7c c8 33 78 mr r8,r6 c3c: 79 47 1f 24 rldicr r7,r10,3,60 c40: 39 20 00 01 li r9,1 c44: 41 82 00 0c beq c50 <.save_general_regs+0x40> c48: 7d 23 38 2a ldx r9,r3,r7 c4c: 79 29 00 20 clrldi r9,r9,32 c50: 91 24 00 00 stw r9,0(r4) c54: 2c 28 00 00 cmpdi r8,0 c58: 39 4a 00 01 addi r10,r10,1 c5c: 38 84 00 04 addi r4,r4,4 c60: 41 82 ff d0 beq c30 <.save_general_regs+0x20> c64: 38 60 ff f2 li r3,-14 c68: 4e 80 00 20 blr c70: 38 60 00 00 li r3,0 c74: 4e 80 00 20 blr 0000000000000000 <.fixup>: cc: 39 00 ff f2 li r8,-14 d0: 48 00 00 00 b d0 <.fixup+0xd0> d0: R_PPC64_REL24 .text+0xc54 After the patch: 0000000000001490 <.save_general_regs>: 1490: 39 20 00 2c li r9,44 1494: 39 40 00 00 li r10,0 1498: 7d 29 03 a6 mtctr r9 149c: 60 00 00 00 nop 14a0: 28 2a 00 27 cmpldi r10,39 14a4: 79 48 1f 24 rldicr r8,r10,3,60 14a8: 39 20 00 01 li r9,1 14ac: 41 82 00 0c beq 14b8 <.save_general_regs+0x28> 14b0: 7d 23 40 2a ldx r9,r3,r8 14b4: 79 29 00 20 clrldi r9,r9,32 14b8: 91 24 00 00 stw r9,0(r4) 14bc: 39 4a 00 01 addi r10,r10,1 14c0: 38 84 00 04 addi r4,r4,4 14c4: 42 00 ff dc bdnz 14a0 <.save_general_regs+0x10> 14c8: 38 60 00 00 li r3,0 14cc: 4e 80 00 20 blr 14d0: 38 60 ff f2 li r3,-14 14d4: 4e 80 00 20 blr Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/94ba5a5138f99522e1562dbcdb38d31aa790dc89.1599216721.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index a5cfe867fbdc..96d1c144f92b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -189,14 +189,14 @@ extern long __put_user_bad(void); #define __put_user_size_allowed(x, ptr, size, retval) \ do { \ + __label__ __pu_failed; \ + \ retval = 0; \ - switch (size) { \ - case 1: __put_user_asm(x, ptr, retval, "stb"); break; \ - case 2: __put_user_asm(x, ptr, retval, "sth"); break; \ - case 4: __put_user_asm(x, ptr, retval, "stw"); break; \ - case 8: __put_user_asm2(x, ptr, retval); break; \ - default: __put_user_bad(); \ - } \ + __put_user_size_goto(x, ptr, size, __pu_failed); \ + break; \ + \ +__pu_failed: \ + retval = -EFAULT; \ } while (0) #define __put_user_size(x, ptr, size, retval) \ -- cgit v1.2.3 From e64ac41ab0c510b3f85199a585eb886cad92fb19 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Sep 2020 11:01:31 +0000 Subject: powerpc/uaccess: Switch __patch_instruction() to __put_user_asm_goto() __patch_instruction() is the only user of __put_user_asm() outside of asm/uaccess.h Switch to the new __put_user_asm_goto() to enable retirement of __put_user_asm() in a later patch. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b9745b122f4a9ae72cef445c61320022ab8b77b7.1599216721.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/code-patching.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 8c3934ea6220..2333625b5e31 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -21,21 +21,18 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr, struct ppc_inst *patch_addr) { - int err = 0; - - if (!ppc_inst_prefixed(instr)) { - __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); - } else { - __put_user_asm(ppc_inst_as_u64(instr), patch_addr, err, "std"); - } - - if (err) - return err; + if (!ppc_inst_prefixed(instr)) + __put_user_asm_goto(ppc_inst_val(instr), patch_addr, failed, "stw"); + else + __put_user_asm_goto(ppc_inst_as_u64(instr), patch_addr, failed, "std"); asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), "r" (exec_addr)); return 0; + +failed: + return -EFAULT; } int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) -- cgit v1.2.3 From 7fdf966bed155b214f4f1f9b67825a40b2e9b998 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Sep 2020 11:01:32 +0000 Subject: powerpc/uaccess: Remove __put_user_asm() and __put_user_asm2() __put_user_asm() and __put_user_asm2() are not used anymore. Remove them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d66c4a372738d2fbd81f433ca86e4295871ace6a.1599216721.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 41 +++++--------------------------------- 1 file changed, 5 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 96d1c144f92b..26781b044932 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -151,42 +151,6 @@ static inline int __access_ok(unsigned long addr, unsigned long size, extern long __put_user_bad(void); -/* - * We don't tell gcc that we are accessing memory, but this is OK - * because we do not write to any memory gcc knows about, so there - * are no aliasing issues. - */ -#define __put_user_asm(x, addr, err, op) \ - __asm__ __volatile__( \ - "1: " op "%U2%X2 %1,%2 # put_user\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: li %0,%3\n" \ - " b 2b\n" \ - ".previous\n" \ - EX_TABLE(1b, 3b) \ - : "=r" (err) \ - : "r" (x), "m<>" (*addr), "i" (-EFAULT), "0" (err)) - -#ifdef __powerpc64__ -#define __put_user_asm2(x, ptr, retval) \ - __put_user_asm(x, ptr, retval, "std") -#else /* __powerpc64__ */ -#define __put_user_asm2(x, addr, err) \ - __asm__ __volatile__( \ - "1: stw%X2 %1,%2\n" \ - "2: stw%X2 %L1,%L2\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: li %0,%3\n" \ - " b 3b\n" \ - ".previous\n" \ - EX_TABLE(1b, 4b) \ - EX_TABLE(2b, 4b) \ - : "=r" (err) \ - : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err)) -#endif /* __powerpc64__ */ - #define __put_user_size_allowed(x, ptr, size, retval) \ do { \ __label__ __pu_failed; \ @@ -249,6 +213,11 @@ do { \ }) +/* + * We don't tell gcc that we are accessing memory, but this is OK + * because we do not write to any memory gcc knows about, so there + * are no aliasing issues. + */ #define __put_user_asm_goto(x, addr, label, op) \ asm volatile goto( \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ -- cgit v1.2.3 From c118c7303ad528be8ff2aea8cd1ee15452c763f0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 7 Sep 2020 13:42:09 +0000 Subject: powerpc/32: Fix vmap stack - Do not activate MMU before reading task struct We need r1 to be properly set before activating MMU, so reading task_struct->stack must be done with MMU off. This means we need an additional register to play with MSR bits while r11 now points to the stack. For that, move r10 back to CR (As is already done for hash MMU) and use r10. We still don't have r1 correct yet when we activate MMU. It is done in following patch. Fixes: 028474876f47 ("powerpc/32: prepare for CONFIG_VMAP_STACK") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a027d447022a006c9c4958ac734128e577a3c5c1.1599486108.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.S | 6 ------ arch/powerpc/kernel/head_32.h | 31 ++++++------------------------- 2 files changed, 6 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 5624db0e09a1..b0e184f795c0 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -274,14 +274,8 @@ __secondary_hold_acknowledge: DO_KVM 0x200 MachineCheck: EXCEPTION_PROLOG_0 -#ifdef CONFIG_VMAP_STACK - li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r11 - isync -#endif #ifdef CONFIG_PPC_CHRP mfspr r11, SPRN_SPRG_THREAD - tovirt_vmstack r11, r11 lwz r11, RTAS_SP(r11) cmpwi cr1, r11, 0 bne cr1, 7f diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 9abec6cd099c..21effebb9277 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -39,24 +39,13 @@ .endm .macro EXCEPTION_PROLOG_1 for_rtas=0 -#ifdef CONFIG_VMAP_STACK - .ifeq \for_rtas - li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r11 - isync - .endif subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ -#else - tophys(r11,r1) /* use tophys(r1) if kernel */ - subi r11, r11, INT_FRAME_SIZE /* alloc exc. frame */ -#endif beq 1f mfspr r11,SPRN_SPRG_THREAD - tovirt_vmstack r11, r11 lwz r11,TASK_STACK-THREAD(r11) addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE - tophys_novmstack r11, r11 1: + tophys_novmstack r11, r11 #ifdef CONFIG_VMAP_STACK mtcrf 0x7f, r11 bt 32 - THREAD_ALIGN_SHIFT, stack_overflow @@ -64,12 +53,11 @@ .endm .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 -#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) -BEGIN_MMU_FTR_SECTION +#ifdef CONFIG_VMAP_STACK mtcr r10 -FTR_SECTION_ELSE - stw r10, _CCR(r11) -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) + li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r10 + isync #else stw r10,_CCR(r11) /* save registers */ #endif @@ -77,11 +65,9 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) stw r12,GPR12(r11) stw r9,GPR9(r11) stw r10,GPR10(r11) -#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) -BEGIN_MMU_FTR_SECTION +#ifdef CONFIG_VMAP_STACK mfcr r10 stw r10, _CCR(r11) -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) #endif mfspr r12,SPRN_SPRG_SCRATCH1 stw r12,GPR11(r11) @@ -97,11 +83,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) stw r10, _DSISR(r11) .endif lwz r9, SRR1(r12) -#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) -BEGIN_MMU_FTR_SECTION andi. r10, r9, MSR_PR -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) -#endif lwz r12, SRR0(r12) #else mfspr r12,SPRN_SRR0 @@ -328,7 +310,6 @@ label: #ifdef CONFIG_VMAP_STACK #ifdef CONFIG_SMP mfspr r11, SPRN_SPRG_THREAD - tovirt(r11, r11) lwz r11, TASK_CPU - THREAD(r11) slwi r11, r11, 3 addis r11, r11, emergency_ctx@ha -- cgit v1.2.3 From da7bb43ab9da39bcfed0d146ce94e1f0cbae4ca0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 7 Sep 2020 13:42:10 +0000 Subject: powerpc/32: Fix vmap stack - Properly set r1 before activating MMU We need r1 to be properly set before activating MMU, otherwise any new exception taken while saving registers into the stack in exception prologs will use the user stack, which is wrong and will even lockup or crash when KUAP is selected. Do that by switching the meaning of r11 and r1 until we have saved r1 to the stack: copy r1 into r11 and setup the new stack pointer in r1. To avoid complicating and impacting all generic and specific prolog code (and more), copy back r1 into r11 once r11 is save onto the stack. We could get rid of copying r1 back and forth at the cost of rewriting everything to use r1 instead of r11 all the way when CONFIG_VMAP_STACK is set, but the effort is probably not worth it. Fixes: 028474876f47 ("powerpc/32: prepare for CONFIG_VMAP_STACK") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8f85e8752ac5af602db7237ef53d634f4f3d3892.1599486108.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.h | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 21effebb9277..cc36998c5541 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -39,15 +39,24 @@ .endm .macro EXCEPTION_PROLOG_1 for_rtas=0 +#ifdef CONFIG_VMAP_STACK + mr r11, r1 + subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */ + beq 1f + mfspr r1,SPRN_SPRG_THREAD + lwz r1,TASK_STACK-THREAD(r1) + addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE +#else subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f mfspr r11,SPRN_SPRG_THREAD lwz r11,TASK_STACK-THREAD(r11) addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE +#endif 1: tophys_novmstack r11, r11 #ifdef CONFIG_VMAP_STACK - mtcrf 0x7f, r11 + mtcrf 0x7f, r1 bt 32 - THREAD_ALIGN_SHIFT, stack_overflow #endif .endm @@ -62,6 +71,15 @@ stw r10,_CCR(r11) /* save registers */ #endif mfspr r10, SPRN_SPRG_SCRATCH0 +#ifdef CONFIG_VMAP_STACK + stw r11,GPR1(r1) + stw r11,0(r1) + mr r11, r1 +#else + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1, r11) /* set new kernel sp */ +#endif stw r12,GPR12(r11) stw r9,GPR9(r11) stw r10,GPR10(r11) @@ -89,9 +107,6 @@ mfspr r12,SPRN_SRR0 mfspr r9,SPRN_SRR1 #endif - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt_novmstack r1, r11 /* set new kernel sp */ #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #else @@ -309,19 +324,19 @@ label: .macro vmap_stack_overflow_exception #ifdef CONFIG_VMAP_STACK #ifdef CONFIG_SMP - mfspr r11, SPRN_SPRG_THREAD - lwz r11, TASK_CPU - THREAD(r11) - slwi r11, r11, 3 - addis r11, r11, emergency_ctx@ha + mfspr r1, SPRN_SPRG_THREAD + lwz r1, TASK_CPU - THREAD(r1) + slwi r1, r1, 3 + addis r1, r1, emergency_ctx@ha #else - lis r11, emergency_ctx@ha + lis r1, emergency_ctx@ha #endif - lwz r11, emergency_ctx@l(r11) - cmpwi cr1, r11, 0 + lwz r1, emergency_ctx@l(r1) + cmpwi cr1, r1, 0 bne cr1, 1f - lis r11, init_thread_union@ha - addi r11, r11, init_thread_union@l -1: addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE + lis r1, init_thread_union@ha + addi r1, r1, init_thread_union@l +1: addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 SAVE_NVGPRS(r11) addi r3, r1, STACK_FRAME_OVERHEAD -- cgit v1.2.3 From 04d476bfbb06426fef2985c8e53578bb04596a6f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 05:46:41 +0000 Subject: powerpc/process: Replace an #ifdef CONFIG_PPC_47x by IS_ENABLED() isync() is always defined, no need for an #ifdef. Replace it by IS_ENABLED(CONFIG_PPC_47x). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ac8da0e3baa91dda805e1e492fd65aecd90c1fb5.1597643156.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 483e36a42617..59dde9a6d3a0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -831,9 +831,8 @@ static void switch_hw_breakpoint(struct task_struct *new) static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) { mtspr(SPRN_DAC1, dabr); -#ifdef CONFIG_PPC_47x - isync(); -#endif + if (IS_ENABLED(CONFIG_PPC_47x)) + isync(); return 0; } #elif defined(CONFIG_PPC_BOOK3S) -- cgit v1.2.3 From bfac2799301c19d81122af04a8a3ad5ecae3737e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 05:46:42 +0000 Subject: powerpc/process: Replace an #ifdef CONFIG_PPC_BOOK3S_64 by IS_ENABLED() This #ifdef CONFIG_PPC_BOOK3S_64 calls preload_new_slb_context() when radix is not enabled. radix_enabled() is always defined, and the prototype for preload_new_slb_context() is always present, so the #ifdef is unneeded. Replace it by IS_ENABLED(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d31506ca9bac9def68cf7424eded63fdc4fb6660.1597643167.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 59dde9a6d3a0..47c401246c40 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1780,10 +1780,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #ifdef CONFIG_PPC64 unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ -#ifdef CONFIG_PPC_BOOK3S_64 - if (!radix_enabled()) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled()) preload_new_slb_context(start, sp); -#endif #endif /* -- cgit v1.2.3 From 2ec42996f5b12826466300a755413577b6913206 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 05:46:43 +0000 Subject: powerpc/process: Replace an #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) by IS_ENABLED() The #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) encloses some printk which can be compiled in all cases. Replace by IS_ENABLED(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a1b6ef3d657c8f249193442f56868fc358ea5b6c.1597643160.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 47c401246c40..4c20136dbdf6 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1502,12 +1502,13 @@ void show_regs(struct pt_regs * regs) trap = TRAP(regs); if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); - if (trap == 0x200 || trap == 0x300 || trap == 0x600) -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); -#else - pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); -#endif + if (trap == 0x200 || trap == 0x300 || trap == 0x600) { + if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) + pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); + else + pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); + } + #ifdef CONFIG_PPC64 pr_cont("IRQMASK: %lx ", regs->softe); #endif -- cgit v1.2.3 From 8f020c7ca300fd60374f0347814c92ea513c24da Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 05:46:44 +0000 Subject: powerpc/process: Replace #ifdef CONFIG_KALLSYMS by IS_ENABLED() The #ifdef CONFIG_KALLSYMS encloses some printk which can compile in all cases. Replace by IS_ENABLED(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2d89732a9062b2cf2651728804e4b8f6c9b9358e.1597643164.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4c20136dbdf6..743afa368849 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1525,14 +1525,14 @@ void show_regs(struct pt_regs * regs) break; } pr_cont("\n"); -#ifdef CONFIG_KALLSYMS /* * Lookup NIP late so we have the best change of getting the * above info out without failing */ - printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); - printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); -#endif + if (IS_ENABLED(CONFIG_KALLSYMS)) { + printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); + printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); + } show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT); if (!user_mode(regs)) show_instructions(regs); -- cgit v1.2.3 From 60d62bfd24efce1a595d259100b8a4e7a489e834 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 05:46:45 +0000 Subject: powerpc/process: Tag an #endif to help locate the matching #ifdef. That #endif is more than 100 lines after the matching #ifdef, and there are several #ifdef/#else/#endif inbetween. Tag it as /* CONFIG_PPC_BOOK3S_64 */ to help locate the matching #ifdef. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3612a8f8aaca16de3fc414a7e66293319d6e213c.1597643147.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 743afa368849..3bc671df5b51 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -581,7 +581,7 @@ void notrace restore_math(struct pt_regs *regs) regs->msr |= new_msr | fpexc_mode; } } -#endif +#endif /* CONFIG_PPC_BOOK3S_64 */ static void save_all(struct task_struct *tsk) { -- cgit v1.2.3 From 80739c2bd29133715d6828e333649a55095f4747 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 05:47:55 +0000 Subject: powerpc/process: Remove useless #ifdef CONFIG_VSX cpu_has_feature(CPU_FTR_VSX) returns false when CONFIG_VSX is not set. There is no need to enclose the test in an #ifdef CONFIG_VSX. Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0eb61cf0dc66d781d47deb2228498cd61d03a754.1597643221.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3bc671df5b51..fb079f11a6df 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -124,10 +124,8 @@ unsigned long notrace msr_check_and_set(unsigned long bits) newmsr = oldmsr | bits; -#ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP)) newmsr |= MSR_VSX; -#endif if (oldmsr != newmsr) mtmsr_isync(newmsr); @@ -144,10 +142,8 @@ void notrace __msr_check_and_clear(unsigned long bits) newmsr = oldmsr & ~bits; -#ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP)) newmsr &= ~MSR_VSX; -#endif if (oldmsr != newmsr) mtmsr_isync(newmsr); @@ -162,10 +158,8 @@ static void __giveup_fpu(struct task_struct *tsk) save_fpu(tsk); msr = tsk->thread.regs->msr; msr &= ~(MSR_FP|MSR_FE0|MSR_FE1); -#ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX)) msr &= ~MSR_VSX; -#endif tsk->thread.regs->msr = msr; } @@ -245,10 +239,8 @@ static void __giveup_altivec(struct task_struct *tsk) save_altivec(tsk); msr = tsk->thread.regs->msr; msr &= ~MSR_VEC; -#ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX)) msr &= ~MSR_VSX; -#endif tsk->thread.regs->msr = msr; } @@ -421,10 +413,8 @@ static int __init init_msr_all_available(void) if (cpu_has_feature(CPU_FTR_ALTIVEC)) msr_all_available |= MSR_VEC; #endif -#ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX)) msr_all_available |= MSR_VSX; -#endif #ifdef CONFIG_SPE if (cpu_has_feature(CPU_FTR_SPE)) msr_all_available |= MSR_SPE; @@ -509,19 +499,18 @@ static bool should_restore_altivec(void) { return false; } static void do_restore_altivec(void) { } #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX static bool should_restore_vsx(void) { if (cpu_has_feature(CPU_FTR_VSX)) return true; return false; } +#ifdef CONFIG_VSX static void do_restore_vsx(void) { current->thread.used_vsr = 1; } #else -static bool should_restore_vsx(void) { return false; } static void do_restore_vsx(void) { } #endif /* CONFIG_VSX */ -- cgit v1.2.3 From e3667ee427e224f9951eb3940a97477285548134 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 05:47:56 +0000 Subject: powerpc/process: Remove useless #ifdef CONFIG_ALTIVEC cpu_has_feature(CPU_FTR_ALTIVEC) returns false when CONFIG_ALTIVEC is not set. There is no need to enclose the test in an #ifdef CONFIG_ALTIVEC. Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/03ba6b52344ca7c336df2bc6e3d31d736c804ae2.1597643221.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index fb079f11a6df..52019b6962ba 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -409,10 +409,8 @@ static int __init init_msr_all_available(void) #ifdef CONFIG_PPC_FPU msr_all_available |= MSR_FP; #endif -#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC)) msr_all_available |= MSR_VEC; -#endif if (cpu_has_feature(CPU_FTR_VSX)) msr_all_available |= MSR_VSX; #ifdef CONFIG_SPE @@ -446,10 +444,8 @@ void giveup_all(struct task_struct *tsk) if (usermsr & MSR_FP) __giveup_fpu(tsk); #endif -#ifdef CONFIG_ALTIVEC if (usermsr & MSR_VEC) __giveup_altivec(tsk); -#endif #ifdef CONFIG_SPE if (usermsr & MSR_SPE) __giveup_spe(tsk); -- cgit v1.2.3 From 532ed1900d37a47c821718a0d8d28eb05b2c4d28 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 05:47:57 +0000 Subject: powerpc/process: Remove useless #ifdef CONFIG_SPE cpu_has_feature(CPU_FTR_SPE) returns false when CONFIG_SPE is not set. There is no need to enclose the test in an #ifdef CONFIG_SPE. Remove it. CPU_FTR_SPE only exists on 32 bits. Define it as 0 on 64 bits. We have a couple of places like: #ifdef CONFIG_SPE if (cpu_has_feature(CPU_FTR_SPE)) { do_something_that_requires_CONFIG_SPE } else { return -EINVAL; } #else return -EINVAL; #endif Replace them by a cleaner version: if (cpu_has_feature(CPU_FTR_SPE)) { #ifdef CONFIG_SPE do_something_that_requires_CONFIG_SPE #endif } else { return -EINVAL; } When CONFIG_SPE is not set, this resolves to an unconditional return of -EINVAL Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/698df8387555765b70ea42e4a7fa48141c309c1f.1597643221.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 1 + arch/powerpc/kernel/process.c | 21 +++++++-------------- 2 files changed, 8 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 32a15dc49e8c..8ca5885bd5b9 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -170,6 +170,7 @@ static inline void cpu_feature_keys_init(void) { } #else /* CONFIG_PPC32 */ /* Define these to 0 for the sake of tests in common code */ #define CPU_FTR_PPC_LE (0) +#define CPU_FTR_SPE (0) #endif /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 52019b6962ba..348d4355bc00 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -413,10 +413,8 @@ static int __init init_msr_all_available(void) msr_all_available |= MSR_VEC; if (cpu_has_feature(CPU_FTR_VSX)) msr_all_available |= MSR_VSX; -#ifdef CONFIG_SPE if (cpu_has_feature(CPU_FTR_SPE)) msr_all_available |= MSR_SPE; -#endif return 0; } @@ -446,10 +444,8 @@ void giveup_all(struct task_struct *tsk) #endif if (usermsr & MSR_VEC) __giveup_altivec(tsk); -#ifdef CONFIG_SPE if (usermsr & MSR_SPE) __giveup_spe(tsk); -#endif msr_check_and_clear(msr_all_available); } @@ -1899,7 +1895,6 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) * fpexc_mode. fpexc_mode is also used for setting FP exception * mode (asyn, precise, disabled) for 'Classic' FP. */ if (val & PR_FP_EXC_SW_ENABLE) { -#ifdef CONFIG_SPE if (cpu_has_feature(CPU_FTR_SPE)) { /* * When the sticky exception bits are set @@ -1913,16 +1908,15 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) * anyway to restore the prctl settings from * the saved environment. */ +#ifdef CONFIG_SPE tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); tsk->thread.fpexc_mode = val & (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); +#endif return 0; } else { return -EINVAL; } -#else - return -EINVAL; -#endif } /* on a CONFIG_SPE this does not hurt us. The bits that @@ -1943,8 +1937,7 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) { unsigned int val; - if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) -#ifdef CONFIG_SPE + if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) { if (cpu_has_feature(CPU_FTR_SPE)) { /* * When the sticky exception bits are set @@ -1958,15 +1951,15 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) * anyway to restore the prctl settings from * the saved environment. */ +#ifdef CONFIG_SPE tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); val = tsk->thread.fpexc_mode; +#endif } else return -EINVAL; -#else - return -EINVAL; -#endif - else + } else { val = __unpack_fe01(tsk->thread.fpexc_mode); + } return put_user(val, (unsigned int __user *) adr); } -- cgit v1.2.3 From c83c192a6fbb1d4db4144c40296ed059f5eca384 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 05:47:58 +0000 Subject: powerpc/process: Remove useless #ifdef CONFIG_PPC_FPU Add a stub for __giveup_fpu() when CONFIG_PPC_FPU is not selected, as done for CONFIG_SPE and CONFIG_ALTIVEC. This allows to remove some #ifdef CONFIG_PPC_FPU. Also change one to IS_ENABLED(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/69c8b7954ceeccc6b849e52e1fa41b3a0f10f6c1.1597643221.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 348d4355bc00..14d5189b17d8 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -229,6 +229,8 @@ void enable_kernel_fp(void) } } EXPORT_SYMBOL(enable_kernel_fp); +#else +static inline void __giveup_fpu(struct task_struct *tsk) { } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC @@ -406,9 +408,8 @@ static unsigned long msr_all_available; static int __init init_msr_all_available(void) { -#ifdef CONFIG_PPC_FPU - msr_all_available |= MSR_FP; -#endif + if (IS_ENABLED(CONFIG_PPC_FPU)) + msr_all_available |= MSR_FP; if (cpu_has_feature(CPU_FTR_ALTIVEC)) msr_all_available |= MSR_VEC; if (cpu_has_feature(CPU_FTR_VSX)) @@ -438,10 +439,8 @@ void giveup_all(struct task_struct *tsk) WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC))); -#ifdef CONFIG_PPC_FPU if (usermsr & MSR_FP) __giveup_fpu(tsk); -#endif if (usermsr & MSR_VEC) __giveup_altivec(tsk); if (usermsr & MSR_SPE) -- cgit v1.2.3 From 2c637d2df4ee4830e9d3eb2bd5412250522ce96e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 11 Sep 2020 10:29:15 +0000 Subject: powerpc/powermac: Fix low_sleep_handler with KUAP and KUEP low_sleep_handler() has an hardcoded restore of segment registers that doesn't take KUAP and KUEP into account. Use head_32's load_segment_registers() routine instead. Fixes: a68c31fc01ef ("powerpc/32s: Implement Kernel Userspace Access Protection") Fixes: 31ed2b13c48d ("powerpc/32s: Implement Kernel Userspace Execution Prevention.") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/21b05f7298c1b18f73e6e5b4cd5005aafa24b6da.1599820109.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.S | 2 +- arch/powerpc/platforms/powermac/sleep.S | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index b0e184f795c0..2bd0aa3a4cc7 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -996,7 +996,7 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr -load_segment_registers: +_GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index f9a680fdd9c4..51bfdfe85058 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -294,14 +294,7 @@ grackle_wake_up: * we do any r1 memory access as we are not sure they * are in a sane state above the first 256Mb region */ - li r0,16 /* load up segment register values */ - mtctr r0 /* for context 0 */ - lis r3,0x2000 /* Ku = 1, VSID = 0 */ - li r4,0 -3: mtsrin r3,r4 - addi r3,r3,0x111 /* increment VSID */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b + bl load_segment_registers sync isync -- cgit v1.2.3 From 4c42dc5c69a8f24c467a6c997909d2f1d4efdc7f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 11 Sep 2020 05:05:38 +0000 Subject: powerpc/kasan: Fix CONFIG_KASAN_VMALLOC for 8xx Before the commit identified below, pages tables allocation was performed after the allocation of final shadow area for linear memory. But that commit switched the order, leading to page tables being already allocated at the time 8xx kasan_init_shadow_8M() is called. Due to this, kasan_init_shadow_8M() doesn't map the needed shadow entries because there are already page tables. kasan_init_shadow_8M() installs huge PMD entries instead of page tables. We could at that time free the page tables, but there is no point in creating page tables that get freed before being used. Only book3s/32 hash needs early allocation of page tables. For other variants, we can keep the initial order and create remaining page tables after the allocation of final shadow memory for linear mem. Move back the allocation of shadow page tables for CONFIG_KASAN_VMALLOC into kasan_init() after the loop which creates final shadow memory for linear mem. Fixes: 41ea93cf7ba4 ("powerpc/kasan: Fix shadow pages allocation failure") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8ae4554357da4882612644a74387ae05525b2aaa.1599800716.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/kasan/kasan_init_32.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index fb294046e00e..929716ea21e9 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -127,8 +127,7 @@ void __init kasan_mmu_init(void) { int ret; - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || - IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) { ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); if (ret) @@ -139,11 +138,11 @@ void __init kasan_mmu_init(void) void __init kasan_init(void) { struct memblock_region *reg; + int ret; for_each_memblock(memory, reg) { phys_addr_t base = reg->base; phys_addr_t top = min(base + reg->size, total_lowmem); - int ret; if (base >= top) continue; @@ -153,6 +152,13 @@ void __init kasan_init(void) panic("kasan: kasan_init_region() failed"); } + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); + + if (ret) + panic("kasan: kasan_init_shadow_page_tables() failed"); + } + kasan_remap_early_shadow_ro(); clear_page(kasan_early_shadow_page); -- cgit v1.2.3 From bbc4f40b5322b3e0b8678619f1c613dadc811669 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 11 Sep 2020 10:01:21 +0800 Subject: powerpc/ps3: make two symbols static This addresses the following sparse warning: arch/powerpc/platforms/ps3/spu.c:451:33: warning: symbol 'spu_management_ps3_ops' was not declared. Should it be static? arch/powerpc/platforms/ps3/spu.c:592:28: warning: symbol 'spu_priv1_ps3_ops' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200911020121.1464585-1-yanaijie@huawei.com --- arch/powerpc/platforms/ps3/spu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 1193c294b8d0..0c252478e556 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -448,7 +448,7 @@ static void ps3_disable_spu(struct spu_context *ctx) ctx->ops->runcntl_stop(ctx); } -const struct spu_management_ops spu_management_ps3_ops = { +static const struct spu_management_ops spu_management_ps3_ops = { .enumerate_spus = ps3_enumerate_spus, .create_spu = ps3_create_spu, .destroy_spu = ps3_destroy_spu, @@ -589,7 +589,7 @@ static u64 resource_allocation_enable_get(struct spu *spu) return 0; /* No support. */ } -const struct spu_priv1_ops spu_priv1_ps3_ops = { +static const struct spu_priv1_ops spu_priv1_ps3_ops = { .int_mask_and = int_mask_and, .int_mask_or = int_mask_or, .int_mask_set = int_mask_set, -- cgit v1.2.3 From 79b123cdf9cf0d4a1620baa8c611962626323a08 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Sep 2020 12:55:39 +0530 Subject: powerepc/book3s64/hash: Align start/end address correctly with bolt mapping This ensures we don't do a partial mapping of memory. With nvdimm, when creating namespaces with size not aligned to 16MB, the kernel ends up partially mapping the pages. This can result in kernel adding multiple hash page table entries for the same range. A new namespace will result in create_section_mapping() with start and end overlapping an already existing bolted hash page table entry. commit: 6acd7d5ef264 ("libnvdimm/namespace: Enforce memremap_compat_align()") made sure that we always create namespaces aligned to 16MB. But we can do better by avoiding mapping pages that are not aligned. This helps to catch access to these partially mapped pages early. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200907072539.67310-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/hash_utils.c | 12 +++++++++--- arch/powerpc/mm/book3s64/radix_pgtable.c | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index c663e7ba801f..7185bc43b24f 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -260,8 +260,12 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n", vstart, vend, pstart, prot, psize, ssize); - for (vaddr = vstart, paddr = pstart; vaddr < vend; - vaddr += step, paddr += step) { + /* Carefully map only the possible range */ + vaddr = ALIGN(vstart, step); + paddr = ALIGN(pstart, step); + vend = ALIGN_DOWN(vend, step); + + for (; vaddr < vend; vaddr += step, paddr += step) { unsigned long hash, hpteg; unsigned long vsid = get_kernel_vsid(vaddr, ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, ssize); @@ -343,7 +347,9 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, if (!mmu_hash_ops.hpte_removebolted) return -ENODEV; - for (vaddr = vstart; vaddr < vend; vaddr += step) { + /* Unmap the full range specificied */ + vaddr = ALIGN_DOWN(vstart, step); + for (;vaddr < vend; vaddr += step) { rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); if (rc == -ENOENT) { ret = -ENOENT; diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index d5f0c10d752a..5c8adeb8c955 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -276,6 +276,7 @@ static int __meminit create_physical_mapping(unsigned long start, int psize; start = ALIGN(start, PAGE_SIZE); + end = ALIGN_DOWN(end, PAGE_SIZE); for (addr = start; addr < end; addr += mapping_size) { unsigned long gap, previous_size; int rc; -- cgit v1.2.3 From ffd2961bb41f797eb00b58e019b707555197275e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 19 Aug 2020 19:47:00 +1000 Subject: powerpc/powernv/idle: add a basic stop 0-3 driver for POWER10 This driver does not restore stop > 3 state, so it limits itself to states which do not lose full state or TB. The POWER10 SPRs are sufficiently different from P9 that it seems easier to split out the P10 code. The POWER10 deep sleep code (e.g., the BHRB restore) has been taken out, but it can be re-added when stop > 3 support is added. Signed-off-by: Nicholas Piggin Tested-by: Pratik Rajesh Sampat Tested-by: Vaidyanathan Srinivasan Reviewed-by: Pratik Rajesh Sampat Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819094700.493399-1-npiggin@gmail.com --- arch/powerpc/include/asm/machdep.h | 2 - arch/powerpc/include/asm/processor.h | 2 +- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/platforms/powernv/idle.c | 302 +++++++++++++++++++++++----------- drivers/cpuidle/cpuidle-powernv.c | 2 +- 5 files changed, 212 insertions(+), 97 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index a90b892f0bfe..5082cd496190 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -222,8 +222,6 @@ struct machdep_calls { extern void e500_idle(void); extern void power4_idle(void); -extern void power7_idle(void); -extern void power9_idle(void); extern void ppc6xx_idle(void); extern void book3e_idle(void); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 36a71cd41f37..22ffe85a91b8 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -432,7 +432,7 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern void power7_idle_type(unsigned long type); -extern void power9_idle_type(unsigned long stop_psscr_val, +extern void arch300_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask); extern int fix_alignment(struct pt_regs *); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 5647006ed373..d25c357a873c 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1353,6 +1353,7 @@ #define PVR_POWER8NVL 0x004C #define PVR_POWER8 0x004D #define PVR_POWER9 0x004E +#define PVR_POWER10 0x0080 #define PVR_BE 0x0070 #define PVR_PA6T 0x0090 diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 345ab062b21a..1ed7c5286487 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -565,7 +565,7 @@ void power7_idle_type(unsigned long type) irq_set_pending_from_srr1(srr1); } -void power7_idle(void) +static void power7_idle(void) { if (!powersave_nap) return; @@ -659,20 +659,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) mmcr0 = mfspr(SPRN_MMCR0); } - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - /* - * POWER10 uses MMCRA (BHRBRD) as BHRB disable bit. - * If the user hasn't asked for the BHRB to be - * written, the value of MMCRA[BHRBRD] is 1. - * On wakeup from stop, MMCRA[BHRBD] will be 0, - * since it is previleged resource and will be lost. - * Thus, if we do not save and restore the MMCRA[BHRBD], - * hardware will be needlessly writing to the BHRB - * in problem mode. - */ - mmcra = mfspr(SPRN_MMCRA); - } - if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { sprs.lpcr = mfspr(SPRN_LPCR); sprs.hfscr = mfspr(SPRN_HFSCR); @@ -735,10 +721,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) mtspr(SPRN_MMCR0, mmcr0); } - /* Reload MMCRA to restore BHRB disable bit for POWER10 */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - mtspr(SPRN_MMCRA, mmcra); - /* * DD2.2 and earlier need to set then clear bit 60 in MMCRA * to ensure the PMU starts running. @@ -823,73 +805,6 @@ out: return srr1; } -#ifdef CONFIG_HOTPLUG_CPU -static unsigned long power9_offline_stop(unsigned long psscr) -{ - unsigned long srr1; - -#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE - __ppc64_runlatch_off(); - srr1 = power9_idle_stop(psscr, true); - __ppc64_runlatch_on(); -#else - /* - * Tell KVM we're entering idle. - * This does not have to be done in real mode because the P9 MMU - * is independent per-thread. Some steppings share radix/hash mode - * between threads, but in that case KVM has a barrier sync in real - * mode before and after switching between radix and hash. - * - * kvm_start_guest must still be called in real mode though, hence - * the false argument. - */ - local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; - - __ppc64_runlatch_off(); - srr1 = power9_idle_stop(psscr, false); - __ppc64_runlatch_on(); - - local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; - /* Order setting hwthread_state vs. testing hwthread_req */ - smp_mb(); - if (local_paca->kvm_hstate.hwthread_req) - srr1 = idle_kvm_start_guest(srr1); - mtmsr(MSR_KERNEL); -#endif - - return srr1; -} -#endif - -void power9_idle_type(unsigned long stop_psscr_val, - unsigned long stop_psscr_mask) -{ - unsigned long psscr; - unsigned long srr1; - - if (!prep_irq_for_idle_irqsoff()) - return; - - psscr = mfspr(SPRN_PSSCR); - psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; - - __ppc64_runlatch_off(); - srr1 = power9_idle_stop(psscr, true); - __ppc64_runlatch_on(); - - fini_irq_for_idle_irqsoff(); - - irq_set_pending_from_srr1(srr1); -} - -/* - * Used for ppc_md.power_save which needs a function with no parameters - */ -void power9_idle(void) -{ - power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); -} - #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* * This is used in working around bugs in thread reconfiguration @@ -962,6 +877,198 @@ void pnv_power9_force_smt4_release(void) EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ +struct p10_sprs { + /* + * SPRs that get lost in shallow states: + * + * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1 + * isa300 idle routines restore CR, LR. + * CTR is volatile + * idle thread doesn't use FP or VEC + * kernel doesn't use TAR + * HSPRG1 is only live in HV interrupt entry + * SPRG2 is only live in KVM guests, KVM handles it. + */ +}; + +static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; + unsigned long srr1; + unsigned long pls; +// struct p10_sprs sprs = {}; /* avoid false used-uninitialised */ + bool sprs_saved = false; + + if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { + /* EC=ESL=0 case */ + + BUG_ON(!mmu_on); + + /* + * Wake synchronously. SRESET via xscom may still cause + * a 0x100 powersave wakeup with SRR1 reason! + */ + srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ + if (likely(!srr1)) + return 0; + + /* + * Registers not saved, can't recover! + * This would be a hardware bug + */ + BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); + + goto out; + } + + /* EC=ESL=1 case */ + if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { + /* XXX: save SPRs for deep state loss here. */ + + sprs_saved = true; + + atomic_start_thread_idle(); + } + + srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ + + psscr = mfspr(SPRN_PSSCR); + + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + /* + * On POWER10, SRR1 bits do not match exactly as expected. + * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so + * just always test PSSCR for SPR/TB state loss. + */ + pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; + if (likely(pls < deep_spr_loss_state)) { + if (sprs_saved) + atomic_stop_thread_idle(); + goto out; + } + + /* HV state loss */ + BUG_ON(!sprs_saved); + + atomic_lock_thread_idle(); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* XXX: restore per-core SPRs here */ + + if (pls >= pnv_first_tb_loss_level) { + /* TB loss */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + } + + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + +core_woken: + atomic_unlock_and_stop_thread_idle(); + + /* XXX: restore per-thread SPRs here */ + + if (!radix_enabled()) + __slb_restore_bolted_realmode(); + +out: + if (mmu_on) + mtmsr(MSR_KERNEL); + + return srr1; +} + +#ifdef CONFIG_HOTPLUG_CPU +static unsigned long arch300_offline_stop(unsigned long psscr) +{ + unsigned long srr1; + +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE + __ppc64_runlatch_off(); + if (cpu_has_feature(CPU_FTR_ARCH_31)) + srr1 = power10_idle_stop(psscr, true); + else + srr1 = power9_idle_stop(psscr, true); + __ppc64_runlatch_on(); +#else + /* + * Tell KVM we're entering idle. + * This does not have to be done in real mode because the P9 MMU + * is independent per-thread. Some steppings share radix/hash mode + * between threads, but in that case KVM has a barrier sync in real + * mode before and after switching between radix and hash. + * + * kvm_start_guest must still be called in real mode though, hence + * the false argument. + */ + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; + + __ppc64_runlatch_off(); + if (cpu_has_feature(CPU_FTR_ARCH_31)) + srr1 = power10_idle_stop(psscr, false); + else + srr1 = power9_idle_stop(psscr, false); + __ppc64_runlatch_on(); + + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; + /* Order setting hwthread_state vs. testing hwthread_req */ + smp_mb(); + if (local_paca->kvm_hstate.hwthread_req) + srr1 = idle_kvm_start_guest(srr1); + mtmsr(MSR_KERNEL); +#endif + + return srr1; +} +#endif + +void arch300_idle_type(unsigned long stop_psscr_val, + unsigned long stop_psscr_mask) +{ + unsigned long psscr; + unsigned long srr1; + + if (!prep_irq_for_idle_irqsoff()) + return; + + psscr = mfspr(SPRN_PSSCR); + psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; + + __ppc64_runlatch_off(); + if (cpu_has_feature(CPU_FTR_ARCH_31)) + srr1 = power10_idle_stop(psscr, true); + else + srr1 = power9_idle_stop(psscr, true); + __ppc64_runlatch_on(); + + fini_irq_for_idle_irqsoff(); + + irq_set_pending_from_srr1(srr1); +} + +/* + * Used for ppc_md.power_save which needs a function with no parameters + */ +static void arch300_idle(void) +{ + arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask); +} + #ifdef CONFIG_HOTPLUG_CPU void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) @@ -995,7 +1102,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu) psscr = mfspr(SPRN_PSSCR); psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | pnv_deepest_stop_psscr_val; - srr1 = power9_offline_stop(psscr); + srr1 = arch300_offline_stop(psscr); } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { srr1 = power7_offline(); } else { @@ -1093,11 +1200,15 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) * @dt_idle_states: Number of idle state entries * Returns 0 on success */ -static void __init pnv_power9_idle_init(void) +static void __init pnv_arch300_idle_init(void) { u64 max_residency_ns = 0; int i; + /* stop is not really architected, we only have p9,p10 drivers */ + if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9)) + return; + /* * pnv_deepest_stop_{val,mask} should be set to values corresponding to * the deepest stop state. @@ -1112,6 +1223,11 @@ static void __init pnv_power9_idle_init(void) struct pnv_idle_states_t *state = &pnv_idle_states[i]; u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; + /* No deep loss driver implemented for POWER10 yet */ + if (pvr_version_is(PVR_POWER10) && + state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT)) + continue; + if ((state->flags & OPAL_PM_TIMEBASE_STOP) && (pnv_first_tb_loss_level > psscr_rl)) pnv_first_tb_loss_level = psscr_rl; @@ -1162,7 +1278,7 @@ static void __init pnv_power9_idle_init(void) if (unlikely(!default_stop_found)) { pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); } else { - ppc_md.power_save = power9_idle; + ppc_md.power_save = arch300_idle; pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", pnv_default_stop_val, pnv_default_stop_mask); } @@ -1224,7 +1340,7 @@ static void __init pnv_probe_idle_states(void) } if (cpu_has_feature(CPU_FTR_ARCH_300)) - pnv_power9_idle_init(); + pnv_arch300_idle_init(); for (i = 0; i < nr_pnv_idle_states; i++) supported_cpuidle_states |= pnv_idle_states[i].flags; @@ -1295,7 +1411,7 @@ static int pnv_parse_cpuidle_dt(void) for (i = 0; i < nr_idle_states; i++) pnv_idle_states[i].residency_ns = temp_u32[i]; - /* For power9 */ + /* For power9 and later */ if (cpu_has_feature(CPU_FTR_ARCH_300)) { /* Read pm_crtl_val */ if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", @@ -1358,8 +1474,8 @@ static int __init pnv_init_idle_states(void) if (!cpu_has_feature(CPU_FTR_ARCH_300)) { /* P7/P8 nap */ p->thread_idle_state = PNV_THREAD_RUNNING; - } else { - /* P9 stop */ + } else if (pvr_version_is(PVR_POWER9)) { + /* P9 stop workarounds */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE p->requested_psscr = 0; atomic_set(&p->dont_stop, 0); diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index addaa6e6718b..c32c600b3cf8 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -141,7 +141,7 @@ static int stop_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - power9_idle_type(stop_psscr_table[index].val, + arch300_idle_type(stop_psscr_table[index].val, stop_psscr_table[index].mask); return index; } -- cgit v1.2.3 From 3a3181e16fbde752007759f8759d25e0ff1fc425 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Fri, 7 Aug 2020 12:18:54 +0200 Subject: powerpc/pci: unmap legacy INTx interrupts when a PHB is removed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a passthrough IO adapter is removed from a pseries machine using hash MMU and the XIVE interrupt mode, the POWER hypervisor expects the guest OS to clear all page table entries related to the adapter. If some are still present, the RTAS call which isolates the PCI slot returns error 9001 "valid outstanding translations" and the removal of the IO adapter fails. This is because when the PHBs are scanned, Linux maps automatically the INTx interrupts in the Linux interrupt number space but these are never removed. To solve this problem, we introduce a PPC platform specific pcibios_remove_bus() routine which clears all interrupt mappings when the bus is removed. This also clears the associated page table entries of the ESB pages when using XIVE. For this purpose, we record the logical interrupt numbers of the mapped interrupt under the PHB structure and let pcibios_remove_bus() do the clean up. Since some PCI adapters, like GPUs, use the "interrupt-map" property to describe interrupt mappings other than the legacy INTx interrupts, we can not restrict the size of the mapping array to PCI_NUM_INTX. The number of interrupt mappings is computed from the "interrupt-map" property and the mapping array is allocated accordingly. Signed-off-by: Cédric Le Goater Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200807101854.844619-1-clg@kaod.org --- arch/powerpc/include/asm/pci-bridge.h | 6 ++ arch/powerpc/kernel/pci-common.c | 114 ++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index d2a2a14e56f9..d21e070352dc 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -48,6 +48,9 @@ struct pci_controller_ops { /* * Structure of a PCI controller (host bridge) + * + * @irq_count: number of interrupt mappings + * @irq_map: interrupt mappings */ struct pci_controller { struct pci_bus *bus; @@ -127,6 +130,9 @@ struct pci_controller { void *private_data; struct npu *npu; + + unsigned int irq_count; + unsigned int *irq_map; }; /* These are used for config access before all the PCI probing diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index be108616a721..deb831f0ae13 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -353,6 +353,115 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr) return NULL; } +/* + * Assumption is made on the interrupt parent. All interrupt-map + * entries are considered to have the same parent. + */ +static int pcibios_irq_map_count(struct pci_controller *phb) +{ + const __be32 *imap; + int imaplen; + struct device_node *parent; + u32 intsize, addrsize, parintsize, paraddrsize; + + if (of_property_read_u32(phb->dn, "#interrupt-cells", &intsize)) + return 0; + if (of_property_read_u32(phb->dn, "#address-cells", &addrsize)) + return 0; + + imap = of_get_property(phb->dn, "interrupt-map", &imaplen); + if (!imap) { + pr_debug("%pOF : no interrupt-map\n", phb->dn); + return 0; + } + imaplen /= sizeof(u32); + pr_debug("%pOF : imaplen=%d\n", phb->dn, imaplen); + + if (imaplen < (addrsize + intsize + 1)) + return 0; + + imap += intsize + addrsize; + parent = of_find_node_by_phandle(be32_to_cpup(imap)); + if (!parent) { + pr_debug("%pOF : no imap parent found !\n", phb->dn); + return 0; + } + + if (of_property_read_u32(parent, "#interrupt-cells", &parintsize)) { + pr_debug("%pOF : parent lacks #interrupt-cells!\n", phb->dn); + return 0; + } + + if (of_property_read_u32(parent, "#address-cells", ¶ddrsize)) + paraddrsize = 0; + + return imaplen / (addrsize + intsize + 1 + paraddrsize + parintsize); +} + +static void pcibios_irq_map_init(struct pci_controller *phb) +{ + phb->irq_count = pcibios_irq_map_count(phb); + if (phb->irq_count < PCI_NUM_INTX) + phb->irq_count = PCI_NUM_INTX; + + pr_debug("%pOF : interrupt map #%d\n", phb->dn, phb->irq_count); + + phb->irq_map = kcalloc(phb->irq_count, sizeof(unsigned int), + GFP_KERNEL); +} + +static void pci_irq_map_register(struct pci_dev *pdev, unsigned int virq) +{ + struct pci_controller *phb = pci_bus_to_host(pdev->bus); + int i; + + if (!phb->irq_map) + return; + + for (i = 0; i < phb->irq_count; i++) { + /* + * Look for an empty or an equivalent slot, as INTx + * interrupts can be shared between adapters. + */ + if (phb->irq_map[i] == virq || !phb->irq_map[i]) { + phb->irq_map[i] = virq; + break; + } + } + + if (i == phb->irq_count) + pr_err("PCI:%s all platform interrupts mapped\n", + pci_name(pdev)); +} + +/* + * Clearing the mapped interrupts will also clear the underlying + * mappings of the ESB pages of the interrupts when under XIVE. It is + * a requirement of PowerVM to clear all memory mappings before + * removing a PHB. + */ +static void pci_irq_map_dispose(struct pci_bus *bus) +{ + struct pci_controller *phb = pci_bus_to_host(bus); + int i; + + if (!phb->irq_map) + return; + + pr_debug("PCI: Clearing interrupt mappings for PHB %04x:%02x...\n", + pci_domain_nr(bus), bus->number); + for (i = 0; i < phb->irq_count; i++) + irq_dispose_mapping(phb->irq_map[i]); + + kfree(phb->irq_map); +} + +void pcibios_remove_bus(struct pci_bus *bus) +{ + pci_irq_map_dispose(bus); +} +EXPORT_SYMBOL_GPL(pcibios_remove_bus); + /* * Reads the interrupt pin to determine if interrupt is use by card. * If the interrupt is used, then gets the interrupt line from the @@ -401,6 +510,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) pci_dev->irq = virq; + /* Record all interrut mappings for later removal of a PHB */ + pci_irq_map_register(pci_dev, virq); return 0; } @@ -1554,6 +1665,9 @@ void pcibios_scan_phb(struct pci_controller *hose) pr_debug("PCI: Scanning PHB %pOF\n", node); + /* Allocate interrupt mappings array */ + pcibios_irq_map_init(hose); + /* Get some IO space for the new PHB */ pcibios_setup_phb_io_space(hose); -- cgit v1.2.3 From d53c3dfb23c45f7d4f910c3a3ca84bf0a99c6143 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 14 Sep 2020 14:52:16 +1000 Subject: mm: fix exec activate_mm vs TLB shootdown and lazy tlb switching race Reading and modifying current->mm and current->active_mm and switching mm should be done with irqs off, to prevent races seeing an intermediate state. This is similar to commit 38cf307c1f20 ("mm: fix kthread_use_mm() vs TLB invalidate"). At exec-time when the new mm is activated, the old one should usually be single-threaded and no longer used, unless something else is holding an mm_users reference (which may be possible). Absent other mm_users, there is also a race with preemption and lazy tlb switching. Consider the kernel_execve case where the current thread is using a lazy tlb active mm: call_usermodehelper() kernel_execve() old_mm = current->mm; active_mm = current->active_mm; *** preempt *** --------------------> schedule() prev->active_mm = NULL; mmdrop(prev active_mm); ... <-------------------- schedule() current->mm = mm; current->active_mm = mm; if (!old_mm) mmdrop(active_mm); If we switch back to the kernel thread from a different mm, there is a double free of the old active_mm, and a missing free of the new one. Closing this race only requires interrupts to be disabled while ->mm and ->active_mm are being switched, but the TLB problem requires also holding interrupts off over activate_mm. Unfortunately not all archs can do that yet, e.g., arm defers the switch if irqs are disabled and expects finish_arch_post_lock_switch() to be called to complete the flush; um takes a blocking lock in activate_mm(). So as a first step, disable interrupts across the mm/active_mm updates to close the lazy tlb preempt race, and provide an arch option to extend that to activate_mm which allows architectures doing IPI based TLB shootdowns to close the second race. This is a bit ugly, but in the interest of fixing the bug and backporting before all architectures are converted this is a compromise. Signed-off-by: Nicholas Piggin Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914045219.3736466-2-npiggin@gmail.com --- arch/Kconfig | 7 +++++++ fs/exec.c | 17 +++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index af14a567b493..94821e3f94d1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -414,6 +414,13 @@ config MMU_GATHER_NO_GATHER bool depends on MMU_GATHER_TABLE_FREE +config ARCH_WANT_IRQS_OFF_ACTIVATE_MM + bool + help + Temporary select until all architectures can be converted to have + irqs disabled over activate_mm. Architectures that do IPI based TLB + shootdowns should enable this. + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/fs/exec.c b/fs/exec.c index a91003e28eaa..d4fb18baf1fb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1130,11 +1130,24 @@ static int exec_mmap(struct mm_struct *mm) } task_lock(tsk); - active_mm = tsk->active_mm; membarrier_exec_mmap(mm); - tsk->mm = mm; + + local_irq_disable(); + active_mm = tsk->active_mm; tsk->active_mm = mm; + tsk->mm = mm; + /* + * This prevents preemption while active_mm is being loaded and + * it and mm are being updated, which could cause problems for + * lazy tlb mm refcounting when these are updated by context + * switches. Not all architectures can handle irqs off over + * activate_mm yet. + */ + if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); activate_mm(active_mm, mm); + if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); -- cgit v1.2.3 From 66acd46080bd9e5ad2be4b0eb1d498d5145d058e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 14 Sep 2020 14:52:17 +1000 Subject: powerpc: select ARCH_WANT_IRQS_OFF_ACTIVATE_MM powerpc uses IPIs in some situations to switch a kernel thread away from a lazy tlb mm, which is subject to the TLB flushing race described in the changelog introducing ARCH_WANT_IRQS_OFF_ACTIVATE_MM. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914045219.3736466-3-npiggin@gmail.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/mmu_context.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1f48bbfb3ce9..65cb32211574 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -149,6 +149,7 @@ config PPC select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_TABLE_SORT diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 7f3658a97384..e02aa793420b 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -244,7 +244,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm_irqs_off(prev, next, current); } /* We don't currently use enter_lazy_tlb() for anything */ -- cgit v1.2.3 From bafb056ce27940c9994ea905336aa8f27b4f7275 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 14 Sep 2020 14:52:18 +1000 Subject: sparc64: remove mm_cpumask clearing to fix kthread_use_mm race The de facto (and apparently uncommented) standard for using an mm had, thanks to this code in sparc if nothing else, been that you must have a reference on mm_users *and that reference must have been obtained with mmget()*, i.e., from a thread with a reference to mm_users that had used the mm. The introduction of mmget_not_zero() in commit d2005e3f41d4 ("userfaultfd: don't pin the user memory in userfaultfd_file_create()") allowed mm_count holders to aoperate on user mappings asynchronously from the actual threads using the mm, but they were not to load those mappings into their TLB (i.e., walking vmas and page tables is okay, kthread_use_mm() is not). io_uring 2b188cc1bb857 ("Add io_uring IO interface") added code which does a kthread_use_mm() from a mmget_not_zero() refcount. The problem with this is code which previously assumed mm == current->mm and mm->mm_users == 1 implies the mm will remain single-threaded at least until this thread creates another mm_users reference, has now broken. arch/sparc/kernel/smp_64.c: if (atomic_read(&mm->mm_users) == 1) { cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); goto local_flush_and_out; } vs fs/io_uring.c if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) || !mmget_not_zero(ctx->sqo_mm))) return -EFAULT; kthread_use_mm(ctx->sqo_mm); mmget_not_zero() could come in right after the mm_users == 1 test, then kthread_use_mm() which sets its CPU in the mm_cpumask. That update could be lost if cpumask_copy() occurs afterward. I propose we fix this by allowing mmget_not_zero() to be a first-class reference, and not have this obscure undocumented and unchecked restriction. The basic fix for sparc64 is to remove its mm_cpumask clearing code. The optimisation could be effectively restored by sending IPIs to mm_cpumask members and having them remove themselves from mm_cpumask. This is more tricky so I leave it as an exercise for someone with a sparc64 SMP. powerpc has a (currently similarly broken) example. Signed-off-by: Nicholas Piggin Acked-by: David S. Miller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914045219.3736466-4-npiggin@gmail.com --- arch/sparc/kernel/smp_64.c | 65 ++++++++++------------------------------------ 1 file changed, 14 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index e286e2badc8a..e38d8bf454e8 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1039,38 +1039,9 @@ void smp_fetch_global_pmu(void) * are flush_tlb_*() routines, and these run after flush_cache_*() * which performs the flushw. * - * The SMP TLB coherency scheme we use works as follows: - * - * 1) mm->cpu_vm_mask is a bit mask of which cpus an address - * space has (potentially) executed on, this is the heuristic - * we use to avoid doing cross calls. - * - * Also, for flushing from kswapd and also for clones, we - * use cpu_vm_mask as the list of cpus to make run the TLB. - * - * 2) TLB context numbers are shared globally across all processors - * in the system, this allows us to play several games to avoid - * cross calls. - * - * One invariant is that when a cpu switches to a process, and - * that processes tsk->active_mm->cpu_vm_mask does not have the - * current cpu's bit set, that tlb context is flushed locally. - * - * If the address space is non-shared (ie. mm->count == 1) we avoid - * cross calls when we want to flush the currently running process's - * tlb state. This is done by clearing all cpu bits except the current - * processor's in current->mm->cpu_vm_mask and performing the - * flush locally only. This will force any subsequent cpus which run - * this task to flush the context from the local tlb if the process - * migrates to another cpu (again). - * - * 3) For shared address spaces (threads) and swapping we bite the - * bullet for most cases and perform the cross call (but only to - * the cpus listed in cpu_vm_mask). - * - * The performance gain from "optimizing" away the cross call for threads is - * questionable (in theory the big win for threads is the massive sharing of - * address space state across processors). + * mm->cpu_vm_mask is a bit mask of which cpus an address + * space has (potentially) executed on, this is the heuristic + * we use to limit cross calls. */ /* This currently is only used by the hugetlb arch pre-fault @@ -1080,18 +1051,13 @@ void smp_fetch_global_pmu(void) void smp_flush_tlb_mm(struct mm_struct *mm) { u32 ctx = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (atomic_read(&mm->mm_users) == 1) { - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - goto local_flush_and_out; - } + get_cpu(); smp_cross_call_masked(&xcall_flush_tlb_mm, ctx, 0, 0, mm_cpumask(mm)); -local_flush_and_out: __flush_tlb_mm(ctx, SECONDARY_CONTEXT); put_cpu(); @@ -1114,17 +1080,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long { u32 ctx = CTX_HWBITS(mm->context); struct tlb_pending_info info; - int cpu = get_cpu(); + + get_cpu(); info.ctx = ctx; info.nr = nr; info.vaddrs = vaddrs; - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_call_function_many(mm_cpumask(mm), tlb_pending_func, - &info, 1); + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); __flush_tlb_pending(ctx, nr, vaddrs); @@ -1134,14 +1098,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) { unsigned long context = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_cross_call_masked(&xcall_flush_tlb_page, - context, vaddr, 0, - mm_cpumask(mm)); + get_cpu(); + + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, + mm_cpumask(mm)); + __flush_tlb_page(context, vaddr); put_cpu(); -- cgit v1.2.3 From a665eec0a22e11cdde708c1c256a465ebe768047 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 14 Sep 2020 14:52:19 +1000 Subject: powerpc/64s/radix: Fix mm_cpumask trimming race vs kthread_use_mm Commit 0cef77c7798a7 ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask") added a mechanism to trim the mm_cpumask of a process under certain conditions. One of the assumptions is that mm_users would not be incremented via a reference outside the process context with mmget_not_zero() then go on to kthread_use_mm() via that reference. That invariant was broken by io_uring code (see previous sparc64 fix), but I'll point Fixes: to the original powerpc commit because we are changing that assumption going forward, so this will make backports match up. Fix this by no longer relying on that assumption, but by having each CPU check the mm is not being used, and clearing their own bit from the mask only if it hasn't been switched-to by the time the IPI is processed. This relies on commit 38cf307c1f20 ("mm: fix kthread_use_mm() vs TLB invalidate") and ARCH_WANT_IRQS_OFF_ACTIVATE_MM to disable irqs over mm switch sequences. Fixes: 0cef77c7798a7 ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask") Signed-off-by: Nicholas Piggin Reviewed-by: Michael Ellerman Depends-on: 38cf307c1f20 ("mm: fix kthread_use_mm() vs TLB invalidate") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914045219.3736466-5-npiggin@gmail.com --- arch/powerpc/include/asm/tlb.h | 13 ------------- arch/powerpc/mm/book3s64/radix_tlb.c | 23 ++++++++++++++++------- 2 files changed, 16 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index fbc6f3002f23..d97f061fecac 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -66,19 +66,6 @@ static inline int mm_is_thread_local(struct mm_struct *mm) return false; return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)); } -static inline void mm_reset_thread_local(struct mm_struct *mm) -{ - WARN_ON(atomic_read(&mm->context.copros) > 0); - /* - * It's possible for mm_access to take a reference on mm_users to - * access the remote mm from another thread, but it's not allowed - * to set mm_cpumask, so mm_users may be > 1 here. - */ - WARN_ON(current->mm != mm); - atomic_set(&mm->context.active_cpus, 1); - cpumask_clear(mm_cpumask(mm)); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); -} #else /* CONFIG_PPC_BOOK3S_64 */ static inline int mm_is_thread_local(struct mm_struct *mm) { diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 0d233763441f..143b4fd396f0 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -645,19 +645,29 @@ static void do_exit_flush_lazy_tlb(void *arg) struct mm_struct *mm = arg; unsigned long pid = mm->context.id; + /* + * A kthread could have done a mmget_not_zero() after the flushing CPU + * checked mm_is_singlethreaded, and be in the process of + * kthread_use_mm when interrupted here. In that case, current->mm will + * be set to mm, because kthread_use_mm() setting ->mm and switching to + * the mm is done with interrupts off. + */ if (current->mm == mm) - return; /* Local CPU */ + goto out_flush; if (current->active_mm == mm) { - /* - * Must be a kernel thread because sender is single-threaded. - */ - BUG_ON(current->mm); + WARN_ON_ONCE(current->mm != NULL); + /* Is a kernel thread and is using mm as the lazy tlb */ mmgrab(&init_mm); - switch_mm(mm, &init_mm, current); current->active_mm = &init_mm; + switch_mm_irqs_off(mm, &init_mm, current); mmdrop(mm); } + + atomic_dec(&mm->context.active_cpus); + cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm)); + +out_flush: _tlbiel_pid(pid, RIC_FLUSH_ALL); } @@ -672,7 +682,6 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm) */ smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, (void *)mm, 1); - mm_reset_thread_local(mm); } void radix__flush_tlb_mm(struct mm_struct *mm) -- cgit v1.2.3 From ca78ef2f08ccfa29b711d644964cdf9d7ace15e5 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Sat, 12 Sep 2020 13:44:51 +0530 Subject: powerpc/papr_scm: Fix warning triggered by perf_stats_show() A warning is reported by the kernel in case perf_stats_show() returns an error code. The warning is of the form below: papr_scm ibm,persistent-memory:ibm,pmemory@44100001: Failed to query performance stats, Err:-10 dev_attr_show: perf_stats_show+0x0/0x1c0 [papr_scm] returned bad count fill_read_buffer: dev_attr_show+0x0/0xb0 returned bad count On investigation it looks like that the compiler is silently truncating the return value of drc_pmem_query_stats() from 'long' to 'int', since the variable used to store the return code 'rc' is an 'int'. This truncated value is then returned back as a 'ssize_t' back from perf_stats_show() to 'dev_attr_show()' which thinks of it as a large unsigned number and triggers this warning.. To fix this we update the type of variable 'rc' from 'int' to 'ssize_t' that prevents the compiler from truncating the return value of drc_pmem_query_stats() and returning correct signed value back from perf_stats_show(). Fixes: 2d02bf835e57 ("powerpc/papr_scm: Fetch nvdimm performance stats from PHYP") Signed-off-by: Vaibhav Jain Reviewed-by: Ira Weiny Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200912081451.66225-1-vaibhav@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index a88a707a608a..5493bc847bd0 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -785,7 +785,8 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, static ssize_t perf_stats_show(struct device *dev, struct device_attribute *attr, char *buf) { - int index, rc; + int index; + ssize_t rc; struct seq_buf s; struct papr_scm_perf_stat *stat; struct papr_scm_perf_stats *stats; @@ -820,7 +821,7 @@ static ssize_t perf_stats_show(struct device *dev, free_stats: kfree(stats); - return rc ? rc : seq_buf_used(&s); + return rc ? rc : (ssize_t)seq_buf_used(&s); } DEVICE_ATTR_ADMIN_RO(perf_stats); -- cgit v1.2.3 From f3232321db58480804f80d59aeb651a5c859a200 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Fri, 7 Aug 2020 13:15:17 +0530 Subject: powerpc/topology: Override cpu_smt_mask On Power9, a pair of SMT4 cores can be presented by the firmware as a SMT8 core for backward compatibility reasons, with the fusion of two SMT4 cores. Powerpc allows LPARs to be live migrated from Power8 to Power9. Existing software developed/configured for Power8, expects to see a SMT8 core. In order to maintain userspace backward compatibility (with Power8 chips in case of Power9) in enterprise Linux systems, the topology_sibling_cpumask has to be set to SMT8 core. cpu_smt_mask() should generally point to the cpu mask of the SMT4 core. Hence override the default cpu_smt_mask() to be powerpc specific allowing for better scheduling behaviour on Power. schbench (latency measured in usecs, so lesser is better) Without patch With patch Latency percentiles (usec) Latency percentiles (usec) 50.0000th: 34 50.0000th: 38 75.0000th: 47 75.0000th: 52 90.0000th: 54 90.0000th: 60 95.0000th: 57 95.0000th: 64 *99.0000th: 62 *99.0000th: 72 99.5000th: 65 99.5000th: 75 99.9000th: 76 99.9000th: 3452 min=0, max=9205 min=0, max=9344 schbench (With Cede disabled) Without patch With patch Latency percentiles (usec) Latency percentiles (usec) 50.0000th: 20 50.0000th: 21 75.0000th: 28 75.0000th: 29 90.0000th: 33 90.0000th: 34 95.0000th: 35 95.0000th: 37 *99.0000th: 40 *99.0000th: 40 99.5000th: 48 99.5000th: 42 99.9000th: 94 99.9000th: 79 min=0, max=791 min=0, max=791 perf bench sched pipe usec/ops : lesser is better Without patch N Min Max Median Avg Stddev 101 5.095113 5.595269 5.204842 5.2298776 0.10762713 5.10 - 5.15 : ################################################## 23% (24) 5.15 - 5.20 : ############################################# 21% (22) 5.20 - 5.25 : ################################################## 23% (24) 5.25 - 5.30 : ######################### 11% (12) 5.30 - 5.35 : ########## 4% (5) 5.35 - 5.40 : ######## 3% (4) 5.40 - 5.45 : ######## 3% (4) 5.45 - 5.50 : #### 1% (2) 5.50 - 5.55 : ## 0% (1) 5.55 - 5.60 : #### 1% (2) With patch N Min Max Median Avg Stddev 101 5.134675 8.524719 5.207658 5.2780985 0.34911969 5.1 - 5.5 : ################################################## 94% (95) 5.5 - 5.8 : ## 3% (4) 5.8 - 6.2 : 0% (1) 6.2 - 6.5 : 6.5 - 6.8 : 6.8 - 7.2 : 7.2 - 7.5 : 7.5 - 7.8 : 7.8 - 8.2 : 8.2 - 8.5 : perf bench sched pipe (cede disabled) usec/ops : lesser is better Without patch N Min Max Median Avg Stddev 101 7.884227 12.576538 7.956474 8.0170722 0.46159054 7.9 - 8.4 : ################################################## 99% (100) 8.4 - 8.8 : 8.8 - 9.3 : 9.3 - 9.8 : 9.8 - 10.2 : 10.2 - 10.7 : 10.7 - 11.2 : 11.2 - 11.6 : 11.6 - 12.1 : 12.1 - 12.6 : With patch N Min Max Median Avg Stddev 101 7.956021 8.217284 8.015615 8.0283866 0.049844967 7.96 - 7.98 : ###################### 12% (13) 7.98 - 8.01 : ################################################## 28% (29) 8.01 - 8.03 : #################################### 20% (21) 8.03 - 8.06 : ######################### 14% (15) 8.06 - 8.09 : ###################### 12% (13) 8.09 - 8.11 : ###### 3% (4) 8.11 - 8.14 : ### 1% (2) 8.14 - 8.17 : ### 1% (2) 8.17 - 8.19 : 8.19 - 8.22 : # 0% (1) Observations: With the patch, the initial run/iteration takes a slight longer time. This can be attributed to the fact that now we pick a CPU from a idle core which could be sleep mode. Once we remove the cede, state the numbers improve in favour of the patch. ebizzy: transactions per second (higher is better) without patch N Min Max Median Avg Stddev 100 1018433 1304470 1193208 1182315.7 60018.733 1018433 - 1047037 : ###### 3% (3) 1047037 - 1075640 : ######## 4% (4) 1075640 - 1104244 : ######## 4% (4) 1104244 - 1132848 : ############### 7% (7) 1132848 - 1161452 : #################################### 17% (17) 1161452 - 1190055 : ########################## 12% (12) 1190055 - 1218659 : ############################################# 21% (21) 1218659 - 1247263 : ################################################## 23% (23) 1247263 - 1275866 : ######## 4% (4) 1275866 - 1304470 : ######## 4% (4) with patch N Min Max Median Avg Stddev 100 967014 1292938 1208819 1185281.8 69815.851 967014 - 999606 : ## 1% (1) 999606 - 1032199 : ## 1% (1) 1032199 - 1064791 : ############ 6% (6) 1064791 - 1097384 : ########## 5% (5) 1097384 - 1129976 : ################## 9% (9) 1129976 - 1162568 : #################### 10% (10) 1162568 - 1195161 : ########################## 13% (13) 1195161 - 1227753 : ############################################ 22% (22) 1227753 - 1260346 : ################################################## 25% (25) 1260346 - 1292938 : ############## 7% (7) Observations: Not much changes, ebizzy is not much impacted. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200807074517.27957-2-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/cputhreads.h | 1 - arch/powerpc/include/asm/smp.h | 13 +++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index deb99fd6e060..98c8bd155bf9 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -23,7 +23,6 @@ extern int threads_per_core; extern int threads_per_subcore; extern int threads_shift; -extern bool has_big_cores; extern cpumask_t threads_core_mask; #else #define threads_per_core 1 diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 81a49566ccd8..b727f5f7b8f9 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -135,6 +135,19 @@ static inline struct cpumask *cpu_smallcore_mask(int cpu) extern int cpu_to_core_id(int cpu); +extern bool has_big_cores; + +#define cpu_smt_mask cpu_smt_mask +#ifdef CONFIG_SCHED_SMT +static inline const struct cpumask *cpu_smt_mask(int cpu) +{ + if (has_big_cores) + return per_cpu(cpu_smallcore_map, cpu); + + return per_cpu(cpu_sibling_map, cpu); +} +#endif /* CONFIG_SCHED_SMT */ + /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. * * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up -- cgit v1.2.3 From 67df77845c181166d4bc324cbb0382f7e81c7631 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 17 Aug 2020 11:22:57 +0530 Subject: powerpc/numa: Restrict possible nodes based on platform As per draft LoPAPR (Revision 2.9_pre7), section B.5.3 "Run Time Abstraction Services (RTAS) Node" available at: https://openpowerfoundation.org/wp-content/uploads/2020/07/LoPAR-20200611.pdf ... there are 2 device tree properties: "ibm,max-associativity-domains" which defines the maximum number of domains that the firmware i.e PowerVM can support. and: "ibm,current-associativity-domains" which defines the maximum number of domains that the current platform can support. The value of "ibm,max-associativity-domains" is always greater than or equal to "ibm,current-associativity-domains" property. If the latter property is not available, use "ibm,max-associativity-domain" as a fallback. In this yet to be released LoPAPR, "ibm,current-associativity-domains" is mentioned in page 833 / B.5.3 which is covered under under "Appendix B. System Binding" section Currently powerpc uses the "ibm,max-associativity-domains" property while setting the possible number of nodes. This is currently set at 32. However the possible number of nodes for a platform may be significantly less. Hence set the possible number of nodes based on "ibm,current-associativity-domains" property. Nathan Lynch had raised a valid concern that post LPM (Live Partition Migration), a user could DLPAR add processors and memory after LPM with "new" associativity properties: https://lore.kernel.org/linuxppc-dev/871rljfet9.fsf@linux.ibm.com/t/#u He also pointed out that "ibm,max-associativity-domains" has the same contents on all currently available PowerVM systems, unlike "ibm,current-associativity-domains" and hence may be better able to handle the new NUMA associativity properties. However with the recent commit dbce45628085 ("powerpc/numa: Limit possible nodes to within num_possible_nodes"), all new NUMA associativity properties are capped to initially set nr_node_ids. Hence this commit should be safe with any new DLPAR add post LPM. $ lsprop /proc/device-tree/rtas/ibm,*associ*-domains /proc/device-tree/rtas/ibm,current-associativity-domains 00000005 00000001 00000002 00000002 00000002 00000010 /proc/device-tree/rtas/ibm,max-associativity-domains 00000005 00000001 00000008 00000020 00000020 00000100 $ cat /sys/devices/system/node/possible ##Before patch 0-31 $ cat /sys/devices/system/node/possible ##After patch 0-1 Note the maximum nodes this platform can support is only 2 but the possible nodes is set to 32. This is important because lot of kernel and user space code allocate structures for all possible nodes leading to a lot of memory that is allocated but not used. I ran a simple experiment to create and destroy 100 memory cgroups on boot on a 8 node machine (Power8 Alpine). Before patch: free -k at boot total used free shared buff/cache available Mem: 523498176 4106816 518820608 22272 570752 516606720 Swap: 4194240 0 4194240 free -k after creating 100 memory cgroups total used free shared buff/cache available Mem: 523498176 4628416 518246464 22336 623296 516058688 Swap: 4194240 0 4194240 free -k after destroying 100 memory cgroups total used free shared buff/cache available Mem: 523498176 4697408 518173760 22400 627008 515987904 Swap: 4194240 0 4194240 After patch: free -k at boot total used free shared buff/cache available Mem: 523498176 3969472 518933888 22272 594816 516731776 Swap: 4194240 0 4194240 free -k after creating 100 memory cgroups total used free shared buff/cache available Mem: 523498176 4181888 518676096 22208 640192 516496448 Swap: 4194240 0 4194240 free -k after destroying 100 memory cgroups total used free shared buff/cache available Mem: 523498176 4232320 518619904 22272 645952 516443264 Swap: 4194240 0 4194240 Observations: Fixed kernel takes 137344 kb (4106816-3969472) less to boot. Fixed kernel takes 309184 kb (4628416-4181888-137344) less to create 100 memcgs. Signed-off-by: Srikar Dronamraju [mpe: Reformat change log a bit for readability] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200817055257.110873-1-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 1f61fa2148b5..5ddc83ba20f4 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -900,10 +900,19 @@ static void __init find_possible_nodes(void) if (!rtas) return; - if (of_property_read_u32_index(rtas, - "ibm,max-associativity-domains", + if (of_property_read_u32_index(rtas, "ibm,current-associativity-domains", + min_common_depth, &numnodes)) { + /* + * ibm,current-associativity-domains is a fairly recent + * property. If it doesn't exist, then fallback on + * ibm,max-associativity-domains. Current denotes what the + * platform can support compared to max which denotes what the + * Hypervisor can support. + */ + if (of_property_read_u32_index(rtas, "ibm,max-associativity-domains", min_common_depth, &numnodes)) - goto out; + goto out; + } for (i = 0; i < numnodes; i++) { if (!node_possible(i)) -- cgit v1.2.3 From a874f1005ef5dfe53dfd8cda59a6600e89986ecd Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Tue, 18 Aug 2020 13:41:02 +0530 Subject: powerpc/numa: Set numa_node for all possible cpus A Powerpc system with multiple possible nodes and with CONFIG_NUMA enabled always used to have a node 0, even if node 0 does not any cpus or memory attached to it. As per PAPR, node affinity of a cpu is only available once its present / online. For all cpus that are possible but not present, cpu_to_node() would point to node 0. To ensure a cpuless, memoryless dummy node is not online, powerpc need to make sure all possible but not present cpu_to_node are set to a proper node. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200818081104.57888-2-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 5ddc83ba20f4..f63e1a41402f 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -507,6 +507,11 @@ static int numa_setup_cpu(unsigned long lcpu) int fcpu = cpu_first_thread_sibling(lcpu); int nid = NUMA_NO_NODE; + if (!cpu_present(lcpu)) { + set_cpu_numa_node(lcpu, first_online_node); + return first_online_node; + } + /* * If a valid cpu-to-node mapping is already available, use it * directly instead of querying the firmware, since it represents @@ -944,8 +949,17 @@ void __init mem_topology_setup(void) reset_numa_cpu_lookup_table(); - for_each_present_cpu(cpu) + for_each_possible_cpu(cpu) { + /* + * Powerpc with CONFIG_NUMA always used to have a node 0, + * even if it was memoryless or cpuless. For all cpus that + * are possible but not present, cpu_to_node() would point + * to node 0. To remove a cpuless, memoryless dummy node, + * powerpc need to make sure all possible but not present + * cpu_to_node are set to a proper node. + */ numa_setup_cpu(cpu); + } } void __init initmem_init(void) -- cgit v1.2.3 From 6398eaa268168b528dd1d3d0e70e61e9c13bea23 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Tue, 18 Aug 2020 13:41:03 +0530 Subject: powerpc/numa: Prefer node id queried from vphn Node id queried from the static device tree may not be correct. For example: it may always show 0 on a shared processor. Hence prefer the node id queried from vphn and fallback on the device tree based node id if vphn query fails. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200818081104.57888-3-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f63e1a41402f..9f0127cfb978 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -728,21 +728,22 @@ static int __init parse_numa_properties(void) */ for_each_present_cpu(i) { struct device_node *cpu; - int nid; - - cpu = of_get_cpu_node(i, NULL); - BUG_ON(!cpu); - nid = of_node_to_nid_single(cpu); - of_node_put(cpu); + int nid = vphn_get_nid(i); /* * Don't fall back to default_nid yet -- we will plug * cpus into nodes once the memory scan has discovered * the topology. */ - if (nid < 0) - continue; - node_set_online(nid); + if (nid == NUMA_NO_NODE) { + cpu = of_get_cpu_node(i, NULL); + BUG_ON(!cpu); + nid = of_node_to_nid_single(cpu); + of_node_put(cpu); + } + + if (likely(nid > 0)) + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); -- cgit v1.2.3 From e75130f20b1f48e04ccc806aea01f0a361f9cb6b Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Tue, 18 Aug 2020 13:41:04 +0530 Subject: powerpc/numa: Offline memoryless cpuless node 0 Currently Linux kernel with CONFIG_NUMA on a system with multiple possible nodes, marks node 0 as online at boot. However in practice, there are systems which have node 0 as memoryless and cpuless. This can cause numa_balancing to be enabled on systems with only one node with memory and CPUs. The existence of this dummy node which is cpuless and memoryless node can confuse users/scripts looking at output of lscpu / numactl. By marking, node 0 as offline, lets stop assuming that node 0 is always online. If node 0 has CPU or memory that are online, node 0 will again be set as online. v5.8 available: 2 nodes (0,2) node 0 cpus: node 0 size: 0 MB node 0 free: 0 MB node 2 cpus: 0 1 2 3 4 5 6 7 node 2 size: 32625 MB node 2 free: 31490 MB node distances: node 0 2 0: 10 20 2: 20 10 proc and sys files ------------------ /sys/devices/system/node/online: 0,2 /proc/sys/kernel/numa_balancing: 1 /sys/devices/system/node/has_cpu: 2 /sys/devices/system/node/has_memory: 2 /sys/devices/system/node/has_normal_memory: 2 /sys/devices/system/node/possible: 0-31 v5.8 + patch ------------------ available: 1 nodes (2) node 2 cpus: 0 1 2 3 4 5 6 7 node 2 size: 32625 MB node 2 free: 31487 MB node distances: node 2 2: 10 proc and sys files ------------------ /sys/devices/system/node/online: 2 /proc/sys/kernel/numa_balancing: 0 /sys/devices/system/node/has_cpu: 2 /sys/devices/system/node/has_memory: 2 /sys/devices/system/node/has_normal_memory: 2 /sys/devices/system/node/possible: 0-31 Example of a node with online CPUs/memory on node 0. (Same o/p with and without patch) numactl -H available: 4 nodes (0-3) node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 node 0 size: 32482 MB node 0 free: 22994 MB node 1 cpus: 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 node 1 size: 0 MB node 1 free: 0 MB node 2 cpus: 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 node 2 size: 0 MB node 2 free: 0 MB node 3 cpus: 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 node 3 size: 0 MB node 3 free: 0 MB node distances: node 0 1 2 3 0: 10 20 40 40 1: 20 10 40 40 2: 40 40 10 20 3: 40 40 20 10 Note: On Powerpc, cpu_to_node of possible but not present cpus would previously return 0. Hence this commit depends on commit ("powerpc/numa: Set numa_node for all possible cpus") and commit ("powerpc/numa: Prefer node id queried from vphn"). Without the 2 commits, Powerpc system might crash. 1. User space applications like Numactl, lscpu, that parse the sysfs tend to believe there is an extra online node. This tends to confuse users and applications. Other user space applications start believing that system was not able to use all the resources (i.e missing resources) or the system was not setup correctly. 2. Also existence of dummy node also leads to inconsistent information. The number of online nodes is inconsistent with the information in the device-tree and resource-dump 3. When the dummy node is present, single node non-Numa systems end up showing up as NUMA systems and numa_balancing gets enabled. This will mean we take the hit from the unnecessary numa hinting faults. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200818081104.57888-4-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9f0127cfb978..481951ac3e55 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -933,6 +933,16 @@ void __init mem_topology_setup(void) { int cpu; + /* + * Linux/mm assumes node 0 to be online at boot. However this is not + * true on PowerPC, where node 0 is similar to any other node, it + * could be cpuless, memoryless node. So force node 0 to be offline + * for now. This will prevent cpuless, memoryless node 0 showing up + * unnecessarily as online. If a node has cpus or memory that need + * to be online, then node will anyway be marked online. + */ + node_set_offline(0); + if (parse_numa_properties()) setup_nonnuma(); -- cgit v1.2.3 From d0fd24bbd27619d7b8d4da26a19a2027931ae9fc Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 10 Aug 2020 12:48:25 +0530 Subject: powerpc/smp: Fix a warning under !NEED_MULTIPLE_NODES Fix a build warning in a non CONFIG_NEED_MULTIPLE_NODES "error: _numa_cpu_lookup_table_ undeclared" Signed-off-by: Srikar Dronamraju Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200810071834.92514-2-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8261999c7d52..d511bf73ade9 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -861,6 +861,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) GFP_KERNEL, cpu_to_node(cpu)); zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); +#ifdef CONFIG_NEED_MULTIPLE_NODES /* * numa_node_id() works after this. */ @@ -869,6 +870,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu])); } +#endif } /* Init the cpumasks so the boot CPU is related to itself */ -- cgit v1.2.3 From 2ef0ca54d97f40f7621d595ac5479bd7fa076bfa Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 10 Aug 2020 12:48:26 +0530 Subject: powerpc/smp: Merge Power9 topology with Power topology A new sched_domain_topology_level was added just for Power9. However the same can be achieved by merging powerpc_topology with power9_topology and makes the code more simpler especially when adding a new sched domain. Signed-off-by: Srikar Dronamraju Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200810071834.92514-3-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index d511bf73ade9..1fb98b255b4c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1314,7 +1314,7 @@ int setup_profiling_timer(unsigned int multiplier) } #ifdef CONFIG_SCHED_SMT -/* cpumask of CPUs with asymetric SMT dependancy */ +/* cpumask of CPUs with asymmetric SMT dependency */ static int powerpc_smt_flags(void) { int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; @@ -1327,14 +1327,6 @@ static int powerpc_smt_flags(void) } #endif -static struct sched_domain_topology_level powerpc_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, -#endif - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; - /* * P9 has a slightly odd architecture where pairs of cores share an L2 cache. * This topology makes it *much* cheaper to migrate tasks between adjacent cores @@ -1362,7 +1354,7 @@ static const struct cpumask *smallcore_smt_mask(int cpu) } #endif -static struct sched_domain_topology_level power9_topology[] = { +static struct sched_domain_topology_level powerpc_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, #endif @@ -1387,21 +1379,10 @@ void __init smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_SCHED_SMT if (has_big_cores) { pr_info("Big cores detected but using small core scheduling\n"); - power9_topology[0].mask = smallcore_smt_mask; powerpc_topology[0].mask = smallcore_smt_mask; } #endif - /* - * If any CPU detects that it's sharing a cache with another CPU then - * use the deeper topology that is aware of this sharing. - */ - if (shared_caches) { - pr_info("Using shared cache scheduler topology\n"); - set_sched_topology(power9_topology); - } else { - pr_info("Using standard scheduler topology\n"); - set_sched_topology(powerpc_topology); - } + set_sched_topology(powerpc_topology); } #ifdef CONFIG_HOTPLUG_CPU -- cgit v1.2.3 From 5e93f16ae48b728775496429c6db53d0bf8cdd9b Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 10 Aug 2020 12:48:27 +0530 Subject: powerpc/smp: Move powerpc_topology above Just moving the powerpc_topology description above. This will help in using functions in this file and avoid declarations. No other functional changes Signed-off-by: Srikar Dronamraju Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200810071834.92514-4-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 104 +++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 1fb98b255b4c..b12d143c7104 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -819,6 +819,58 @@ out: return err; } +static bool shared_caches; + +#ifdef CONFIG_SCHED_SMT +/* cpumask of CPUs with asymmetric SMT dependency */ +static int powerpc_smt_flags(void) +{ + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { + printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); + flags |= SD_ASYM_PACKING; + } + return flags; +} +#endif + +/* + * P9 has a slightly odd architecture where pairs of cores share an L2 cache. + * This topology makes it *much* cheaper to migrate tasks between adjacent cores + * since the migrated task remains cache hot. We want to take advantage of this + * at the scheduler level so an extra topology level is required. + */ +static int powerpc_shared_cache_flags(void) +{ + return SD_SHARE_PKG_RESOURCES; +} + +/* + * We can't just pass cpu_l2_cache_mask() directly because + * returns a non-const pointer and the compiler barfs on that. + */ +static const struct cpumask *shared_cache_mask(int cpu) +{ + return cpu_l2_cache_mask(cpu); +} + +#ifdef CONFIG_SCHED_SMT +static const struct cpumask *smallcore_smt_mask(int cpu) +{ + return cpu_smallcore_mask(cpu); +} +#endif + +static struct sched_domain_topology_level powerpc_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, +#endif + { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + static int init_big_cores(void) { int cpu; @@ -1248,8 +1300,6 @@ static void add_cpu_to_masks(int cpu) set_cpus_related(cpu, i, cpu_core_mask); } -static bool shared_caches; - /* Activate a secondary processor. */ void start_secondary(void *unused) { @@ -1313,56 +1363,6 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } -#ifdef CONFIG_SCHED_SMT -/* cpumask of CPUs with asymmetric SMT dependency */ -static int powerpc_smt_flags(void) -{ - int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; - - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); - flags |= SD_ASYM_PACKING; - } - return flags; -} -#endif - -/* - * P9 has a slightly odd architecture where pairs of cores share an L2 cache. - * This topology makes it *much* cheaper to migrate tasks between adjacent cores - * since the migrated task remains cache hot. We want to take advantage of this - * at the scheduler level so an extra topology level is required. - */ -static int powerpc_shared_cache_flags(void) -{ - return SD_SHARE_PKG_RESOURCES; -} - -/* - * We can't just pass cpu_l2_cache_mask() directly because - * returns a non-const pointer and the compiler barfs on that. - */ -static const struct cpumask *shared_cache_mask(int cpu) -{ - return cpu_l2_cache_mask(cpu); -} - -#ifdef CONFIG_SCHED_SMT -static const struct cpumask *smallcore_smt_mask(int cpu) -{ - return cpu_smallcore_mask(cpu); -} -#endif - -static struct sched_domain_topology_level powerpc_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, -#endif - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; - void __init smp_cpus_done(unsigned int max_cpus) { /* -- cgit v1.2.3 From 3c6032a8fe99547d31b2b57715e303a67d1b0c66 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 10 Aug 2020 12:48:28 +0530 Subject: powerpc/smp: Move topology fixups into a new function Move topology fixup based on the platform attributes into its own function which is called just before set_sched_topology. Signed-off-by: Srikar Dronamraju Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200810071834.92514-5-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index b12d143c7104..9f4333d0748b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1363,6 +1363,16 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } +static void fixup_topology(void) +{ +#ifdef CONFIG_SCHED_SMT + if (has_big_cores) { + pr_info("Big cores detected but using small core scheduling\n"); + powerpc_topology[0].mask = smallcore_smt_mask; + } +#endif +} + void __init smp_cpus_done(unsigned int max_cpus) { /* @@ -1376,12 +1386,7 @@ void __init smp_cpus_done(unsigned int max_cpus) dump_numa_cpu_topology(); -#ifdef CONFIG_SCHED_SMT - if (has_big_cores) { - pr_info("Big cores detected but using small core scheduling\n"); - powerpc_topology[0].mask = smallcore_smt_mask; - } -#endif + fixup_topology(); set_sched_topology(powerpc_topology); } -- cgit v1.2.3 From f6606cfdfbcda00ce8a6e63c8fc13c93e73ac059 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Sun, 13 Sep 2020 22:40:38 +0530 Subject: powerpc/smp: Dont assume l2-cache to be superset of sibling Current code assumes that cpumask of cpus sharing a l2-cache mask will always be a superset of cpu_sibling_mask. Lets stop that assumption. cpu_l2_cache_mask is a superset of cpu_sibling_mask if and only if shared_caches is set. Reviewed-by: Gautham R. Shenoy Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200913171038.GB11808@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9f4333d0748b..168532e37305 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1186,9 +1186,23 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int)) int i; l2_cache = cpu_to_l2cache(cpu); - if (!l2_cache) + if (!l2_cache) { + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; + + /* + * If no l2cache for this CPU, assume all siblings to share + * cache with this CPU. + */ + if (has_big_cores) + sibling_mask = cpu_smallcore_mask; + + for_each_cpu(i, sibling_mask(cpu)) + set_cpus_related(cpu, i, cpu_l2_cache_mask); + return false; + } + cpumask_set_cpu(cpu, mask_fn(cpu)); for_each_cpu(i, cpu_online_mask) { /* * when updating the marks the current CPU has not been marked @@ -1271,29 +1285,30 @@ static void add_cpu_to_masks(int cpu) * add it to it's own thread sibling mask. */ cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); + cpumask_set_cpu(cpu, cpu_core_mask(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) if (cpu_online(i)) set_cpus_related(i, cpu, cpu_sibling_mask); add_cpu_to_smallcore_masks(cpu); - /* - * Copy the thread sibling mask into the cache sibling mask - * and mark any CPUs that share an L2 with this CPU. - */ - for_each_cpu(i, cpu_sibling_mask(cpu)) - set_cpus_related(cpu, i, cpu_l2_cache_mask); update_mask_by_l2(cpu, cpu_l2_cache_mask); - /* - * Copy the cache sibling mask into core sibling mask and mark - * any CPUs on the same chip as this CPU. - */ - for_each_cpu(i, cpu_l2_cache_mask(cpu)) - set_cpus_related(cpu, i, cpu_core_mask); + if (pkg_id == -1) { + struct cpumask *(*mask)(int) = cpu_sibling_mask; + + /* + * Copy the sibling mask into core sibling mask and + * mark any CPUs on the same chip as this CPU. + */ + if (shared_caches) + mask = cpu_l2_cache_mask; + + for_each_cpu(i, mask(cpu)) + set_cpus_related(cpu, i, cpu_core_mask); - if (pkg_id == -1) return; + } for_each_cpu(i, cpu_online_mask) if (get_physical_package_id(i) == pkg_id) -- cgit v1.2.3 From caa8e29da59926bef099b46ab6280333d583e944 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 10 Aug 2020 12:48:30 +0530 Subject: powerpc/smp: Optimize start_secondary In start_secondary, even if shared_cache was already set, system does a redundant match for cpumask. This redundant check can be removed by checking if shared_cache is already set. While here, localize the sibling_mask variable to within the if condition. Signed-off-by: Srikar Dronamraju Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200810071834.92514-7-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 168532e37305..016a822eb8c4 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -852,7 +852,7 @@ static int powerpc_shared_cache_flags(void) */ static const struct cpumask *shared_cache_mask(int cpu) { - return cpu_l2_cache_mask(cpu); + return per_cpu(cpu_l2_cache_map, cpu); } #ifdef CONFIG_SCHED_SMT @@ -1319,7 +1319,6 @@ static void add_cpu_to_masks(int cpu) void start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; mmgrab(&init_mm); current->active_mm = &init_mm; @@ -1345,14 +1344,20 @@ void start_secondary(void *unused) /* Update topology CPU masks */ add_cpu_to_masks(cpu); - if (has_big_cores) - sibling_mask = cpu_smallcore_mask; /* * Check for any shared caches. Note that this must be done on a * per-core basis because one core in the pair might be disabled. */ - if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) - shared_caches = true; + if (!shared_caches) { + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; + struct cpumask *mask = cpu_l2_cache_mask(cpu); + + if (has_big_cores) + sibling_mask = cpu_smallcore_mask; + + if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu))) + shared_caches = true; + } set_numa_node(numa_cpu_lookup_table[cpu]); set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); -- cgit v1.2.3 From f9f130ff2ec93c5949576bbfb168cc9530c23649 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 10 Aug 2020 12:48:31 +0530 Subject: powerpc/numa: Detect support for coregroup Add support for grouping cores based on the device-tree classification. - The last domain in the associativity domains always refers to the core. - If primary reference domain happens to be the penultimate domain in the associativity domains device-tree property, then there are no coregroups. However if its not a penultimate domain, then there are coregroups. There can be more than one coregroup. For now we would be interested in the last or the smallest coregroups, i.e one sub-group per DIE. Currently there are no firmwares that are exposing this grouping. Hence allow the basis for grouping to be abstract. Once the firmware starts using this grouping, code would be added to detect the type of grouping and adjust the sd domain flags accordingly. Signed-off-by: Srikar Dronamraju Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200810071834.92514-8-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/smp.h | 1 + arch/powerpc/kernel/smp.c | 1 + arch/powerpc/mm/numa.c | 34 +++++++++++++++++++++------------- 3 files changed, 23 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index b727f5f7b8f9..041f0b97c45b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -28,6 +28,7 @@ extern int boot_cpuid; extern int spinning_secondaries; extern u32 *cpu_to_phys_id; +extern bool coregroup_enabled; extern void cpu_die(void); extern int cpu_to_chip_id(int cpu); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 016a822eb8c4..a44b9350d2ef 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -75,6 +75,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; struct task_struct *secondary_current; bool has_big_cores; +bool coregroup_enabled; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 481951ac3e55..b2c44c5a81fb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -897,7 +897,9 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) static void __init find_possible_nodes(void) { struct device_node *rtas; - u32 numnodes, i; + const __be32 *domains; + int prop_length, max_nodes; + u32 i; if (!numa_enabled) return; @@ -906,25 +908,31 @@ static void __init find_possible_nodes(void) if (!rtas) return; - if (of_property_read_u32_index(rtas, "ibm,current-associativity-domains", - min_common_depth, &numnodes)) { - /* - * ibm,current-associativity-domains is a fairly recent - * property. If it doesn't exist, then fallback on - * ibm,max-associativity-domains. Current denotes what the - * platform can support compared to max which denotes what the - * Hypervisor can support. - */ - if (of_property_read_u32_index(rtas, "ibm,max-associativity-domains", - min_common_depth, &numnodes)) + /* + * ibm,current-associativity-domains is a fairly recent property. If + * it doesn't exist, then fallback on ibm,max-associativity-domains. + * Current denotes what the platform can support compared to max + * which denotes what the Hypervisor can support. + */ + domains = of_get_property(rtas, "ibm,current-associativity-domains", + &prop_length); + if (!domains) { + domains = of_get_property(rtas, "ibm,max-associativity-domains", + &prop_length); + if (!domains) goto out; } - for (i = 0; i < numnodes; i++) { + max_nodes = of_read_number(&domains[min_common_depth], 1); + for (i = 0; i < max_nodes; i++) { if (!node_possible(i)) node_set(i, node_possible_map); } + prop_length /= sizeof(int); + if (prop_length > min_common_depth + 2) + coregroup_enabled = 1; + out: of_node_put(rtas); } -- cgit v1.2.3 From 6e086302816b2ced602bc99641eb0189c05f018a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 10 Aug 2020 12:48:32 +0530 Subject: powerpc/smp: Allocate cpumask only after searching thread group If allocated earlier and the search fails, then cpu_l1_cache_map cpumask is unnecessarily cleared. However cpu_l1_cache_map can be allocated / cleared after we search thread group. Please note CONFIG_CPUMASK_OFFSTACK is not set on Powerpc. Hence cpumask allocated by zalloc_cpumask_var_node is never freed. Signed-off-by: Srikar Dronamraju Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200810071834.92514-9-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a44b9350d2ef..41f76c8cd024 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -790,10 +790,6 @@ static int init_cpu_l1_cache_map(int cpu) if (err) goto out; - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), - GFP_KERNEL, - cpu_to_node(cpu)); - cpu_group_start = get_cpu_thread_group_start(cpu, &tg); if (unlikely(cpu_group_start == -1)) { @@ -802,6 +798,9 @@ static int init_cpu_l1_cache_map(int cpu) goto out; } + zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + for (i = first_thread; i < first_thread + threads_per_core; i++) { int i_group_start = get_cpu_thread_group_start(i, &tg); -- cgit v1.2.3 From 72730bfc2a2b91a525f38dfc830f598bdb95f216 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 10 Aug 2020 12:48:33 +0530 Subject: powerpc/smp: Create coregroup domain Add percpu coregroup maps and masks to create coregroup domain. If a coregroup doesn't exist, the coregroup domain will be degenerated in favour of SMT/CACHE domain. Do note this patch is only creating stubs for cpu_to_coregroup_id. The actual cpu_to_coregroup_id implementation would be in a subsequent patch. Signed-off-by: Srikar Dronamraju Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200810071834.92514-10-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/topology.h | 10 +++++++ arch/powerpc/kernel/smp.c | 54 ++++++++++++++++++++++++++++++++++++- arch/powerpc/mm/numa.c | 5 ++++ 3 files changed, 68 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index f0b6300e7dd3..6609174918ab 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -88,12 +88,22 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) extern int find_and_online_cpu_nid(int cpu); +extern int cpu_to_coregroup_id(int cpu); #else static inline int find_and_online_cpu_nid(int cpu) { return 0; } +static inline int cpu_to_coregroup_id(int cpu) +{ +#ifdef CONFIG_SMP + return cpu_to_core_id(cpu); +#else + return 0; +#endif +} + #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ #include diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 41f76c8cd024..3d96752d6570 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -81,12 +81,22 @@ DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); EXPORT_SYMBOL_GPL(has_big_cores); +enum { +#ifdef CONFIG_SCHED_SMT + smt_idx, +#endif + cache_idx, + mc_idx, + die_idx, +}; + #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 struct thread_groups { @@ -862,11 +872,27 @@ static const struct cpumask *smallcore_smt_mask(int cpu) } #endif +static struct cpumask *cpu_coregroup_mask(int cpu) +{ + return per_cpu(cpu_coregroup_map, cpu); +} + +static bool has_coregroup_support(void) +{ + return coregroup_enabled; +} + +static const struct cpumask *cpu_mc_mask(int cpu) +{ + return cpu_coregroup_mask(cpu); +} + static struct sched_domain_topology_level powerpc_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, #endif { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, + { cpu_mc_mask, SD_INIT_NAME(MC) }, { cpu_cpu_mask, SD_INIT_NAME(DIE) }, { NULL, }, }; @@ -913,6 +939,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) GFP_KERNEL, cpu_to_node(cpu)); zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); + if (has_coregroup_support()) + zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + #ifdef CONFIG_NEED_MULTIPLE_NODES /* * numa_node_id() works after this. @@ -930,6 +960,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); + if (has_coregroup_support()) + cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid)); + init_big_cores(); if (has_big_cores) { cpumask_set_cpu(boot_cpuid, @@ -1234,6 +1267,8 @@ static void remove_cpu_from_masks(int cpu) set_cpus_unrelated(cpu, i, cpu_sibling_mask); if (has_big_cores) set_cpus_unrelated(cpu, i, cpu_smallcore_mask); + if (has_coregroup_support()) + set_cpus_unrelated(cpu, i, cpu_coregroup_mask); } } #endif @@ -1294,6 +1329,20 @@ static void add_cpu_to_masks(int cpu) add_cpu_to_smallcore_masks(cpu); update_mask_by_l2(cpu, cpu_l2_cache_mask); + if (has_coregroup_support()) { + int coregroup_id = cpu_to_coregroup_id(cpu); + + cpumask_set_cpu(cpu, cpu_coregroup_mask(cpu)); + for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) { + int fcpu = cpu_first_thread_sibling(i); + + if (fcpu == first_thread) + set_cpus_related(cpu, i, cpu_coregroup_mask); + else if (coregroup_id == cpu_to_coregroup_id(i)) + set_cpus_related(cpu, i, cpu_coregroup_mask); + } + } + if (pkg_id == -1) { struct cpumask *(*mask)(int) = cpu_sibling_mask; @@ -1388,9 +1437,12 @@ static void fixup_topology(void) #ifdef CONFIG_SCHED_SMT if (has_big_cores) { pr_info("Big cores detected but using small core scheduling\n"); - powerpc_topology[0].mask = smallcore_smt_mask; + powerpc_topology[smt_idx].mask = smallcore_smt_mask; } #endif + + if (!has_coregroup_support()) + powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask; } void __init smp_cpus_done(unsigned int max_cpus) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b2c44c5a81fb..dfebca905acb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1245,6 +1245,11 @@ int find_and_online_cpu_nid(int cpu) return new_nid; } +int cpu_to_coregroup_id(int cpu) +{ + return cpu_to_core_id(cpu); +} + static int topology_update_init(void) { topology_inited = 1; -- cgit v1.2.3 From fa35e868f9ddcbb7984fd5ab7f91aef924fa8543 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 10 Aug 2020 12:48:34 +0530 Subject: powerpc/smp: Implement cpu_to_coregroup_id Lookup the coregroup id from the associativity array. If unable to detect the coregroup id, fallback on the core id. This way, ensure sched_domain degenerates and an extra sched domain is not created. Ideally this function should have been implemented in arch/powerpc/kernel/smp.c. However if its implemented in mm/numa.c, we don't need to find the primary domain again. If the device-tree mentions more than one coregroup, then kernel implements only the last or the smallest coregroup, which currently corresponds to the penultimate domain in the device-tree. Signed-off-by: Srikar Dronamraju Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200810071834.92514-11-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index dfebca905acb..b725fb66e913 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1247,6 +1247,26 @@ int find_and_online_cpu_nid(int cpu) int cpu_to_coregroup_id(int cpu) { + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; + int index; + + if (cpu < 0 || cpu > nr_cpu_ids) + return -1; + + if (!coregroup_enabled) + goto out; + + if (!firmware_has_feature(FW_FEATURE_VPHN)) + goto out; + + if (vphn_get_associativity(cpu, associativity)) + goto out; + + index = of_read_number(associativity, 1); + if (index > min_common_depth + 1) + return of_read_number(&associativity[index - 1], 1); + +out: return cpu_to_core_id(cpu); } -- cgit v1.2.3 From d208e13c6a2277d9fb71fad6a1394c70bdd7b634 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 17 Sep 2020 12:20:16 +1000 Subject: powerpc/process: Fix uninitialised variable error Clang, and GCC with -Wmaybe-uninitialized, can't see that val is unused in get_fpexec_mode(): arch/powerpc/kernel/process.c:1940:7: error: variable 'val' is used uninitialized whenever 'if' condition is true if (cpu_has_feature(CPU_FTR_SPE)) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ We know that CPU_FTR_SPE will only be true iff CONFIG_SPE is also true, but the compiler doesn't. Avoid it by initialising val to zero. Reported-by: kernel test robot Fixes: 532ed1900d37 ("powerpc/process: Remove useless #ifdef CONFIG_SPE") Signed-off-by: Michael Ellerman Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Link: https://lore.kernel.org/r/20200917024509.3253837-1-mpe@ellerman.id.au --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 14d5189b17d8..d421a2c7f822 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1934,7 +1934,7 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) { - unsigned int val; + unsigned int val = 0; if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) { if (cpu_has_feature(CPU_FTR_SPE)) { -- cgit v1.2.3 From bda7673d64b6c2e92423363a756caa657464e096 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 17 Sep 2020 10:06:43 +0800 Subject: powerpc/book3s64: fix link error with CONFIG_PPC_RADIX_MMU=n Fix link error when CONFIG_PPC_RADIX_MMU is disabled: powerpc64-linux-gnu-ld: arch/powerpc/platforms/pseries/lpar.o:(.toc+0x0): undefined reference to `mmu_pid_bits' Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200917020643.90375-1-yangyingliang@huawei.com --- arch/powerpc/platforms/pseries/lpar.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index baf24eacd268..764170fdb0f7 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1724,6 +1724,7 @@ void __init hpte_init_pseries(void) pseries_lpar_register_process_table(0, 0, 0); } +#ifdef CONFIG_PPC_RADIX_MMU void radix_init_pseries(void) { pr_info("Using radix MMU under hypervisor\n"); @@ -1731,6 +1732,7 @@ void radix_init_pseries(void) pseries_lpar_register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); } +#endif #ifdef CONFIG_PPC_SMLPAR #define CMO_FREE_HINT_DEFAULT 1 -- cgit v1.2.3 From 96543e7352bded5d6d1a0e0022376ebdd6c1b8ab Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 16 Sep 2020 10:50:26 +0800 Subject: powerpc/pseries: convert to use DEFINE_SEQ_ATTRIBUTE macro Use DEFINE_SEQ_ATTRIBUTE macro to simplify the code. Signed-off-by: Liu Shixin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200916025026.3992835-1-liushixin2@huawei.com --- arch/powerpc/platforms/pseries/hvCall_inst.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index c40c62ec432e..2c59b4986ea5 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -70,31 +70,14 @@ static int hc_show(struct seq_file *m, void *p) return 0; } -static const struct seq_operations hcall_inst_seq_ops = { +static const struct seq_operations hcall_inst_sops = { .start = hc_start, .next = hc_next, .stop = hc_stop, .show = hc_show }; -static int hcall_inst_seq_open(struct inode *inode, struct file *file) -{ - int rc; - struct seq_file *seq; - - rc = seq_open(file, &hcall_inst_seq_ops); - seq = file->private_data; - seq->private = file_inode(file)->i_private; - - return rc; -} - -static const struct file_operations hcall_inst_seq_fops = { - .open = hcall_inst_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; +DEFINE_SEQ_ATTRIBUTE(hcall_inst); #define HCALL_ROOT_DIR "hcall_inst" #define CPU_NAME_BUF_SIZE 32 @@ -149,7 +132,7 @@ static int __init hcall_inst_init(void) snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu); debugfs_create_file(cpu_name_buf, 0444, hcall_root, per_cpu(hcall_stats, cpu), - &hcall_inst_seq_fops); + &hcall_inst_fops); } return 0; -- cgit v1.2.3 From ef1edbba52883907caf02ab85e0d00a2e4648f05 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 16 Sep 2020 21:56:36 +1000 Subject: powerpc/mm/64s: Fix slb_setup_new_exec() sparse warning Sparse says: symbol slb_setup_new_exec was not declared. Should it be static? No, it should have a declaration in a header, add one. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200916115637.3100484-1-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/internal.h | 2 ++ arch/powerpc/mm/book3s64/mmu_context.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h index 7eda0d30d765..c12d78ee42f5 100644 --- a/arch/powerpc/mm/book3s64/internal.h +++ b/arch/powerpc/mm/book3s64/internal.h @@ -13,4 +13,6 @@ static inline bool stress_slb(void) return static_branch_unlikely(&stress_slb_key); } +void slb_setup_new_exec(void); + #endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */ diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 0ba30b8b935b..1c54821de7bf 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -21,6 +21,8 @@ #include #include +#include "internal.h" + static DEFINE_IDA(mmu_context_ida); static int alloc_context_id(int min_id, int max_id) @@ -48,8 +50,6 @@ int hash__alloc_context_id(void) } EXPORT_SYMBOL_GPL(hash__alloc_context_id); -void slb_setup_new_exec(void); - static int realloc_context_ids(mm_context_t *ctx) { int i, id; -- cgit v1.2.3 From d10ebe79dfae7dc59b6cf77ffa615f0b8dae21bf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 16 Sep 2020 21:56:37 +1000 Subject: powerpc/perf: Add declarations to fix sparse warnings Sparse warns about all the init functions: symbol init_ppc970_pmu was not declared. Should it be static? symbol init_power5p_pmu was not declared. Should it be static? symbol init_power5_pmu was not declared. Should it be static? symbol init_power6_pmu was not declared. Should it be static? symbol init_power7_pmu was not declared. Should it be static? symbol init_power9_pmu was not declared. Should it be static? symbol init_power8_pmu was not declared. Should it be static? symbol init_generic_compat_pmu was not declared. Should it be static? They're already declared in internal.h, so just make sure all the C files include that directly or indirectly. Signed-off-by: Michael Ellerman Reviewed-by: Madhavan Srinivasan Link: https://lore.kernel.org/r/20200916115637.3100484-2-mpe@ellerman.id.au --- arch/powerpc/perf/isa207-common.h | 2 ++ arch/powerpc/perf/power10-pmu.c | 1 - arch/powerpc/perf/power5+-pmu.c | 2 ++ arch/powerpc/perf/power5-pmu.c | 2 ++ arch/powerpc/perf/power6-pmu.c | 2 ++ arch/powerpc/perf/power7-pmu.c | 2 ++ arch/powerpc/perf/ppc970-pmu.c | 2 ++ 7 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 044de65e96b9..7025de5e60e7 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -13,6 +13,8 @@ #include #include +#include "internal.h" + #define EVENT_EBB_MASK 1ull #define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT #define EVENT_BHRB_MASK 1ull diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 83148656b524..9dbe8f9b89b4 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -9,7 +9,6 @@ #define pr_fmt(fmt) "power10-pmu: " fmt #include "isa207-common.h" -#include "internal.h" /* * Raw event encoding for Power10: diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index a62b2cd7914f..3e64b4a1511f 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -10,6 +10,8 @@ #include #include +#include "internal.h" + /* * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) */ diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index 8732b587cf71..017bb19b73fb 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -10,6 +10,8 @@ #include #include +#include "internal.h" + /* * Bits in event code for POWER5 (not POWER5++) */ diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 0e318cf87129..189974478e9f 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -10,6 +10,8 @@ #include #include +#include "internal.h" + /* * Bits in event code for POWER6 */ diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 5e0bf09cf077..bacfab104a1a 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -10,6 +10,8 @@ #include #include +#include "internal.h" + /* * Bits in event code for POWER7 */ diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index d35223fb112c..7d78df97f272 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -9,6 +9,8 @@ #include #include +#include "internal.h" + /* * Bits in event code for PPC970 */ -- cgit v1.2.3 From 1ea21ba231f248034e8c794aa675869ca2b97d42 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:56:32 +1000 Subject: powerpc: Move arch_cpu_idle_dead() into smp.c arch_cpu_idle_dead() is in idle.c, which makes sense, but it's inside a CONFIG_HOTPLUG_CPU block. It would be more at home in smp.c, inside the existing CONFIG_HOTPLUG_CPU block. Note that CONFIG_HOTPLUG_CPU depends on CONFIG_SMP so even though smp.c is not built for SMP=n builds, that's fine. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015634.1974478-1-mpe@ellerman.id.au --- arch/powerpc/kernel/idle.c | 8 -------- arch/powerpc/kernel/smp.c | 6 ++++++ 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 422e31d2f5a2..ae0e2632393d 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -41,14 +41,6 @@ static int __init powersave_off(char *arg) } __setup("powersave=off", powersave_off); -#ifdef CONFIG_HOTPLUG_CPU -void arch_cpu_idle_dead(void) -{ - sched_preempt_enable_no_resched(); - cpu_die(); -} -#endif - void arch_cpu_idle(void) { ppc64_runlatch_off(); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3d96752d6570..24b0476c1d4f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1489,6 +1489,12 @@ void __cpu_die(unsigned int cpu) smp_ops->cpu_die(cpu); } +void arch_cpu_idle_dead(void) +{ + sched_preempt_enable_no_resched(); + cpu_die(); +} + void cpu_die(void) { /* -- cgit v1.2.3 From bf3c1464db883a953ad7bbed64924480b8b2b244 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:56:33 +1000 Subject: powerpc/smp: Fold cpu_die() into its only caller Avoid the eternal confusion between cpu_die() and __cpu_die() by removing the former, folding it into its only caller. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015634.1974478-2-mpe@ellerman.id.au --- arch/powerpc/include/asm/smp.h | 1 - arch/powerpc/kernel/smp.c | 4 ---- 2 files changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 041f0b97c45b..0bf80c5b440a 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -30,7 +30,6 @@ extern int spinning_secondaries; extern u32 *cpu_to_phys_id; extern bool coregroup_enabled; -extern void cpu_die(void); extern int cpu_to_chip_id(int cpu); #ifdef CONFIG_SMP diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 24b0476c1d4f..4ae767c455b8 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1492,11 +1492,7 @@ void __cpu_die(unsigned int cpu) void arch_cpu_idle_dead(void) { sched_preempt_enable_no_resched(); - cpu_die(); -} -void cpu_die(void) -{ /* * Disable on the down path. This will be re-enabled by * start_secondary() via start_secondary_resume() below -- cgit v1.2.3 From 39f87561454dc33efb2a3d8354d066207acac8a6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:56:34 +1000 Subject: powerpc/smp: Move ppc_md.cpu_die() to smp_ops.cpu_offline_self() We have smp_ops->cpu_die() and ppc_md.cpu_die(). One of them offlines the current CPU and one offlines another CPU, can you guess which is which? Also one is in smp_ops and one is in ppc_md? So rename ppc_md.cpu_die(), to cpu_offline_self(), because that's what it does. And move it into smp_ops where it belongs. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015634.1974478-3-mpe@ellerman.id.au --- arch/powerpc/include/asm/machdep.h | 1 - arch/powerpc/include/asm/smp.h | 3 +++ arch/powerpc/kernel/smp.c | 4 ++-- arch/powerpc/kernel/sysfs.c | 4 +++- arch/powerpc/platforms/85xx/smp.c | 4 ++-- arch/powerpc/platforms/powermac/pmac.h | 2 +- arch/powerpc/platforms/powermac/sleep.S | 6 +++--- arch/powerpc/platforms/powermac/smp.c | 8 ++++---- arch/powerpc/platforms/powernv/smp.c | 4 ++-- arch/powerpc/platforms/pseries/hotplug-cpu.c | 6 +++--- 10 files changed, 23 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 5082cd496190..95081078aa8a 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -65,7 +65,6 @@ struct machdep_calls { void __noreturn (*restart)(char *cmd); void __noreturn (*halt)(void); void (*panic)(char *str); - void (*cpu_die)(void); long (*time_init)(void); /* Optional, may be NULL */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 0bf80c5b440a..635bdf947105 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -50,6 +50,9 @@ struct smp_ops_t { int (*cpu_disable)(void); void (*cpu_die)(unsigned int nr); int (*cpu_bootable)(unsigned int nr); +#ifdef CONFIG_HOTPLUG_CPU + void (*cpu_offline_self)(void); +#endif }; extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 4ae767c455b8..58990baa5182 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1499,8 +1499,8 @@ void arch_cpu_idle_dead(void) */ this_cpu_disable_ftrace(); - if (ppc_md.cpu_die) - ppc_md.cpu_die(); + if (smp_ops->cpu_offline_self) + smp_ops->cpu_offline_self(); /* If we return, we re-enter start_secondary */ start_secondary_resume(); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 5dea98fa2f93..928b29e0ee83 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -1160,6 +1160,7 @@ static int __init topology_init(void) for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); +#ifdef CONFIG_HOTPLUG_CPU /* * For now, we just see if the system supports making * the RTAS calls for CPU hotplug. But, there may be a @@ -1167,8 +1168,9 @@ static int __init topology_init(void) * CPU. For instance, the boot cpu might never be valid * for hotplugging. */ - if (ppc_md.cpu_die) + if (smp_ops->cpu_offline_self) c->hotpluggable = 1; +#endif if (cpu_online(cpu) || c->hotpluggable) { register_cpu(c, cpu); diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index fda108bae95f..c6df294054fe 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -112,7 +112,7 @@ static void mpc85xx_take_timebase(void) local_irq_restore(flags); } -static void smp_85xx_mach_cpu_die(void) +static void smp_85xx_cpu_offline_self(void) { unsigned int cpu = smp_processor_id(); @@ -506,7 +506,7 @@ void __init mpc85xx_smp_init(void) if (qoriq_pm_ops) { smp_85xx_ops.give_timebase = mpc85xx_give_timebase; smp_85xx_ops.take_timebase = mpc85xx_take_timebase; - ppc_md.cpu_die = smp_85xx_mach_cpu_die; + smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self; smp_85xx_ops.cpu_die = qoriq_cpu_kill; } #endif diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h index 16a52afdb76e..0d715db434dc 100644 --- a/arch/powerpc/platforms/powermac/pmac.h +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -34,7 +34,7 @@ extern void pmac_check_ht_link(void); extern void pmac_setup_smp(void); extern int psurge_secondary_virq; -extern void low_cpu_die(void) __attribute__((noreturn)); +extern void low_cpu_offline_self(void) __attribute__((noreturn)); extern int pmac_nvram_init(void); extern void pmac_pic_init(void); diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index 51bfdfe85058..7e0f8ba6e54a 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -201,8 +201,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) addi r3,r3,sleep_storage@l stw r5,0(r3) - .globl low_cpu_die -low_cpu_die: + .globl low_cpu_offline_self +low_cpu_offline_self: /* Flush & disable all caches */ bl flush_disable_caches @@ -244,7 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) mtmsr r2 isync b 1b -_ASM_NOKPROBE_SYMBOL(low_cpu_die) +_ASM_NOKPROBE_SYMBOL(low_cpu_offline_self) /* * Here is the resume code. */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index eb23264910e1..a6fedcfb714f 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -920,7 +920,7 @@ static int smp_core99_cpu_disable(void) #ifdef CONFIG_PPC32 -static void pmac_cpu_die(void) +static void pmac_cpu_offline_self(void) { int cpu = smp_processor_id(); @@ -930,12 +930,12 @@ static void pmac_cpu_die(void) generic_set_cpu_dead(cpu); smp_wmb(); mb(); - low_cpu_die(); + low_cpu_offline_self(); } #else /* CONFIG_PPC32 */ -static void pmac_cpu_die(void) +static void pmac_cpu_offline_self(void) { int cpu = smp_processor_id(); @@ -1020,7 +1020,7 @@ void __init pmac_setup_smp(void) #endif /* CONFIG_PPC_PMAC32_PSURGE */ #ifdef CONFIG_HOTPLUG_CPU - ppc_md.cpu_die = pmac_cpu_die; + smp_ops->cpu_offline_self = pmac_cpu_offline_self; #endif } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index bbf361f23ae8..54c4ba45c7ce 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -158,7 +158,7 @@ static void pnv_flush_interrupts(void) } } -static void pnv_smp_cpu_kill_self(void) +static void pnv_cpu_offline_self(void) { unsigned long srr1, unexpected_mask, wmask; unsigned int cpu; @@ -417,6 +417,7 @@ static struct smp_ops_t pnv_smp_ops = { #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = pnv_smp_cpu_disable, .cpu_die = generic_cpu_die, + .cpu_offline_self = pnv_cpu_offline_self, #endif /* CONFIG_HOTPLUG_CPU */ }; @@ -430,7 +431,6 @@ void __init pnv_smp_init(void) smp_ops = &pnv_smp_ops; #ifdef CONFIG_HOTPLUG_CPU - ppc_md.cpu_die = pnv_smp_cpu_kill_self; #ifdef CONFIG_KEXEC_CORE crash_wake_offline = 1; #endif diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 7a974ed6b240..f2837e33bf5d 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -55,7 +55,7 @@ static void rtas_stop_self(void) panic("Alas, I survived.\n"); } -static void pseries_mach_cpu_die(void) +static void pseries_cpu_offline_self(void) { unsigned int hwcpu = hard_smp_processor_id(); @@ -102,7 +102,7 @@ static int pseries_cpu_disable(void) * to self-destroy so that the cpu-offline thread can send the CPU_DEAD * notifications. * - * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to + * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to * self-destruct. */ static void pseries_cpu_die(unsigned int cpu) @@ -901,7 +901,7 @@ static int __init pseries_cpu_hotplug_init(void) return 0; } - ppc_md.cpu_die = pseries_mach_cpu_die; + smp_ops->cpu_offline_self = pseries_cpu_offline_self; smp_ops->cpu_disable = pseries_cpu_disable; smp_ops->cpu_die = pseries_cpu_die; -- cgit v1.2.3 From 6c71cfcc01685ef495ca7886471a76e73446424e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 21 Aug 2020 20:34:07 +1000 Subject: powerpc/prom_init: Check display props exist before enabling btext It's possible to enable CONFIG_PPC_EARLY_DEBUG_BOOTX for a pseries kernel (maybe it shouldn't be), which is then booted with qemu/slof. But if you do that the kernel crashes in draw_byte(), with a DAR pointing somewhere near INT_MAX. Adding some debug to prom_init we see that we're not able to read the "address" property from OF, so we're just using whatever junk value was on the stack. So check the properties can be read properly from OF, if not we bail out before initialising btext, which avoids the crash. Signed-off-by: Michael Ellerman Reviewed-by: Alexey Kardashevskiy Link: https://lore.kernel.org/r/20200821103407.3362149-1-mpe@ellerman.id.au --- arch/powerpc/kernel/prom_init.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ae7ec9903191..5090a5ab54e5 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2422,10 +2422,19 @@ static void __init prom_check_displays(void) u32 width, height, pitch, addr; prom_printf("Setting btext !\n"); - prom_getprop(node, "width", &width, 4); - prom_getprop(node, "height", &height, 4); - prom_getprop(node, "linebytes", &pitch, 4); - prom_getprop(node, "address", &addr, 4); + + if (prom_getprop(node, "width", &width, 4) == PROM_ERROR) + return; + + if (prom_getprop(node, "height", &height, 4) == PROM_ERROR) + return; + + if (prom_getprop(node, "linebytes", &pitch, 4) == PROM_ERROR) + return; + + if (prom_getprop(node, "address", &addr, 4) == PROM_ERROR) + return; + prom_printf("W=%d H=%d LB=%d addr=0x%x\n", width, height, pitch, addr); btext_setup_display(width, height, 8, pitch, addr); -- cgit v1.2.3 From 8ec5cb12cd957e59b0470b75d26c901aaf645bc3 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Wed, 16 Sep 2020 14:21:29 +0800 Subject: powerpc/powernv: fix wrong warning message in opalcore_config_init() The logic of the warn output is incorrect. The two args should be exchanged. Signed-off-by: Qinglang Miao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200916062129.190864-1-miaoqinglang@huawei.com --- arch/powerpc/platforms/powernv/opal-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 6dba3b62269f..23571f0b555a 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -510,7 +510,7 @@ static void __init opalcore_config_init(void) idx = be32_to_cpu(opalc_metadata->region_cnt); if (idx > MAX_PT_LOAD_CNT) { pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)", - MAX_PT_LOAD_CNT, idx); + idx, MAX_PT_LOAD_CNT); idx = MAX_PT_LOAD_CNT; } for (i = 0; i < idx; i++) { -- cgit v1.2.3 From 7b2aab5f22f0f7cc9e2f8672c9e65e2e88d30102 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 14 Sep 2020 23:10:01 +0200 Subject: powerpc/sysfs: Remove unused 'err' variable in sysfs_create_dscr_default() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a compile error with W=1. arch/powerpc/kernel/sysfs.c: In function ‘sysfs_create_dscr_default’: arch/powerpc/kernel/sysfs.c:228:7: error: variable ‘err’ set but not used [-Werror=unused-but-set-variable] int err = 0; ^~~ cc1: all warnings being treated as errors Signed-off-by: Cédric Le Goater Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914211007.2285999-2-clg@kaod.org --- arch/powerpc/kernel/sysfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 928b29e0ee83..2e08640bb3b4 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -217,14 +217,13 @@ static DEVICE_ATTR(dscr_default, 0600, static void sysfs_create_dscr_default(void) { if (cpu_has_feature(CPU_FTR_DSCR)) { - int err = 0; int cpu; dscr_default = spr_default_dscr; for_each_possible_cpu(cpu) paca_ptrs[cpu]->dscr_default = dscr_default; - err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); + device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); } } #endif /* CONFIG_PPC64 */ -- cgit v1.2.3 From 5ab187e01a5310e1f9cd2f6b192b2343b8bd14cb Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 14 Sep 2020 23:10:03 +0200 Subject: powerpc/sstep: Remove empty if statement checking for invalid form MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The check should be performed by the caller. This fixes a compile error with W=1. ../arch/powerpc/lib/sstep.c: In function ‘mlsd_8lsd_ea’: ../arch/powerpc/lib/sstep.c:225:3: error: suggest braces around empty body in an ‘if’ statement [-Werror=empty-body] ; /* Invalid form. Should already be checked for by caller! */ ^ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914211007.2285999-4-clg@kaod.org --- arch/powerpc/lib/sstep.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index caee8cc77e19..e9dcaba9a4f8 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -219,10 +219,13 @@ static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr, ea += regs->gpr[ra]; else if (!prefix_r && !ra) ; /* Leave ea as is */ - else if (prefix_r && !ra) + else if (prefix_r) ea += regs->nip; - else if (prefix_r && ra) - ; /* Invalid form. Should already be checked for by caller! */ + + /* + * (prefix_r && ra) is an invalid form. Should already be + * checked for by caller! + */ return ea; } -- cgit v1.2.3 From 2228f19cf90ef796c8d84f54f3a5db2dcc85c83f Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 14 Sep 2020 23:10:04 +0200 Subject: powerpc/xive: Make debug routines static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a compile error with W=1. CC arch/powerpc/sysdev/xive/common.o ../arch/powerpc/sysdev/xive/common.c:1568:6: error: no previous prototype for ‘xive_debug_show_cpu’ [-Werror=missing-prototypes] void xive_debug_show_cpu(struct seq_file *m, int cpu) ^~~~~~~~~~~~~~~~~~~ ../arch/powerpc/sysdev/xive/common.c:1602:6: error: no previous prototype for ‘xive_debug_show_irq’ [-Werror=missing-prototypes] void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) ^~~~~~~~~~~~~~~~~~~ Fixes: 930914b7d528 ("powerpc/xive: Add a debugfs file to dump internal XIVE state") Signed-off-by: Cédric Le Goater Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914211007.2285999-5-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index f591be9f01f4..a80440af491a 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1565,7 +1565,7 @@ static int __init xive_off(char *arg) } __setup("xive=off", xive_off); -void xive_debug_show_cpu(struct seq_file *m, int cpu) +static void xive_debug_show_cpu(struct seq_file *m, int cpu) { struct xive_cpu *xc = per_cpu(xive_cpu, cpu); @@ -1599,7 +1599,7 @@ void xive_debug_show_cpu(struct seq_file *m, int cpu) seq_puts(m, "\n"); } -void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) +static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) { struct irq_chip *chip = irq_data_get_irq_chip(d); int rc; -- cgit v1.2.3 From ebbfeef0d8093a06ff39c60105b6650be3344cbe Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 14 Sep 2020 23:10:07 +0200 Subject: powerpc/32: Declare stack_overflow_exception() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a compile error with W=1. CC arch/powerpc/kernel/traps.o ../arch/powerpc/kernel/traps.c:1663:6: error: no previous prototype for ‘stack_overflow_exception’ [-Werror=missing-prototypes] void stack_overflow_exception(struct pt_regs *regs) ^~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 3978eb78517c ("powerpc/32: Add early stack overflow detection with VMAP stack.") Signed-off-by: Cédric Le Goater Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914211007.2285999-8-clg@kaod.org --- arch/powerpc/include/asm/asm-prototypes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index de14b1a34d56..4957119604c7 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -67,6 +67,7 @@ void single_step_exception(struct pt_regs *regs); void program_check_exception(struct pt_regs *regs); void alignment_exception(struct pt_regs *regs); void StackOverflow(struct pt_regs *regs); +void stack_overflow_exception(struct pt_regs *regs); void kernel_fp_unavailable_exception(struct pt_regs *regs); void altivec_unavailable_exception(struct pt_regs *regs); void vsx_unavailable_exception(struct pt_regs *regs); -- cgit v1.2.3 From aea948bb80b478ddc2448f7359d574387521a52d Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 6 Oct 2020 13:02:18 +0530 Subject: powerpc/powernv/elog: Fix race while processing OPAL error log event. Every error log reported by OPAL is exported to userspace through a sysfs interface and notified using kobject_uevent(). The userspace daemon (opal_errd) then reads the error log and acknowledges the error log is saved safely to disk. Once acknowledged the kernel removes the respective sysfs file entry causing respective resources to be released including kobject. However it's possible the userspace daemon may already be scanning elog entries when a new sysfs elog entry is created by the kernel. User daemon may read this new entry and ack it even before kernel can notify userspace about it through kobject_uevent() call. If that happens then we have a potential race between elog_ack_store->kobject_put() and kobject_uevent which can lead to use-after-free of a kernfs object resulting in a kernel crash. eg: BUG: Unable to handle kernel data access on read at 0x6b6b6b6b6b6b6bfb Faulting instruction address: 0xc0000000008ff2a0 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV CPU: 27 PID: 805 Comm: irq/29-opal-elo Not tainted 5.9.0-rc2-gcc-8.2.0-00214-g6f56a67bcbb5-dirty #363 ... NIP kobject_uevent_env+0xa0/0x910 LR elog_event+0x1f4/0x2d0 Call Trace: 0x5deadbeef0000122 (unreliable) elog_event+0x1f4/0x2d0 irq_thread_fn+0x4c/0xc0 irq_thread+0x1c0/0x2b0 kthread+0x1c4/0x1d0 ret_from_kernel_thread+0x5c/0x6c This patch fixes this race by protecting the sysfs file creation/notification by holding a reference count on kobject until we safely send kobject_uevent(). The function create_elog_obj() returns the elog object which if used by caller function will end up in use-after-free problem again. However, the return value of create_elog_obj() function isn't being used today and there is no need as well. Hence change it to return void to make this fix complete. Fixes: 774fea1a38c6 ("powerpc/powernv: Read OPAL error log and export it through sysfs") Cc: stable@vger.kernel.org # v3.15+ Reported-by: Oliver O'Halloran Signed-off-by: Mahesh Salgaonkar Signed-off-by: Aneesh Kumar K.V Reviewed-by: Oliver O'Halloran Reviewed-by: Vasant Hegde [mpe: Rework the logic to use a single return, reword comments, add oops] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201006122051.190176-1-mpe@ellerman.id.au --- arch/powerpc/platforms/powernv/opal-elog.c | 33 +++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 62ef7ad995da..5e33b1fc67c2 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -179,14 +179,14 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, return count; } -static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) +static void create_elog_obj(uint64_t id, size_t size, uint64_t type) { struct elog_obj *elog; int rc; elog = kzalloc(sizeof(*elog), GFP_KERNEL); if (!elog) - return NULL; + return; elog->kobj.kset = elog_kset; @@ -219,18 +219,37 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) rc = kobject_add(&elog->kobj, NULL, "0x%llx", id); if (rc) { kobject_put(&elog->kobj); - return NULL; + return; } + /* + * As soon as the sysfs file for this elog is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the elog before kobject_uevent() is called. If that + * happens then there is a potential race between + * elog_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&elog->kobj); rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr); - if (rc) { + if (rc == 0) { + kobject_uevent(&elog->kobj, KOBJ_ADD); + } else { + /* Drop the reference taken for the bin file */ kobject_put(&elog->kobj); - return NULL; } - kobject_uevent(&elog->kobj, KOBJ_ADD); + /* Drop our reference */ + kobject_put(&elog->kobj); - return elog; + return; } static irqreturn_t elog_event(int irq, void *data) -- cgit v1.2.3 From 9983efa83b0a98da33807ccf5c047e204fdcac4c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 16 Sep 2020 13:02:33 +1000 Subject: powerpc: untangle cputable mce include Having cputable.h include mce.h means it pulls in a bunch of low level headers (e.g., synch.h) which then can't use CPU_FTR_ definitions. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200916030234.4110379-1-npiggin@gmail.com --- arch/powerpc/include/asm/cputable.h | 5 ----- arch/powerpc/kernel/cputable.c | 1 + arch/powerpc/kernel/dt_cpu_ftrs.c | 1 + 3 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 8ca5885bd5b9..a1a300c1a20e 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -9,11 +9,6 @@ #ifndef __ASSEMBLY__ -/* - * Added to include __machine_check_early_realmode_* functions - */ -#include - /* This structure can grow, it's real size is used by head.S code * via the mkdefs mechanism. */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 2aa89c6b2896..b5bc2edef440 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -16,6 +16,7 @@ #include #include #include /* for PTRRELOC on ARCH=ppc */ +#include #include #include diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index f204ad79b6b5..1098863e17ee 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 05504b42562066ae27ce3e7dcec37f81dea476cb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 16 Sep 2020 13:02:34 +1000 Subject: powerpc/64s: Add cp_abort after tlbiel to invalidate copy-buffer address The copy buffer is implemented as a real address in the nest which is translated from EA by copy, and used for memory access by paste. This requires that it be invalidated by TLB invalidation. TLBIE does invalidate the copy buffer, but TLBIEL does not. Add cp_abort to the tlbiel sequence. Signed-off-by: Nicholas Piggin [mpe: Fixup whitespace and comment formatting] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200916030234.4110379-2-npiggin@gmail.com --- arch/powerpc/include/asm/synch.h | 19 ++++++++++++++++++- arch/powerpc/mm/book3s64/hash_native.c | 8 ++++---- arch/powerpc/mm/book3s64/radix_tlb.c | 12 ++++++------ 3 files changed, 28 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index aca70fb43147..1d67bc8d7bc6 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -3,8 +3,9 @@ #define _ASM_POWERPC_SYNCH_H #ifdef __KERNEL__ +#include #include -#include +#include #ifndef __ASSEMBLY__ extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup; @@ -20,6 +21,22 @@ static inline void isync(void) { __asm__ __volatile__ ("isync" : : : "memory"); } + +static inline void ppc_after_tlbiel_barrier(void) +{ + asm volatile("ptesync": : :"memory"); + /* + * POWER9, POWER10 need a cp_abort after tlbiel to ensure the copy is + * invalidated correctly. If this is not done, the paste can take data + * from the physical address that was translated at copy time. + * + * POWER9 in practice does not need this, because address spaces with + * accelerators mapped will use tlbie (which does invalidate the copy) + * to invalidate translations. It's not possible to limit POWER10 this + * way due to local copy-paste. + */ + asm volatile(ASM_FTR_IFSET(PPC_CP_ABORT, "", %0) : : "i" (CPU_FTR_ARCH_31) : "memory"); +} #endif /* __ASSEMBLY__ */ #if defined(__powerpc64__) diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index cf20e5229ce1..0203cdf48c54 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -82,7 +82,7 @@ static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is) for (set = 0; set < num_sets; set++) tlbiel_hash_set_isa206(set, is); - asm volatile("ptesync": : :"memory"); + ppc_after_tlbiel_barrier(); } static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) @@ -110,7 +110,7 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) */ tlbiel_hash_set_isa300(0, is, 0, 2, 1); - asm volatile("ptesync": : :"memory"); + ppc_after_tlbiel_barrier(); asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); } @@ -303,7 +303,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize, asm volatile("ptesync": : :"memory"); if (use_local) { __tlbiel(vpn, psize, apsize, ssize); - asm volatile("ptesync": : :"memory"); + ppc_after_tlbiel_barrier(); } else { __tlbie(vpn, psize, apsize, ssize); fixup_tlbie_vpn(vpn, psize, apsize, ssize); @@ -879,7 +879,7 @@ static void native_flush_hash_range(unsigned long number, int local) __tlbiel(vpn, psize, psize, ssize); } pte_iterate_hashed_end(); } - asm volatile("ptesync":::"memory"); + ppc_after_tlbiel_barrier(); } else { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 143b4fd396f0..b487b489d4b6 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -65,7 +65,7 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) for (set = 1; set < num_sets; set++) tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); - asm volatile("ptesync": : :"memory"); + ppc_after_tlbiel_barrier(); } void radix__tlbiel_all(unsigned int action) @@ -296,7 +296,7 @@ static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) /* For PWC, only one flush is needed */ if (ric == RIC_FLUSH_PWC) { - asm volatile("ptesync": : :"memory"); + ppc_after_tlbiel_barrier(); return; } @@ -304,7 +304,7 @@ static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) __tlbiel_pid(pid, set, RIC_FLUSH_TLB); - asm volatile("ptesync": : :"memory"); + ppc_after_tlbiel_barrier(); asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); } @@ -431,7 +431,7 @@ static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, asm volatile("ptesync": : :"memory"); __tlbiel_va(va, pid, ap, ric); - asm volatile("ptesync": : :"memory"); + ppc_after_tlbiel_barrier(); } static inline void _tlbiel_va_range(unsigned long start, unsigned long end, @@ -442,7 +442,7 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end, if (also_pwc) __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); __tlbiel_va_range(start, end, pid, page_size, psize); - asm volatile("ptesync": : :"memory"); + ppc_after_tlbiel_barrier(); } static inline void __tlbie_va_range(unsigned long start, unsigned long end, @@ -949,7 +949,7 @@ is_local: if (hflush) __tlbiel_va_range(hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M); - asm volatile("ptesync": : :"memory"); + ppc_after_tlbiel_barrier(); } else if (cputlb_use_tlbie()) { asm volatile("ptesync": : :"memory"); __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); -- cgit v1.2.3 From cdb1ea0276bd6a225aa1203b4829b8c3c0d4d069 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 25 Aug 2020 17:56:12 +1000 Subject: powerpc/pseries: add new branch prediction security bits for link stack The hypervisor interface has defined branch prediction security bits for handling the link stack. Wire them up. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200825075612.224656-1-npiggin@gmail.com --- arch/powerpc/include/asm/hvcall.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index e8f116a425f9..c1fbccb04390 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -375,11 +375,13 @@ #define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6 #define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7 #define H_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) // IBM bit 9 +#define H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST (1ull << 52) // IBM bit 11 #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 +#define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6 /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 2f4ee0a90284..633c45ec406d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -519,9 +519,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST); + if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST) + security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST); + if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE) security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE); + if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK) + security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); + /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. -- cgit v1.2.3 From 903fd31d3212ab72d564c68f6cfb5d04db68773e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 15 Sep 2020 21:46:45 +1000 Subject: powerpc/64: fix irq replay missing preempt Prior to commit 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C"), replayed interrupts returned by the regular interrupt exit code, which performs preemption in case an interrupt had set need_resched. This logic was missed by the conversion. Adding preempt_disable/enable around the interrupt replay and final irq enable will reschedule if needed. Fixes: 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200915114650.3980244-1-npiggin@gmail.com --- arch/powerpc/kernel/irq.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index bf21ebd36190..77019699606a 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -368,6 +368,12 @@ notrace void arch_local_irq_restore(unsigned long mask) } } + /* + * Disable preempt here, so that the below preempt_enable will + * perform resched if required (a replayed interrupt may set + * need_resched). + */ + preempt_disable(); irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); @@ -377,6 +383,7 @@ notrace void arch_local_irq_restore(unsigned long mask) trace_hardirqs_on(); irq_soft_mask_set(IRQS_ENABLED); __hard_irq_enable(); + preempt_enable(); } EXPORT_SYMBOL(arch_local_irq_restore); -- cgit v1.2.3 From 2b48e96be2f9f7151197fd25dc41487054bc6f5b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 15 Sep 2020 21:46:46 +1000 Subject: powerpc/64: fix irq replay pt_regs->softe value Replayed interrupts get an "artificial" struct pt_regs constructed to pass to interrupt handler functions. This did not get the softe field set correctly, it's as though the interrupt has hit while irqs are disabled. It should be IRQS_ENABLED. This is possibly harmless, asynchronous handlers should not be testing if irqs were disabled, but it might be possible for example some code is shared with synchronous or NMI handlers, and it makes more sense if debug output looks at this. Fixes: 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200915114650.3980244-2-npiggin@gmail.com --- arch/powerpc/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 77019699606a..3fdad9336885 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -214,7 +214,7 @@ void replay_soft_interrupts(void) struct pt_regs regs; ppc_save_regs(®s); - regs.softe = IRQS_ALL_DISABLED; + regs.softe = IRQS_ENABLED; again: if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) -- cgit v1.2.3 From 012a9a97a8fd6c96d5ec64eb0583220490d95e73 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 15 Sep 2020 21:46:47 +1000 Subject: powerpc/64e: remove PACA_IRQ_EE_EDGE This is not used anywhere. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200915114650.3980244-3-npiggin@gmail.com --- arch/powerpc/include/asm/hw_irq.h | 5 ++--- arch/powerpc/kernel/exceptions-64e.S | 1 - arch/powerpc/kernel/irq.c | 23 ----------------------- 3 files changed, 2 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 538698facb80..2034f9590a8c 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -25,9 +25,8 @@ #define PACA_IRQ_DBELL 0x02 #define PACA_IRQ_EE 0x04 #define PACA_IRQ_DEC 0x08 /* Or FIT */ -#define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ -#define PACA_IRQ_HMI 0x20 -#define PACA_IRQ_PMI 0x40 +#define PACA_IRQ_HMI 0x10 +#define PACA_IRQ_PMI 0x20 /* * Some soft-masked interrupts must be hard masked until they are replayed diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index d9ed79415100..ca444ca82b8d 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -988,7 +988,6 @@ kernel_dbg_exc: .endm masked_interrupt_book3e_0x500: - // XXX When adding support for EPR, use PACA_IRQ_EE_EDGE masked_interrupt_book3e PACA_IRQ_EE 1 masked_interrupt_book3e_0x900: diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 3fdad9336885..736a6b56e7d6 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -181,16 +181,6 @@ notrace unsigned int __check_irq_replay(void) return 0x500; } - /* - * Check if an EPR external interrupt happened this bit is typically - * set if we need to handle another "edge" interrupt from within the - * MPIC "EPR" handler. - */ - if (happened & PACA_IRQ_EE_EDGE) { - local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; - return 0x500; - } - if (happened & PACA_IRQ_DBELL) { local_paca->irq_happened &= ~PACA_IRQ_DBELL; return 0x280; @@ -270,19 +260,6 @@ again: hard_irq_disable(); } - /* - * Check if an EPR external interrupt happened this bit is typically - * set if we need to handle another "edge" interrupt from within the - * MPIC "EPR" handler. - */ - if (IS_ENABLED(CONFIG_PPC_BOOK3E) && (happened & PACA_IRQ_EE_EDGE)) { - local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; - regs.trap = 0x500; - do_IRQ(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) { local_paca->irq_happened &= ~PACA_IRQ_DBELL; if (IS_ENABLED(CONFIG_PPC_BOOK3E)) -- cgit v1.2.3 From 903dd1ff453e458fc7608ee4df42a6df16d3d1a0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 15 Sep 2020 21:46:48 +1000 Subject: powerpc/64e: remove 64s specific interrupt soft-mask code Since the assembly soft-masking code was moved to 64e specific, there are some 64s specific interrupt types still there. Remove them. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200915114650.3980244-4-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64e.S | 10 ---------- arch/powerpc/kernel/irq.c | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index ca444ca82b8d..f579ce46eef2 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1302,16 +1302,6 @@ fast_exception_return: addi r3,r1,STACK_FRAME_OVERHEAD; bl do_IRQ b ret_from_except -1: cmpwi cr0,r3,0xf00 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl performance_monitor_exception - b ret_from_except -1: cmpwi cr0,r3,0xe60 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl handle_hmi_exception - b ret_from_except 1: cmpwi cr0,r3,0x900 bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 736a6b56e7d6..b725509f9073 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -113,7 +113,7 @@ static inline notrace int decrementer_check_overflow(void) #ifdef CONFIG_PPC_BOOK3E /* This is called whenever we are re-enabling interrupts - * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if + * and returns either 0 (nothing to do) or 500/900/280 if * there's an EE, DEC or DBELL to generate. * * This is called in two contexts: From arch_local_irq_restore() -- cgit v1.2.3 From 455575533c7aa294d3c0284d59a77ae9a60c0537 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 15 Sep 2020 21:46:49 +1000 Subject: powerpc/64: make restore_interrupts 64e only This is not used by 64s. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200915114650.3980244-5-npiggin@gmail.com --- arch/powerpc/kernel/irq.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index b725509f9073..631e6d236c97 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -191,6 +191,25 @@ notrace unsigned int __check_irq_replay(void) return 0; } + +/* + * This is specifically called by assembly code to re-enable interrupts + * if they are currently disabled. This is typically called before + * schedule() or do_signal() when returning to userspace. We do it + * in C to avoid the burden of dealing with lockdep etc... + * + * NOTE: This is called with interrupts hard disabled but not marked + * as such in paca->irq_happened, so we need to resync this. + */ +void notrace restore_interrupts(void) +{ + if (irqs_disabled()) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + local_irq_enable(); + } else + __hard_irq_enable(); +} + #endif /* CONFIG_PPC_BOOK3E */ void replay_soft_interrupts(void) @@ -364,24 +383,6 @@ notrace void arch_local_irq_restore(unsigned long mask) } EXPORT_SYMBOL(arch_local_irq_restore); -/* - * This is specifically called by assembly code to re-enable interrupts - * if they are currently disabled. This is typically called before - * schedule() or do_signal() when returning to userspace. We do it - * in C to avoid the burden of dealing with lockdep etc... - * - * NOTE: This is called with interrupts hard disabled but not marked - * as such in paca->irq_happened, so we need to resync this. - */ -void notrace restore_interrupts(void) -{ - if (irqs_disabled()) { - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - local_irq_enable(); - } else - __hard_irq_enable(); -} - /* * This is a helper to use when about to go into idle low-power * when the latter has the side effect of re-enabling interrupts -- cgit v1.2.3 From 4366337490cbe5a35b50e83755be629a502ec7e2 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Fri, 18 Sep 2020 08:59:51 +0000 Subject: powerpc/papr_scm: Fix warnings about undeclared variable Build the kernel with 'make C=2': arch/powerpc/platforms/pseries/papr_scm.c:825:1: warning: symbol 'dev_attr_perf_stats' was not declared. Should it be static? Signed-off-by: Wang Wensheng Reviewed-by: Vaibhav Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200918085951.44983-1-wangwensheng4@huawei.com --- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 5493bc847bd0..a95aa425e7d4 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -823,7 +823,7 @@ free_stats: kfree(stats); return rc ? rc : (ssize_t)seq_buf_used(&s); } -DEVICE_ATTR_ADMIN_RO(perf_stats); +static DEVICE_ATTR_ADMIN_RO(perf_stats); static ssize_t flags_show(struct device *dev, struct device_attribute *attr, char *buf) -- cgit v1.2.3 From 5c5e46dad939b2bf4df04293ab9ac68abd7c1f55 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 24 Sep 2020 11:49:22 +1000 Subject: powerpc: PPC_SECURE_BOOT should not require PowerNV In commit 61f879d97ce4 ("powerpc/pseries: Detect secure and trusted boot state of the system.") we taught the kernel how to understand the secure-boot parameters used by a pseries guest. However, CONFIG_PPC_SECURE_BOOT still requires PowerNV. I didn't catch this because pseries_le_defconfig includes support for PowerNV and so everything still worked. Indeed, most configs will. Nonetheless, technically PPC_SECURE_BOOT doesn't require PowerNV any more. The secure variables support (PPC_SECVAR_SYSFS) doesn't do anything on pSeries yet, but I don't think it's worth adding a new condition - at some stage we'll want to add a backend for pSeries anyway. Fixes: 61f879d97ce4 ("powerpc/pseries: Detect secure and trusted boot state of the system.") Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200924014922.172914-1-dja@axtens.net --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d02b51174fe4..fe0e6b317cc2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -984,7 +984,7 @@ config PPC_MEM_KEYS config PPC_SECURE_BOOT prompt "Enable secure boot support" bool - depends on PPC_POWERNV + depends on PPC_POWERNV || PPC_PSERIES depends on IMA_ARCH_POLICY imply IMA_SECURE_AND_OR_TRUSTED_BOOT help -- cgit v1.2.3 From 874dc62f548f28649ac3d7e31025b1e8cec3868a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 06:13:10 +0200 Subject: powerpc: switch 85xx defconfigs from legacy ide to libata Switch the 85xx defconfigs from the soon to be removed legacy ide driver to libata. Signed-off-by: Christoph Hellwig Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200924041310.520970-1-hch@lst.de --- arch/powerpc/configs/85xx/mpc85xx_cds_defconfig | 6 +++--- arch/powerpc/configs/85xx/tqm8540_defconfig | 6 +++--- arch/powerpc/configs/85xx/tqm8541_defconfig | 6 +++--- arch/powerpc/configs/85xx/tqm8555_defconfig | 6 +++--- arch/powerpc/configs/85xx/tqm8560_defconfig | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig index 0683d8c292a8..cea72e85ed26 100644 --- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig +++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig @@ -29,9 +29,9 @@ CONFIG_SYN_COOKIES=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_IDE=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_VIA82CXXX=y +CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_VIA=y CONFIG_NETDEVICES=y CONFIG_GIANFAR=y CONFIG_E1000=y diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig index 98982a0e82d8..bbf040aa1f9a 100644 --- a/arch/powerpc/configs/85xx/tqm8540_defconfig +++ b/arch/powerpc/configs/85xx/tqm8540_defconfig @@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_IDE=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_VIA82CXXX=y +CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_VIA=y CONFIG_NETDEVICES=y CONFIG_GIANFAR=y CONFIG_E100=y diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig index a6e21db1dafe..523ad8dcfd9d 100644 --- a/arch/powerpc/configs/85xx/tqm8541_defconfig +++ b/arch/powerpc/configs/85xx/tqm8541_defconfig @@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_IDE=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_VIA82CXXX=y +CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_VIA=y CONFIG_NETDEVICES=y CONFIG_GIANFAR=y CONFIG_E100=y diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig index ca1de3979474..0032ce1e8c9c 100644 --- a/arch/powerpc/configs/85xx/tqm8555_defconfig +++ b/arch/powerpc/configs/85xx/tqm8555_defconfig @@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_IDE=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_VIA82CXXX=y +CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_VIA=y CONFIG_NETDEVICES=y CONFIG_GIANFAR=y CONFIG_E100=y diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig index ca3b8c8ef30f..a80b971f7d6e 100644 --- a/arch/powerpc/configs/85xx/tqm8560_defconfig +++ b/arch/powerpc/configs/85xx/tqm8560_defconfig @@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_IDE=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_VIA82CXXX=y +CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_VIA=y CONFIG_NETDEVICES=y CONFIG_GIANFAR=y CONFIG_E100=y -- cgit v1.2.3 From d125aedb404204de0579b16028096b2cc09e4deb Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 18 Sep 2020 19:30:42 +1000 Subject: powerpc/eeh: Rework EEH initialisation Drop the EEH register / unregister ops thing and have the platform pass the ops structure into eeh_init() directly. This takes one initcall out of the EEH setup path and it means we're only doing EEH setup on the platforms which actually support it. It's also less code and generally easier to follow. No functional changes. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200918093050.37344-1-oohall@gmail.com --- arch/powerpc/include/asm/eeh.h | 3 +- arch/powerpc/kernel/eeh.c | 87 +++++----------------------- arch/powerpc/platforms/powernv/eeh-powernv.c | 4 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 5 +- 4 files changed, 21 insertions(+), 78 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index d5f369bcd130..765bcf63edea 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -295,8 +295,7 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); void eeh_show_enabled(void); -int __init eeh_ops_register(struct eeh_ops *ops); -int __exit eeh_ops_unregister(const char *name); +int __init eeh_init(struct eeh_ops *ops); int eeh_check_failure(const volatile void __iomem *token); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_init(void); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 94682382fc8c..28a0ea5d9faa 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -929,56 +929,6 @@ void eeh_save_bars(struct eeh_dev *edev) edev->config_space[1] |= PCI_COMMAND_MASTER; } -/** - * eeh_ops_register - Register platform dependent EEH operations - * @ops: platform dependent EEH operations - * - * Register the platform dependent EEH operation callback - * functions. The platform should call this function before - * any other EEH operations. - */ -int __init eeh_ops_register(struct eeh_ops *ops) -{ - if (!ops->name) { - pr_warn("%s: Invalid EEH ops name for %p\n", - __func__, ops); - return -EINVAL; - } - - if (eeh_ops && eeh_ops != ops) { - pr_warn("%s: EEH ops of platform %s already existing (%s)\n", - __func__, eeh_ops->name, ops->name); - return -EEXIST; - } - - eeh_ops = ops; - - return 0; -} - -/** - * eeh_ops_unregister - Unreigster platform dependent EEH operations - * @name: name of EEH platform operations - * - * Unregister the platform dependent EEH operation callback - * functions. - */ -int __exit eeh_ops_unregister(const char *name) -{ - if (!name || !strlen(name)) { - pr_warn("%s: Invalid EEH ops name\n", - __func__); - return -EINVAL; - } - - if (eeh_ops && !strcmp(eeh_ops->name, name)) { - eeh_ops = NULL; - return 0; - } - - return -EEXIST; -} - static int eeh_reboot_notifier(struct notifier_block *nb, unsigned long action, void *unused) { @@ -991,25 +941,22 @@ static struct notifier_block eeh_reboot_nb = { }; /** - * eeh_init - EEH initialization - * - * Initialize EEH by trying to enable it for all of the adapters in the system. - * As a side effect we can determine here if eeh is supported at all. - * Note that we leave EEH on so failed config cycles won't cause a machine - * check. If a user turns off EEH for a particular adapter they are really - * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't - * grant access to a slot if EEH isn't enabled, and so we always enable - * EEH for all slots/all devices. + * eeh_init - System wide EEH initialization * - * The eeh-force-off option disables EEH checking globally, for all slots. - * Even if force-off is set, the EEH hardware is still enabled, so that - * newer systems can boot. + * It's the platform's job to call this from an arch_initcall(). */ -static int eeh_init(void) +int eeh_init(struct eeh_ops *ops) { struct pci_controller *hose, *tmp; int ret = 0; + /* the platform should only initialise EEH once */ + if (WARN_ON(eeh_ops)) + return -EEXIST; + if (WARN_ON(!ops)) + return -ENOENT; + eeh_ops = ops; + /* Register reboot notifier */ ret = register_reboot_notifier(&eeh_reboot_nb); if (ret) { @@ -1018,13 +965,13 @@ static int eeh_init(void) return ret; } - /* call platform initialization function */ - if (!eeh_ops) { - pr_warn("%s: Platform EEH operation not found\n", - __func__); - return -EEXIST; - } else if ((ret = eeh_ops->init())) + if (eeh_ops->init) + ret = eeh_ops->init(); + if (ret) { + pr_warn("%s: platform EEH init failed (%d)\n", + __func__, ret); return ret; + } /* Initialize PHB PEs */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) @@ -1036,8 +983,6 @@ static int eeh_init(void) return eeh_event_init(); } -core_initcall_sync(eeh_init); - static int eeh_device_notifier(struct notifier_block *nb, unsigned long action, void *data) { diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 663bd69ac51b..33938f8a3fba 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1717,7 +1717,7 @@ static int __init eeh_powernv_init(void) { int ret = -EINVAL; - ret = eeh_ops_register(&pnv_eeh_ops); + ret = eeh_init(&pnv_eeh_ops); if (!ret) pr_info("EEH: PowerNV platform initialized\n"); else @@ -1725,4 +1725,4 @@ static int __init eeh_powernv_init(void) return ret; } -machine_early_initcall(powernv, eeh_powernv_init); +machine_core_initcall_sync(powernv, eeh_powernv_init); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 1db74cec72bc..df32b8ccbd59 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -990,13 +990,12 @@ static int __init eeh_pseries_init(void) { int ret; - ret = eeh_ops_register(&pseries_eeh_ops); + ret = eeh_init(&pseries_eeh_ops); if (!ret) pr_info("EEH: pSeries platform initialized\n"); else pr_info("EEH: pSeries platform initialization failure (%d)\n", ret); - return ret; } -machine_early_initcall(pseries, eeh_pseries_init); +machine_core_initcall_sync(pseries, eeh_pseries_init); -- cgit v1.2.3 From 82a1ea21f1bac42eb8e3f77d5d249201855f2c85 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 18 Sep 2020 19:30:43 +1000 Subject: powerpc/powernv: Stop using eeh_ops->init() Fold pnv_eeh_init() into eeh_powernv_init() rather than having eeh_init() call it via eeh_ops->init(). It's simpler and it'll let us delete eeh_ops.init. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200918093050.37344-2-oohall@gmail.com --- arch/powerpc/platforms/powernv/eeh-powernv.c | 94 +++++++++++++--------------- 1 file changed, 45 insertions(+), 49 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 33938f8a3fba..5db92f39887a 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -44,54 +44,6 @@ static void pnv_pcibios_bus_add_device(struct pci_dev *pdev) eeh_probe_device(pdev); } -static int pnv_eeh_init(void) -{ - struct pci_controller *hose; - struct pnv_phb *phb; - int max_diag_size = PNV_PCI_DIAG_BUF_SIZE; - - if (!firmware_has_feature(FW_FEATURE_OPAL)) { - pr_warn("%s: OPAL is required !\n", - __func__); - return -EINVAL; - } - - /* Set probe mode */ - eeh_add_flag(EEH_PROBE_MODE_DEV); - - /* - * P7IOC blocks PCI config access to frozen PE, but PHB3 - * doesn't do that. So we have to selectively enable I/O - * prior to collecting error log. - */ - list_for_each_entry(hose, &hose_list, list_node) { - phb = hose->private_data; - - if (phb->model == PNV_PHB_MODEL_P7IOC) - eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); - - if (phb->diag_data_size > max_diag_size) - max_diag_size = phb->diag_data_size; - - /* - * PE#0 should be regarded as valid by EEH core - * if it's not the reserved one. Currently, we - * have the reserved PE#255 and PE#127 for PHB3 - * and P7IOC separately. So we should regard - * PE#0 as valid for PHB3 and P7IOC. - */ - if (phb->ioda.reserved_pe_idx != 0) - eeh_add_flag(EEH_VALID_PE_ZERO); - - break; - } - - eeh_set_pe_aux_size(max_diag_size); - ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device; - - return 0; -} - static irqreturn_t pnv_eeh_event(int irq, void *data) { /* @@ -1674,7 +1626,6 @@ static int pnv_eeh_restore_config(struct eeh_dev *edev) static struct eeh_ops pnv_eeh_ops = { .name = "powernv", - .init = pnv_eeh_init, .probe = pnv_eeh_probe, .set_option = pnv_eeh_set_option, .get_state = pnv_eeh_get_state, @@ -1715,8 +1666,53 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps); */ static int __init eeh_powernv_init(void) { + int max_diag_size = PNV_PCI_DIAG_BUF_SIZE; + struct pci_controller *hose; + struct pnv_phb *phb; int ret = -EINVAL; + if (!firmware_has_feature(FW_FEATURE_OPAL)) { + pr_warn("%s: OPAL is required !\n", __func__); + return -EINVAL; + } + + /* Set probe mode */ + eeh_add_flag(EEH_PROBE_MODE_DEV); + + /* + * P7IOC blocks PCI config access to frozen PE, but PHB3 + * doesn't do that. So we have to selectively enable I/O + * prior to collecting error log. + */ + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->model == PNV_PHB_MODEL_P7IOC) + eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); + + if (phb->diag_data_size > max_diag_size) + max_diag_size = phb->diag_data_size; + + /* + * PE#0 should be regarded as valid by EEH core + * if it's not the reserved one. Currently, we + * have the reserved PE#255 and PE#127 for PHB3 + * and P7IOC separately. So we should regard + * PE#0 as valid for PHB3 and P7IOC. + */ + if (phb->ioda.reserved_pe_idx != 0) + eeh_add_flag(EEH_VALID_PE_ZERO); + + break; + } + + /* + * eeh_init() allocates the eeh_pe and its aux data buf so the + * size needs to be set before calling eeh_init(). + */ + eeh_set_pe_aux_size(max_diag_size); + ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device; + ret = eeh_init(&pnv_eeh_ops); if (!ret) pr_info("EEH: PowerNV platform initialized\n"); -- cgit v1.2.3 From 1f8fa0cd6a848ff072bffe0ee776554387128f60 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 18 Sep 2020 19:30:44 +1000 Subject: powerpc/pseries: Stop using eeh_ops->init() Fold pseries_eeh_init() into eeh_pseries_init() rather than having eeh_init() call it via eeh_ops->init(). It's simpler and it'll let us delete eeh_ops.init. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200918093050.37344-3-oohall@gmail.com --- arch/powerpc/platforms/pseries/eeh_pseries.c | 155 ++++++++++++--------------- 1 file changed, 71 insertions(+), 84 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index df32b8ccbd59..a3ed6a0507da 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -237,88 +237,6 @@ static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; static DEFINE_SPINLOCK(slot_errbuf_lock); static int eeh_error_buf_size; -/** - * pseries_eeh_init - EEH platform dependent initialization - * - * EEH platform dependent initialization on pseries. - */ -static int pseries_eeh_init(void) -{ - struct pci_controller *phb; - struct pci_dn *pdn; - int addr, config_addr; - - /* figure out EEH RTAS function call tokens */ - ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); - ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); - ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); - ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); - ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); - ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); - ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); - ibm_configure_pe = rtas_token("ibm,configure-pe"); - - /* - * ibm,configure-pe and ibm,configure-bridge have the same semantics, - * however ibm,configure-pe can be faster. If we can't find - * ibm,configure-pe then fall back to using ibm,configure-bridge. - */ - if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE) - ibm_configure_pe = rtas_token("ibm,configure-bridge"); - - /* - * Necessary sanity check. We needn't check "get-config-addr-info" - * and its variant since the old firmware probably support address - * of domain/bus/slot/function for EEH RTAS operations. - */ - if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE || - ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || - (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && - ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) || - ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE || - ibm_configure_pe == RTAS_UNKNOWN_SERVICE) { - pr_info("EEH functionality not supported\n"); - return -EINVAL; - } - - /* Initialize error log lock and size */ - spin_lock_init(&slot_errbuf_lock); - eeh_error_buf_size = rtas_token("rtas-error-log-max"); - if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - pr_info("%s: unknown EEH error log size\n", - __func__); - eeh_error_buf_size = 1024; - } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - pr_info("%s: EEH error log size %d exceeds the maximal %d\n", - __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); - eeh_error_buf_size = RTAS_ERROR_LOG_MAX; - } - - /* Set EEH probe mode */ - eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); - - /* Set EEH machine dependent code */ - ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; - - if (is_kdump_kernel() || reset_devices) { - pr_info("Issue PHB reset ...\n"); - list_for_each_entry(phb, &hose_list, list_node) { - pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); - addr = (pdn->busno << 16) | (pdn->devfn << 8); - config_addr = pseries_eeh_get_config_addr(phb, addr); - /* invalid PE config addr */ - if (config_addr == 0) - continue; - - pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL); - pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE); - pseries_eeh_phb_configure_bridge(phb, config_addr); - } - } - - return 0; -} - static int pseries_eeh_cap_start(struct pci_dn *pdn) { u32 status; @@ -963,7 +881,6 @@ static int pseries_notify_resume(struct eeh_dev *edev) static struct eeh_ops pseries_eeh_ops = { .name = "pseries", - .init = pseries_eeh_init, .probe = pseries_eeh_probe, .set_option = pseries_eeh_set_option, .get_state = pseries_eeh_get_state, @@ -988,7 +905,77 @@ static struct eeh_ops pseries_eeh_ops = { */ static int __init eeh_pseries_init(void) { - int ret; + struct pci_controller *phb; + struct pci_dn *pdn; + int ret, addr, config_addr; + + /* figure out EEH RTAS function call tokens */ + ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); + ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); + ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); + ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); + ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); + ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); + ibm_configure_pe = rtas_token("ibm,configure-pe"); + + /* + * ibm,configure-pe and ibm,configure-bridge have the same semantics, + * however ibm,configure-pe can be faster. If we can't find + * ibm,configure-pe then fall back to using ibm,configure-bridge. + */ + if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE) + ibm_configure_pe = rtas_token("ibm,configure-bridge"); + + /* + * Necessary sanity check. We needn't check "get-config-addr-info" + * and its variant since the old firmware probably support address + * of domain/bus/slot/function for EEH RTAS operations. + */ + if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE || + ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || + (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && + ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) || + ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE || + ibm_configure_pe == RTAS_UNKNOWN_SERVICE) { + pr_info("EEH functionality not supported\n"); + return -EINVAL; + } + + /* Initialize error log lock and size */ + spin_lock_init(&slot_errbuf_lock); + eeh_error_buf_size = rtas_token("rtas-error-log-max"); + if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { + pr_info("%s: unknown EEH error log size\n", + __func__); + eeh_error_buf_size = 1024; + } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { + pr_info("%s: EEH error log size %d exceeds the maximal %d\n", + __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); + eeh_error_buf_size = RTAS_ERROR_LOG_MAX; + } + + /* Set EEH probe mode */ + eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); + + /* Set EEH machine dependent code */ + ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; + + if (is_kdump_kernel() || reset_devices) { + pr_info("Issue PHB reset ...\n"); + list_for_each_entry(phb, &hose_list, list_node) { + pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); + addr = (pdn->busno << 16) | (pdn->devfn << 8); + config_addr = pseries_eeh_get_config_addr(phb, addr); + /* invalid PE config addr */ + if (config_addr == 0) + continue; + + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL); + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE); + pseries_eeh_phb_configure_bridge(phb, config_addr); + } + } ret = eeh_init(&pseries_eeh_ops); if (!ret) -- cgit v1.2.3 From 5d69e46a2104050c0a458c6bf6abba5f58f56e4c Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 18 Sep 2020 19:30:45 +1000 Subject: powerpc/eeh: Delete eeh_ops->init No longer used since the platforms perform their EEH initialisation before calling eeh_init(). Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200918093050.37344-4-oohall@gmail.com --- arch/powerpc/include/asm/eeh.h | 1 - arch/powerpc/kernel/eeh.c | 8 -------- 2 files changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 765bcf63edea..85030c05e67e 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -216,7 +216,6 @@ enum { struct eeh_ops { char *name; - int (*init)(void); struct eeh_dev *(*probe)(struct pci_dev *pdev); int (*set_option)(struct eeh_pe *pe, int option); int (*get_state)(struct eeh_pe *pe, int *delay); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 28a0ea5d9faa..98faf139e676 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -965,14 +965,6 @@ int eeh_init(struct eeh_ops *ops) return ret; } - if (eeh_ops->init) - ret = eeh_ops->init(); - if (ret) { - pr_warn("%s: platform EEH init failed (%d)\n", - __func__, ret); - return ret; - } - /* Initialize PHB PEs */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) eeh_phb_pe_create(hose); -- cgit v1.2.3 From 395ee2a2a15ba1c4c7c414db24dc3082ba8feab8 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 18 Sep 2020 19:30:46 +1000 Subject: powerpc/eeh: Move EEH initialisation to an arch initcall The initialisation of EEH mostly happens in a core_initcall_sync initcall, followed by registering a bus notifier later on in an arch_initcall. Anything involving initcall dependecies is mostly incomprehensible unless you've spent a while staring at code so here's the full sequence: ppc_md.setup_arch <-- pci_controllers are created here ...time passes... core_initcall <-- pci_dns are created from DT nodes core_initcall_sync <-- platforms call eeh_init() postcore_initcall <-- PCI bus type is registered postcore_initcall_sync arch_initcall <-- EEH pci_bus notifier registered subsys_initcall <-- PHBs are scanned here There's no real requirement to do the EEH setup at the core_initcall_sync level. It just needs to be done after pci_dn's are created and before we start scanning PHBs. Simplify the flow a bit by moving the platform EEH inititalisation to an arch_initcall so we can fold the bus notifier registration into eeh_init(). Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200918093050.37344-5-oohall@gmail.com --- arch/powerpc/kernel/eeh.c | 64 ++++++++++++++-------------- arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +- 3 files changed, 34 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 98faf139e676..c9e25cfce8f0 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -940,6 +940,30 @@ static struct notifier_block eeh_reboot_nb = { .notifier_call = eeh_reboot_notifier, }; +static int eeh_device_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + /* + * Note: It's not possible to perform EEH device addition (i.e. + * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on + * the device's resources, which have not yet been set up. + */ + case BUS_NOTIFY_DEL_DEVICE: + eeh_remove_device(to_pci_dev(dev)); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block eeh_device_nb = { + .notifier_call = eeh_device_notifier, +}; + /** * eeh_init - System wide EEH initialization * @@ -960,7 +984,14 @@ int eeh_init(struct eeh_ops *ops) /* Register reboot notifier */ ret = register_reboot_notifier(&eeh_reboot_nb); if (ret) { - pr_warn("%s: Failed to register notifier (%d)\n", + pr_warn("%s: Failed to register reboot notifier (%d)\n", + __func__, ret); + return ret; + } + + ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb); + if (ret) { + pr_warn("%s: Failed to register bus notifier (%d)\n", __func__, ret); return ret; } @@ -975,37 +1006,6 @@ int eeh_init(struct eeh_ops *ops) return eeh_event_init(); } -static int eeh_device_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - - switch (action) { - /* - * Note: It's not possible to perform EEH device addition (i.e. - * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on - * the device's resources, which have not yet been set up. - */ - case BUS_NOTIFY_DEL_DEVICE: - eeh_remove_device(to_pci_dev(dev)); - break; - default: - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block eeh_device_nb = { - .notifier_call = eeh_device_notifier, -}; - -static __init int eeh_set_bus_notifier(void) -{ - bus_register_notifier(&pci_bus_type, &eeh_device_nb); - return 0; -} -arch_initcall(eeh_set_bus_notifier); - /** * eeh_probe_device() - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 5db92f39887a..b97ec796dd41 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1721,4 +1721,4 @@ static int __init eeh_powernv_init(void) return ret; } -machine_core_initcall_sync(powernv, eeh_powernv_init); +machine_arch_initcall(powernv, eeh_powernv_init); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index a3ed6a0507da..691b9a38b683 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -985,4 +985,4 @@ static int __init eeh_pseries_init(void) ret); return ret; } -machine_core_initcall_sync(pseries, eeh_pseries_init); +machine_arch_initcall(pseries, eeh_pseries_init); -- cgit v1.2.3 From f61c859feb5d19787c93d6b2b3d4beeca7260034 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 18 Sep 2020 19:30:47 +1000 Subject: powerpc/pseries/eeh: Clean up pe_config_addr lookups De-duplicate, and fix up the comments, and make the prototype just take a pci_dn since the job of the function is to return the pe_config_addr of the PE which contains a given device. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200918093050.37344-6-oohall@gmail.com --- arch/powerpc/platforms/pseries/eeh_pseries.c | 80 ++++------------------------ 1 file changed, 11 insertions(+), 69 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 691b9a38b683..c96ee45a4689 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -33,8 +33,6 @@ #include #include -static int pseries_eeh_get_pe_addr(struct pci_dn *pdn); - /* RTAS tokens */ static int ibm_set_eeh_option; static int ibm_set_slot_reset; @@ -86,7 +84,8 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) /** - * pseries_eeh_get_config_addr - Retrieve config address + * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device + * @pdn: pci_dn of the input device * * Retrieve the assocated config address. Actually, there're 2 RTAS * function calls dedicated for the purpose. We need implement @@ -97,16 +96,17 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) * It's notable that zero'ed return value means invalid PE config * address. */ -static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr) +static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn) { + int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + struct pci_controller *phb = pdn->phb; int ret = 0; int rets[3]; if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { /* - * First of all, we need to make sure there has one PE - * associated with the device. Otherwise, PE address is - * meaningless. + * First of all, use function 1 to determine if this device is + * part of a PE or not. ret[0] being zero indicates it's not. */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, config_addr, BUID_HI(phb->buid), @@ -429,7 +429,7 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) struct eeh_pe *parent; /* Retrieve PE address */ - edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn); + edev->pe_config_addr = pseries_eeh_get_pe_config_addr(pdn); pe.addr = edev->pe_config_addr; /* Some older systems (Power4) allow the ibm,set-eeh-option @@ -548,64 +548,6 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) return ret; } -/** - * pseries_eeh_get_pe_addr - Retrieve PE address - * @pe: EEH PE - * - * Retrieve the assocated PE address. Actually, there're 2 RTAS - * function calls dedicated for the purpose. We need implement - * it through the new function and then the old one. Besides, - * you should make sure the config address is figured out from - * FDT node before calling the function. - * - * It's notable that zero'ed return value means invalid PE config - * address. - */ -static int pseries_eeh_get_pe_addr(struct pci_dn *pdn) -{ - int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); - unsigned long buid = pdn->phb->buid; - int ret = 0; - int rets[3]; - - if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { - /* - * First of all, we need to make sure there has one PE - * associated with the device. Otherwise, PE address is - * meaningless. - */ - ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - config_addr, BUID_HI(buid), BUID_LO(buid), 1); - if (ret || (rets[0] == 0)) - return 0; - - /* Retrieve the associated PE config address */ - ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - config_addr, BUID_HI(buid), BUID_LO(buid), 0); - if (ret) { - pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", - __func__, pdn->phb->global_number, config_addr); - return 0; - } - - return rets[0]; - } - - if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { - ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, - config_addr, BUID_HI(buid), BUID_LO(buid), 0); - if (ret) { - pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", - __func__, pdn->phb->global_number, config_addr); - return 0; - } - - return rets[0]; - } - - return ret; -} - /** * pseries_eeh_get_state - Retrieve PE state * @pe: EEH PE @@ -907,7 +849,7 @@ static int __init eeh_pseries_init(void) { struct pci_controller *phb; struct pci_dn *pdn; - int ret, addr, config_addr; + int ret, config_addr; /* figure out EEH RTAS function call tokens */ ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); @@ -965,8 +907,8 @@ static int __init eeh_pseries_init(void) pr_info("Issue PHB reset ...\n"); list_for_each_entry(phb, &hose_list, list_node) { pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); - addr = (pdn->busno << 16) | (pdn->devfn << 8); - config_addr = pseries_eeh_get_config_addr(phb, addr); + config_addr = pseries_eeh_get_pe_config_addr(pdn); + /* invalid PE config addr */ if (config_addr == 0) continue; -- cgit v1.2.3 From 98ba956f6a3891b233466b8da064f17d16dc2090 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 18 Sep 2020 19:30:48 +1000 Subject: powerpc/pseries/eeh: Rework device EEH PE determination The process Linux uses for determining if a device supports EEH or not appears to be at odds with what PAPR says the OS should be doing. The current flow is something like: 1. Assume pe_config_addr is equal the the device's config_addr. 2. Attempt to enable EEH on that PE 3. Verify EEH was enabled (POWER4 bug workaround) 4. Try find the pe_config_addr using the ibm,get-config-addr-info2 RTAS call. 5. If that fails walk the pci_dn tree upwards trying to find a parent device with EEH support. If we find one then add the device to that PE. The first major problem with this process is that we need the PE config address in step 2) since its needs to be passed to the ibm,set-eeh-option RTAS call when enabling EEH for th PE. We hack around this requirement in by making the assumption in 1) and delay finding the actual PE address until 4). This is fine if: a) The PCI device is the 0th function, and b) The device is on the PE's root bus. Granted, the current sequence does appear to work on most systems even when these conditions are false. At a guess PowerVM's RTAS has workarounds to accommodate Linux's quirks or the RTAS call to enable EEH is treated as no-op on most platforms since EEH is usually enabled by default. However, what is currently implemented is a bit sketch and is downright confusing since it doesn't match up with what what PAPR suggests we should be doing. This patch re-works how we handle EEH init so that we find the PE config address using the ibm,get-config-addr-info2 RTAS call first, then use the found address to finish the EEH init process. It also drops the Power4 workaround since as of commit 471d7ff8b51b ("powerpc/64s: Remove POWER4 support") the kernel does not support running on a Power4 CPU so there's no need to support the Power4 platform's quirks either. With the patch applied the sequence is now: 1. Find the pe_config_addr from the device using the RTAS call. 2. Enable the PE. 3. Insert the edev into the tree and create an eeh_pe if needed. The other change made here is ignoring unsupported devices entirely. Currently the device's BARs are saved to the eeh_dev even if the device is not part of an EEH PE. Not being part of a PE means that an EEH recovery pass will never see that device so the saving the BARs is pointless. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200918093050.37344-7-oohall@gmail.com --- arch/powerpc/platforms/pseries/eeh_pseries.c | 57 +++++++++++----------------- 1 file changed, 22 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index c96ee45a4689..4b88b482ef16 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -355,10 +355,10 @@ static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev) */ void pseries_eeh_init_edev(struct pci_dn *pdn) { + struct eeh_pe pe, *parent; struct eeh_dev *edev; - struct eeh_pe pe; + int addr; u32 pcie_flags; - int enable = 0; int ret; if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))) @@ -415,51 +415,38 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) } } - /* Initialize the fake PE */ + /* first up, find the pe_config_addr for the PE containing the device */ + addr = pseries_eeh_get_pe_config_addr(pdn); + if (addr == 0) { + eeh_edev_dbg(edev, "Unable to find pe_config_addr\n"); + goto err; + } + + /* Try enable EEH on the fake PE */ memset(&pe, 0, sizeof(struct eeh_pe)); pe.phb = pdn->phb; - pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8); + pe.addr = addr; - /* Enable EEH on the device */ eeh_edev_dbg(edev, "Enabling EEH on device\n"); ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); if (ret) { eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret); - } else { - struct eeh_pe *parent; + goto err; + } - /* Retrieve PE address */ - edev->pe_config_addr = pseries_eeh_get_pe_config_addr(pdn); - pe.addr = edev->pe_config_addr; + edev->pe_config_addr = addr; - /* Some older systems (Power4) allow the ibm,set-eeh-option - * call to succeed even on nodes where EEH is not supported. - * Verify support explicitly. - */ - ret = eeh_ops->get_state(&pe, NULL); - if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) - enable = 1; + eeh_add_flag(EEH_ENABLED); - /* - * This device doesn't support EEH, but it may have an - * EEH parent. In this case any error on the device will - * freeze the PE of it's upstream bridge, so added it to - * the upstream PE. - */ - parent = pseries_eeh_pe_get_parent(edev); - if (parent && !enable) - edev->pe_config_addr = parent->addr; + parent = pseries_eeh_pe_get_parent(edev); + eeh_pe_tree_insert(edev, parent); + eeh_save_bars(edev); + eeh_edev_dbg(edev, "EEH enabled for device"); - if (enable || parent) { - eeh_add_flag(EEH_ENABLED); - eeh_pe_tree_insert(edev, parent); - } - eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n", - (enable ? "enabled" : "unsupported"), ret); - } + return; - /* Save memory bars */ - eeh_save_bars(edev); +err: + eeh_edev_dbg(edev, "EEH is unsupported on device (code = %d)\n", ret); } static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev) -- cgit v1.2.3 From 42de19d5ef71b91765266557705394e52954adb3 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 18 Sep 2020 19:30:49 +1000 Subject: powerpc/pseries/eeh: Allow zero to be a valid PE configuration address There's no real reason why zero can't be a valid PE configuration address. Under qemu each sPAPR PHB (i.e. EEH supporting) has the passed-though devices on bus zero, so the PE address of bus :00 should be zero. However, all previous versions of Linux will reject that, so Qemu at least goes out of it's way to avoid it. The Qemu implementation of ibm,get-config-addr-info2 RTAS has the following comment: > /* > * We always have PE address of form "00BB0001". "BB" > * represents the bus number of PE's primary bus. > */ So qemu puts a one into the register portion of the PE's config_addr to avoid it being zero. The whole is pretty silly considering that RTAS will return a negative error code if it can't map the device's config_addr to a PE. This patch fixes Linux to treat zero as a valid PE address. This shouldn't have any real effects due to the Qemu hack mentioned above. And the fact that Linux EEH has worked historically on PowerVM means they never pass through devices on bus zero so we would never see the problem there either. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200918093050.37344-8-oohall@gmail.com --- arch/powerpc/platforms/pseries/eeh_pseries.c | 38 ++++++++++++++++------------ 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 4b88b482ef16..f8b546135bba 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -87,21 +87,20 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device * @pdn: pci_dn of the input device * - * Retrieve the assocated config address. Actually, there're 2 RTAS - * function calls dedicated for the purpose. We need implement - * it through the new function and then the old one. Besides, - * you should make sure the config address is figured out from - * FDT node before calling the function. + * The EEH RTAS calls use a tuple consisting of: (buid_hi, buid_lo, + * pe_config_addr) as a handle to a given PE. This function finds the + * pe_config_addr based on the device's config addr. * - * It's notable that zero'ed return value means invalid PE config - * address. + * Keep in mind that the pe_config_addr *might* be numerically identical to the + * device's config addr, but the two are conceptually distinct. + * + * Returns the pe_config_addr, or a negative error code. */ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn) { int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); struct pci_controller *phb = pdn->phb; - int ret = 0; - int rets[3]; + int ret, rets[3]; if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { /* @@ -112,16 +111,16 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn) config_addr, BUID_HI(phb->buid), BUID_LO(phb->buid), 1); if (ret || (rets[0] == 0)) - return 0; + return -ENOENT; - /* Retrieve the associated PE config address */ + /* Retrieve the associated PE config address with function 0 */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, config_addr, BUID_HI(phb->buid), BUID_LO(phb->buid), 0); if (ret) { pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", __func__, phb->global_number, config_addr); - return 0; + return -ENXIO; } return rets[0]; @@ -134,13 +133,20 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn) if (ret) { pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", __func__, phb->global_number, config_addr); - return 0; + return -ENXIO; } return rets[0]; } - return ret; + /* + * PAPR does describe a process for finding the pe_config_addr that was + * used before the ibm,get-config-addr-info calls were added. However, + * I haven't found *any* systems that don't have that RTAS call + * implemented. If you happen to find one that needs the old DT based + * process, patches are welcome! + */ + return -ENOENT; } /** @@ -417,7 +423,7 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) /* first up, find the pe_config_addr for the PE containing the device */ addr = pseries_eeh_get_pe_config_addr(pdn); - if (addr == 0) { + if (addr < 0) { eeh_edev_dbg(edev, "Unable to find pe_config_addr\n"); goto err; } @@ -897,7 +903,7 @@ static int __init eeh_pseries_init(void) config_addr = pseries_eeh_get_pe_config_addr(pdn); /* invalid PE config addr */ - if (config_addr == 0) + if (config_addr < 0) continue; pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL); -- cgit v1.2.3 From 35d64734b64315f2c5716c5a0a380ed1ba8fbe4a Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 18 Sep 2020 19:30:50 +1000 Subject: powerpc/eeh: Clean up PE addressing When support for EEH on PowerNV was added a lot of pseries specific code was made "generic" and some of the quirks of pseries EEH came along for the ride. One of the stranger quirks is eeh_pe containing two types of PE address: pe->addr and pe->config_addr. There reason for this appears to be historical baggage rather than any real requirements. On pseries EEH PEs are manipulated using RTAS calls. Each EEH RTAS call takes a "PE configuration address" as an input which is used to identify which EEH PE is being manipulated by the call. When initialising the EEH state for a device the first thing we need to do is determine the configuration address for the PE which contains the device so we can enable EEH on that PE. This process is outlined in PAPR which is the modern (i.e post-2003) FW specification for pseries. However, EEH support was first described in the pSeries RISC Platform Architecture (RPA) and although they are mostly compatible EEH is one of the areas where they are not. The major difference is that RPA doesn't actually have the concept of a PE. On RPA systems the EEH RTAS calls are done on a per-device basis using the same config_addr that would be passed to the RTAS functions to access PCI config space (e.g. ibm,read-pci-config). The config_addr is not identical since the function and config register offsets of the config_addr must be set to zero. EEH operations being done on a per-device basis doesn't make a whole lot of sense when you consider how EEH was implemented on legacy PCI systems. For legacy PCI(-X) systems EEH was implemented using special PCI-PCI bridges which contained logic to detect errors and freeze the secondary bus when one occurred. This means that the EEH enabled state is shared among all devices behind that EEH bridge. As a result there's no way to implement the per-device control required for the semantics specified by RPA. It can be made to work if we assume that a separate EEH bridge exists for each EEH capable PCI slot and there are no bridges behind those slots. However, RPA also specifies the ibm,configure-bridge RTAS call for re-initalising bridges behind EEH capable slots after they are reset due to an EEH event so that is probably not a valid assumption. This incoherence was fixed in later PAPR, which succeeded RPA. Unfortunately, since Linux EEH support seems to have been implemented based on the RPA spec some of the legacy assumptions were carried over (probably for POWER4 compatibility). The fix made in PAPR was the introduction of the "PE" concept and redefining the EEH RTAS calls (set-eeh-option, reset-slot, etc) to operate on a per-PE basis so all devices behind an EEH bride would share the same EEH state. The "config_addr" argument to the EEH RTAS calls became the "PE_config_addr" and the OS was required to use the ibm,get-config-addr-info RTAS call to find the correct PE address for the device. When support for the new interfaces was added to Linux it was implemented using something like: At probe time: pdn->eeh_config_addr = rtas_config_addr(pdn); pdn->eeh_pe_config_addr = rtas_get_config_addr_info(pdn); When performing an RTAS call: config_addr = pdn->eeh_config_addr; if (pdn->eeh_pe_config_addr) config_addr = pdn->eeh_pe_config_addr; rtas_call(..., config_addr, ...); In other words, if the ibm,get-config-addr-info RTAS call is implemented and returned a valid result we'd use that as the argument to the EEH RTAS calls. If not, Linux would fall back to using the device's config_addr. Over time these addresses have moved around going from pci_dn to eeh_dev and finally into eeh_pe. Today the users look like this: config_addr = pe->config_addr; if (pe->addr) config_addr = pe->addr; rtas_call(..., config_addr, ...); However, considering the EEH core always operates on a per-PE basis and even on pseries the only per-device operation is the initial call to ibm,set-eeh-option I'm not sure if any of this actually works on an RPA system today. It doesn't make much sense to have the fallback address in a generic structure either since the bulk of the code which reference it is in pseries anyway. The EEH core makes a token effort to support looking up a PE using the config_addr by having two arguments to eeh_pe_get(). However, a survey of all the callers to eeh_pe_get() shows that all bar one have the config_addr argument hard-coded to zero.The only caller that doesn't is in eeh_pe_tree_insert() which has: if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) return -EINVAL; pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn); The third argument (config_addr) is only used if the second (pe->addr) argument is invalid. The preceding check ensures that the call to eeh_pe_get() will never happen if edev->pe_config_addr is invalid so there is no situation where eeh_pe_get() will search for a PE based on the 3rd argument. The check also means that we'll never insert a PE into the tree where pe_config_addr is zero since EEH_VALID_PE_ZERO is never set on pseries. All the users of the fallback address on pseries never actually use the fallback and all the only caller that supplies something for the config_addr argument to eeh_pe_get() never use it either. It's all dead code. This patch removes the fallback address from eeh_pe since nothing uses it. Specificly, we do this by: 1) Removing pe->config_addr 2) Removing the EEH_VALID_PE_ZERO flag 3) Removing the fallback address argument to eeh_pe_get(). 4) Removing all the checks for pe->addr being zero in the pseries EEH code. This leaves us with PE's only being identified by what's in their pe->addr field and the EEH core relying on the platform to ensure that eeh_dev's are only inserted into the EEH tree if they're actually inside a PE. No functional changes, I hope. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200918093050.37344-9-oohall@gmail.com --- arch/powerpc/include/asm/eeh.h | 4 +-- arch/powerpc/kernel/eeh.c | 2 +- arch/powerpc/kernel/eeh_pe.c | 46 ++++------------------------ arch/powerpc/platforms/powernv/eeh-powernv.c | 16 ++-------- arch/powerpc/platforms/pseries/eeh_pseries.c | 42 ++++--------------------- 5 files changed, 17 insertions(+), 93 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 85030c05e67e..dd6a4ac6c713 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -27,7 +27,6 @@ struct pci_dn; #define EEH_FORCE_DISABLED 0x02 /* EEH disabled */ #define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ #define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ -#define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */ #define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */ #define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */ @@ -280,8 +279,7 @@ int eeh_phb_pe_create(struct pci_controller *phb); int eeh_wait_state(struct eeh_pe *pe, int max_wait); struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root); -struct eeh_pe *eeh_pe_get(struct pci_controller *phb, - int pe_no, int config_addr); +struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no); int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent); int eeh_pe_tree_remove(struct eeh_dev *edev); void eeh_pe_update_time_stamp(struct eeh_pe *pe); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index c9e25cfce8f0..87de8b798b2d 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1657,7 +1657,7 @@ static ssize_t eeh_force_recover_write(struct file *filp, return -ENODEV; /* Retrieve PE */ - pe = eeh_pe_get(hose, pe_no, 0); + pe = eeh_pe_get(hose, pe_no); if (!pe) return -ENODEV; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d2aaaa73fdd5..61b7d4019051 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -251,43 +251,21 @@ void eeh_pe_dev_traverse(struct eeh_pe *root, /** * __eeh_pe_get - Check the PE address - * @data: EEH PE - * @flag: EEH device * * For one particular PE, it can be identified by PE address * or tranditional BDF address. BDF address is composed of * Bus/Device/Function number. The extra data referred by flag * indicates which type of address should be used. */ -struct eeh_pe_get_flag { - int pe_no; - int config_addr; -}; - static void *__eeh_pe_get(struct eeh_pe *pe, void *flag) { - struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag; + int *target_pe = flag; - /* Unexpected PHB PE */ + /* PHB PEs are special and should be ignored */ if (pe->type & EEH_PE_PHB) return NULL; - /* - * We prefer PE address. For most cases, we should - * have non-zero PE address - */ - if (eeh_has_flag(EEH_VALID_PE_ZERO)) { - if (tmp->pe_no == pe->addr) - return pe; - } else { - if (tmp->pe_no && - (tmp->pe_no == pe->addr)) - return pe; - } - - /* Try BDF address */ - if (tmp->config_addr && - (tmp->config_addr == pe->config_addr)) + if (*target_pe == pe->addr) return pe; return NULL; @@ -297,7 +275,6 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag) * eeh_pe_get - Search PE based on the given address * @phb: PCI controller * @pe_no: PE number - * @config_addr: Config address * * Search the corresponding PE based on the specified address which * is included in the eeh device. The function is used to check if @@ -306,16 +283,11 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag) * which is composed of PCI bus/device/function number, or unified * PE address. */ -struct eeh_pe *eeh_pe_get(struct pci_controller *phb, - int pe_no, int config_addr) +struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no) { struct eeh_pe *root = eeh_phb_pe_get(phb); - struct eeh_pe_get_flag tmp = { pe_no, config_addr }; - struct eeh_pe *pe; - - pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp); - return pe; + return eeh_pe_traverse(root, __eeh_pe_get, &pe_no); } /** @@ -336,19 +308,13 @@ int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent) struct pci_controller *hose = edev->controller; struct eeh_pe *pe, *parent; - /* Check if the PE number is valid */ - if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { - eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n"); - return -EINVAL; - } - /* * Search the PE has been existing or not according * to the PE address. If that has been existing, the * PE should be composed of PCI bus and its subordinate * components. */ - pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn); + pe = eeh_pe_get(hose, edev->pe_config_addr); if (pe) { if (pe->type & EEH_PE_INVALID) { list_add_tail(&edev->entry, &pe->edevs); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index b97ec796dd41..89e22c460ebf 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -87,7 +87,7 @@ static ssize_t pnv_eeh_ei_write(struct file *filp, return -EINVAL; /* Retrieve PE */ - pe = eeh_pe_get(hose, pe_no, 0); + pe = eeh_pe_get(hose, pe_no); if (!pe) return -ENODEV; @@ -306,7 +306,7 @@ static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev) if (parent) { struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent); - return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0); + return eeh_pe_get(phb->hose, ioda_pe->pe_number); } return NULL; @@ -1358,7 +1358,7 @@ static int pnv_eeh_get_pe(struct pci_controller *hose, } /* Find the PE according to PE# */ - dev_pe = eeh_pe_get(hose, pe_no, 0); + dev_pe = eeh_pe_get(hose, pe_no); if (!dev_pe) return -EEXIST; @@ -1693,16 +1693,6 @@ static int __init eeh_powernv_init(void) if (phb->diag_data_size > max_diag_size) max_diag_size = phb->diag_data_size; - /* - * PE#0 should be regarded as valid by EEH core - * if it's not the reserved one. Currently, we - * have the reserved PE#255 and PE#127 for PHB3 - * and P7IOC separately. So we should regard - * PE#0 as valid for PHB3 and P7IOC. - */ - if (phb->ioda.reserved_pe_idx != 0) - eeh_add_flag(EEH_VALID_PE_ZERO); - break; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index f8b546135bba..d8fd5f7b2143 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -509,7 +509,6 @@ EXPORT_SYMBOL_GPL(pseries_eeh_init_edev_recursive); static int pseries_eeh_set_option(struct eeh_pe *pe, int option) { int ret = 0; - int config_addr; /* * When we're enabling or disabling EEH functioality on @@ -522,9 +521,6 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) case EEH_OPT_ENABLE: case EEH_OPT_THAW_MMIO: case EEH_OPT_THAW_DMA: - config_addr = pe->config_addr; - if (pe->addr) - config_addr = pe->addr; break; case EEH_OPT_FREEZE_PE: /* Not support */ @@ -535,7 +531,7 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) } ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), + pe->addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), option); return ret; @@ -556,25 +552,19 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) */ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay) { - int config_addr; int ret; int rets[4]; int result; - /* Figure out PE config address if possible */ - config_addr = pe->config_addr; - if (pe->addr) - config_addr = pe->addr; - if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets, - config_addr, BUID_HI(pe->phb->buid), + pe->addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid)); } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) { /* Fake PE unavailable info */ rets[2] = 0; ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets, - config_addr, BUID_HI(pe->phb->buid), + pe->addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid)); } else { return EEH_STATE_NOT_SUPPORT; @@ -628,14 +618,7 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay) */ static int pseries_eeh_reset(struct eeh_pe *pe, int option) { - int config_addr; - - /* Figure out PE address */ - config_addr = pe->config_addr; - if (pe->addr) - config_addr = pe->addr; - - return pseries_eeh_phb_reset(pe->phb, config_addr, option); + return pseries_eeh_phb_reset(pe->phb, pe->addr, option); } /** @@ -651,19 +634,13 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option) */ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len) { - int config_addr; unsigned long flags; int ret; spin_lock_irqsave(&slot_errbuf_lock, flags); memset(slot_errbuf, 0, eeh_error_buf_size); - /* Figure out the PE address */ - config_addr = pe->config_addr; - if (pe->addr) - config_addr = pe->addr; - - ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr, + ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, pe->addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), virt_to_phys(drv_log), len, virt_to_phys(slot_errbuf), eeh_error_buf_size, @@ -682,14 +659,7 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u */ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) { - int config_addr; - - /* Figure out the PE address */ - config_addr = pe->config_addr; - if (pe->addr) - config_addr = pe->addr; - - return pseries_eeh_phb_configure_bridge(pe->phb, config_addr); + return pseries_eeh_phb_configure_bridge(pe->phb, pe->addr); } /** -- cgit v1.2.3 From d0ffdee8ff01fb21085d835ee54dc8c1c4d19226 Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Sat, 19 Sep 2020 12:00:25 -0300 Subject: powerpc/tm: Save and restore AMR on treclaim and trechkpt Althought AMR is stashed in the checkpoint area, currently we don't save it to the per thread checkpoint struct after a treclaim and so we don't restore it either from that struct when we trechkpt. As a consequence when the transaction is later rolled back the kernel space AMR value when the trechkpt was done appears in userspace. That commit saves and restores AMR accordingly on treclaim and trechkpt. Since AMR value is also used in kernel space in other functions, it also takes care of stashing kernel live AMR into the stack before treclaim and before trechkpt, restoring it later, just before returning from tm_reclaim and __tm_recheckpoint. Is also fixes two nonrelated comments about CR and MSR. Signed-off-by: Gustavo Romero Tested-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200919150025.9609-1-gromero@linux.ibm.com --- arch/powerpc/include/asm/processor.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/tm.S | 35 +++++++++++++++++++++++++++++++---- 3 files changed, 33 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 22ffe85a91b8..365290b9a24b 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -220,6 +220,7 @@ struct thread_struct { unsigned long tm_tar; unsigned long tm_ppr; unsigned long tm_dscr; + unsigned long tm_amr; /* * Checkpointed FP and VSX 0-31 register set. diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8711c2164b45..c2722ff36e98 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -176,6 +176,7 @@ int main(void) OFFSET(THREAD_TM_TAR, thread_struct, tm_tar); OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr); OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr); + OFFSET(THREAD_TM_AMR, thread_struct, tm_amr); OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs); OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr); OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 6ba0fdd1e7f8..2b91f233b05d 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -122,6 +122,13 @@ _GLOBAL(tm_reclaim) std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) + /* + * Save kernel live AMR since it will be clobbered by treclaim + * but can be used elsewhere later in kernel space. + */ + mfspr r3, SPRN_AMR + std r3, TM_FRAME_L1(r1) + /* We need to setup MSR for VSX register save instructions. */ mfmsr r14 mr r15, r14 @@ -245,7 +252,7 @@ _GLOBAL(tm_reclaim) * but is used in signal return to 'wind back' to the abort handler. */ - /* ******************** CR,LR,CCR,MSR ********** */ + /* ***************** CTR, LR, CR, XER ********** */ mfctr r3 mflr r4 mfcr r5 @@ -256,7 +263,6 @@ _GLOBAL(tm_reclaim) std r5, _CCR(r7) std r6, _XER(r7) - /* ******************** TAR, DSCR ********** */ mfspr r3, SPRN_TAR mfspr r4, SPRN_DSCR @@ -264,6 +270,10 @@ _GLOBAL(tm_reclaim) std r3, THREAD_TM_TAR(r12) std r4, THREAD_TM_DSCR(r12) + /* ******************** AMR **************** */ + mfspr r3, SPRN_AMR + std r3, THREAD_TM_AMR(r12) + /* * MSR and flags: We don't change CRs, and we don't need to alter MSR. */ @@ -308,7 +318,9 @@ _GLOBAL(tm_reclaim) std r3, THREAD_TM_TFHAR(r12) std r4, THREAD_TM_TFIAR(r12) - /* AMR is checkpointed too, but is unsupported by Linux. */ + /* Restore kernel live AMR */ + ld r8, TM_FRAME_L1(r1) + mtspr SPRN_AMR, r8 /* Restore original MSR/IRQ state & clear TM mode */ ld r14, TM_FRAME_L0(r1) /* Orig MSR */ @@ -355,6 +367,13 @@ _GLOBAL(__tm_recheckpoint) */ SAVE_NVGPRS(r1) + /* + * Save kernel live AMR since it will be clobbered for trechkpt + * but can be used elsewhere later in kernel space. + */ + mfspr r8, SPRN_AMR + std r8, TM_FRAME_L0(r1) + /* Load complete register state from ts_ckpt* registers */ addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */ @@ -404,7 +423,7 @@ _GLOBAL(__tm_recheckpoint) restore_gprs: - /* ******************** CR,LR,CCR,MSR ********** */ + /* ****************** CTR, LR, XER ************* */ ld r4, _CTR(r7) ld r5, _LINK(r7) ld r8, _XER(r7) @@ -417,6 +436,10 @@ restore_gprs: ld r4, THREAD_TM_TAR(r3) mtspr SPRN_TAR, r4 + /* ******************** AMR ******************** */ + ld r4, THREAD_TM_AMR(r3) + mtspr SPRN_AMR, r4 + /* Load up the PPR and DSCR in GPRs only at this stage */ ld r5, THREAD_TM_DSCR(r3) ld r6, THREAD_TM_PPR(r3) @@ -509,6 +532,10 @@ restore_gprs: li r4, MSR_RI mtmsrd r4, 1 + /* Restore kernel live AMR */ + ld r8, TM_FRAME_L0(r1) + mtspr SPRN_AMR, r8 + REST_NVGPRS(r1) addi r1, r1, TM_FRAME_SIZE -- cgit v1.2.3 From 4bce545903fa0290e011cf118997717f0c4f4d20 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:43 +0530 Subject: powerpc/topology: Update topology_core_cpumask On Power, cpu_core_mask and cpu_cpu_mask refer to the same set of CPUs. cpu_cpu_mask is needed by scheduler, hence look at deprecating cpu_core_mask. Before deleting the cpu_core_mask, ensure its only user is moved to cpu_cpu_mask. Signed-off-by: Srikar Dronamraju Tested-by: Satheesh Rajendran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-2-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 6609174918ab..e0f232533c9d 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -122,7 +122,7 @@ int get_physical_package_id(int cpu); #endif #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) -#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) +#define topology_core_cpumask(cpu) (cpu_cpu_mask(cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) #endif -- cgit v1.2.3 From 4ca234a9cbd7c3a656b34dd98c8b156f70ed7849 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:44 +0530 Subject: powerpc/smp: Stop updating cpu_core_mask Anton Blanchard reported that his 4096 vcpu KVM guest took around 30 minutes to boot. He also analyzed it to the time taken to iterate while setting the cpu_core_mask. Further analysis shows that cpu_core_mask and cpu_cpu_mask for any CPU would be equal on Power. However updating cpu_core_mask took forever to update as its a per cpu cpumask variable. Instead cpu_cpu_mask was a per NODE /per DIE cpumask that was shared by all the respective CPUs. Also cpu_cpu_mask is needed from a scheduler perspective. However cpu_core_map is an exported symbol. Hence stop updating cpu_core_map and make it point to cpu_cpu_mask. Signed-off-by: Srikar Dronamraju Tested-by: Satheesh Rajendran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-3-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/smp.h | 5 ----- arch/powerpc/kernel/smp.c | 33 +++++++-------------------------- 2 files changed, 7 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 635bdf947105..b2035b2f57ce 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -121,11 +121,6 @@ static inline struct cpumask *cpu_sibling_mask(int cpu) return per_cpu(cpu_sibling_map, cpu); } -static inline struct cpumask *cpu_core_mask(int cpu) -{ - return per_cpu(cpu_core_map, cpu); -} - static inline struct cpumask *cpu_l2_cache_mask(int cpu) { return per_cpu(cpu_l2_cache_map, cpu); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 58990baa5182..bf6d4192adda 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -953,12 +953,17 @@ void __init smp_prepare_cpus(unsigned int max_cpus) local_memory_node(numa_cpu_lookup_table[cpu])); } #endif + /* + * cpu_core_map is now more updated and exists only since + * its been exported for long. It only will have a snapshot + * of cpu_cpu_mask. + */ + cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); } /* Init the cpumasks so the boot CPU is related to itself */ cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); - cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); if (has_coregroup_support()) cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid)); @@ -1260,9 +1265,7 @@ static void remove_cpu_from_masks(int cpu) { int i; - /* NB: cpu_core_mask is a superset of the others */ - for_each_cpu(i, cpu_core_mask(cpu)) { - set_cpus_unrelated(cpu, i, cpu_core_mask); + for_each_cpu(i, cpu_cpu_mask(cpu)) { set_cpus_unrelated(cpu, i, cpu_l2_cache_mask); set_cpus_unrelated(cpu, i, cpu_sibling_mask); if (has_big_cores) @@ -1312,7 +1315,6 @@ EXPORT_SYMBOL_GPL(get_physical_package_id); static void add_cpu_to_masks(int cpu) { int first_thread = cpu_first_thread_sibling(cpu); - int pkg_id = get_physical_package_id(cpu); int i; /* @@ -1320,7 +1322,6 @@ static void add_cpu_to_masks(int cpu) * add it to it's own thread sibling mask. */ cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); - cpumask_set_cpu(cpu, cpu_core_mask(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) if (cpu_online(i)) @@ -1342,26 +1343,6 @@ static void add_cpu_to_masks(int cpu) set_cpus_related(cpu, i, cpu_coregroup_mask); } } - - if (pkg_id == -1) { - struct cpumask *(*mask)(int) = cpu_sibling_mask; - - /* - * Copy the sibling mask into core sibling mask and - * mark any CPUs on the same chip as this CPU. - */ - if (shared_caches) - mask = cpu_l2_cache_mask; - - for_each_cpu(i, mask(cpu)) - set_cpus_related(cpu, i, cpu_core_mask); - - return; - } - - for_each_cpu(i, cpu_online_mask) - if (get_physical_package_id(i) == pkg_id) - set_cpus_related(cpu, i, cpu_core_mask); } /* Activate a secondary processor. */ -- cgit v1.2.3 From e29e9ed665eeb6f98cd88672994ecf4aaefdb943 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:45 +0530 Subject: powerpc/smp: Remove get_physical_package_id Now that cpu_core_mask has been removed and topology_core_cpumask has been updated to use cpu_cpu_mask, we no more need get_physical_package_id. Signed-off-by: Srikar Dronamraju Tested-by: Satheesh Rajendran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-4-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/topology.h | 5 ----- arch/powerpc/kernel/smp.c | 20 -------------------- 2 files changed, 25 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index e0f232533c9d..e45219f74be0 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -114,12 +114,7 @@ static inline int cpu_to_coregroup_id(int cpu) #ifdef CONFIG_PPC64 #include -#ifdef CONFIG_PPC_SPLPAR -int get_physical_package_id(int cpu); -#define topology_physical_package_id(cpu) (get_physical_package_id(cpu)) -#else #define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu)) -#endif #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_core_cpumask(cpu) (cpu_cpu_mask(cpu)) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index bf6d4192adda..aecc01f0e95e 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1292,26 +1292,6 @@ static inline void add_cpu_to_smallcore_masks(int cpu) } } -int get_physical_package_id(int cpu) -{ - int pkg_id = cpu_to_chip_id(cpu); - - /* - * If the platform is PowerNV or Guest on KVM, ibm,chip-id is - * defined. Hence we would return the chip-id as the result of - * get_physical_package_id. - */ - if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR) && - IS_ENABLED(CONFIG_PPC_SPLPAR)) { - struct device_node *np = of_get_cpu_node(cpu, NULL); - pkg_id = of_node_to_nid(np); - of_node_put(np); - } - - return pkg_id; -} -EXPORT_SYMBOL_GPL(get_physical_package_id); - static void add_cpu_to_masks(int cpu) { int first_thread = cpu_first_thread_sibling(cpu); -- cgit v1.2.3 From 70edd4a7c753ba18e3e4bb9e97b6d85156cea738 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:46 +0530 Subject: powerpc/smp: Optimize remove_cpu_from_masks While offlining a CPU, system currently iterate through all the CPUs in the DIE to clear sibling, l2_cache and smallcore maps. However if there are more cores in a DIE, system can end up spending more time iterating through CPUs which are completely unrelated. Optimize this by only iterating through smaller but relevant cpumap. If shared_cache is set, cpu_l2_cache_map should be relevant else cpu_sibling_map would be relevant. Signed-off-by: Srikar Dronamraju Tested-by: Satheesh Rajendran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-5-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index aecc01f0e95e..9cdb966f00b3 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1263,14 +1263,21 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int)) #ifdef CONFIG_HOTPLUG_CPU static void remove_cpu_from_masks(int cpu) { + struct cpumask *(*mask_fn)(int) = cpu_sibling_mask; int i; - for_each_cpu(i, cpu_cpu_mask(cpu)) { + if (shared_caches) + mask_fn = cpu_l2_cache_mask; + + for_each_cpu(i, mask_fn(cpu)) { set_cpus_unrelated(cpu, i, cpu_l2_cache_mask); set_cpus_unrelated(cpu, i, cpu_sibling_mask); if (has_big_cores) set_cpus_unrelated(cpu, i, cpu_smallcore_mask); - if (has_coregroup_support()) + } + + if (has_coregroup_support()) { + for_each_cpu(i, cpu_coregroup_mask(cpu)) set_cpus_unrelated(cpu, i, cpu_coregroup_mask); } } -- cgit v1.2.3 From 53516d4abacfab1faaa075c1f79957abc3da358c Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:47 +0530 Subject: powerpc/smp: Limit CPUs traversed to within a node. All the arch specific topology cpumasks are within a node/DIE. However when setting these per CPU cpumasks, system traverses through all the online CPUs. This is redundant. Reduce the traversal to only CPUs that are online in the node to which the CPU belongs to. Signed-off-by: Srikar Dronamraju Tested-by: Satheesh Rajendran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-6-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9cdb966f00b3..9455af47123c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1241,7 +1241,7 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int)) } cpumask_set_cpu(cpu, mask_fn(cpu)); - for_each_cpu(i, cpu_online_mask) { + for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) { /* * when updating the marks the current CPU has not been marked * online, but we need to update the cache masks -- cgit v1.2.3 From 1f3a4181042107e32e44047e9dde990aced845b5 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:48 +0530 Subject: powerpc/smp: Stop passing mask to update_mask_by_l2 update_mask_by_l2 is called only once. But it passes cpu_l2_cache_mask as parameter. Instead of passing cpu_l2_cache_mask, use it directly in update_mask_by_l2. Signed-off-by: Srikar Dronamraju Tested-by: Satheesh Rajendran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-7-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9455af47123c..0e96f6687363 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1218,7 +1218,7 @@ static struct device_node *cpu_to_l2cache(int cpu) return cache; } -static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int)) +static bool update_mask_by_l2(int cpu) { struct device_node *l2_cache, *np; int i; @@ -1240,7 +1240,7 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int)) return false; } - cpumask_set_cpu(cpu, mask_fn(cpu)); + cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu)); for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) { /* * when updating the marks the current CPU has not been marked @@ -1251,7 +1251,7 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int)) continue; if (np == l2_cache) - set_cpus_related(cpu, i, mask_fn); + set_cpus_related(cpu, i, cpu_l2_cache_mask); of_node_put(np); } @@ -1315,7 +1315,7 @@ static void add_cpu_to_masks(int cpu) set_cpus_related(i, cpu, cpu_sibling_mask); add_cpu_to_smallcore_masks(cpu); - update_mask_by_l2(cpu, cpu_l2_cache_mask); + update_mask_by_l2(cpu); if (has_coregroup_support()) { int coregroup_id = cpu_to_coregroup_id(cpu); -- cgit v1.2.3 From 661e3d42f99193b7fdd71467a87e48f6e597c285 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:49 +0530 Subject: powerpc/smp: Depend on cpu_l1_cache_map when adding CPUs Currently on hotplug/hotunplug, CPU iterates through all the CPUs in its core to find threads in its thread group. However this info is already captured in cpu_l1_cache_map. Hence reduce iterations and cleanup add_cpu_to_smallcore_masks function. Signed-off-by: Srikar Dronamraju Tested-by: Satheesh Rajendran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-8-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0e96f6687363..3a3e6bbab29c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1285,16 +1285,15 @@ static void remove_cpu_from_masks(int cpu) static inline void add_cpu_to_smallcore_masks(int cpu) { - struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu); - int i, first_thread = cpu_first_thread_sibling(cpu); + int i; if (!has_big_cores) return; cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu)); - for (i = first_thread; i < first_thread + threads_per_core; i++) { - if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map)) + for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) { + if (cpu_online(i)) set_cpus_related(i, cpu, cpu_smallcore_mask); } } -- cgit v1.2.3 From 375370a10d061d5c75c6bc5b09c5db4cc0b0fcfe Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:50 +0530 Subject: powerpc/smp: Check for duplicate topologies and consolidate CACHE and COREGROUP domains are now part of default topology. However on systems that don't support CACHE or COREGROUP, these domains will eventually be degenerated. The degeneration happens per CPU. Do note the current fixup_topology() logic ensures that mask of a domain that is not supported on the current platform is set to the previous domain. Instead of waiting for the scheduler to degenerated try to consolidate based on their masks and sd_flags. This is done just before setting the scheduler topology. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-9-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3a3e6bbab29c..917c8598cf61 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1401,6 +1401,8 @@ int setup_profiling_timer(unsigned int multiplier) static void fixup_topology(void) { + int i; + #ifdef CONFIG_SCHED_SMT if (has_big_cores) { pr_info("Big cores detected but using small core scheduling\n"); @@ -1410,6 +1412,30 @@ static void fixup_topology(void) if (!has_coregroup_support()) powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask; + + /* + * Try to consolidate topology levels here instead of + * allowing scheduler to degenerate. + * - Dont consolidate if masks are different. + * - Dont consolidate if sd_flags exists and are different. + */ + for (i = 1; i <= die_idx; i++) { + if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask) + continue; + + if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags && + powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags) + continue; + + if (!powerpc_topology[i - 1].sd_flags) + powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags; + + powerpc_topology[i].mask = powerpc_topology[i + 1].mask; + powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags; +#ifdef CONFIG_SCHED_DEBUG + powerpc_topology[i].name = powerpc_topology[i + 1].name; +#endif + } } void __init smp_cpus_done(unsigned int max_cpus) -- cgit v1.2.3 From 3ab33d6dc3e98e83b55732049e1d1d488207bb6d Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:51 +0530 Subject: powerpc/smp: Optimize update_mask_by_l2 All threads of a SMT4 core can either be part of this CPU's l2-cache mask or not related to this CPU l2-cache mask. Use this relation to reduce the number of iterations needed to find all the CPUs that share the same l2-cache. Use a temporary mask to iterate through the CPUs that may share l2_cache mask. Also instead of setting one CPU at a time into cpu_l2_cache_mask, copy the SMT4/sub mask at one shot. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-10-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 51 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 917c8598cf61..925251b0bb0f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -670,6 +670,28 @@ static void set_cpus_unrelated(int i, int j, } #endif +/* + * Extends set_cpus_related. Instead of setting one CPU at a time in + * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask. + */ +static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int), + struct cpumask *(*dstmask)(int)) +{ + struct cpumask *mask; + int k; + + mask = srcmask(j); + for_each_cpu(k, srcmask(i)) + cpumask_or(dstmask(k), dstmask(k), mask); + + if (i == j) + return; + + mask = srcmask(i); + for_each_cpu(k, srcmask(j)) + cpumask_or(dstmask(k), dstmask(k), mask); +} + /* * parse_thread_groups: Parses the "ibm,thread-groups" device tree * property for the CPU device node @dn and stores @@ -1220,7 +1242,9 @@ static struct device_node *cpu_to_l2cache(int cpu) static bool update_mask_by_l2(int cpu) { + struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; struct device_node *l2_cache, *np; + cpumask_var_t mask; int i; l2_cache = cpu_to_l2cache(cpu); @@ -1240,22 +1264,37 @@ static bool update_mask_by_l2(int cpu) return false; } - cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu)); - for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) { + alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu)); + cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu)); + + if (has_big_cores) + submask_fn = cpu_smallcore_mask; + + /* Update l2-cache mask with all the CPUs that are part of submask */ + or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask); + + /* Skip all CPUs already part of current CPU l2-cache mask */ + cpumask_andnot(mask, mask, cpu_l2_cache_mask(cpu)); + + for_each_cpu(i, mask) { /* * when updating the marks the current CPU has not been marked * online, but we need to update the cache masks */ np = cpu_to_l2cache(i); - if (!np) - continue; - if (np == l2_cache) - set_cpus_related(cpu, i, cpu_l2_cache_mask); + /* Skip all CPUs already part of current CPU l2-cache */ + if (np == l2_cache) { + or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask); + cpumask_andnot(mask, mask, submask_fn(i)); + } else { + cpumask_andnot(mask, mask, cpu_l2_cache_mask(i)); + } of_node_put(np); } of_node_put(l2_cache); + free_cpumask_var(mask); return true; } -- cgit v1.2.3 From b8a97cb4599cda28bd3b3bc13042f5803b42ad65 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:52 +0530 Subject: powerpc/smp: Move coregroup mask updation to a new function Move the logic for updating the coregroup mask of a CPU to its own function. This will help in reworking the updation of coregroup mask in subsequent patch. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-11-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 925251b0bb0f..45619433c43a 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1337,6 +1337,23 @@ static inline void add_cpu_to_smallcore_masks(int cpu) } } +static void update_coregroup_mask(int cpu) +{ + int first_thread = cpu_first_thread_sibling(cpu); + int coregroup_id = cpu_to_coregroup_id(cpu); + int i; + + cpumask_set_cpu(cpu, cpu_coregroup_mask(cpu)); + for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) { + int fcpu = cpu_first_thread_sibling(i); + + if (fcpu == first_thread) + set_cpus_related(cpu, i, cpu_coregroup_mask); + else if (coregroup_id == cpu_to_coregroup_id(i)) + set_cpus_related(cpu, i, cpu_coregroup_mask); + } +} + static void add_cpu_to_masks(int cpu) { int first_thread = cpu_first_thread_sibling(cpu); @@ -1355,19 +1372,8 @@ static void add_cpu_to_masks(int cpu) add_cpu_to_smallcore_masks(cpu); update_mask_by_l2(cpu); - if (has_coregroup_support()) { - int coregroup_id = cpu_to_coregroup_id(cpu); - - cpumask_set_cpu(cpu, cpu_coregroup_mask(cpu)); - for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) { - int fcpu = cpu_first_thread_sibling(i); - - if (fcpu == first_thread) - set_cpus_related(cpu, i, cpu_coregroup_mask); - else if (coregroup_id == cpu_to_coregroup_id(i)) - set_cpus_related(cpu, i, cpu_coregroup_mask); - } - } + if (has_coregroup_support()) + update_coregroup_mask(cpu); } /* Activate a secondary processor. */ -- cgit v1.2.3 From 70a94089d7f7fa91bc1795622426b3ed017ec71a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 21 Sep 2020 15:26:53 +0530 Subject: powerpc/smp: Optimize update_coregroup_mask All threads of a SMT4/SMT8 core can either be part of CPU's coregroup mask or outside the coregroup. Use this relation to reduce the number of iterations needed to find all the CPUs that share the same coregroup Use a temporary mask to iterate through the CPUs that may share coregroup mask. Also instead of setting one CPU at a time into cpu_coregroup_mask, copy the SMT4/SMT8/submask at one shot. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200921095653.9701-12-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 45619433c43a..0dc1b8591cc8 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1339,19 +1339,33 @@ static inline void add_cpu_to_smallcore_masks(int cpu) static void update_coregroup_mask(int cpu) { - int first_thread = cpu_first_thread_sibling(cpu); + struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; + cpumask_var_t mask; int coregroup_id = cpu_to_coregroup_id(cpu); int i; - cpumask_set_cpu(cpu, cpu_coregroup_mask(cpu)); - for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) { - int fcpu = cpu_first_thread_sibling(i); + alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu)); + cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu)); + + if (shared_caches) + submask_fn = cpu_l2_cache_mask; + + /* Update coregroup mask with all the CPUs that are part of submask */ + or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask); + + /* Skip all CPUs already part of coregroup mask */ + cpumask_andnot(mask, mask, cpu_coregroup_mask(cpu)); - if (fcpu == first_thread) - set_cpus_related(cpu, i, cpu_coregroup_mask); - else if (coregroup_id == cpu_to_coregroup_id(i)) - set_cpus_related(cpu, i, cpu_coregroup_mask); + for_each_cpu(i, mask) { + /* Skip all CPUs not part of this coregroup */ + if (coregroup_id == cpu_to_coregroup_id(i)) { + or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask); + cpumask_andnot(mask, mask, submask_fn(i)); + } else { + cpumask_andnot(mask, mask, cpu_coregroup_mask(i)); + } } + free_cpumask_var(mask); } static void add_cpu_to_masks(int cpu) -- cgit v1.2.3 From 3b6c3adbb2fa42749c3d38cfc4d4d0b7e096bb7b Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 21 Sep 2020 03:10:04 -0400 Subject: powerpc/perf: Exclude pmc5/6 from the irrelevant PMU group constraints PMU counter support functions enforces event constraints for group of events to check if all events in a group can be monitored. Incase of event codes using PMC5 and PMC6 ( 500fa and 600f4 respectively ), not all constraints are applicable, say the threshold or sample bits. But current code includes pmc5 and pmc6 in some group constraints (like IC_DC Qualifier bits) which is actually not applicable and hence results in those events not getting counted when scheduled along with group of other events. Patch fixes this by excluding PMC5/6 from constraints which are not relevant for it. Fixes: 7ffd948 ("powerpc/perf: factor out power8 pmu functions") Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1600672204-1610-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/isa207-common.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 964437adec18..2848904df638 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -288,6 +288,15 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) mask |= CNST_PMC_MASK(pmc); value |= CNST_PMC_VAL(pmc); + + /* + * PMC5 and PMC6 are used to count cycles and instructions and + * they do not support most of the constraint bits. Add a check + * to exclude PMC5/6 from most of the constraints except for + * EBB/BHRB. + */ + if (pmc >= 5) + goto ebb_bhrb; } if (pmc <= 4) { @@ -357,6 +366,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } } +ebb_bhrb: if (!pmc && ebb) /* EBB events must specify the PMC */ return -1; -- cgit v1.2.3 From bd59380c5ba4147dcbaad3e582b55ccfd120b764 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 20 Aug 2020 14:45:12 +1000 Subject: powerpc/rtas: Restrict RTAS requests from userspace A number of userspace utilities depend on making calls to RTAS to retrieve information and update various things. The existing API through which we expose RTAS to userspace exposes more RTAS functionality than we actually need, through the sys_rtas syscall, which allows root (or anyone with CAP_SYS_ADMIN) to make any RTAS call they want with arbitrary arguments. Many RTAS calls take the address of a buffer as an argument, and it's up to the caller to specify the physical address of the buffer as an argument. We allocate a buffer (the "RMO buffer") in the Real Memory Area that RTAS can access, and then expose the physical address and size of this buffer in /proc/powerpc/rtas/rmo_buffer. Userspace is expected to read this address, poke at the buffer using /dev/mem, and pass an address in the RMO buffer to the RTAS call. However, there's nothing stopping the caller from specifying whatever address they want in the RTAS call, and it's easy to construct a series of RTAS calls that can overwrite arbitrary bytes (even without /dev/mem access). Additionally, there are some RTAS calls that do potentially dangerous things and for which there are no legitimate userspace use cases. In the past, this would not have been a particularly big deal as it was assumed that root could modify all system state freely, but with Secure Boot and lockdown we need to care about this. We can't fundamentally change the ABI at this point, however we can address this by implementing a filter that checks RTAS calls against a list of permitted calls and forces the caller to use addresses within the RMO buffer. The list is based off the list of calls that are used by the librtas userspace library, and has been tested with a number of existing userspace RTAS utilities. For compatibility with any applications we are not aware of that require other calls, the filter can be turned off at build time. Cc: stable@vger.kernel.org Reported-by: Daniel Axtens Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200820044512.7543-1-ajd@linux.ibm.com --- arch/powerpc/Kconfig | 13 ++++ arch/powerpc/kernel/rtas.c | 153 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fe0e6b317cc2..6c76caa950e1 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1004,6 +1004,19 @@ config PPC_SECVAR_SYSFS read/write operations on these variables. Say Y if you have secure boot enabled and want to expose variables to userspace. +config PPC_RTAS_FILTER + bool "Enable filtering of RTAS syscalls" + default y + depends on PPC_RTAS + help + The RTAS syscall API has security issues that could be used to + compromise system integrity. This option enforces restrictions on the + RTAS calls and arguments passed by userspace programs to mitigate + these issues. + + Say Y unless you know what you are doing and the filter is causing + problems for you. + endmenu config ISA_DMA_API diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 806d554ce357..954f41676f69 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -992,6 +992,147 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, return NULL; } +#ifdef CONFIG_PPC_RTAS_FILTER + +/* + * The sys_rtas syscall, as originally designed, allows root to pass + * arbitrary physical addresses to RTAS calls. A number of RTAS calls + * can be abused to write to arbitrary memory and do other things that + * are potentially harmful to system integrity, and thus should only + * be used inside the kernel and not exposed to userspace. + * + * All known legitimate users of the sys_rtas syscall will only ever + * pass addresses that fall within the RMO buffer, and use a known + * subset of RTAS calls. + * + * Accordingly, we filter RTAS requests to check that the call is + * permitted, and that provided pointers fall within the RMO buffer. + * The rtas_filters list contains an entry for each permitted call, + * with the indexes of the parameters which are expected to contain + * addresses and sizes of buffers allocated inside the RMO buffer. + */ +struct rtas_filter { + const char *name; + int token; + /* Indexes into the args buffer, -1 if not used */ + int buf_idx1; + int size_idx1; + int buf_idx2; + int size_idx2; + + int fixed_size; +}; + +static struct rtas_filter rtas_filters[] __ro_after_init = { + { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, + { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ + { "display-character", -1, -1, -1, -1, -1 }, + { "ibm,display-message", -1, 0, -1, -1, -1 }, + { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, + { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinct", -1, -1, -1, -1, -1 }, + { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, + { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, + { "ibm,get-indices", -1, 2, 3, -1, -1 }, + { "get-power-level", -1, -1, -1, -1, -1 }, + { "get-sensor-state", -1, -1, -1, -1, -1 }, + { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, + { "get-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,get-vpd", -1, 0, -1, 1, 2 }, + { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, + { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, + { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, + { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, + { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, + { "set-indicator", -1, -1, -1, -1, -1 }, + { "set-power-level", -1, -1, -1, -1, -1 }, + { "set-time-for-power-on", -1, -1, -1, -1, -1 }, + { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, + { "set-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,suspend-me", -1, -1, -1, -1, -1 }, + { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, + { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, + { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, +}; + +static bool in_rmo_buf(u32 base, u32 end) +{ + return base >= rtas_rmo_buf && + base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) && + base <= end && + end >= rtas_rmo_buf && + end < (rtas_rmo_buf + RTAS_RMOBUF_MAX); +} + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + struct rtas_filter *f = &rtas_filters[i]; + u32 base, size, end; + + if (token != f->token) + continue; + + if (f->buf_idx1 != -1) { + base = be32_to_cpu(args->args[f->buf_idx1]); + if (f->size_idx1 != -1) + size = be32_to_cpu(args->args[f->size_idx1]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + + end = base + size - 1; + if (!in_rmo_buf(base, end)) + goto err; + } + + if (f->buf_idx2 != -1) { + base = be32_to_cpu(args->args[f->buf_idx2]); + if (f->size_idx2 != -1) + size = be32_to_cpu(args->args[f->size_idx2]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + end = base + size - 1; + + /* + * Special case for ibm,configure-connector where the + * address can be 0 + */ + if (!strcmp(f->name, "ibm,configure-connector") && + base == 0) + return false; + + if (!in_rmo_buf(base, end)) + goto err; + } + + return false; + } + +err: + pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); + pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", + token, nargs, current->comm); + return true; +} + +#else + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + return false; +} + +#endif /* CONFIG_PPC_RTAS_FILTER */ + /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { @@ -1029,6 +1170,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); + if (block_rtas_call(token, nargs, &args)) + return -EINVAL; + /* Need to handle ibm,suspend_me call specially */ if (token == ibm_suspend_me_token) { @@ -1090,6 +1234,9 @@ void __init rtas_initialize(void) unsigned long rtas_region = RTAS_INSTANTIATE_MAX; u32 base, size, entry; int no_base, no_size, no_entry; +#ifdef CONFIG_PPC_RTAS_FILTER + int i; +#endif /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. @@ -1129,6 +1276,12 @@ void __init rtas_initialize(void) #ifdef CONFIG_RTAS_ERROR_LOGGING rtas_last_error_token = rtas_token("rtas-last-error"); #endif + +#ifdef CONFIG_PPC_RTAS_FILTER + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + rtas_filters[i].token = rtas_token(rtas_filters[i].name); + } +#endif } int __init early_init_dt_scan_rtas(unsigned long node, -- cgit v1.2.3 From 72cdd117c449896c707fc6cfe5b90978160697d0 Mon Sep 17 00:00:00 2001 From: Scott Cheloha Date: Wed, 16 Sep 2020 09:51:22 -0500 Subject: pseries/hotplug-memory: hot-add: skip redundant LMB lookup During memory hot-add, dlpar_add_lmb() calls memory_add_physaddr_to_nid() to determine which node id (nid) to use when later calling __add_memory(). This is wasteful. On pseries, memory_add_physaddr_to_nid() finds an appropriate nid for a given address by looking up the LMB containing the address and then passing that LMB to of_drconf_to_nid_single() to get the nid. In dlpar_add_lmb() we get this address from the LMB itself. In short, we have a pointer to an LMB and then we are searching for that LMB *again* in order to find its nid. If we call of_drconf_to_nid_single() directly from dlpar_add_lmb() we can skip the redundant lookup. The only error handling we need to duplicate from memory_add_physaddr_to_nid() is the fallback to the default nid when drconf_to_nid_single() returns -1 (NUMA_NO_NODE) or an invalid nid. Skipping the extra lookup makes hot-add operations faster, especially on machines with many LMBs. Consider an LPAR with 126976 LMBs. In one test, hot-adding 126000 LMBs on an upatched kernel took ~3.5 hours while a patched kernel completed the same operation in ~2 hours: Unpatched (12450 seconds): Sep 9 04:06:31 ltc-brazos1 drmgr[810169]: drmgr: -c mem -a -q 126000 Sep 9 04:06:31 ltc-brazos1 kernel: pseries-hotplug-mem: Attempting to hot-add 126000 LMB(s) [...] Sep 9 07:34:01 ltc-brazos1 kernel: pseries-hotplug-mem: Memory at 20000000 (drc index 80000002) was hot-added Patched (7065 seconds): Sep 8 21:49:57 ltc-brazos1 drmgr[877703]: drmgr: -c mem -a -q 126000 Sep 8 21:49:57 ltc-brazos1 kernel: pseries-hotplug-mem: Attempting to hot-add 126000 LMB(s) [...] Sep 8 23:27:42 ltc-brazos1 kernel: pseries-hotplug-mem: Memory at 20000000 (drc index 80000002) was hot-added It should be noted that the speedup grows more substantial when hot-adding LMBs at the end of the drconf range. This is because we are skipping a linear LMB search. To see the distinction, consider smaller hot-add test on the same LPAR. A perf-stat run with 10 iterations showed that hot-adding 4096 LMBs completed less than 1 second faster on a patched kernel: Unpatched: Performance counter stats for 'drmgr -c mem -a -q 4096' (10 runs): 104,753.42 msec task-clock # 0.992 CPUs utilized ( +- 0.55% ) 4,708 context-switches # 0.045 K/sec ( +- 0.69% ) 2,444 cpu-migrations # 0.023 K/sec ( +- 1.25% ) 394 page-faults # 0.004 K/sec ( +- 0.22% ) 445,902,503,057 cycles # 4.257 GHz ( +- 0.55% ) (66.67%) 8,558,376,740 stalled-cycles-frontend # 1.92% frontend cycles idle ( +- 0.88% ) (49.99%) 300,346,181,651 stalled-cycles-backend # 67.36% backend cycles idle ( +- 0.76% ) (50.01%) 258,091,488,691 instructions # 0.58 insn per cycle # 1.16 stalled cycles per insn ( +- 0.22% ) (66.67%) 70,568,169,256 branches # 673.660 M/sec ( +- 0.17% ) (50.01%) 3,100,725,426 branch-misses # 4.39% of all branches ( +- 0.20% ) (49.99%) 105.583 +- 0.589 seconds time elapsed ( +- 0.56% ) Patched: Performance counter stats for 'drmgr -c mem -a -q 4096' (10 runs): 104,055.69 msec task-clock # 0.993 CPUs utilized ( +- 0.32% ) 4,606 context-switches # 0.044 K/sec ( +- 0.20% ) 2,463 cpu-migrations # 0.024 K/sec ( +- 0.93% ) 394 page-faults # 0.004 K/sec ( +- 0.25% ) 442,951,129,921 cycles # 4.257 GHz ( +- 0.32% ) (66.66%) 8,710,413,329 stalled-cycles-frontend # 1.97% frontend cycles idle ( +- 0.47% ) (50.06%) 299,656,905,836 stalled-cycles-backend # 67.65% backend cycles idle ( +- 0.39% ) (50.02%) 252,731,168,193 instructions # 0.57 insn per cycle # 1.19 stalled cycles per insn ( +- 0.20% ) (66.66%) 68,902,851,121 branches # 662.173 M/sec ( +- 0.13% ) (49.94%) 3,100,242,882 branch-misses # 4.50% of all branches ( +- 0.15% ) (49.98%) 104.829 +- 0.325 seconds time elapsed ( +- 0.31% ) This is consistent. An add-by-count hot-add operation adds LMBs greedily, so LMBs near the start of the drconf range are considered first. On an otherwise idle LPAR with so many LMBs we would expect to find the LMBs we need near the start of the drconf range, hence the smaller speedup. Signed-off-by: Scott Cheloha Reviewed-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200916145122.3408129-1-cheloha@linux.ibm.com --- arch/powerpc/include/asm/topology.h | 3 +++ arch/powerpc/mm/numa.c | 2 +- arch/powerpc/platforms/pseries/hotplug-memory.c | 6 ++++-- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index e45219f74be0..8728590f514a 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -86,6 +86,9 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) #endif /* CONFIG_NUMA */ +struct drmem_lmb; +int of_drconf_to_nid_single(struct drmem_lmb *lmb); + #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) extern int find_and_online_cpu_nid(int cpu); extern int cpu_to_coregroup_id(int cpu); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b725fb66e913..8335399b7509 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -430,7 +430,7 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa) * This is like of_node_to_nid_single() for memory represented in the * ibm,dynamic-reconfiguration-memory node. */ -static int of_drconf_to_nid_single(struct drmem_lmb *lmb) +int of_drconf_to_nid_single(struct drmem_lmb *lmb) { struct assoc_arrays aa = { .arrays = NULL }; int default_nid = NUMA_NO_NODE; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 0ea976d1cac4..9a533acf8ad0 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -611,8 +611,10 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) block_sz = memory_block_size_bytes(); - /* Find the node id for this address. */ - nid = memory_add_physaddr_to_nid(lmb->base_addr); + /* Find the node id for this LMB. Fake one if necessary. */ + nid = of_drconf_to_nid_single(lmb); + if (nid < 0 || !node_possible(nid)) + nid = first_online_node; /* Add the memory */ rc = __add_memory(nid, lmb->base_addr, block_sz); -- cgit v1.2.3 From 269e583357df32d77368903214f10f43fa5d7a5f Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 7 Oct 2020 15:09:02 +1100 Subject: powerpc/eeh: Delete eeh_pe->config_addr The eeh_pe->config_addr field was supposed to be removed in commit 35d64734b643 ("powerpc/eeh: Clean up PE addressing") which made it largely unused. Finish the job. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007040903.819081-1-oohall@gmail.com --- arch/powerpc/include/asm/eeh.h | 1 - arch/powerpc/kernel/eeh.c | 2 +- arch/powerpc/kernel/eeh_pe.c | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index dd6a4ac6c713..b1a5bba2e0b9 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -73,7 +73,6 @@ struct pci_dn; struct eeh_pe { int type; /* PE type: PHB/Bus/Device */ int state; /* PE EEH dependent mode */ - int config_addr; /* Traditional PCI address */ int addr; /* PE configuration address */ struct pci_controller *phb; /* Associated PHB */ struct pci_bus *bus; /* Top PCI bus for bus PE */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 87de8b798b2d..0e160dffcb86 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -466,7 +466,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) return 0; } - if (!pe->addr && !pe->config_addr) { + if (!pe->addr) { eeh_stats.no_cfg_addr++; return 0; } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 61b7d4019051..845e024321d4 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -354,8 +354,8 @@ int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent) pr_err("%s: out of memory!\n", __func__); return -ENOMEM; } - pe->addr = edev->pe_config_addr; - pe->config_addr = edev->bdfn; + + pe->addr = edev->pe_config_addr; /* * Put the new EEH PE into hierarchy tree. If the parent -- cgit v1.2.3 From 8175bd580e629dcf9cc507794da774a6b8d3a9bd Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 7 Oct 2020 15:09:03 +1100 Subject: powerpc/pseries/eeh: Fix use of uninitialised variable If the RTAS call to query the PE address for a device fails we jump the err: label where an error message is printed along with the return code. However, the printed return code is from the "ret" variable which isn't set at that point since we assigned the result to "addr" instead. Fix this by consistently using the "ret" variable for the result of the RTAS call helpers an dropping the "addr" local variable" Fixes: 98ba956f6a38 ("powerpc/pseries/eeh: Rework device EEH PE determination") Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007040903.819081-2-oohall@gmail.com --- arch/powerpc/platforms/pseries/eeh_pseries.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index d8fd5f7b2143..cf024fa37bda 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -363,7 +363,6 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) { struct eeh_pe pe, *parent; struct eeh_dev *edev; - int addr; u32 pcie_flags; int ret; @@ -422,8 +421,8 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) } /* first up, find the pe_config_addr for the PE containing the device */ - addr = pseries_eeh_get_pe_config_addr(pdn); - if (addr < 0) { + ret = pseries_eeh_get_pe_config_addr(pdn); + if (ret < 0) { eeh_edev_dbg(edev, "Unable to find pe_config_addr\n"); goto err; } @@ -431,7 +430,7 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) /* Try enable EEH on the fake PE */ memset(&pe, 0, sizeof(struct eeh_pe)); pe.phb = pdn->phb; - pe.addr = addr; + pe.addr = ret; eeh_edev_dbg(edev, "Enabling EEH on device\n"); ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); @@ -440,7 +439,7 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) goto err; } - edev->pe_config_addr = addr; + edev->pe_config_addr = pe.addr; eeh_add_flag(EEH_ENABLED); -- cgit v1.2.3 From 0f9866f7e85765bbda86666df56c92f377c3bc10 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 3 Oct 2020 13:19:39 +0530 Subject: powerpc/perf/hv-gpci: Fix starting index value Commit 9e9f60108423f ("powerpc/perf/{hv-gpci, hv-common}: generate requests with counters annotated") adds a framework for defining gpci counters. In this patch, they adds starting_index value as '0xffffffffffffffff'. which is wrong as starting_index is of size 32 bits. Because of this, incase we try to run hv-gpci event we get error. In power9 machine: command#: perf stat -e hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ -C 0 -I 1000 event syntax error: '..bie_count_and_time_tlbie_instructions_issued/' \___ value too big for format, maximum is 4294967295 This patch fix this issue and changes starting_index value to '0xffffffff' After this patch: command#: perf stat -e hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ -C 0 -I 1000 1.000085786 1,024 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 2.000287818 1,024 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 2.439113909 17,408 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ Fixes: 9e9f60108423 ("powerpc/perf/{hv-gpci, hv-common}: generate requests with counters annotated") Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201003074943.338618-1-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci-requests.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h index e608f9db12dd..8965b4463d43 100644 --- a/arch/powerpc/perf/hv-gpci-requests.h +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -95,7 +95,7 @@ REQUEST(__field(0, 8, partition_id) #define REQUEST_NAME system_performance_capabilities #define REQUEST_NUM 0x40 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__field(0, 1, perf_collect_privileged) __field(0x1, 1, capability_mask) @@ -223,7 +223,7 @@ REQUEST(__field(0, 2, partition_id) #define REQUEST_NAME system_hypervisor_times #define REQUEST_NUM 0xF0 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) __count(0x8, 8, time_spent_processing_virtual_processor_timers) @@ -234,7 +234,7 @@ REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) #define REQUEST_NAME system_tlbie_count_and_time #define REQUEST_NUM 0xF4 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__count(0, 8, tlbie_instructions_issued) /* -- cgit v1.2.3 From dcb5cdf60a1fbbdb3b4dd2abc562206481f09ef1 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 3 Oct 2020 13:19:42 +0530 Subject: powerpc/perf/hv-gpci: Add cpu hotplug support Patch here adds cpu hotplug functions to hv_gpci pmu. A new cpuhp_state "CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE" enum is added. The online callback function updates the cpumask only if its empty. As the primary intention of adding hotplug support is to designate a CPU to make HCALL to collect the counter data. The offline function test and clear corresponding cpu in a cpumask and update cpumask to any other active cpu. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201003074943.338618-4-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci.c | 46 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 2 files changed, 47 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 1667315b82e9..1bca0c1a6cfe 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -48,6 +48,8 @@ EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31); /* u32, byte offset */ EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); +static cpumask_t hv_gpci_cpumask; + static struct attribute *format_attrs[] = { &format_attr_request.attr, &format_attr_starting_index.attr, @@ -266,6 +268,45 @@ static struct pmu h_gpci_pmu = { .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; +static int ppc_hv_gpci_cpu_online(unsigned int cpu) +{ + if (cpumask_empty(&hv_gpci_cpumask)) + cpumask_set_cpu(cpu, &hv_gpci_cpumask); + + return 0; +} + +static int ppc_hv_gpci_cpu_offline(unsigned int cpu) +{ + int target; + + /* Check if exiting cpu is used for collecting gpci events */ + if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask)) + return 0; + + /* Find a new cpu to collect gpci events */ + target = cpumask_last(cpu_active_mask); + + if (target < 0 || target >= nr_cpu_ids) { + pr_err("hv_gpci: CPU hotplug init failed\n"); + return -1; + } + + /* Migrate gpci events to the new target */ + cpumask_set_cpu(target, &hv_gpci_cpumask); + perf_pmu_migrate_context(&h_gpci_pmu, cpu, target); + + return 0; +} + +static int hv_gpci_cpu_hotplug_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, + "perf/powerpc/hv_gcpi:online", + ppc_hv_gpci_cpu_online, + ppc_hv_gpci_cpu_offline); +} + static int hv_gpci_init(void) { int r; @@ -286,6 +327,11 @@ static int hv_gpci_init(void) return -ENODEV; } + /* init cpuhotplug */ + r = hv_gpci_cpu_hotplug_init(); + if (r) + return r; + /* sampling not supported */ h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 3215023d4852..5d08ed922510 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -183,6 +183,7 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, + CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, -- cgit v1.2.3 From 09b791d95559ef82542063333ecaa2ac9d57118e Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 3 Oct 2020 13:19:43 +0530 Subject: powerpc/hv-gpci: Add sysfs files inside hv-gpci device to show cpumask Patch here adds a cpumask attr to hv_gpci pmu along with ABI documentation. Primary use to expose the cpumask is for the perf tool which has the capability to parse the driver sysfs folder and understand the cpumask file. Having cpumask file will reduce the number of perf command line parameters (will avoid "-C" option in the perf tool command line). It can also notify the user which is the current cpu used to retrieve the counter data. command:# cat /sys/devices/hv_gpci/cpumask 0 Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201003074943.338618-5-kjain@linux.ibm.com --- .../ABI/testing/sysfs-bus-event_source-devices-hv_gpci | 7 +++++++ arch/powerpc/perf/hv-gpci.c | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'arch') diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index ed989869d116..6a023b42486c 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -72,3 +72,10 @@ Contact: Linux on PowerPC Developer List Description: A number indicating the latest version of the gpci interface that the kernel is aware of. + +What: /sys/devices/hv_gpci/cpumask +Date: October 2020 +Contact: Linux on PowerPC Developer List +Description: read only + This sysfs file exposes the cpumask which is designated to make + HCALLs to retrieve hv-gpci pmu event counter data. diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 1bca0c1a6cfe..d48413e28c39 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -96,7 +96,15 @@ static ssize_t kernel_version_show(struct device *dev, return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT); } +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask); +} + static DEVICE_ATTR_RO(kernel_version); +static DEVICE_ATTR_RO(cpumask); + HV_CAPS_ATTR(version, "0x%x\n"); HV_CAPS_ATTR(ga, "%d\n"); HV_CAPS_ATTR(expanded, "%d\n"); @@ -113,6 +121,15 @@ static struct attribute *interface_attrs[] = { NULL, }; +static struct attribute *cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = cpumask_attrs, +}; + static struct attribute_group interface_group = { .name = "interface", .attrs = interface_attrs, @@ -122,6 +139,7 @@ static const struct attribute_group *attr_groups[] = { &format_group, &event_group, &interface_group, + &cpumask_attr_group, NULL, }; -- cgit v1.2.3 From 792254a77201453d9a77479e63dc216ad90462d2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 7 Oct 2020 18:06:05 +1000 Subject: powerpc/security: Fix link stack flush instruction The inline execution path for the hardware assisted branch flush instruction failed to set CTR to the correct value before bcctr, causing a crash when the feature is enabled. Fixes: 4d24e21cc694 ("powerpc/security: Allow for processors that flush the link stack using the special bcctr") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007080605.64423-1-npiggin@gmail.com --- arch/powerpc/include/asm/asm-prototypes.h | 4 +++- arch/powerpc/kernel/entry_64.S | 8 ++++++-- arch/powerpc/kernel/security.c | 34 ++++++++++++++++++++++--------- 3 files changed, 33 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 4957119604c7..d0b832cbbec8 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -145,7 +145,9 @@ void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); /* Patch sites */ -extern s32 patch__call_flush_branch_caches; +extern s32 patch__call_flush_branch_caches1; +extern s32 patch__call_flush_branch_caches2; +extern s32 patch__call_flush_branch_caches3; extern s32 patch__flush_count_cache_return; extern s32 patch__flush_link_stack_return; extern s32 patch__call_kvm_flush_link_stack; diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 733e40eba4eb..2f3846192ec7 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -430,7 +430,11 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs); #define FLUSH_COUNT_CACHE \ 1: nop; \ - patch_site 1b, patch__call_flush_branch_caches + patch_site 1b, patch__call_flush_branch_caches1; \ +1: nop; \ + patch_site 1b, patch__call_flush_branch_caches2; \ +1: nop; \ + patch_site 1b, patch__call_flush_branch_caches3 .macro nops number .rept \number @@ -512,7 +516,7 @@ _GLOBAL(_switch) kuap_check_amr r9, r10 - FLUSH_COUNT_CACHE + FLUSH_COUNT_CACHE /* Clobbers r9, ctr */ /* * On SMP kernels, care must be taken because a task may be diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index c9876aab3142..e4e1a94ccf6a 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -430,30 +430,44 @@ device_initcall(stf_barrier_debugfs_init); static void update_branch_cache_flush(void) { + u32 *site; + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + site = &patch__call_kvm_flush_link_stack; // This controls the branch from guest_exit_cont to kvm_flush_link_stack if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { - patch_instruction_site(&patch__call_kvm_flush_link_stack, - ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); } else { // Could use HW flush, but that could also flush count cache - patch_branch_site(&patch__call_kvm_flush_link_stack, - (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); + patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); } #endif + // Patch out the bcctr first, then nop the rest + site = &patch__call_flush_branch_caches3; + patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + site = &patch__call_flush_branch_caches2; + patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + site = &patch__call_flush_branch_caches1; + patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + // This controls the branch from _switch to flush_branch_caches if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE && link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { - patch_instruction_site(&patch__call_flush_branch_caches, - ppc_inst(PPC_INST_NOP)); + // Nothing to be done + } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW && link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) { - patch_instruction_site(&patch__call_flush_branch_caches, - ppc_inst(PPC_INST_BCCTR_FLUSH)); + // Patch in the bcctr last + site = &patch__call_flush_branch_caches1; + patch_instruction_site(site, ppc_inst(0x39207fff)); // li r9,0x7fff + site = &patch__call_flush_branch_caches2; + patch_instruction_site(site, ppc_inst(0x7d2903a6)); // mtctr r9 + site = &patch__call_flush_branch_caches3; + patch_instruction_site(site, ppc_inst(PPC_INST_BCCTR_FLUSH)); + } else { - patch_branch_site(&patch__call_flush_branch_caches, - (u64)&flush_branch_caches, BRANCH_SET_LINK); + patch_branch_site(site, (u64)&flush_branch_caches, BRANCH_SET_LINK); // If we just need to flush the link stack, early return if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) { -- cgit v1.2.3 From ec72024e35dddb88a81e40071c87ceb18b5ee835 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Oct 2020 17:18:33 +0530 Subject: powerpc/drmem: Make lmb_size 64 bit Similar to commit 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic") make sure different variables tracking lmb_size are updated to be 64 bit. This was found by code audit. Cc: stable@vger.kernel.org Signed-off-by: Aneesh Kumar K.V Acked-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007114836.282468-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/drmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 030a19d92213..bf2402fed3e0 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -20,7 +20,7 @@ struct drmem_lmb { struct drmem_lmb_info { struct drmem_lmb *lmbs; int n_lmbs; - u32 lmb_size; + u64 lmb_size; }; extern struct drmem_lmb_info *drmem_info; @@ -80,7 +80,7 @@ struct of_drconf_cell_v2 { #define DRCONF_MEM_RESERVED 0x00000080 #define DRCONF_MEM_HOTREMOVABLE 0x00000100 -static inline u32 drmem_lmb_size(void) +static inline u64 drmem_lmb_size(void) { return drmem_info->lmb_size; } -- cgit v1.2.3 From 301d2ea6572386245c5d2d2dc85c3b5a737b85ac Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Oct 2020 17:18:34 +0530 Subject: powerpc/memhotplug: Make lmb size 64bit Similar to commit 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic") make sure different variables tracking lmb_size are updated to be 64 bit. This was found by code audit. Cc: stable@vger.kernel.org Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007114836.282468-3-aneesh.kumar@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 43 +++++++++++++++++-------- 1 file changed, 29 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 9a533acf8ad0..ca5589a1ec7f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -277,7 +277,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb) return dlpar_change_lmb_state(lmb, false); } -static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) +static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size) { unsigned long block_sz, start_pfn; int sections_per_block; @@ -308,10 +308,11 @@ out: static int pseries_remove_mem_node(struct device_node *np) { - const __be32 *regs; + const __be32 *prop; unsigned long base; - unsigned int lmb_size; + unsigned long lmb_size; int ret = -EINVAL; + int addr_cells, size_cells; /* * Check to see if we are actually removing memory @@ -322,12 +323,19 @@ static int pseries_remove_mem_node(struct device_node *np) /* * Find the base address and size of the memblock */ - regs = of_get_property(np, "reg", NULL); - if (!regs) + prop = of_get_property(np, "reg", NULL); + if (!prop) return ret; - base = be64_to_cpu(*(unsigned long *)regs); - lmb_size = be32_to_cpu(regs[3]); + addr_cells = of_n_addr_cells(np); + size_cells = of_n_size_cells(np); + + /* + * "reg" property represents (addr,size) tuple. + */ + base = of_read_number(prop, addr_cells); + prop += addr_cells; + lmb_size = of_read_number(prop, size_cells); pseries_remove_memblock(base, lmb_size); return 0; @@ -564,7 +572,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) #else static inline int pseries_remove_memblock(unsigned long base, - unsigned int memblock_size) + unsigned long memblock_size) { return -EOPNOTSUPP; } @@ -888,10 +896,11 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) static int pseries_add_mem_node(struct device_node *np) { - const __be32 *regs; + const __be32 *prop; unsigned long base; - unsigned int lmb_size; + unsigned long lmb_size; int ret = -EINVAL; + int addr_cells, size_cells; /* * Check to see if we are actually adding memory @@ -902,12 +911,18 @@ static int pseries_add_mem_node(struct device_node *np) /* * Find the base and size of the memblock */ - regs = of_get_property(np, "reg", NULL); - if (!regs) + prop = of_get_property(np, "reg", NULL); + if (!prop) return ret; - base = be64_to_cpu(*(unsigned long *)regs); - lmb_size = be32_to_cpu(regs[3]); + addr_cells = of_n_addr_cells(np); + size_cells = of_n_size_cells(np); + /* + * "reg" property represents (addr,size) tuple. + */ + base = of_read_number(prop, addr_cells); + prop += addr_cells; + lmb_size = of_read_number(prop, size_cells); /* * Update memory region to represent the memory add -- cgit v1.2.3 From 950805f4d90eda14325ceab56b0f00d034baa8bc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Oct 2020 17:18:35 +0530 Subject: powerpc/book3s64/radix: Make radix_mem_block_size 64bit Similar to commit 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic") make sure different variables tracking lmb_size are updated to be 64 bit. Fixes: af9d00e93a4f ("powerpc/mm/radix: Create separate mappings for hot-plugged memory") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007114836.282468-4-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/mmu.h | 2 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index ddc414ab3c4d..e0b52940e43c 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -70,7 +70,7 @@ extern unsigned int mmu_base_pid; /* * memory block size used with radix translation. */ -extern unsigned int __ro_after_init radix_mem_block_size; +extern unsigned long __ro_after_init radix_mem_block_size; #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) #define PRTB_ENTRIES (1ul << mmu_pid_bits) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 5c8adeb8c955..78c5afe98359 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -34,7 +34,7 @@ unsigned int mmu_pid_bits; unsigned int mmu_base_pid; -unsigned int radix_mem_block_size __ro_after_init; +unsigned long radix_mem_block_size __ro_after_init; static __ref void *early_alloc_pgtable(unsigned long size, int nid, unsigned long region_start, unsigned long region_end) -- cgit v1.2.3 From fbf2f134c8c312d3166534a8bd6a1aaee6e9c7ef Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Oct 2020 17:18:36 +0530 Subject: powerpc/lmb-size: Use addr #size-cells value when fetching lmb-size Make it consistent with other usages. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007114836.282468-5-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/radix_pgtable.c | 7 ++++--- arch/powerpc/platforms/pseries/hotplug-memory.c | 13 +++++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 78c5afe98359..f8e9eb49d46b 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -498,7 +498,7 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname, depth, void *data) { unsigned long *mem_block_size = (unsigned long *)data; - const __be64 *prop; + const __be32 *prop; int len; if (depth != 1) @@ -508,13 +508,14 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname, return 0; prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); - if (!prop || len < sizeof(__be64)) + + if (!prop || len < dt_root_size_cells * sizeof(__be32)) /* * Nothing in the device tree */ *mem_block_size = MIN_MEMORY_BLOCK_SIZE; else - *mem_block_size = be64_to_cpup(prop); + *mem_block_size = of_read_number(prop, dt_root_size_cells); return 1; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index ca5589a1ec7f..4e18653a3804 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -30,12 +30,17 @@ unsigned long pseries_memory_block_size(void) np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (np) { - const __be64 *size; + int len; + int size_cells; + const __be32 *prop; - size = of_get_property(np, "ibm,lmb-size", NULL); - if (size) - memblock_size = be64_to_cpup(size); + size_cells = of_n_size_cells(np); + + prop = of_get_property(np, "ibm,lmb-size", &len); + if (prop && len >= size_cells * sizeof(__be32)) + memblock_size = of_read_number(prop, size_cells); of_node_put(np); + } else if (machine_is(pseries)) { /* This fallback really only applies to pseries */ unsigned int memzero_size = 0; -- cgit v1.2.3 From 13135b461cf205941308984bd3271ec7d403dc40 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 14 Sep 2020 02:49:04 +0530 Subject: powerpc/papr_scm: Add PAPR command family to pass-through command-set Add NVDIMM_FAMILY_PAPR to the list of valid 'dimm_family_mask' acceptable by papr_scm. This is needed as since commit 92fe2aa859f5 ("libnvdimm: Validate command family indices") libnvdimm performs a validation of 'nd_cmd_pkg.nd_family' received as part of ND_CMD_CALL processing to ensure only known command families can use the general ND_CMD_CALL pass-through functionality. Without this change the ND_CMD_CALL pass-through targeting NVDIMM_FAMILY_PAPR error out with -EINVAL. Fixes: 92fe2aa859f5 ("libnvdimm: Validate command family indices") Signed-off-by: Vaibhav Jain Reviewed-by: Ira Weiny Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200913211904.24472-1-vaibhav@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index a95aa425e7d4..835163f54244 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -898,6 +898,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) p->bus_desc.of_node = p->pdev->dev.of_node; p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); + /* Set the dimm command family mask to accept PDSMs */ + set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask); + if (!p->bus_desc.provider_name) return -ENOMEM; -- cgit v1.2.3 From ca1d3443b4dd1e8f152bd6c881ddb3eb2996179a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Sep 2020 06:48:31 +0000 Subject: powerpc: Remove SYNC on non 6xx SYNC is usefull for Powerpc 601 only. On everything else, SYNC is empty. Remove it from code that is not made to run on 6xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/27951fa6c9a8f80724d1bc81a6117ac32343a55d.1601362098.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_40x.S | 1 - arch/powerpc/kernel/head_booke.h | 1 - arch/powerpc/kernel/misc_64.S | 1 - 3 files changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 5b282d9965a5..44c9018aed1b 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -72,7 +72,6 @@ turn_on_mmu: lis r0,start_here@h ori r0,r0,start_here@l mtspr SPRN_SRR0,r0 - SYNC rfi /* enables MMU */ b . /* prevent prefetch past rfi */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 18f87bf9e32b..71c359d438b5 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -176,7 +176,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif mtspr SPRN_SRR1,r10 mtspr SPRN_SRR0,r11 - SYNC RFI /* jump to handler, enable MMU */ 99: b ret_from_kernel_syscall .endm diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 7bb46ad98207..070465825c21 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -365,7 +365,6 @@ _GLOBAL(kexec_smp_wait) li r4,KEXEC_STATE_REAL_MODE stb r4,PACAKEXECSTATE(r13) - SYNC b kexec_wait -- cgit v1.2.3 From e42a64002a507bf61e57106ed5323b1854371563 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Sep 2020 06:48:33 +0000 Subject: powerpc: Remove CONFIG_PPC601_SYNC_FIX This config option isn't in any defconfig. The very first versions of Powerpc 601 have a bug which requires additional sync before and/or after some instructions. This was more than 25 years ago and time has come to retire those buggy versions of the 601 from the kernel. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/55b46bff16705b1ae7bf0a60ccd522b1010ebf75.1601362098.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 6 ------ arch/powerpc/platforms/Kconfig | 15 --------------- 2 files changed, 21 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index b4cc6608131c..0b9dc814b81c 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -382,15 +382,9 @@ n: #endif /* various errata or part fixups */ -#ifdef CONFIG_PPC601_SYNC_FIX -#define SYNC sync; isync -#define SYNC_601 sync -#define ISYNC_601 isync -#else #define SYNC #define SYNC_601 #define ISYNC_601 -#endif #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) #define MFTB(dest) \ diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index b439b027a42f..7a5e8f4541e3 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -199,21 +199,6 @@ source "drivers/cpuidle/Kconfig" endmenu -config PPC601_SYNC_FIX - bool "Workarounds for PPC601 bugs" - depends on PPC_BOOK3S_601 && PPC_PMAC - default y - help - Some versions of the PPC601 (the first PowerPC chip) have bugs which - mean that extra synchronization instructions are required near - certain instructions, typically those that make major changes to the - CPU state. These extra instructions reduce performance slightly. - If you say N here, these extra instructions will not be included, - resulting in a kernel which will run faster but may not run at all - on some systems with the PPC601 chip. - - If in doubt, say Y here. - config TAU bool "On-chip CPU temperature sensor support" depends on PPC_BOOK3S_32 -- cgit v1.2.3 From d2a5cd83ee984c0e9fc172d2df9591c264261a52 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Sep 2020 06:48:34 +0000 Subject: powerpc: Drop SYNC_601() ISYNC_601() and SYNC() Those macros are now empty at all time. Drop them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7990bb63fc53e460bfa94f8040184881d9e6fbc3.1601362098.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 4 ---- arch/powerpc/kernel/entry_32.S | 17 +---------------- arch/powerpc/kernel/fpu.S | 1 - arch/powerpc/kernel/head_32.S | 9 --------- arch/powerpc/kernel/head_32.h | 1 - arch/powerpc/kernel/l2cr_6xx.S | 3 +-- arch/powerpc/mm/book3s32/hash_low.S | 12 ------------ 7 files changed, 2 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 0b9dc814b81c..67a421b81a50 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -382,10 +382,6 @@ n: #endif /* various errata or part fixups */ -#define SYNC -#define SYNC_601 -#define ISYNC_601 - #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) #define MFTB(dest) \ 90: mfspr dest, SPRN_TBRL; \ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index f4d0af8e1136..f25ea188ecd3 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -234,7 +234,6 @@ transfer_to_handler_cont: mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r10 mtlr r9 - SYNC RFI /* jump to handler, enable MMU */ #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) @@ -264,7 +263,6 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r0 - SYNC RFI reenable_mmu: @@ -323,7 +321,6 @@ stack_ovf: #endif mtspr SPRN_SRR0,r9 mtspr SPRN_SRR1,r10 - SYNC RFI _ASM_NOKPROBE_SYMBOL(stack_ovf) #endif @@ -411,7 +408,6 @@ ret_from_syscall: /* disable interrupts so current_thread_info()->flags can't change */ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */ /* Note: We don't bother telling lockdep about it */ - SYNC mtmsr r10 lwz r9,TI_FLAGS(r2) li r8,-MAX_ERRNO @@ -474,7 +470,6 @@ syscall_exit_finish: #endif mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - SYNC RFI _ASM_NOKPROBE_SYMBOL(syscall_exit_finish) #ifdef CONFIG_44x @@ -567,7 +562,6 @@ syscall_exit_work: * lockdep as we are supposed to have IRQs on at this point */ ori r10,r10,MSR_EE - SYNC mtmsr r10 /* Save NVGPRS if they're not saved already */ @@ -606,7 +600,6 @@ ret_from_kernel_syscall: #endif mtspr SPRN_SRR0, r9 mtspr SPRN_SRR1, r10 - SYNC RFI _ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) @@ -810,7 +803,6 @@ fast_exception_return: REST_GPR(9, r11) REST_GPR(12, r11) lwz r11,GPR11(r11) - SYNC RFI _ASM_NOKPROBE_SYMBOL(fast_exception_return) @@ -872,7 +864,6 @@ ret_from_except: * from the interrupt. */ /* Note: We don't bother telling lockdep about it */ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) - SYNC /* Some chip revs have problems here... */ mtmsr r10 /* disable interrupts */ lwz r3,_MSR(r1) /* Returning to user mode? */ @@ -1035,7 +1026,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) * exc_exit_restart below. -- paulus */ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI) - SYNC mtmsr r10 /* clear the RI bit */ .globl exc_exit_restart exc_exit_restart: @@ -1046,7 +1036,6 @@ exc_exit_restart: lwz r1,GPR1(r1) .globl exc_exit_restart_end exc_exit_restart_end: - SYNC RFI _ASM_NOKPROBE_SYMBOL(exc_exit_restart) _ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) @@ -1274,7 +1263,6 @@ do_resched: /* r10 contains MSR_KERNEL here */ mfmsr r10 #endif ori r10,r10,MSR_EE - SYNC mtmsr r10 /* hard-enable interrupts */ bl schedule recheck: @@ -1283,7 +1271,6 @@ recheck: * TI_FLAGS aren't advertised. */ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) - SYNC mtmsr r10 /* disable interrupts */ lwz r9,TI_FLAGS(r2) andi. r0,r9,_TIF_NEED_RESCHED @@ -1292,7 +1279,6 @@ recheck: beq restore_user do_user_signal: /* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE - SYNC mtmsr r10 /* hard-enable interrupts */ /* save r13-r31 in the exception frame, if not already done */ lwz r3,_TRAP(r1) @@ -1382,8 +1368,7 @@ _GLOBAL(enter_rtas) mfmsr r9 stw r9,8(r1) LOAD_REG_IMMEDIATE(r0,MSR_KERNEL) - SYNC /* disable interrupts so SRR0/1 */ - mtmsr r0 /* don't get trashed */ + mtmsr r0 /* disable interrupts so SRR0/1 don't get trashed */ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) mtlr r6 stw r7, THREAD + RTAS_SP(r2) diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 825893d4cb59..3ff9a8fafa46 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -87,7 +87,6 @@ BEGIN_FTR_SECTION oris r5,r5,MSR_VSX@h END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - SYNC MTMSRD(r5) /* enable use of fpu now */ isync /* enable use of FP after return */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 2bd0aa3a4cc7..48cde60334a2 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -219,7 +219,6 @@ turn_on_mmu: lis r0,start_here@h ori r0,r0,start_here@l mtspr SPRN_SRR0,r0 - SYNC RFI /* enables MMU */ /* @@ -784,14 +783,12 @@ fast_hash_page_return: mtcr r11 lwz r11, THR11(r10) mfspr r10, SPRN_SPRG_SCRATCH0 - SYNC RFI 1: /* ISI */ mtcr r11 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - SYNC RFI stack_overflow: @@ -882,7 +879,6 @@ __secondary_start_pmac_0: set to map the 0xf0000000 - 0xffffffff region */ mfmsr r0 rlwinm r0,r0,0,28,26 /* clear DR (0x10) */ - SYNC mtmsr r0 isync @@ -930,7 +926,6 @@ __secondary_start: ori r3,r3,start_secondary@l mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - SYNC RFI #endif /* CONFIG_SMP */ @@ -1074,7 +1069,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) .align 4 mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 - SYNC RFI /* Load up the kernel context */ 2: bl load_up_mmu @@ -1099,7 +1093,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) ori r3,r3,start_kernel@l mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - SYNC RFI /* @@ -1217,7 +1210,6 @@ _ENTRY(update_bats) .align 4 mtspr SPRN_SRR0, r4 mtspr SPRN_SRR1, r3 - SYNC RFI 1: bl clear_bats lis r3, BATS@ha @@ -1237,7 +1229,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtmsr r3 mtspr SPRN_SRR0, r7 mtspr SPRN_SRR1, r6 - SYNC RFI flush_tlbs: diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index cc36998c5541..7c767765071d 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -222,7 +222,6 @@ #endif mtspr SPRN_SRR1,r10 mtspr SPRN_SRR0,r11 - SYNC RFI /* jump to handler, enable MMU */ 99: b ret_from_kernel_syscall .endm diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 5f07aa5e9851..225511d73bef 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -256,7 +256,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) sync /* Restore MSR (restores EE and DR bits to original state) */ - SYNC mtmsr r7 isync @@ -377,7 +376,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) 1: bdnz 1b /* Restore MSR (restores EE and DR bits to original state) */ -4: SYNC +4: mtmsr r7 isync blr diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 1690d369688b..3143de6ae769 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -199,11 +199,9 @@ _GLOBAL(add_hash_page) * covered by a BAT). -- paulus */ mfmsr r9 - SYNC rlwinm r0,r9,0,17,15 /* clear bit 16 (MSR_EE) */ rlwinm r0,r0,0,28,26 /* clear MSR_DR */ mtmsr r0 - SYNC_601 isync #ifdef CONFIG_SMP @@ -262,7 +260,6 @@ _GLOBAL(add_hash_page) /* reenable interrupts and DR */ mtmsr r9 - SYNC_601 isync lwz r0,4(r1) @@ -506,11 +503,9 @@ _GLOBAL(flush_hash_pages) * covered by a BAT). -- paulus */ mfmsr r10 - SYNC rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ rlwinm r0,r0,0,28,26 /* clear MSR_DR */ mtmsr r0 - SYNC_601 isync /* First find a PTE in the range that has _PAGE_HASHPTE set */ @@ -629,7 +624,6 @@ _GLOBAL(flush_hash_pages) #endif 19: mtmsr r10 - SYNC_601 isync blr EXPORT_SYMBOL(flush_hash_pages) @@ -643,11 +637,9 @@ _GLOBAL(_tlbie) lwz r8,TASK_CPU(r2) oris r8,r8,11 mfmsr r10 - SYNC rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ rlwinm r0,r0,0,28,26 /* clear DR */ mtmsr r0 - SYNC_601 isync lis r9,mmu_hash_lock@h ori r9,r9,mmu_hash_lock@l @@ -664,7 +656,6 @@ _GLOBAL(_tlbie) li r0,0 stw r0,0(r9) /* clear mmu_hash_lock */ mtmsr r10 - SYNC_601 isync #else /* CONFIG_SMP */ tlbie r3 @@ -681,11 +672,9 @@ _GLOBAL(_tlbia) lwz r8,TASK_CPU(r2) oris r8,r8,10 mfmsr r10 - SYNC rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ rlwinm r0,r0,0,28,26 /* clear DR */ mtmsr r0 - SYNC_601 isync lis r9,mmu_hash_lock@h ori r9,r9,mmu_hash_lock@l @@ -709,7 +698,6 @@ _GLOBAL(_tlbia) li r0,0 stw r0,0(r9) /* clear mmu_hash_lock */ mtmsr r10 - SYNC_601 isync #endif /* CONFIG_SMP */ blr -- cgit v1.2.3 From f0ed73f3fa2cdca65973659689ec9e46d99a5f60 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Sep 2020 06:48:35 +0000 Subject: powerpc: Remove PowerPC 601 Powerpc 601 is 25 years old. It is not selected by any defconfig. It requires a lot of special handling as it deviates from the standard 6xx. Retire it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/00a6948d659e017f8ca63437d1384222c3aede57.1601362098.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/cputable.c | 15 --------------- arch/powerpc/platforms/Kconfig.cputype | 11 ++--------- 2 files changed, 2 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b5bc2edef440..492c0b36aff6 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -609,21 +609,6 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_PPC_BOOK3S_601 - { /* 601 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00010000, - .cpu_name = "601", - .cpu_features = CPU_FTRS_PPC601, - .cpu_user_features = COMMON_USER | PPC_FEATURE_601_INSTR | - PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_generic, - .platform = "ppc601", - }, -#endif /* CONFIG_PPC_BOOK3S_601 */ #ifdef CONFIG_PPC_BOOK3S_6xx { /* 603 */ .pvr_mask = 0xffff0000, diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e74ec220b5d6..c194c4ae8bc7 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -20,7 +20,7 @@ choice depends on PPC32 help There are five families of 32 bit PowerPC chips supported. - The most common ones are the desktop and server CPUs (601, 603, + The most common ones are the desktop and server CPUs (603, 604, 740, 750, 74xx) CPUs from Freescale and IBM, with their embedded 512x/52xx/82xx/83xx/86xx counterparts. The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500 @@ -30,7 +30,7 @@ choice If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx. config PPC_BOOK3S_6xx - bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx except 601" + bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" select PPC_BOOK3S_32 select PPC_FPU select PPC_HAVE_PMU_SUPPORT @@ -38,13 +38,6 @@ config PPC_BOOK3S_6xx select PPC_HAVE_KUAP select HAVE_ARCH_VMAP_STACK if !ADB_PMU -config PPC_BOOK3S_601 - bool "PowerPC 601" - select PPC_BOOK3S_32 - select PPC_FPU - select PPC_HAVE_KUAP - select HAVE_ARCH_VMAP_STACK - config PPC_85xx bool "Freescale 85xx" select E500 -- cgit v1.2.3 From 8b14e1dff067195dca7a42321771437cb33a99e9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Sep 2020 06:48:36 +0000 Subject: powerpc: Remove support for PowerPC 601 PowerPC 601 has been retired. Remove all associated specific code. CPU_FTRS_PPC601 has CPU_FTR_COHERENT_ICACHE and CPU_FTR_COMMON. CPU_FTR_COMMON is already present via other CPU_FTRS. None of the remaining CPU selects CPU_FTR_COHERENT_ICACHE. So CPU_FTRS_PPC601 can be removed from the possible features, hence can be removed completely. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/60b725d55e21beec3335175c20b77903ff98284f.1601362098.git.christophe.leroy@csgroup.eu --- arch/powerpc/boot/util.S | 15 +-------- arch/powerpc/include/asm/cputable.h | 12 ++----- arch/powerpc/include/asm/ppc_asm.h | 3 +- arch/powerpc/include/asm/ptrace.h | 4 --- arch/powerpc/include/asm/time.h | 2 +- arch/powerpc/include/asm/timex.h | 3 -- arch/powerpc/kernel/btext.c | 8 +---- arch/powerpc/kernel/entry_32.S | 18 ---------- arch/powerpc/kernel/head_32.S | 44 ++---------------------- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/kernel/traps.c | 4 --- arch/powerpc/kernel/vdso32/datapage.S | 2 -- arch/powerpc/kernel/vdso32/vdso32.lds.S | 2 -- arch/powerpc/mm/book3s32/mmu.c | 39 +++------------------- arch/powerpc/mm/ptdump/bats.c | 59 --------------------------------- arch/powerpc/platforms/powermac/setup.c | 2 +- arch/powerpc/platforms/powermac/smp.c | 4 --- 17 files changed, 17 insertions(+), 206 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S index f11f0589a669..d03cdb7606dc 100644 --- a/arch/powerpc/boot/util.S +++ b/arch/powerpc/boot/util.S @@ -18,7 +18,7 @@ .text -/* udelay (on non-601 processors) needs to know the period of the +/* udelay needs to know the period of the * timebase in nanoseconds. This used to be hardcoded to be 60ns * (period of 66MHz/4). Now a variable is used that is initialized to * 60 for backward compatibility, but it can be overridden as necessary @@ -37,19 +37,6 @@ timebase_period_ns: */ .globl udelay udelay: - mfspr r4,SPRN_PVR - srwi r4,r4,16 - cmpwi 0,r4,1 /* 601 ? */ - bne .Ludelay_not_601 -00: li r0,86 /* Instructions / microsecond? */ - mtctr r0 -10: addi r0,r0,0 /* NOP */ - bdnz 10b - subic. r3,r3,1 - bne 00b - blr - -.Ludelay_not_601: mulli r4,r3,1000 /* nanoseconds */ /* Change r4 to be the number of ticks using: * (nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index a1a300c1a20e..93bc70d4c9a1 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -295,8 +295,6 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_MAYBE_CAN_NAP 0 #endif -#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | \ - CPU_FTR_COHERENT_ICACHE) #define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE) #define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE) @@ -512,10 +510,8 @@ static inline void cpu_feature_keys_init(void) { } #else enum { CPU_FTRS_POSSIBLE = -#ifdef CONFIG_PPC_BOOK3S_601 - CPU_FTRS_PPC601 | -#elif defined(CONFIG_PPC_BOOK3S_32) - CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU | +#ifdef CONFIG_PPC_BOOK3S_32 + CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU | CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 | CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX | CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 | @@ -590,9 +586,7 @@ enum { #else enum { CPU_FTRS_ALWAYS = -#ifdef CONFIG_PPC_BOOK3S_601 - CPU_FTRS_PPC601 & -#elif defined(CONFIG_PPC_BOOK3S_32) +#ifdef CONFIG_PPC_BOOK3S_32 CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU & CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 & CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX & diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 67a421b81a50..511786f0e40d 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -401,8 +401,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define MFTBU(dest) mfspr dest, SPRN_TBRU #endif -/* tlbsync is not implemented on 601 */ -#if !defined(CONFIG_SMP) || defined(CONFIG_PPC_BOOK3S_601) +#ifndef CONFIG_SMP #define TLBSYNC #else #define TLBSYNC tlbsync; sync diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 155a197c0aa1..e2c778c176a3 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -243,11 +243,7 @@ static inline void set_trap_norestart(struct pt_regs *regs) } #define arch_has_single_step() (1) -#ifndef CONFIG_PPC_BOOK3S_601 #define arch_has_block_step() (true) -#else -#define arch_has_block_step() (false) -#endif #define ARCH_HAS_USER_SINGLE_STEP_REPORT /* diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index cb326720a8a1..ce065589192a 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -39,7 +39,7 @@ struct div_result { }; /* Accessor functions for the timebase (RTC on 601) registers. */ -#define __USE_RTC() (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) +#define __USE_RTC() (0) #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 6047402b0a4d..95988870a57b 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -17,9 +17,6 @@ typedef unsigned long cycles_t; static inline cycles_t get_cycles(void) { - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) - return 0; - return mftb(); } diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 02300edc6989..b609fb39dba8 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -95,18 +95,12 @@ void __init btext_prepare_BAT(void) boot_text_mapped = 0; return; } - if (PVR_VER(mfspr(SPRN_PVR)) != 1) { + { /* 603, 604, G3, G4, ... */ lowbits = addr & ~0xFF000000UL; addr &= 0xFF000000UL; disp_BAT[0] = vaddr | (BL_16M<<2) | 2; disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW); - } else { - /* 601 */ - lowbits = addr & ~0xFF800000UL; - addr &= 0xFF800000UL; - disp_BAT[0] = vaddr | (_PAGE_NO_CACHE | PP_RWXX) | 4; - disp_BAT[1] = addr | BL_8M | 0x40; } logicalDisplayBase = (void *) (vaddr + lowbits); } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index f25ea188ecd3..8cdc8bcde703 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -811,19 +811,11 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return) 1: lis r3,exc_exit_restart_end@ha addi r3,r3,exc_exit_restart_end@l cmplw r12,r3 -#ifdef CONFIG_PPC_BOOK3S_601 - bge 2b -#else bge 3f -#endif lis r4,exc_exit_restart@ha addi r4,r4,exc_exit_restart@l cmplw r12,r4 -#ifdef CONFIG_PPC_BOOK3S_601 - blt 2b -#else blt 3f -#endif lis r3,fee_restarts@ha tophys(r3,r3) lwz r5,fee_restarts@l(r3) @@ -840,7 +832,6 @@ fee_restarts: /* aargh, a nonrecoverable interrupt, panic */ /* aargh, we don't know which trap this is */ -/* but the 601 doesn't implement the RI bit, so assume it's OK */ 3: li r10,-1 stw r10,_TRAP(r11) @@ -1302,19 +1293,11 @@ nonrecoverable: lis r10,exc_exit_restart_end@ha addi r10,r10,exc_exit_restart_end@l cmplw r12,r10 -#ifdef CONFIG_PPC_BOOK3S_601 - bgelr -#else bge 3f -#endif lis r11,exc_exit_restart@ha addi r11,r11,exc_exit_restart@l cmplw r12,r11 -#ifdef CONFIG_PPC_BOOK3S_601 - bltlr -#else blt 3f -#endif lis r10,ee_restarts@ha lwz r12,ee_restarts@l(r10) addi r12,r12,1 @@ -1322,7 +1305,6 @@ nonrecoverable: mr r12,r11 /* restart at exc_exit_restart */ blr 3: /* OK, we can't recover, kill this process */ - /* but the 601 doesn't implement the RI bit, so assume it's OK */ lwz r3,_TRAP(r1) andi. r0,r3,1 beq 5f diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 48cde60334a2..b14524d4534c 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -34,16 +34,6 @@ #include "head_32.h" -/* 601 only have IBAT */ -#ifdef CONFIG_PPC_BOOK3S_601 -#define LOAD_BAT(n, reg, RA, RB) \ - li RA,0; \ - mtspr SPRN_IBAT##n##U,RA; \ - lwz RA,(n*16)+0(reg); \ - lwz RB,(n*16)+4(reg); \ - mtspr SPRN_IBAT##n##U,RA; \ - mtspr SPRN_IBAT##n##L,RB -#else #define LOAD_BAT(n, reg, RA, RB) \ /* see the comment for clear_bats() -- Cort */ \ li RA,0; \ @@ -57,7 +47,6 @@ lwz RB,(n*16)+12(reg); \ mtspr SPRN_DBAT##n##U,RA; \ mtspr SPRN_DBAT##n##L,RB -#endif __HEAD .stabs "arch/powerpc/kernel/",N_SO,0,0,0f @@ -432,7 +421,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) SystemCall: SYSCALL_ENTRY 0xc00 -/* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) @@ -974,8 +962,7 @@ load_up_mmu: lwz r6,_SDR1@l(r6) mtspr SPRN_SDR1,r6 -/* Load the BAT registers with the values set up by MMU_init. - MMU_init takes care of whether we're on a 601 or not. */ +/* Load the BAT registers with the values set up by MMU_init. */ lis r3,BATS@ha addi r3,r3,BATS@l tophys(r3,r3) @@ -1152,7 +1139,6 @@ EXPORT_SYMBOL(switch_mmu_context) clear_bats: li r10,0 -#ifndef CONFIG_PPC_BOOK3S_601 mtspr SPRN_DBAT0U,r10 mtspr SPRN_DBAT0L,r10 mtspr SPRN_DBAT1U,r10 @@ -1161,7 +1147,6 @@ clear_bats: mtspr SPRN_DBAT2L,r10 mtspr SPRN_DBAT3U,r10 mtspr SPRN_DBAT3L,r10 -#endif mtspr SPRN_IBAT0U,r10 mtspr SPRN_IBAT0L,r10 mtspr SPRN_IBAT1U,r10 @@ -1252,26 +1237,9 @@ mmu_off: sync RFI -/* - * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET - * (we keep one for debugging) and on others, we use one 256M BAT. - */ +/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ initial_bats: lis r11,PAGE_OFFSET@h -#ifdef CONFIG_PPC_BOOK3S_601 - ori r11,r11,4 /* set up BAT registers for 601 */ - li r8,0x7f /* valid, block length = 8MB */ - mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */ - mtspr SPRN_IBAT0L,r8 /* lower BAT register */ - addis r11,r11,0x800000@h - addis r8,r8,0x800000@h - mtspr SPRN_IBAT1U,r11 - mtspr SPRN_IBAT1L,r8 - addis r11,r11,0x800000@h - addis r8,r8,0x800000@h - mtspr SPRN_IBAT2U,r11 - mtspr SPRN_IBAT2L,r8 -#else tophys(r8,r11) #ifdef CONFIG_SMP ori r8,r8,0x12 /* R/W access, M=1 */ @@ -1280,11 +1248,10 @@ initial_bats: #endif /* CONFIG_SMP */ ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */ - mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */ + mtspr SPRN_DBAT0L,r8 /* N.B. 6xx have valid */ mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */ mtspr SPRN_IBAT0L,r8 mtspr SPRN_IBAT0U,r11 -#endif isync blr @@ -1302,13 +1269,8 @@ setup_disp_bat: beqlr lwz r11,0(r8) lwz r8,4(r8) -#ifndef CONFIG_PPC_BOOK3S_601 mtspr SPRN_DBAT3L,r8 mtspr SPRN_DBAT3U,r11 -#else - mtspr SPRN_IBAT3L,r8 - mtspr SPRN_IBAT3U,r11 -#endif blr #endif /* CONFIG_BOOTX_TEXT */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 1823706ae076..057d6b8e9bb0 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -223,6 +223,6 @@ __init void initialize_cache_info(void) dcache_bsize = cur_cpu_spec->dcache_bsize; icache_bsize = cur_cpu_spec->icache_bsize; ucache_bsize = 0; - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200)) + if (IS_ENABLED(CONFIG_E200)) ucache_bsize = icache_bsize = dcache_bsize; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d1ebe152f210..c5f39f13e96e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -529,9 +529,6 @@ out: * Check if the NIP corresponds to the address of a sync * instruction for which there is an entry in the exception * table. - * Note that the 601 only takes a machine check on TEA - * (transfer error ack) signal assertion, and does not - * set any of the top 16 bits of SRR1. * -- paulus. */ static inline int check_io_access(struct pt_regs *regs) @@ -796,7 +793,6 @@ int machine_check_generic(struct pt_regs *regs) case 0x80000: pr_cont("Machine check signal\n"); break; - case 0: /* for 601 */ case 0x40000: case 0x140000: /* 7450 MSS error and TEA */ pr_cont("Transfer error ack signal\n"); diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 217bb630f8f9..1d23e2771dba 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -47,7 +47,6 @@ V_FUNCTION_END(__kernel_get_syscall_map) * * returns the timebase frequency in HZ */ -#ifndef CONFIG_PPC_BOOK3S_601 V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 @@ -60,4 +59,3 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) blr .cfi_endproc V_FUNCTION_END(__kernel_get_tbfreq) -#endif diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 5206c2eb2a1d..7eadac74c7f9 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -144,13 +144,11 @@ VERSION __kernel_datapage_offset; __kernel_get_syscall_map; -#ifndef CONFIG_PPC_BOOK3S_601 __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; __kernel_time; __kernel_get_tbfreq; -#endif __kernel_sync_dicache; __kernel_sync_dicache_p5; __kernel_sigtramp32; diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index d426eaf76bb0..771d607f1a3d 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -74,14 +74,7 @@ static int find_free_bat(void) { int b; - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) { - for (b = 0; b < 4; b++) { - struct ppc_bat *bat = BATS[b]; - - if (!(bat[0].batl & 0x40)) - return b; - } - } else { + { int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; for (b = 0; b < n; b++) { @@ -97,7 +90,7 @@ static int find_free_bat(void) /* * This function calculates the size of the larger block usable to map the * beginning of an area based on the start address and size of that area: - * - max block size is 8M on 601 and 256 on other 6xx. + * - max block size is 256 on 6xx. * - base address must be aligned to the block size. So the maximum block size * is identified by the lowest bit set to 1 in the base address (for instance * if base is 0x16000000, max size is 0x02000000). @@ -106,7 +99,7 @@ static int find_free_bat(void) */ static unsigned int block_size(unsigned long base, unsigned long top) { - unsigned int max_size = IS_ENABLED(CONFIG_PPC_BOOK3S_601) ? SZ_8M : SZ_256M; + unsigned int max_size = SZ_256M; unsigned int base_shift = (ffs(base) - 1) & 31; unsigned int block_shift = (fls(top - base) - 1) & 31; @@ -117,7 +110,6 @@ static unsigned int block_size(unsigned long base, unsigned long top) * Set up one of the IBAT (block address translation) register pairs. * The parameters are not checked; in particular size must be a power * of 2 between 128k and 256M. - * Only for 603+ ... */ static void setibat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot) @@ -214,9 +206,6 @@ void mmu_mark_initmem_nx(void) unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; unsigned long size; - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) - return; - for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) { size = block_size(base, top); setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); @@ -253,9 +242,6 @@ void mmu_mark_rodata_ro(void) int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; int i; - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) - return; - for (i = 0; i < nb; i++) { struct ppc_bat *bat = BATS[i]; @@ -294,8 +280,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, flags &= ~_PAGE_COHERENT; bl = (size >> 17) - 1; - if (!IS_ENABLED(CONFIG_PPC_BOOK3S_601)) { - /* 603, 604, etc. */ + { /* Do DBAT first */ wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED); @@ -312,16 +297,6 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, bat[0] = bat[1]; else bat[0].batu = bat[0].batl = 0; - } else { - /* 601 cpu */ - if (bl > BL_8M) - bl = BL_8M; - wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE - | _PAGE_COHERENT); - wimgxpp |= (flags & _PAGE_RW)? - ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX; - bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ - bat->batl = phys | bl | 0x40; /* V=1 */ } bat_addrs[index].start = virt; @@ -474,11 +449,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, */ BUG_ON(first_memblock_base != 0); - /* 601 can only access 16MB at the moment */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000)); - else /* Anything else has 256M mapped */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000)); + memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_256M)); } void __init print_system_hash_info(void) diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index e29b338d499f..c4c628b03cf8 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -12,62 +12,6 @@ #include "ptdump.h" -static char *pp_601(int k, int pp) -{ - if (pp == 0) - return k ? " " : "rwx"; - if (pp == 1) - return k ? "r x" : "rwx"; - if (pp == 2) - return "rwx"; - return "r x"; -} - -static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) -{ - u32 blpi = upper & 0xfffe0000; - u32 k = (upper >> 2) & 3; - u32 pp = upper & 3; - phys_addr_t pbn = PHYS_BAT_ADDR(lower); - u32 bsm = lower & 0x3ff; - u32 size = (bsm + 1) << 17; - - seq_printf(m, "%d: ", idx); - if (!(lower & 0x40)) { - seq_puts(m, " -\n"); - return; - } - - seq_printf(m, "0x%08x-0x%08x ", blpi, blpi + size - 1); -#ifdef CONFIG_PHYS_64BIT - seq_printf(m, "0x%016llx ", pbn); -#else - seq_printf(m, "0x%08x ", pbn); -#endif - pt_dump_size(m, size); - - seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp)); - - seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " "); - seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " "); - seq_puts(m, lower & _PAGE_COHERENT ? "m " : " "); - seq_puts(m, "\n"); -} - -#define BAT_SHOW_601(_m, _n, _l, _u) bat_show_601(_m, _n, mfspr(_l), mfspr(_u)) - -static int bats_show_601(struct seq_file *m, void *v) -{ - seq_puts(m, "---[ Block Address Translation ]---\n"); - - BAT_SHOW_601(m, 0, SPRN_IBAT0L, SPRN_IBAT0U); - BAT_SHOW_601(m, 1, SPRN_IBAT1L, SPRN_IBAT1U); - BAT_SHOW_601(m, 2, SPRN_IBAT2L, SPRN_IBAT2U); - BAT_SHOW_601(m, 3, SPRN_IBAT3L, SPRN_IBAT3U); - - return 0; -} - static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool is_d) { u32 bepi = upper & 0xfffe0000; @@ -146,9 +90,6 @@ static int bats_show_603(struct seq_file *m, void *v) static int bats_open(struct inode *inode, struct file *file) { - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) - return single_open(file, bats_show_601, NULL); - return single_open(file, bats_show_603, NULL); } diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index f002b0fa69b8..2e2cc0c75d87 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -284,7 +284,7 @@ static void __init pmac_setup_arch(void) /* 604, G3, G4 etc. */ loops_per_jiffy = *fp / HZ; else - /* 601, 603, etc. */ + /* 603, etc. */ loops_per_jiffy = *fp / (2 * HZ); of_node_put(cpu); break; diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index a6fedcfb714f..74ebe664b016 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -270,10 +270,6 @@ static void __init smp_psurge_probe(void) int i, ncpus; struct device_node *dn; - /* We don't do SMP on the PPC601 -- paulus */ - if (PVR_VER(mfspr(SPRN_PVR)) == 1) - return; - /* * The powersurge cpu board can be used in the generation * of powermacs that have a socket for an upgradeable cpu card, -- cgit v1.2.3 From 2e38ea486615bddbc7a42d002aee93a3a9e7a36f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Sep 2020 06:48:37 +0000 Subject: powerpc: Tidy up a bit after removal of PowerPC 601. The removal of the 601 left some standalone blocks from former if/else. Drop the { } and re-indent. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/31c4cd093963f22831bf388449056ee045533d3b.1601362098.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/btext.c | 11 ++++------- arch/powerpc/mm/book3s32/mmu.c | 45 +++++++++++++++++++----------------------- 2 files changed, 24 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index b609fb39dba8..c22a8e0dbc93 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -95,13 +95,10 @@ void __init btext_prepare_BAT(void) boot_text_mapped = 0; return; } - { - /* 603, 604, G3, G4, ... */ - lowbits = addr & ~0xFF000000UL; - addr &= 0xFF000000UL; - disp_BAT[0] = vaddr | (BL_16M<<2) | 2; - disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW); - } + lowbits = addr & ~0xFF000000UL; + addr &= 0xFF000000UL; + disp_BAT[0] = vaddr | (BL_16M<<2) | 2; + disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW); logicalDisplayBase = (void *) (vaddr + lowbits); } #endif diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 771d607f1a3d..741e4fc990c7 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -73,16 +73,13 @@ unsigned long p_block_mapped(phys_addr_t pa) static int find_free_bat(void) { int b; + int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; - { - int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; + for (b = 0; b < n; b++) { + struct ppc_bat *bat = BATS[b]; - for (b = 0; b < n; b++) { - struct ppc_bat *bat = BATS[b]; - - if (!(bat[1].batu & 3)) - return b; - } + if (!(bat[1].batu & 3)) + return b; } return -1; } @@ -280,24 +277,22 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, flags &= ~_PAGE_COHERENT; bl = (size >> 17) - 1; - { - /* Do DBAT first */ - wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE - | _PAGE_COHERENT | _PAGE_GUARDED); - wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; - bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ - bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; - if (flags & _PAGE_USER) - bat[1].batu |= 1; /* Vp = 1 */ - if (flags & _PAGE_GUARDED) { - /* G bit must be zero in IBATs */ - flags &= ~_PAGE_EXEC; - } - if (flags & _PAGE_EXEC) - bat[0] = bat[1]; - else - bat[0].batu = bat[0].batl = 0; + /* Do DBAT first */ + wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE + | _PAGE_COHERENT | _PAGE_GUARDED); + wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; + bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ + bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; + if (flags & _PAGE_USER) + bat[1].batu |= 1; /* Vp = 1 */ + if (flags & _PAGE_GUARDED) { + /* G bit must be zero in IBATs */ + flags &= ~_PAGE_EXEC; } + if (flags & _PAGE_EXEC) + bat[0] = bat[1]; + else + bat[0].batu = bat[0].batl = 0; bat_addrs[index].start = virt; bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; -- cgit v1.2.3 From a4c5a355422920bcbfe3fd1f01aead2d3a2a820c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Sep 2020 06:48:38 +0000 Subject: powerpc: Remove __USE_RTC() Now that PowerPC 601 is gone, __USE_RTC() is never true. Remove it. That also leads to removing get_rtc() and get_rtcl() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4757e1ed21fe1968c761ae081d1f3d790a9673f8.1601362098.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/time.h | 28 +--------------------- arch/powerpc/kernel/time.c | 52 +++++++---------------------------------- 2 files changed, 9 insertions(+), 71 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index ce065589192a..caf68a4bc19e 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -38,9 +38,6 @@ struct div_result { u64 result_low; }; -/* Accessor functions for the timebase (RTC on 601) registers. */ -#define __USE_RTC() (0) - #ifdef CONFIG_PPC64 /* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ @@ -59,25 +56,6 @@ static inline unsigned int get_tbu(void) } #endif /* !CONFIG_PPC64 */ -static inline unsigned int get_rtcl(void) -{ - unsigned int rtcl; - - asm volatile("mfrtcl %0" : "=r" (rtcl)); - return rtcl; -} - -static inline u64 get_rtc(void) -{ - unsigned int hi, lo, hi2; - - do { - asm volatile("mfrtcu %0; mfrtcl %1; mfrtcu %2" - : "=r" (hi), "=r" (lo), "=r" (hi2)); - } while (hi2 != hi); - return (u64)hi * 1000000000 + lo; -} - static inline u64 get_vtb(void) { #ifdef CONFIG_PPC_BOOK3S_64 @@ -109,7 +87,7 @@ static inline u64 get_tb(void) static inline u64 get_tb_or_rtc(void) { - return __USE_RTC() ? get_rtc() : get_tb(); + return get_tb(); } static inline void set_tb(unsigned int upper, unsigned int lower) @@ -153,10 +131,6 @@ static inline void set_dec(u64 val) static inline unsigned long tb_ticks_since(unsigned long tstamp) { - if (__USE_RTC()) { - int delta = get_rtcl() - (unsigned int) tstamp; - return delta < 0 ? delta + 1000000000 : delta; - } return get_tbl() - tstamp; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index f85539ebb513..13c820c15d37 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -75,15 +75,6 @@ #include #include -static u64 rtc_read(struct clocksource *); -static struct clocksource clocksource_rtc = { - .name = "rtc", - .rating = 400, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .mask = CLOCKSOURCE_MASK(64), - .read = rtc_read, -}; - static u64 timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { .name = "timebase", @@ -447,19 +438,9 @@ void vtime_flush(struct task_struct *tsk) void __delay(unsigned long loops) { unsigned long start; - int diff; spin_begin(); - if (__USE_RTC()) { - start = get_rtcl(); - do { - /* the RTCL register wraps at 1000000000 */ - diff = get_rtcl() - start; - if (diff < 0) - diff += 1000000000; - spin_cpu_relax(); - } while (diff < loops); - } else if (tb_invalid) { + if (tb_invalid) { /* * TB is in error state and isn't ticking anymore. * HMI handler was unable to recover from TB error. @@ -696,8 +677,6 @@ EXPORT_SYMBOL_GPL(tb_to_ns); */ notrace unsigned long long sched_clock(void) { - if (__USE_RTC()) - return get_rtc(); return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; } @@ -847,11 +826,6 @@ void read_persistent_clock64(struct timespec64 *ts) } /* clocksource code */ -static notrace u64 rtc_read(struct clocksource *cs) -{ - return (u64)get_rtc(); -} - static notrace u64 timebase_read(struct clocksource *cs) { return (u64)get_tb(); @@ -948,12 +922,7 @@ void update_vsyscall_tz(void) static void __init clocksource_init(void) { - struct clocksource *clock; - - if (__USE_RTC()) - clock = &clocksource_rtc; - else - clock = &clocksource_timebase; + struct clocksource *clock = &clocksource_timebase; if (clocksource_register_hz(clock, tb_ticks_per_sec)) { printk(KERN_ERR "clocksource: %s is already registered\n", @@ -1071,17 +1040,12 @@ void __init time_init(void) u64 scale; unsigned shift; - if (__USE_RTC()) { - /* 601 processor: dec counts down by 128 every 128ns */ - ppc_tb_freq = 1000000000; - } else { - /* Normal PowerPC with timebase register */ - ppc_md.calibrate_decr(); - printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", - ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); - printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", - ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); - } + /* Normal PowerPC with timebase register */ + ppc_md.calibrate_decr(); + printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", + ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); + printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", + ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); tb_ticks_per_jiffy = ppc_tb_freq / HZ; tb_ticks_per_sec = ppc_tb_freq; -- cgit v1.2.3 From 6601ec1c2ba929430f5585ce7f9d9960b0e0a01d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 Sep 2020 06:48:39 +0000 Subject: powerpc: Remove get_tb_or_rtc() 601 is gone, get_tb_or_rtc() is equivalent to get_tb(). Replace the former by the later. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3e8a13ee83418630c753c30cb722ae682d5b2d39.1601362098.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/time.h | 5 ----- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/time.c | 6 +++--- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index caf68a4bc19e..410ed72eef1c 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -85,11 +85,6 @@ static inline u64 get_tb(void) } #endif /* !CONFIG_PPC64 */ -static inline u64 get_tb_or_rtc(void) -{ - return get_tb(); -} - static inline void set_tb(unsigned int upper, unsigned int lower) { mtspr(SPRN_TBWL, 0); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 631e6d236c97..7d0f7682d01d 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -104,7 +104,7 @@ static inline notrace unsigned long get_irq_happened(void) static inline notrace int decrementer_check_overflow(void) { - u64 now = get_tb_or_rtc(); + u64 now = get_tb(); u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); return now >= *next_tb; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 13c820c15d37..760ea359a7f7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -595,7 +595,7 @@ void timer_interrupt(struct pt_regs *regs) irq_work_run(); } - now = get_tb_or_rtc(); + now = get_tb(); if (now >= *next_tb) { *next_tb = ~(u64)0; if (evt->event_handler) @@ -937,7 +937,7 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt); + __this_cpu_write(decrementers_next_tb, get_tb() + evt); set_dec(evt); /* We may have raced with new irq work */ @@ -1071,7 +1071,7 @@ void __init time_init(void) tb_to_ns_scale = scale; tb_to_ns_shift = shift; /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ - boot_tb = get_tb_or_rtc(); + boot_tb = get_tb(); /* If platform provided a timezone (pmac), we correct the time */ if (timezone_offset) { -- cgit v1.2.3 From 63f9d9df5ed0d4f3a2c0cd08730e1cae1edd11bf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Oct 2020 10:59:19 +0000 Subject: powerpc/time: Remove ifdef in get_dec() and set_dec() Move SPRN_PIT definition in reg.h. This allows to remove ifdef in get_dec() and set_dec() and makes them more readable. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3c9a6eb0fc040868ac59be66f338d08fd017668d.1601549945.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 2 ++ arch/powerpc/include/asm/reg_booke.h | 1 - arch/powerpc/include/asm/time.h | 23 ++++++++++------------- 3 files changed, 12 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index d25c357a873c..788058af1d44 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -521,6 +521,8 @@ #define SPRN_TSCR 0x399 /* Thread Switch Control Register */ #define SPRN_DEC 0x016 /* Decrement Register */ +#define SPRN_PIT 0x3DB /* Programmable Interval Timer (40x/BOOKE) */ + #define SPRN_DER 0x095 /* Debug Enable Register */ #define DER_RSTE 0x40000000 /* Reset Interrupt */ #define DER_CHSTPE 0x20000000 /* Check Stop */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index ff30f1076162..29a948e0c0f2 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -174,7 +174,6 @@ #define SPRN_L1CSR1 0x3F3 /* L1 Cache Control and Status Register 1 */ #define SPRN_MMUCSR0 0x3F4 /* MMU Control and Status Register 0 */ #define SPRN_MMUCFG 0x3F7 /* MMU Configuration Register */ -#define SPRN_PIT 0x3DB /* Programmable Interval Timer */ #define SPRN_BUCSR 0x3F5 /* Branch Unit Control and Status */ #define SPRN_L2CSR0 0x3F9 /* L2 Data Cache Control and Status Register 0 */ #define SPRN_L2CSR1 0x3FA /* L2 Data Cache Control and Status Register 1 */ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 410ed72eef1c..a0c8ae4cb27c 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -100,11 +100,10 @@ static inline void set_tb(unsigned int upper, unsigned int lower) */ static inline u64 get_dec(void) { -#if defined(CONFIG_40x) - return (mfspr(SPRN_PIT)); -#else - return (mfspr(SPRN_DEC)); -#endif + if (IS_ENABLED(CONFIG_40x)) + return mfspr(SPRN_PIT); + + return mfspr(SPRN_DEC); } /* @@ -114,14 +113,12 @@ static inline u64 get_dec(void) */ static inline void set_dec(u64 val) { -#if defined(CONFIG_40x) - mtspr(SPRN_PIT, (u32) val); -#else -#ifndef CONFIG_BOOKE - --val; -#endif - mtspr(SPRN_DEC, val); -#endif /* not 40x */ + if (IS_ENABLED(CONFIG_40x)) + mtspr(SPRN_PIT, (u32)val); + else if (IS_ENABLED(CONFIG_BOOKE)) + mtspr(SPRN_DEC, val); + else + mtspr(SPRN_DEC, val - 1); } static inline unsigned long tb_ticks_since(unsigned long tstamp) -- cgit v1.2.3 From 69a1593abdbcf03a76367320d929a8ae7a5e3d71 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Oct 2020 15:35:38 +0000 Subject: powerpc/32s: Setup the early hash table at all time. At the time being, an early hash table is set up when CONFIG_KASAN is selected. There is nothing wrong with setting such an early hash table all the time, even if it is not used. This is a statically allocated 256 kB table which lies in the init data section. This makes the code simpler and may in the future allow to setup early IO mappings with fixmap instead of hard coding BATs. Put create_hpte() and flush_hash_pages() in the .ref.text section in order to avoid warning for the reference to early_hash[]. This reference is removed by MMU_init_hw_patch() before init memory is freed. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b8f8101c368b8a6451844a58d7bd7d83c14cf2aa.1601566529.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_32.S | 13 +++++-------- arch/powerpc/mm/book3s32/hash_low.S | 9 +++++++-- arch/powerpc/mm/book3s32/mmu.c | 14 +++++--------- arch/powerpc/mm/kasan/kasan_init_32.c | 19 ------------------- 4 files changed, 17 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index b14524d4534c..6dc77419147e 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -155,9 +155,9 @@ __after_mmu_off: bl initial_bats bl load_segment_registers -#ifdef CONFIG_KASAN +BEGIN_MMU_FTR_SECTION bl early_hash_table -#endif +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) #if defined(CONFIG_BOOTX_TEXT) bl setup_disp_bat #endif @@ -936,7 +936,6 @@ _ENTRY(__restore_cpu_setup) * Load stuff into the MMU. Intended to be called with * IR=0 and DR=0. */ -#ifdef CONFIG_KASAN early_hash_table: sync /* Force all PTE updates to finish */ isync @@ -947,8 +946,10 @@ early_hash_table: lis r6, early_hash - PAGE_OFFSET@h ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 + lis r6, early_hash@h + lis r3, Hash@ha + stw r6, Hash@l(r3) blr -#endif load_up_mmu: sync /* Force all PTE updates to finish */ @@ -1037,11 +1038,7 @@ start_here: bl machine_init bl __save_cpu_setup bl MMU_init -#ifdef CONFIG_KASAN -BEGIN_MMU_FTR_SECTION bl MMU_init_hw_patch -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) -#endif /* * Go back to running unmapped so we can load up new values diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 3143de6ae769..b2c912e517b9 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -284,9 +285,9 @@ _ASM_NOKPROBE_SYMBOL(add_hash_page) * * For speed, 4 of the instructions get patched once the size and * physical address of the hash table are known. These definitions - * of Hash_base and Hash_bits below are just an example. + * of Hash_base and Hash_bits below are for the early hash table. */ -Hash_base = 0xc0180000 +Hash_base = early_hash Hash_bits = 12 /* e.g. 256kB hash table */ Hash_msk = (((1 << Hash_bits) - 1) * 64) @@ -307,6 +308,7 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) #define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1) #define HASH_RIGHT 31-LG_PTEG_SIZE +__REF _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */ @@ -473,6 +475,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) sync /* make sure pte updates get to memory */ blr + .previous _ASM_NOKPROBE_SYMBOL(create_hpte) .section .bss @@ -493,6 +496,7 @@ htab_hash_searches: * * We assume that there is a hash table in use (Hash != 0). */ +__REF _GLOBAL(flush_hash_pages) /* * We disable interrupts here, even on UP, because we want @@ -626,6 +630,7 @@ _GLOBAL(flush_hash_pages) 19: mtmsr r10 isync blr + .previous EXPORT_SYMBOL(flush_hash_pages) _ASM_NOKPROBE_SYMBOL(flush_hash_pages) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 741e4fc990c7..a59e7ec98180 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -31,6 +31,8 @@ #include +u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0}; + struct hash_pte *Hash; static unsigned long Hash_size, Hash_mask; unsigned long _SDR1; @@ -395,15 +397,6 @@ void __init MMU_init_hw(void) hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; if (lg_n_hpteg > 16) hash_mb2 = 16 - LG_HPTEG_SIZE; - - /* - * When KASAN is selected, there is already an early temporary hash - * table and the switch to the final hash table is done later. - */ - if (IS_ENABLED(CONFIG_KASAN)) - return; - - MMU_init_hw_patch(); } void __init MMU_init_hw_patch(void) @@ -411,6 +404,9 @@ void __init MMU_init_hw_patch(void) unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); unsigned int hash = (unsigned int)Hash - PAGE_OFFSET; + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) + return; + if (ppc_md.progress) ppc_md.progress("hash:patch", 0x345); if (ppc_md.progress) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 929716ea21e9..59f61efc43af 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -174,22 +174,6 @@ void __init kasan_late_init(void) kasan_unmap_early_shadow_vmalloc(); } -#ifdef CONFIG_PPC_BOOK3S_32 -u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0}; - -static void __init kasan_early_hash_table(void) -{ - unsigned int hash = __pa(early_hash); - - modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16); - modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16); - - Hash = (struct hash_pte *)early_hash; -} -#else -static void __init kasan_early_hash_table(void) {} -#endif - void __init kasan_early_init(void) { unsigned long addr = KASAN_SHADOW_START; @@ -205,7 +189,4 @@ void __init kasan_early_init(void) next = pgd_addr_end(addr, end); pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte); } while (pmd++, addr = next, addr != end); - - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) - kasan_early_hash_table(); } -- cgit v1.2.3 From 533090e5a980ad80bbe0961b4ed45a9bcf55cc0c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 6 Oct 2020 09:05:26 +0000 Subject: powerpc/32s: Rename head_32.S to head_book3s_32.S Unlike PPC64 which had a single head_64.S, PPC32 are multiple ones. There is the head_32.S, selected by default based on the value of BITS and overridden based on some CONFIG_ values. This leads to thinking that it may be selected by different types of PPC32 platform but indeed it ends up being selected by book3s/32 only. Make that explicit by: - Not doing any default selection based on BITS. - Renaming head_32.S to head_book3s_32.S. - Get head_book3s_32.S selected only by CONFIG_PPC_BOOK3S_32. Signed-off-by: Christophe Leroy [mpe: Fix head_$(BITS).o reference in arch/powerpc/Makefile] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/319d379f696412681c66a987cc75e6abf8f958d2.1601975100.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 3 +- arch/powerpc/kernel/Makefile | 3 +- arch/powerpc/kernel/head_32.S | 1366 ---------------------------------- arch/powerpc/kernel/head_book3s_32.S | 1366 ++++++++++++++++++++++++++++++++++ 4 files changed, 1370 insertions(+), 1368 deletions(-) delete mode 100644 arch/powerpc/kernel/head_32.S create mode 100644 arch/powerpc/kernel/head_book3s_32.S (limited to 'arch') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 3e8da9cf2eb9..c4f9dbd12577 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -264,7 +264,8 @@ KBUILD_CFLAGS += $(cpu-as-y) KBUILD_AFLAGS += $(aflags-y) KBUILD_CFLAGS += $(cflags-y) -head-y := arch/powerpc/kernel/head_$(BITS).o +head-$(CONFIG_PPC64) := arch/powerpc/kernel/head_64.o +head-$(CONFIG_PPC_BOOK3S_32) := arch/powerpc/kernel/head_book3s_32.o head-$(CONFIG_PPC_8xx) := arch/powerpc/kernel/head_8xx.o head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index a5550c2b24c4..bf0bf1b900d2 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -95,7 +95,8 @@ obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o obj-$(CONFIG_PPC_DOORBELL) += dbell.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o -extra-y := head_$(BITS).o +extra-$(CONFIG_PPC64) := head_64.o +extra-$(CONFIG_PPC_BOOK3S_32) := head_book3s_32.o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S deleted file mode 100644 index 6dc77419147e..000000000000 --- a/arch/powerpc/kernel/head_32.S +++ /dev/null @@ -1,1366 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP - * Copyright (C) 1996 Cort Dougan - * Adapted for Power Macintosh by Paul Mackerras. - * Low-level exception handlers and MMU support - * rewritten by Paul Mackerras. - * Copyright (C) 1996 Paul Mackerras. - * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). - * - * This file contains the low-level support and setup for the - * PowerPC platform, including trap and interrupt dispatch. - * (The PPC 8xx embedded CPUs use head_8xx.S instead.) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "head_32.h" - -#define LOAD_BAT(n, reg, RA, RB) \ - /* see the comment for clear_bats() -- Cort */ \ - li RA,0; \ - mtspr SPRN_IBAT##n##U,RA; \ - mtspr SPRN_DBAT##n##U,RA; \ - lwz RA,(n*16)+0(reg); \ - lwz RB,(n*16)+4(reg); \ - mtspr SPRN_IBAT##n##U,RA; \ - mtspr SPRN_IBAT##n##L,RB; \ - lwz RA,(n*16)+8(reg); \ - lwz RB,(n*16)+12(reg); \ - mtspr SPRN_DBAT##n##U,RA; \ - mtspr SPRN_DBAT##n##L,RB - - __HEAD - .stabs "arch/powerpc/kernel/",N_SO,0,0,0f - .stabs "head_32.S",N_SO,0,0,0f -0: -_ENTRY(_stext); - -/* - * _start is defined this way because the XCOFF loader in the OpenFirmware - * on the powermac expects the entry point to be a procedure descriptor. - */ -_ENTRY(_start); - /* - * These are here for legacy reasons, the kernel used to - * need to look like a coff function entry for the pmac - * but we're always started by some kind of bootloader now. - * -- Cort - */ - nop /* used by __secondary_hold on prep (mtx) and chrp smp */ - nop /* used by __secondary_hold on prep (mtx) and chrp smp */ - nop - -/* PMAC - * Enter here with the kernel text, data and bss loaded starting at - * 0, running with virtual == physical mapping. - * r5 points to the prom entry point (the client interface handler - * address). Address translation is turned on, with the prom - * managing the hash table. Interrupts are disabled. The stack - * pointer (r1) points to just below the end of the half-meg region - * from 0x380000 - 0x400000, which is mapped in already. - * - * If we are booted from MacOS via BootX, we enter with the kernel - * image loaded somewhere, and the following values in registers: - * r3: 'BooX' (0x426f6f58) - * r4: virtual address of boot_infos_t - * r5: 0 - * - * PREP - * This is jumped to on prep systems right after the kernel is relocated - * to its proper place in memory by the boot loader. The expected layout - * of the regs is: - * r3: ptr to residual data - * r4: initrd_start or if no initrd then 0 - * r5: initrd_end - unused if r4 is 0 - * r6: Start of command line string - * r7: End of command line string - * - * This just gets a minimal mmu environment setup so we can call - * start_here() to do the real work. - * -- Cort - */ - - .globl __start -__start: -/* - * We have to do any OF calls before we map ourselves to KERNELBASE, - * because OF may have I/O devices mapped into that area - * (particularly on CHRP). - */ - cmpwi 0,r5,0 - beq 1f - -#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE - /* find out where we are now */ - bcl 20,31,$+4 -0: mflr r8 /* r8 = runtime addr here */ - addis r8,r8,(_stext - 0b)@ha - addi r8,r8,(_stext - 0b)@l /* current runtime base addr */ - bl prom_init -#endif /* CONFIG_PPC_OF_BOOT_TRAMPOLINE */ - - /* We never return. We also hit that trap if trying to boot - * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */ - trap - -/* - * Check for BootX signature when supporting PowerMac and branch to - * appropriate trampoline if it's present - */ -#ifdef CONFIG_PPC_PMAC -1: lis r31,0x426f - ori r31,r31,0x6f58 - cmpw 0,r3,r31 - bne 1f - bl bootx_init - trap -#endif /* CONFIG_PPC_PMAC */ - -1: mr r31,r3 /* save device tree ptr */ - li r24,0 /* cpu # */ - -/* - * early_init() does the early machine identification and does - * the necessary low-level setup and clears the BSS - * -- Cort - */ - bl early_init - -/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains - * the physical address we are running at, returned by early_init() - */ - bl mmu_off -__after_mmu_off: - bl clear_bats - bl flush_tlbs - - bl initial_bats - bl load_segment_registers -BEGIN_MMU_FTR_SECTION - bl early_hash_table -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) -#if defined(CONFIG_BOOTX_TEXT) - bl setup_disp_bat -#endif -#ifdef CONFIG_PPC_EARLY_DEBUG_CPM - bl setup_cpm_bat -#endif -#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO - bl setup_usbgecko_bat -#endif - -/* - * Call setup_cpu for CPU 0 and initialize 6xx Idle - */ - bl reloc_offset - li r24,0 /* cpu# */ - bl call_setup_cpu /* Call setup_cpu for this CPU */ -#ifdef CONFIG_PPC_BOOK3S_32 - bl reloc_offset - bl init_idle_6xx -#endif /* CONFIG_PPC_BOOK3S_32 */ - - -/* - * We need to run with _start at physical address 0. - * On CHRP, we are loaded at 0x10000 since OF on CHRP uses - * the exception vectors at 0 (and therefore this copy - * overwrites OF's exception vectors with our own). - * The MMU is off at this point. - */ - bl reloc_offset - mr r26,r3 - addis r4,r3,KERNELBASE@h /* current address of _start */ - lis r5,PHYSICAL_START@h - cmplw 0,r4,r5 /* already running at PHYSICAL_START? */ - bne relocate_kernel -/* - * we now have the 1st 16M of ram mapped with the bats. - * prep needs the mmu to be turned on here, but pmac already has it on. - * this shouldn't bother the pmac since it just gets turned on again - * as we jump to our code at KERNELBASE. -- Cort - * Actually no, pmac doesn't have it on any more. BootX enters with MMU - * off, and in other cases, we now turn it off before changing BATs above. - */ -turn_on_mmu: - mfmsr r0 - ori r0,r0,MSR_DR|MSR_IR|MSR_RI - mtspr SPRN_SRR1,r0 - lis r0,start_here@h - ori r0,r0,start_here@l - mtspr SPRN_SRR0,r0 - RFI /* enables MMU */ - -/* - * We need __secondary_hold as a place to hold the other cpus on - * an SMP machine, even when we are running a UP kernel. - */ - . = 0xc0 /* for prep bootloader */ - li r3,1 /* MTX only has 1 cpu */ - .globl __secondary_hold -__secondary_hold: - /* tell the master we're here */ - stw r3,__secondary_hold_acknowledge@l(0) -#ifdef CONFIG_SMP -100: lwz r4,0(0) - /* wait until we're told to start */ - cmpw 0,r4,r3 - bne 100b - /* our cpu # was at addr 0 - go */ - mr r24,r3 /* cpu # */ - b __secondary_start -#else - b . -#endif /* CONFIG_SMP */ - - .globl __secondary_hold_spinloop -__secondary_hold_spinloop: - .long 0 - .globl __secondary_hold_acknowledge -__secondary_hold_acknowledge: - .long -1 - -/* System reset */ -/* core99 pmac starts the seconary here by changing the vector, and - putting it back to what it was (unknown_exception) when done. */ - EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) - -/* Machine check */ -/* - * On CHRP, this is complicated by the fact that we could get a - * machine check inside RTAS, and we have no guarantee that certain - * critical registers will have the values we expect. The set of - * registers that might have bad values includes all the GPRs - * and all the BATs. We indicate that we are in RTAS by putting - * a non-zero value, the address of the exception frame to use, - * in thread.rtas_sp. The machine check handler checks thread.rtas_sp - * and uses its value if it is non-zero. - * (Other exception handlers assume that r1 is a valid kernel stack - * pointer when we take an exception from supervisor mode.) - * -- paulus. - */ - . = 0x200 - DO_KVM 0x200 -MachineCheck: - EXCEPTION_PROLOG_0 -#ifdef CONFIG_PPC_CHRP - mfspr r11, SPRN_SPRG_THREAD - lwz r11, RTAS_SP(r11) - cmpwi cr1, r11, 0 - bne cr1, 7f -#endif /* CONFIG_PPC_CHRP */ - EXCEPTION_PROLOG_1 for_rtas=1 -7: EXCEPTION_PROLOG_2 - addi r3,r1,STACK_FRAME_OVERHEAD -#ifdef CONFIG_PPC_CHRP -#ifdef CONFIG_VMAP_STACK - mfspr r4, SPRN_SPRG_THREAD - tovirt(r4, r4) - lwz r4, RTAS_SP(r4) - cmpwi cr1, r4, 0 -#endif - beq cr1, machine_check_tramp - twi 31, 0, 0 -#else - b machine_check_tramp -#endif - -/* Data access exception. */ - . = 0x300 - DO_KVM 0x300 -DataAccess: -#ifdef CONFIG_VMAP_STACK - mtspr SPRN_SPRG_SCRATCH0,r10 - mfspr r10, SPRN_SPRG_THREAD -BEGIN_MMU_FTR_SECTION - stw r11, THR11(r10) - mfspr r10, SPRN_DSISR - mfcr r11 -#ifdef CONFIG_PPC_KUAP - andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else - andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h -#endif - mfspr r10, SPRN_SPRG_THREAD - beq hash_page_dsi -.Lhash_page_dsi_cont: - mtcr r11 - lwz r11, THR11(r10) -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) - mtspr SPRN_SPRG_SCRATCH1,r11 - mfspr r11, SPRN_DAR - stw r11, DAR(r10) - mfspr r11, SPRN_DSISR - stw r11, DSISR(r10) - mfspr r11, SPRN_SRR0 - stw r11, SRR0(r10) - mfspr r11, SPRN_SRR1 /* check whether user or kernel */ - stw r11, SRR1(r10) - mfcr r10 - andi. r11, r11, MSR_PR - - EXCEPTION_PROLOG_1 - b handle_page_fault_tramp_1 -#else /* CONFIG_VMAP_STACK */ - EXCEPTION_PROLOG handle_dar_dsisr=1 - get_and_save_dar_dsisr_on_stack r4, r5, r11 -BEGIN_MMU_FTR_SECTION -#ifdef CONFIG_PPC_KUAP - andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else - andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h -#endif - bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ - rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ - bl hash_page - b handle_page_fault_tramp_1 -FTR_SECTION_ELSE - b handle_page_fault_tramp_2 -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) -#endif /* CONFIG_VMAP_STACK */ - -/* Instruction access exception. */ - . = 0x400 - DO_KVM 0x400 -InstructionAccess: -#ifdef CONFIG_VMAP_STACK - mtspr SPRN_SPRG_SCRATCH0,r10 - mtspr SPRN_SPRG_SCRATCH1,r11 - mfspr r10, SPRN_SPRG_THREAD - mfspr r11, SPRN_SRR0 - stw r11, SRR0(r10) - mfspr r11, SPRN_SRR1 /* check whether user or kernel */ - stw r11, SRR1(r10) - mfcr r10 -BEGIN_MMU_FTR_SECTION - andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */ - bne hash_page_isi -.Lhash_page_isi_cont: - mfspr r11, SPRN_SRR1 /* check whether user or kernel */ -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) - andi. r11, r11, MSR_PR - - EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 -#else /* CONFIG_VMAP_STACK */ - EXCEPTION_PROLOG - andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */ - beq 1f /* if so, try to put a PTE */ - li r3,0 /* into the hash table */ - mr r4,r12 /* SRR0 is fault address */ -BEGIN_MMU_FTR_SECTION - bl hash_page -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) -#endif /* CONFIG_VMAP_STACK */ -1: mr r4,r12 - andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ - stw r4, _DAR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) - -/* External interrupt */ - EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) - -/* Alignment exception */ - . = 0x600 - DO_KVM 0x600 -Alignment: - EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 - addi r3,r1,STACK_FRAME_OVERHEAD - b alignment_exception_tramp - -/* Program check exception */ - EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) - -/* Floating-point unavailable */ - . = 0x800 - DO_KVM 0x800 -FPUnavailable: -BEGIN_FTR_SECTION -/* - * Certain Freescale cores don't have a FPU and treat fp instructions - * as a FP Unavailable exception. Redirect to illegal/emulation handling. - */ - b ProgramCheck -END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) - EXCEPTION_PROLOG - beq 1f - bl load_up_fpu /* if from user, just load it up */ - b fast_exception_return -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) - -/* Decrementer */ - EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) - -/* System call */ - . = 0xc00 - DO_KVM 0xc00 -SystemCall: - SYSCALL_ENTRY 0xc00 - - EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) - -/* - * The Altivec unavailable trap is at 0x0f20. Foo. - * We effectively remap it to 0x3000. - * We include an altivec unavailable exception vector even if - * not configured for Altivec, so that you can't panic a - * non-altivec kernel running on a machine with altivec just - * by executing an altivec instruction. - */ - . = 0xf00 - DO_KVM 0xf00 - b PerformanceMonitor - - . = 0xf20 - DO_KVM 0xf20 - b AltiVecUnavailable - -/* - * Handle TLB miss for instruction on 603/603e. - * Note: we get an alternate set of r0 - r3 to use automatically. - */ - . = 0x1000 -InstructionTLBMiss: -/* - * r0: scratch - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: scratch - */ - /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_IMISS -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 -#endif - mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r1,_PAGE_PRESENT | _PAGE_EXEC -#endif -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -#endif -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ - rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- InstructionAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ - bne- InstructionAddressInvalid /* return if access not permitted */ - /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ - ori r1, r1, 0xe06 /* clear out reserved bits */ - andc r1, r0, r1 /* PP = user? 1 : 0 */ -BEGIN_FTR_SECTION - rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ -END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) - mtspr SPRN_RPA,r1 - tlbli r3 - mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ - mtcrf 0x80,r3 - rfi -InstructionAddressInvalid: - mfspr r3,SPRN_SRR1 - rlwinm r1,r3,9,6,6 /* Get load/store bit */ - - addis r1,r1,0x2000 - mtspr SPRN_DSISR,r1 /* (shouldn't be needed) */ - andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ - or r2,r2,r1 - mtspr SPRN_SRR1,r2 - mfspr r1,SPRN_IMISS /* Get failing address */ - rlwinm. r2,r2,0,31,31 /* Check for little endian access */ - rlwimi r2,r2,1,30,30 /* change 1 -> 3 */ - xor r1,r1,r2 - mtspr SPRN_DAR,r1 /* Set fault address */ - mfmsr r0 /* Restore "normal" registers */ - xoris r0,r0,MSR_TGPR>>16 - mtcrf 0x80,r3 /* Restore CR0 */ - mtmsr r0 - b InstructionAccess - -/* - * Handle TLB miss for DATA Load operation on 603/603e - */ - . = 0x1100 -DataLoadTLBMiss: -/* - * r0: scratch - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: scratch - */ - /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 - mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP - li r1, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_PRESENT -#endif - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ - rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ - bne- DataAddressInvalid /* return if access not permitted */ - /* - * NOTE! We are assuming this is not an SMP system, otherwise - * we would need to update the pte atomically with lwarx/stwcx. - */ - /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ - ori r1,r1,0xe04 /* clear out reserved bits */ - andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ -BEGIN_FTR_SECTION - rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ -END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) - mtspr SPRN_RPA,r1 - mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ - mtcrf 0x80,r2 -BEGIN_MMU_FTR_SECTION - li r0,1 - mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 - mtspr SPRN_SPRG_603_LRU,r1 - mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 - mtspr SPRN_SRR1,r2 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) - tlbld r3 - rfi -DataAddressInvalid: - mfspr r3,SPRN_SRR1 - rlwinm r1,r3,9,6,6 /* Get load/store bit */ - addis r1,r1,0x2000 - mtspr SPRN_DSISR,r1 - andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ - mtspr SPRN_SRR1,r2 - mfspr r1,SPRN_DMISS /* Get failing address */ - rlwinm. r2,r2,0,31,31 /* Check for little endian access */ - beq 20f /* Jump if big endian */ - xori r1,r1,3 -20: mtspr SPRN_DAR,r1 /* Set fault address */ - mfmsr r0 /* Restore "normal" registers */ - xoris r0,r0,MSR_TGPR>>16 - mtcrf 0x80,r3 /* Restore CR0 */ - mtmsr r0 - b DataAccess - -/* - * Handle TLB miss for DATA Store on 603/603e - */ - . = 0x1200 -DataStoreTLBMiss: -/* - * r0: scratch - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: scratch - */ - /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 - mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT -#endif - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ - rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ - bne- DataAddressInvalid /* return if access not permitted */ - /* - * NOTE! We are assuming this is not an SMP system, otherwise - * we would need to update the pte atomically with lwarx/stwcx. - */ - /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ - li r1,0xe06 /* clear out reserved bits & PP msb */ - andc r1,r0,r1 /* PP = user? 1: 0 */ -BEGIN_FTR_SECTION - rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ -END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) - mtspr SPRN_RPA,r1 - mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ - mtcrf 0x80,r2 -BEGIN_MMU_FTR_SECTION - li r0,1 - mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 - mtspr SPRN_SPRG_603_LRU,r1 - mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 - mtspr SPRN_SRR1,r2 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) - tlbld r3 - rfi - -#ifndef CONFIG_ALTIVEC -#define altivec_assist_exception unknown_exception -#endif - -#ifndef CONFIG_TAU_INT -#define TAUException unknown_exception -#endif - - EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) - EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD) - EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD) - EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD) - - . = 0x3000 - -machine_check_tramp: - EXC_XFER_STD(0x200, machine_check_exception) - -alignment_exception_tramp: - EXC_XFER_STD(0x600, alignment_exception) - -handle_page_fault_tramp_1: -#ifdef CONFIG_VMAP_STACK - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 -#endif - lwz r4, _DAR(r11) - lwz r5, _DSISR(r11) - /* fall through */ -handle_page_fault_tramp_2: - EXC_XFER_LITE(0x300, handle_page_fault) - -#ifdef CONFIG_VMAP_STACK -.macro save_regs_thread thread - stw r0, THR0(\thread) - stw r3, THR3(\thread) - stw r4, THR4(\thread) - stw r5, THR5(\thread) - stw r6, THR6(\thread) - stw r8, THR8(\thread) - stw r9, THR9(\thread) - mflr r0 - stw r0, THLR(\thread) - mfctr r0 - stw r0, THCTR(\thread) -.endm - -.macro restore_regs_thread thread - lwz r0, THLR(\thread) - mtlr r0 - lwz r0, THCTR(\thread) - mtctr r0 - lwz r0, THR0(\thread) - lwz r3, THR3(\thread) - lwz r4, THR4(\thread) - lwz r5, THR5(\thread) - lwz r6, THR6(\thread) - lwz r8, THR8(\thread) - lwz r9, THR9(\thread) -.endm - -hash_page_dsi: - save_regs_thread r10 - mfdsisr r3 - mfdar r4 - mfsrr0 r5 - mfsrr1 r9 - rlwinm r3, r3, 32 - 15, _PAGE_RW /* DSISR_STORE -> _PAGE_RW */ - bl hash_page - mfspr r10, SPRN_SPRG_THREAD - restore_regs_thread r10 - b .Lhash_page_dsi_cont - -hash_page_isi: - mr r11, r10 - mfspr r10, SPRN_SPRG_THREAD - save_regs_thread r10 - li r3, 0 - lwz r4, SRR0(r10) - lwz r9, SRR1(r10) - bl hash_page - mfspr r10, SPRN_SPRG_THREAD - restore_regs_thread r10 - mr r10, r11 - b .Lhash_page_isi_cont - - .globl fast_hash_page_return -fast_hash_page_return: - andis. r10, r9, SRR1_ISI_NOPT@h /* Set on ISI, cleared on DSI */ - mfspr r10, SPRN_SPRG_THREAD - restore_regs_thread r10 - bne 1f - - /* DSI */ - mtcr r11 - lwz r11, THR11(r10) - mfspr r10, SPRN_SPRG_SCRATCH0 - RFI - -1: /* ISI */ - mtcr r11 - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 - RFI - -stack_overflow: - vmap_stack_overflow_exception -#endif - -AltiVecUnavailable: - EXCEPTION_PROLOG -#ifdef CONFIG_ALTIVEC - beq 1f - bl load_up_altivec /* if from user, just load it up */ - b fast_exception_return -#endif /* CONFIG_ALTIVEC */ -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0xf20, altivec_unavailable_exception) - -PerformanceMonitor: - EXCEPTION_PROLOG - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0xf00, performance_monitor_exception) - - -/* - * This code is jumped to from the startup code to copy - * the kernel image to physical address PHYSICAL_START. - */ -relocate_kernel: - addis r9,r26,klimit@ha /* fetch klimit */ - lwz r25,klimit@l(r9) - addis r25,r25,-KERNELBASE@h - lis r3,PHYSICAL_START@h /* Destination base address */ - li r6,0 /* Destination offset */ - li r5,0x4000 /* # bytes of memory to copy */ - bl copy_and_flush /* copy the first 0x4000 bytes */ - addi r0,r3,4f@l /* jump to the address of 4f */ - mtctr r0 /* in copy and do the rest. */ - bctr /* jump to the copy */ -4: mr r5,r25 - bl copy_and_flush /* copy the rest */ - b turn_on_mmu - -/* - * Copy routine used to copy the kernel to start at physical address 0 - * and flush and invalidate the caches as needed. - * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset - * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. - */ -_ENTRY(copy_and_flush) - addi r5,r5,-4 - addi r6,r6,-4 -4: li r0,L1_CACHE_BYTES/4 - mtctr r0 -3: addi r6,r6,4 /* copy a cache line */ - lwzx r0,r6,r4 - stwx r0,r6,r3 - bdnz 3b - dcbst r6,r3 /* write it to memory */ - sync - icbi r6,r3 /* flush the icache line */ - cmplw 0,r6,r5 - blt 4b - sync /* additional sync needed on g4 */ - isync - addi r5,r5,4 - addi r6,r6,4 - blr - -#ifdef CONFIG_SMP - .globl __secondary_start_mpc86xx -__secondary_start_mpc86xx: - mfspr r3, SPRN_PIR - stw r3, __secondary_hold_acknowledge@l(0) - mr r24, r3 /* cpu # */ - b __secondary_start - - .globl __secondary_start_pmac_0 -__secondary_start_pmac_0: - /* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */ - li r24,0 - b 1f - li r24,1 - b 1f - li r24,2 - b 1f - li r24,3 -1: - /* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0 - set to map the 0xf0000000 - 0xffffffff region */ - mfmsr r0 - rlwinm r0,r0,0,28,26 /* clear DR (0x10) */ - mtmsr r0 - isync - - .globl __secondary_start -__secondary_start: - /* Copy some CPU settings from CPU 0 */ - bl __restore_cpu_setup - - lis r3,-KERNELBASE@h - mr r4,r24 - bl call_setup_cpu /* Call setup_cpu for this CPU */ -#ifdef CONFIG_PPC_BOOK3S_32 - lis r3,-KERNELBASE@h - bl init_idle_6xx -#endif /* CONFIG_PPC_BOOK3S_32 */ - - /* get current's stack and current */ - lis r2,secondary_current@ha - tophys(r2,r2) - lwz r2,secondary_current@l(r2) - tophys(r1,r2) - lwz r1,TASK_STACK(r1) - - /* stack */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD - li r0,0 - tophys(r3,r1) - stw r0,0(r3) - - /* load up the MMU */ - bl load_segment_registers - bl load_up_mmu - - /* ptr to phys current thread */ - tophys(r4,r2) - addi r4,r4,THREAD /* phys address of our thread_struct */ - mtspr SPRN_SPRG_THREAD,r4 - lis r4, (swapper_pg_dir - PAGE_OFFSET)@h - ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r4 - - /* enable MMU and jump to start_secondary */ - li r4,MSR_KERNEL - lis r3,start_secondary@h - ori r3,r3,start_secondary@l - mtspr SPRN_SRR0,r3 - mtspr SPRN_SRR1,r4 - RFI -#endif /* CONFIG_SMP */ - -#ifdef CONFIG_KVM_BOOK3S_HANDLER -#include "../kvm/book3s_rmhandlers.S" -#endif - -/* - * Those generic dummy functions are kept for CPUs not - * included in CONFIG_PPC_BOOK3S_32 - */ -#if !defined(CONFIG_PPC_BOOK3S_32) -_ENTRY(__save_cpu_setup) - blr -_ENTRY(__restore_cpu_setup) - blr -#endif /* !defined(CONFIG_PPC_BOOK3S_32) */ - -/* - * Load stuff into the MMU. Intended to be called with - * IR=0 and DR=0. - */ -early_hash_table: - sync /* Force all PTE updates to finish */ - isync - tlbia /* Clear all TLB entries */ - sync /* wait for tlbia/tlbie to finish */ - TLBSYNC /* ... on all CPUs */ - /* Load the SDR1 register (hash table base & size) */ - lis r6, early_hash - PAGE_OFFSET@h - ori r6, r6, 3 /* 256kB table */ - mtspr SPRN_SDR1, r6 - lis r6, early_hash@h - lis r3, Hash@ha - stw r6, Hash@l(r3) - blr - -load_up_mmu: - sync /* Force all PTE updates to finish */ - isync - tlbia /* Clear all TLB entries */ - sync /* wait for tlbia/tlbie to finish */ - TLBSYNC /* ... on all CPUs */ - /* Load the SDR1 register (hash table base & size) */ - lis r6,_SDR1@ha - tophys(r6,r6) - lwz r6,_SDR1@l(r6) - mtspr SPRN_SDR1,r6 - -/* Load the BAT registers with the values set up by MMU_init. */ - lis r3,BATS@ha - addi r3,r3,BATS@l - tophys(r3,r3) - LOAD_BAT(0,r3,r4,r5) - LOAD_BAT(1,r3,r4,r5) - LOAD_BAT(2,r3,r4,r5) - LOAD_BAT(3,r3,r4,r5) -BEGIN_MMU_FTR_SECTION - LOAD_BAT(4,r3,r4,r5) - LOAD_BAT(5,r3,r4,r5) - LOAD_BAT(6,r3,r4,r5) - LOAD_BAT(7,r3,r4,r5) -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) - blr - -_GLOBAL(load_segment_registers) - li r0, NUM_USER_SEGMENTS /* load up user segment register values */ - mtctr r0 /* for context 0 */ - li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ -#ifdef CONFIG_PPC_KUEP - oris r3, r3, SR_NX@h /* Set Nx */ -#endif -#ifdef CONFIG_PPC_KUAP - oris r3, r3, SR_KS@h /* Set Ks */ -#endif - li r4, 0 -3: mtsrin r3, r4 - addi r3, r3, 0x111 /* increment VSID */ - addis r4, r4, 0x1000 /* address of next segment */ - bdnz 3b - li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */ - mtctr r0 /* for context 0 */ - rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */ - rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */ - oris r3, r3, SR_KP@h /* Kp = 1 */ -3: mtsrin r3, r4 - addi r3, r3, 0x111 /* increment VSID */ - addis r4, r4, 0x1000 /* address of next segment */ - bdnz 3b - blr - -/* - * This is where the main kernel code starts. - */ -start_here: - /* ptr to current */ - lis r2,init_task@h - ori r2,r2,init_task@l - /* Set up for using our exception vectors */ - /* ptr to phys current thread */ - tophys(r4,r2) - addi r4,r4,THREAD /* init task's THREAD */ - mtspr SPRN_SPRG_THREAD,r4 - lis r4, (swapper_pg_dir - PAGE_OFFSET)@h - ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r4 - - /* stack */ - lis r1,init_thread_union@ha - addi r1,r1,init_thread_union@l - li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) -/* - * Do early platform-specific initialization, - * and set up the MMU. - */ -#ifdef CONFIG_KASAN - bl kasan_early_init -#endif - li r3,0 - mr r4,r31 - bl machine_init - bl __save_cpu_setup - bl MMU_init - bl MMU_init_hw_patch - -/* - * Go back to running unmapped so we can load up new values - * for SDR1 (hash table pointer) and the segment registers - * and change to using our exception vectors. - */ - lis r4,2f@h - ori r4,r4,2f@l - tophys(r4,r4) - li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) - - .align 4 - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r3 - RFI -/* Load up the kernel context */ -2: bl load_up_mmu - -#ifdef CONFIG_BDI_SWITCH - /* Add helper information for the Abatron bdiGDB debugger. - * We do this here because we know the mmu is disabled, and - * will be enabled for real in just a few instructions. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(0) /* This much match your Abatron config */ - lis r6, swapper_pg_dir@h - ori r6, r6, swapper_pg_dir@l - tophys(r5, r5) - stw r6, 0(r5) -#endif /* CONFIG_BDI_SWITCH */ - -/* Now turn on the MMU for real! */ - li r4,MSR_KERNEL - lis r3,start_kernel@h - ori r3,r3,start_kernel@l - mtspr SPRN_SRR0,r3 - mtspr SPRN_SRR1,r4 - RFI - -/* - * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); - * - * Set up the segment registers for a new context. - */ -_ENTRY(switch_mmu_context) - lwz r3,MMCONTEXTID(r4) - cmpwi cr0,r3,0 - blt- 4f - mulli r3,r3,897 /* multiply context by skew factor */ - rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ -#ifdef CONFIG_PPC_KUEP - oris r3, r3, SR_NX@h /* Set Nx */ -#endif -#ifdef CONFIG_PPC_KUAP - oris r3, r3, SR_KS@h /* Set Ks */ -#endif - li r0,NUM_USER_SEGMENTS - mtctr r0 - - lwz r4, MM_PGD(r4) -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is passed as second argument. - */ - lis r5, abatron_pteptrs@ha - stw r4, abatron_pteptrs@l + 0x4(r5) -#endif - tophys(r4, r4) - mtspr SPRN_SPRG_PGDIR, r4 - li r4,0 - isync -3: - mtsrin r3,r4 - addi r3,r3,0x111 /* next VSID */ - rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b - sync - isync - blr -4: trap - EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0 - blr -EXPORT_SYMBOL(switch_mmu_context) - -/* - * An undocumented "feature" of 604e requires that the v bit - * be cleared before changing BAT values. - * - * Also, newer IBM firmware does not clear bat3 and 4 so - * this makes sure it's done. - * -- Cort - */ -clear_bats: - li r10,0 - - mtspr SPRN_DBAT0U,r10 - mtspr SPRN_DBAT0L,r10 - mtspr SPRN_DBAT1U,r10 - mtspr SPRN_DBAT1L,r10 - mtspr SPRN_DBAT2U,r10 - mtspr SPRN_DBAT2L,r10 - mtspr SPRN_DBAT3U,r10 - mtspr SPRN_DBAT3L,r10 - mtspr SPRN_IBAT0U,r10 - mtspr SPRN_IBAT0L,r10 - mtspr SPRN_IBAT1U,r10 - mtspr SPRN_IBAT1L,r10 - mtspr SPRN_IBAT2U,r10 - mtspr SPRN_IBAT2L,r10 - mtspr SPRN_IBAT3U,r10 - mtspr SPRN_IBAT3L,r10 -BEGIN_MMU_FTR_SECTION - /* Here's a tweak: at this point, CPU setup have - * not been called yet, so HIGH_BAT_EN may not be - * set in HID0 for the 745x processors. However, it - * seems that doesn't affect our ability to actually - * write to these SPRs. - */ - mtspr SPRN_DBAT4U,r10 - mtspr SPRN_DBAT4L,r10 - mtspr SPRN_DBAT5U,r10 - mtspr SPRN_DBAT5L,r10 - mtspr SPRN_DBAT6U,r10 - mtspr SPRN_DBAT6L,r10 - mtspr SPRN_DBAT7U,r10 - mtspr SPRN_DBAT7L,r10 - mtspr SPRN_IBAT4U,r10 - mtspr SPRN_IBAT4L,r10 - mtspr SPRN_IBAT5U,r10 - mtspr SPRN_IBAT5L,r10 - mtspr SPRN_IBAT6U,r10 - mtspr SPRN_IBAT6L,r10 - mtspr SPRN_IBAT7U,r10 - mtspr SPRN_IBAT7L,r10 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) - blr - -_ENTRY(update_bats) - lis r4, 1f@h - ori r4, r4, 1f@l - tophys(r4, r4) - mfmsr r6 - mflr r7 - li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR) - rlwinm r0, r6, 0, ~MSR_RI - rlwinm r0, r0, 0, ~MSR_EE - mtmsr r0 - - .align 4 - mtspr SPRN_SRR0, r4 - mtspr SPRN_SRR1, r3 - RFI -1: bl clear_bats - lis r3, BATS@ha - addi r3, r3, BATS@l - tophys(r3, r3) - LOAD_BAT(0, r3, r4, r5) - LOAD_BAT(1, r3, r4, r5) - LOAD_BAT(2, r3, r4, r5) - LOAD_BAT(3, r3, r4, r5) -BEGIN_MMU_FTR_SECTION - LOAD_BAT(4, r3, r4, r5) - LOAD_BAT(5, r3, r4, r5) - LOAD_BAT(6, r3, r4, r5) - LOAD_BAT(7, r3, r4, r5) -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) - li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI) - mtmsr r3 - mtspr SPRN_SRR0, r7 - mtspr SPRN_SRR1, r6 - RFI - -flush_tlbs: - lis r10, 0x40 -1: addic. r10, r10, -0x1000 - tlbie r10 - bgt 1b - sync - blr - -mmu_off: - addi r4, r3, __after_mmu_off - _start - mfmsr r3 - andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */ - beqlr - andc r3,r3,r0 - - .align 4 - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r3 - sync - RFI - -/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ -initial_bats: - lis r11,PAGE_OFFSET@h - tophys(r8,r11) -#ifdef CONFIG_SMP - ori r8,r8,0x12 /* R/W access, M=1 */ -#else - ori r8,r8,2 /* R/W access */ -#endif /* CONFIG_SMP */ - ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */ - - mtspr SPRN_DBAT0L,r8 /* N.B. 6xx have valid */ - mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */ - mtspr SPRN_IBAT0L,r8 - mtspr SPRN_IBAT0U,r11 - isync - blr - -#ifdef CONFIG_BOOTX_TEXT -setup_disp_bat: - /* - * setup the display bat prepared for us in prom.c - */ - mflr r8 - bl reloc_offset - mtlr r8 - addis r8,r3,disp_BAT@ha - addi r8,r8,disp_BAT@l - cmpwi cr0,r8,0 - beqlr - lwz r11,0(r8) - lwz r8,4(r8) - mtspr SPRN_DBAT3L,r8 - mtspr SPRN_DBAT3U,r11 - blr -#endif /* CONFIG_BOOTX_TEXT */ - -#ifdef CONFIG_PPC_EARLY_DEBUG_CPM -setup_cpm_bat: - lis r8, 0xf000 - ori r8, r8, 0x002a - mtspr SPRN_DBAT1L, r8 - - lis r11, 0xf000 - ori r11, r11, (BL_1M << 2) | 2 - mtspr SPRN_DBAT1U, r11 - - blr -#endif - -#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO -setup_usbgecko_bat: - /* prepare a BAT for early io */ -#if defined(CONFIG_GAMECUBE) - lis r8, 0x0c00 -#elif defined(CONFIG_WII) - lis r8, 0x0d00 -#else -#error Invalid platform for USB Gecko based early debugging. -#endif - /* - * The virtual address used must match the virtual address - * associated to the fixmap entry FIX_EARLY_DEBUG_BASE. - */ - lis r11, 0xfffe /* top 128K */ - ori r8, r8, 0x002a /* uncached, guarded ,rw */ - ori r11, r11, 0x2 /* 128K, Vs=1, Vp=0 */ - mtspr SPRN_DBAT1L, r8 - mtspr SPRN_DBAT1U, r11 - blr -#endif - -#ifdef CONFIG_8260 -/* Jump into the system reset for the rom. - * We first disable the MMU, and then jump to the ROM reset address. - * - * r3 is the board info structure, r4 is the location for starting. - * I use this for building a small kernel that can load other kernels, - * rather than trying to write or rely on a rom monitor that can tftp load. - */ - .globl m8260_gorom -m8260_gorom: - mfmsr r0 - rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ - sync - mtmsr r0 - sync - mfspr r11, SPRN_HID0 - lis r10, 0 - ori r10,r10,HID0_ICE|HID0_DCE - andc r11, r11, r10 - mtspr SPRN_HID0, r11 - isync - li r5, MSR_ME|MSR_RI - lis r6,2f@h - addis r6,r6,-KERNELBASE@h - ori r6,r6,2f@l - mtspr SPRN_SRR0,r6 - mtspr SPRN_SRR1,r5 - isync - sync - rfi -2: - mtlr r4 - blr -#endif - - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. - */ - .data - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S new file mode 100644 index 000000000000..b7b554533e0d --- /dev/null +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -0,0 +1,1366 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains the low-level support and setup for the + * PowerPC platform, including trap and interrupt dispatch. + * (The PPC 8xx embedded CPUs use head_8xx.S instead.) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "head_32.h" + +#define LOAD_BAT(n, reg, RA, RB) \ + /* see the comment for clear_bats() -- Cort */ \ + li RA,0; \ + mtspr SPRN_IBAT##n##U,RA; \ + mtspr SPRN_DBAT##n##U,RA; \ + lwz RA,(n*16)+0(reg); \ + lwz RB,(n*16)+4(reg); \ + mtspr SPRN_IBAT##n##U,RA; \ + mtspr SPRN_IBAT##n##L,RB; \ + lwz RA,(n*16)+8(reg); \ + lwz RB,(n*16)+12(reg); \ + mtspr SPRN_DBAT##n##U,RA; \ + mtspr SPRN_DBAT##n##L,RB + + __HEAD + .stabs "arch/powerpc/kernel/",N_SO,0,0,0f + .stabs "head_book3s_32.S",N_SO,0,0,0f +0: +_ENTRY(_stext); + +/* + * _start is defined this way because the XCOFF loader in the OpenFirmware + * on the powermac expects the entry point to be a procedure descriptor. + */ +_ENTRY(_start); + /* + * These are here for legacy reasons, the kernel used to + * need to look like a coff function entry for the pmac + * but we're always started by some kind of bootloader now. + * -- Cort + */ + nop /* used by __secondary_hold on prep (mtx) and chrp smp */ + nop /* used by __secondary_hold on prep (mtx) and chrp smp */ + nop + +/* PMAC + * Enter here with the kernel text, data and bss loaded starting at + * 0, running with virtual == physical mapping. + * r5 points to the prom entry point (the client interface handler + * address). Address translation is turned on, with the prom + * managing the hash table. Interrupts are disabled. The stack + * pointer (r1) points to just below the end of the half-meg region + * from 0x380000 - 0x400000, which is mapped in already. + * + * If we are booted from MacOS via BootX, we enter with the kernel + * image loaded somewhere, and the following values in registers: + * r3: 'BooX' (0x426f6f58) + * r4: virtual address of boot_infos_t + * r5: 0 + * + * PREP + * This is jumped to on prep systems right after the kernel is relocated + * to its proper place in memory by the boot loader. The expected layout + * of the regs is: + * r3: ptr to residual data + * r4: initrd_start or if no initrd then 0 + * r5: initrd_end - unused if r4 is 0 + * r6: Start of command line string + * r7: End of command line string + * + * This just gets a minimal mmu environment setup so we can call + * start_here() to do the real work. + * -- Cort + */ + + .globl __start +__start: +/* + * We have to do any OF calls before we map ourselves to KERNELBASE, + * because OF may have I/O devices mapped into that area + * (particularly on CHRP). + */ + cmpwi 0,r5,0 + beq 1f + +#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE + /* find out where we are now */ + bcl 20,31,$+4 +0: mflr r8 /* r8 = runtime addr here */ + addis r8,r8,(_stext - 0b)@ha + addi r8,r8,(_stext - 0b)@l /* current runtime base addr */ + bl prom_init +#endif /* CONFIG_PPC_OF_BOOT_TRAMPOLINE */ + + /* We never return. We also hit that trap if trying to boot + * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */ + trap + +/* + * Check for BootX signature when supporting PowerMac and branch to + * appropriate trampoline if it's present + */ +#ifdef CONFIG_PPC_PMAC +1: lis r31,0x426f + ori r31,r31,0x6f58 + cmpw 0,r3,r31 + bne 1f + bl bootx_init + trap +#endif /* CONFIG_PPC_PMAC */ + +1: mr r31,r3 /* save device tree ptr */ + li r24,0 /* cpu # */ + +/* + * early_init() does the early machine identification and does + * the necessary low-level setup and clears the BSS + * -- Cort + */ + bl early_init + +/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains + * the physical address we are running at, returned by early_init() + */ + bl mmu_off +__after_mmu_off: + bl clear_bats + bl flush_tlbs + + bl initial_bats + bl load_segment_registers +BEGIN_MMU_FTR_SECTION + bl early_hash_table +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#if defined(CONFIG_BOOTX_TEXT) + bl setup_disp_bat +#endif +#ifdef CONFIG_PPC_EARLY_DEBUG_CPM + bl setup_cpm_bat +#endif +#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO + bl setup_usbgecko_bat +#endif + +/* + * Call setup_cpu for CPU 0 and initialize 6xx Idle + */ + bl reloc_offset + li r24,0 /* cpu# */ + bl call_setup_cpu /* Call setup_cpu for this CPU */ +#ifdef CONFIG_PPC_BOOK3S_32 + bl reloc_offset + bl init_idle_6xx +#endif /* CONFIG_PPC_BOOK3S_32 */ + + +/* + * We need to run with _start at physical address 0. + * On CHRP, we are loaded at 0x10000 since OF on CHRP uses + * the exception vectors at 0 (and therefore this copy + * overwrites OF's exception vectors with our own). + * The MMU is off at this point. + */ + bl reloc_offset + mr r26,r3 + addis r4,r3,KERNELBASE@h /* current address of _start */ + lis r5,PHYSICAL_START@h + cmplw 0,r4,r5 /* already running at PHYSICAL_START? */ + bne relocate_kernel +/* + * we now have the 1st 16M of ram mapped with the bats. + * prep needs the mmu to be turned on here, but pmac already has it on. + * this shouldn't bother the pmac since it just gets turned on again + * as we jump to our code at KERNELBASE. -- Cort + * Actually no, pmac doesn't have it on any more. BootX enters with MMU + * off, and in other cases, we now turn it off before changing BATs above. + */ +turn_on_mmu: + mfmsr r0 + ori r0,r0,MSR_DR|MSR_IR|MSR_RI + mtspr SPRN_SRR1,r0 + lis r0,start_here@h + ori r0,r0,start_here@l + mtspr SPRN_SRR0,r0 + RFI /* enables MMU */ + +/* + * We need __secondary_hold as a place to hold the other cpus on + * an SMP machine, even when we are running a UP kernel. + */ + . = 0xc0 /* for prep bootloader */ + li r3,1 /* MTX only has 1 cpu */ + .globl __secondary_hold +__secondary_hold: + /* tell the master we're here */ + stw r3,__secondary_hold_acknowledge@l(0) +#ifdef CONFIG_SMP +100: lwz r4,0(0) + /* wait until we're told to start */ + cmpw 0,r4,r3 + bne 100b + /* our cpu # was at addr 0 - go */ + mr r24,r3 /* cpu # */ + b __secondary_start +#else + b . +#endif /* CONFIG_SMP */ + + .globl __secondary_hold_spinloop +__secondary_hold_spinloop: + .long 0 + .globl __secondary_hold_acknowledge +__secondary_hold_acknowledge: + .long -1 + +/* System reset */ +/* core99 pmac starts the seconary here by changing the vector, and + putting it back to what it was (unknown_exception) when done. */ + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) + +/* Machine check */ +/* + * On CHRP, this is complicated by the fact that we could get a + * machine check inside RTAS, and we have no guarantee that certain + * critical registers will have the values we expect. The set of + * registers that might have bad values includes all the GPRs + * and all the BATs. We indicate that we are in RTAS by putting + * a non-zero value, the address of the exception frame to use, + * in thread.rtas_sp. The machine check handler checks thread.rtas_sp + * and uses its value if it is non-zero. + * (Other exception handlers assume that r1 is a valid kernel stack + * pointer when we take an exception from supervisor mode.) + * -- paulus. + */ + . = 0x200 + DO_KVM 0x200 +MachineCheck: + EXCEPTION_PROLOG_0 +#ifdef CONFIG_PPC_CHRP + mfspr r11, SPRN_SPRG_THREAD + lwz r11, RTAS_SP(r11) + cmpwi cr1, r11, 0 + bne cr1, 7f +#endif /* CONFIG_PPC_CHRP */ + EXCEPTION_PROLOG_1 for_rtas=1 +7: EXCEPTION_PROLOG_2 + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_CHRP +#ifdef CONFIG_VMAP_STACK + mfspr r4, SPRN_SPRG_THREAD + tovirt(r4, r4) + lwz r4, RTAS_SP(r4) + cmpwi cr1, r4, 0 +#endif + beq cr1, machine_check_tramp + twi 31, 0, 0 +#else + b machine_check_tramp +#endif + +/* Data access exception. */ + . = 0x300 + DO_KVM 0x300 +DataAccess: +#ifdef CONFIG_VMAP_STACK + mtspr SPRN_SPRG_SCRATCH0,r10 + mfspr r10, SPRN_SPRG_THREAD +BEGIN_MMU_FTR_SECTION + stw r11, THR11(r10) + mfspr r10, SPRN_DSISR + mfcr r11 +#ifdef CONFIG_PPC_KUAP + andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h +#else + andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h +#endif + mfspr r10, SPRN_SPRG_THREAD + beq hash_page_dsi +.Lhash_page_dsi_cont: + mtcr r11 + lwz r11, THR11(r10) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) + mtspr SPRN_SPRG_SCRATCH1,r11 + mfspr r11, SPRN_DAR + stw r11, DAR(r10) + mfspr r11, SPRN_DSISR + stw r11, DSISR(r10) + mfspr r11, SPRN_SRR0 + stw r11, SRR0(r10) + mfspr r11, SPRN_SRR1 /* check whether user or kernel */ + stw r11, SRR1(r10) + mfcr r10 + andi. r11, r11, MSR_PR + + EXCEPTION_PROLOG_1 + b handle_page_fault_tramp_1 +#else /* CONFIG_VMAP_STACK */ + EXCEPTION_PROLOG handle_dar_dsisr=1 + get_and_save_dar_dsisr_on_stack r4, r5, r11 +BEGIN_MMU_FTR_SECTION +#ifdef CONFIG_PPC_KUAP + andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h +#else + andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h +#endif + bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ + rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ + bl hash_page + b handle_page_fault_tramp_1 +FTR_SECTION_ELSE + b handle_page_fault_tramp_2 +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#endif /* CONFIG_VMAP_STACK */ + +/* Instruction access exception. */ + . = 0x400 + DO_KVM 0x400 +InstructionAccess: +#ifdef CONFIG_VMAP_STACK + mtspr SPRN_SPRG_SCRATCH0,r10 + mtspr SPRN_SPRG_SCRATCH1,r11 + mfspr r10, SPRN_SPRG_THREAD + mfspr r11, SPRN_SRR0 + stw r11, SRR0(r10) + mfspr r11, SPRN_SRR1 /* check whether user or kernel */ + stw r11, SRR1(r10) + mfcr r10 +BEGIN_MMU_FTR_SECTION + andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */ + bne hash_page_isi +.Lhash_page_isi_cont: + mfspr r11, SPRN_SRR1 /* check whether user or kernel */ +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) + andi. r11, r11, MSR_PR + + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 +#else /* CONFIG_VMAP_STACK */ + EXCEPTION_PROLOG + andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */ + beq 1f /* if so, try to put a PTE */ + li r3,0 /* into the hash table */ + mr r4,r12 /* SRR0 is fault address */ +BEGIN_MMU_FTR_SECTION + bl hash_page +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif /* CONFIG_VMAP_STACK */ +1: mr r4,r12 + andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ + stw r4, _DAR(r11) + EXC_XFER_LITE(0x400, handle_page_fault) + +/* External interrupt */ + EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + +/* Alignment exception */ + . = 0x600 + DO_KVM 0x600 +Alignment: + EXCEPTION_PROLOG handle_dar_dsisr=1 + save_dar_dsisr_on_stack r4, r5, r11 + addi r3,r1,STACK_FRAME_OVERHEAD + b alignment_exception_tramp + +/* Program check exception */ + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + +/* Floating-point unavailable */ + . = 0x800 + DO_KVM 0x800 +FPUnavailable: +BEGIN_FTR_SECTION +/* + * Certain Freescale cores don't have a FPU and treat fp instructions + * as a FP Unavailable exception. Redirect to illegal/emulation handling. + */ + b ProgramCheck +END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) + EXCEPTION_PROLOG + beq 1f + bl load_up_fpu /* if from user, just load it up */ + b fast_exception_return +1: addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) + +/* Decrementer */ + EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) + +/* System call */ + . = 0xc00 + DO_KVM 0xc00 +SystemCall: + SYSCALL_ENTRY 0xc00 + + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) + +/* + * The Altivec unavailable trap is at 0x0f20. Foo. + * We effectively remap it to 0x3000. + * We include an altivec unavailable exception vector even if + * not configured for Altivec, so that you can't panic a + * non-altivec kernel running on a machine with altivec just + * by executing an altivec instruction. + */ + . = 0xf00 + DO_KVM 0xf00 + b PerformanceMonitor + + . = 0xf20 + DO_KVM 0xf20 + b AltiVecUnavailable + +/* + * Handle TLB miss for instruction on 603/603e. + * Note: we get an alternate set of r0 - r3 to use automatically. + */ + . = 0x1000 +InstructionTLBMiss: +/* + * r0: scratch + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_IMISS +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) + lis r1, TASK_SIZE@h /* check if kernel address */ + cmplw 0,r1,r3 +#endif + mfspr r2, SPRN_SPRG_PGDIR +#ifdef CONFIG_SWAP + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC +#else + li r1,_PAGE_PRESENT | _PAGE_EXEC +#endif +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) + bgt- 112f + lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ +#endif +112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- InstructionAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r0,0(r2) /* get linux-style pte */ + andc. r1,r1,r0 /* check access & ~permission */ + bne- InstructionAddressInvalid /* return if access not permitted */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + ori r1, r1, 0xe06 /* clear out reserved bits */ + andc r1, r0, r1 /* PP = user? 1 : 0 */ +BEGIN_FTR_SECTION + rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ +END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) + mtspr SPRN_RPA,r1 + tlbli r3 + mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r3 + rfi +InstructionAddressInvalid: + mfspr r3,SPRN_SRR1 + rlwinm r1,r3,9,6,6 /* Get load/store bit */ + + addis r1,r1,0x2000 + mtspr SPRN_DSISR,r1 /* (shouldn't be needed) */ + andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ + or r2,r2,r1 + mtspr SPRN_SRR1,r2 + mfspr r1,SPRN_IMISS /* Get failing address */ + rlwinm. r2,r2,0,31,31 /* Check for little endian access */ + rlwimi r2,r2,1,30,30 /* change 1 -> 3 */ + xor r1,r1,r2 + mtspr SPRN_DAR,r1 /* Set fault address */ + mfmsr r0 /* Restore "normal" registers */ + xoris r0,r0,MSR_TGPR>>16 + mtcrf 0x80,r3 /* Restore CR0 */ + mtmsr r0 + b InstructionAccess + +/* + * Handle TLB miss for DATA Load operation on 603/603e + */ + . = 0x1100 +DataLoadTLBMiss: +/* + * r0: scratch + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_DMISS + lis r1, TASK_SIZE@h /* check if kernel address */ + cmplw 0,r1,r3 + mfspr r2, SPRN_SPRG_PGDIR +#ifdef CONFIG_SWAP + li r1, _PAGE_PRESENT | _PAGE_ACCESSED +#else + li r1, _PAGE_PRESENT +#endif + bgt- 112f + lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ +112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- DataAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r0,0(r2) /* get linux-style pte */ + andc. r1,r1,r0 /* check access & ~permission */ + bne- DataAddressInvalid /* return if access not permitted */ + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + ori r1,r1,0xe04 /* clear out reserved bits */ + andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ +BEGIN_FTR_SECTION + rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ +END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) + mtspr SPRN_RPA,r1 + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 +BEGIN_MMU_FTR_SECTION + li r0,1 + mfspr r1,SPRN_SPRG_603_LRU + rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ + slw r0,r0,r2 + xor r1,r0,r1 + srw r0,r1,r2 + mtspr SPRN_SPRG_603_LRU,r1 + mfspr r2,SPRN_SRR1 + rlwimi r2,r0,31-14,14,14 + mtspr SPRN_SRR1,r2 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + tlbld r3 + rfi +DataAddressInvalid: + mfspr r3,SPRN_SRR1 + rlwinm r1,r3,9,6,6 /* Get load/store bit */ + addis r1,r1,0x2000 + mtspr SPRN_DSISR,r1 + andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ + mtspr SPRN_SRR1,r2 + mfspr r1,SPRN_DMISS /* Get failing address */ + rlwinm. r2,r2,0,31,31 /* Check for little endian access */ + beq 20f /* Jump if big endian */ + xori r1,r1,3 +20: mtspr SPRN_DAR,r1 /* Set fault address */ + mfmsr r0 /* Restore "normal" registers */ + xoris r0,r0,MSR_TGPR>>16 + mtcrf 0x80,r3 /* Restore CR0 */ + mtmsr r0 + b DataAccess + +/* + * Handle TLB miss for DATA Store on 603/603e + */ + . = 0x1200 +DataStoreTLBMiss: +/* + * r0: scratch + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_DMISS + lis r1, TASK_SIZE@h /* check if kernel address */ + cmplw 0,r1,r3 + mfspr r2, SPRN_SPRG_PGDIR +#ifdef CONFIG_SWAP + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED +#else + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT +#endif + bgt- 112f + lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ +112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- DataAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r0,0(r2) /* get linux-style pte */ + andc. r1,r1,r0 /* check access & ~permission */ + bne- DataAddressInvalid /* return if access not permitted */ + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + li r1,0xe06 /* clear out reserved bits & PP msb */ + andc r1,r0,r1 /* PP = user? 1: 0 */ +BEGIN_FTR_SECTION + rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ +END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) + mtspr SPRN_RPA,r1 + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 +BEGIN_MMU_FTR_SECTION + li r0,1 + mfspr r1,SPRN_SPRG_603_LRU + rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ + slw r0,r0,r2 + xor r1,r0,r1 + srw r0,r1,r2 + mtspr SPRN_SPRG_603_LRU,r1 + mfspr r2,SPRN_SRR1 + rlwimi r2,r0,31-14,14,14 + mtspr SPRN_SRR1,r2 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + tlbld r3 + rfi + +#ifndef CONFIG_ALTIVEC +#define altivec_assist_exception unknown_exception +#endif + +#ifndef CONFIG_TAU_INT +#define TAUException unknown_exception +#endif + + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) + EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD) + EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD) + EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD) + + . = 0x3000 + +machine_check_tramp: + EXC_XFER_STD(0x200, machine_check_exception) + +alignment_exception_tramp: + EXC_XFER_STD(0x600, alignment_exception) + +handle_page_fault_tramp_1: +#ifdef CONFIG_VMAP_STACK + EXCEPTION_PROLOG_2 handle_dar_dsisr=1 +#endif + lwz r4, _DAR(r11) + lwz r5, _DSISR(r11) + /* fall through */ +handle_page_fault_tramp_2: + EXC_XFER_LITE(0x300, handle_page_fault) + +#ifdef CONFIG_VMAP_STACK +.macro save_regs_thread thread + stw r0, THR0(\thread) + stw r3, THR3(\thread) + stw r4, THR4(\thread) + stw r5, THR5(\thread) + stw r6, THR6(\thread) + stw r8, THR8(\thread) + stw r9, THR9(\thread) + mflr r0 + stw r0, THLR(\thread) + mfctr r0 + stw r0, THCTR(\thread) +.endm + +.macro restore_regs_thread thread + lwz r0, THLR(\thread) + mtlr r0 + lwz r0, THCTR(\thread) + mtctr r0 + lwz r0, THR0(\thread) + lwz r3, THR3(\thread) + lwz r4, THR4(\thread) + lwz r5, THR5(\thread) + lwz r6, THR6(\thread) + lwz r8, THR8(\thread) + lwz r9, THR9(\thread) +.endm + +hash_page_dsi: + save_regs_thread r10 + mfdsisr r3 + mfdar r4 + mfsrr0 r5 + mfsrr1 r9 + rlwinm r3, r3, 32 - 15, _PAGE_RW /* DSISR_STORE -> _PAGE_RW */ + bl hash_page + mfspr r10, SPRN_SPRG_THREAD + restore_regs_thread r10 + b .Lhash_page_dsi_cont + +hash_page_isi: + mr r11, r10 + mfspr r10, SPRN_SPRG_THREAD + save_regs_thread r10 + li r3, 0 + lwz r4, SRR0(r10) + lwz r9, SRR1(r10) + bl hash_page + mfspr r10, SPRN_SPRG_THREAD + restore_regs_thread r10 + mr r10, r11 + b .Lhash_page_isi_cont + + .globl fast_hash_page_return +fast_hash_page_return: + andis. r10, r9, SRR1_ISI_NOPT@h /* Set on ISI, cleared on DSI */ + mfspr r10, SPRN_SPRG_THREAD + restore_regs_thread r10 + bne 1f + + /* DSI */ + mtcr r11 + lwz r11, THR11(r10) + mfspr r10, SPRN_SPRG_SCRATCH0 + RFI + +1: /* ISI */ + mtcr r11 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 + RFI + +stack_overflow: + vmap_stack_overflow_exception +#endif + +AltiVecUnavailable: + EXCEPTION_PROLOG +#ifdef CONFIG_ALTIVEC + beq 1f + bl load_up_altivec /* if from user, just load it up */ + b fast_exception_return +#endif /* CONFIG_ALTIVEC */ +1: addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_LITE(0xf20, altivec_unavailable_exception) + +PerformanceMonitor: + EXCEPTION_PROLOG + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_STD(0xf00, performance_monitor_exception) + + +/* + * This code is jumped to from the startup code to copy + * the kernel image to physical address PHYSICAL_START. + */ +relocate_kernel: + addis r9,r26,klimit@ha /* fetch klimit */ + lwz r25,klimit@l(r9) + addis r25,r25,-KERNELBASE@h + lis r3,PHYSICAL_START@h /* Destination base address */ + li r6,0 /* Destination offset */ + li r5,0x4000 /* # bytes of memory to copy */ + bl copy_and_flush /* copy the first 0x4000 bytes */ + addi r0,r3,4f@l /* jump to the address of 4f */ + mtctr r0 /* in copy and do the rest. */ + bctr /* jump to the copy */ +4: mr r5,r25 + bl copy_and_flush /* copy the rest */ + b turn_on_mmu + +/* + * Copy routine used to copy the kernel to start at physical address 0 + * and flush and invalidate the caches as needed. + * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset + * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. + */ +_ENTRY(copy_and_flush) + addi r5,r5,-4 + addi r6,r6,-4 +4: li r0,L1_CACHE_BYTES/4 + mtctr r0 +3: addi r6,r6,4 /* copy a cache line */ + lwzx r0,r6,r4 + stwx r0,r6,r3 + bdnz 3b + dcbst r6,r3 /* write it to memory */ + sync + icbi r6,r3 /* flush the icache line */ + cmplw 0,r6,r5 + blt 4b + sync /* additional sync needed on g4 */ + isync + addi r5,r5,4 + addi r6,r6,4 + blr + +#ifdef CONFIG_SMP + .globl __secondary_start_mpc86xx +__secondary_start_mpc86xx: + mfspr r3, SPRN_PIR + stw r3, __secondary_hold_acknowledge@l(0) + mr r24, r3 /* cpu # */ + b __secondary_start + + .globl __secondary_start_pmac_0 +__secondary_start_pmac_0: + /* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */ + li r24,0 + b 1f + li r24,1 + b 1f + li r24,2 + b 1f + li r24,3 +1: + /* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0 + set to map the 0xf0000000 - 0xffffffff region */ + mfmsr r0 + rlwinm r0,r0,0,28,26 /* clear DR (0x10) */ + mtmsr r0 + isync + + .globl __secondary_start +__secondary_start: + /* Copy some CPU settings from CPU 0 */ + bl __restore_cpu_setup + + lis r3,-KERNELBASE@h + mr r4,r24 + bl call_setup_cpu /* Call setup_cpu for this CPU */ +#ifdef CONFIG_PPC_BOOK3S_32 + lis r3,-KERNELBASE@h + bl init_idle_6xx +#endif /* CONFIG_PPC_BOOK3S_32 */ + + /* get current's stack and current */ + lis r2,secondary_current@ha + tophys(r2,r2) + lwz r2,secondary_current@l(r2) + tophys(r1,r2) + lwz r1,TASK_STACK(r1) + + /* stack */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r0,0 + tophys(r3,r1) + stw r0,0(r3) + + /* load up the MMU */ + bl load_segment_registers + bl load_up_mmu + + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* phys address of our thread_struct */ + mtspr SPRN_SPRG_THREAD,r4 + lis r4, (swapper_pg_dir - PAGE_OFFSET)@h + ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l + mtspr SPRN_SPRG_PGDIR, r4 + + /* enable MMU and jump to start_secondary */ + li r4,MSR_KERNEL + lis r3,start_secondary@h + ori r3,r3,start_secondary@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + RFI +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_KVM_BOOK3S_HANDLER +#include "../kvm/book3s_rmhandlers.S" +#endif + +/* + * Those generic dummy functions are kept for CPUs not + * included in CONFIG_PPC_BOOK3S_32 + */ +#if !defined(CONFIG_PPC_BOOK3S_32) +_ENTRY(__save_cpu_setup) + blr +_ENTRY(__restore_cpu_setup) + blr +#endif /* !defined(CONFIG_PPC_BOOK3S_32) */ + +/* + * Load stuff into the MMU. Intended to be called with + * IR=0 and DR=0. + */ +early_hash_table: + sync /* Force all PTE updates to finish */ + isync + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + /* Load the SDR1 register (hash table base & size) */ + lis r6, early_hash - PAGE_OFFSET@h + ori r6, r6, 3 /* 256kB table */ + mtspr SPRN_SDR1, r6 + lis r6, early_hash@h + lis r3, Hash@ha + stw r6, Hash@l(r3) + blr + +load_up_mmu: + sync /* Force all PTE updates to finish */ + isync + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + /* Load the SDR1 register (hash table base & size) */ + lis r6,_SDR1@ha + tophys(r6,r6) + lwz r6,_SDR1@l(r6) + mtspr SPRN_SDR1,r6 + +/* Load the BAT registers with the values set up by MMU_init. */ + lis r3,BATS@ha + addi r3,r3,BATS@l + tophys(r3,r3) + LOAD_BAT(0,r3,r4,r5) + LOAD_BAT(1,r3,r4,r5) + LOAD_BAT(2,r3,r4,r5) + LOAD_BAT(3,r3,r4,r5) +BEGIN_MMU_FTR_SECTION + LOAD_BAT(4,r3,r4,r5) + LOAD_BAT(5,r3,r4,r5) + LOAD_BAT(6,r3,r4,r5) + LOAD_BAT(7,r3,r4,r5) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + blr + +_GLOBAL(load_segment_registers) + li r0, NUM_USER_SEGMENTS /* load up user segment register values */ + mtctr r0 /* for context 0 */ + li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ +#ifdef CONFIG_PPC_KUEP + oris r3, r3, SR_NX@h /* Set Nx */ +#endif +#ifdef CONFIG_PPC_KUAP + oris r3, r3, SR_KS@h /* Set Ks */ +#endif + li r4, 0 +3: mtsrin r3, r4 + addi r3, r3, 0x111 /* increment VSID */ + addis r4, r4, 0x1000 /* address of next segment */ + bdnz 3b + li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */ + mtctr r0 /* for context 0 */ + rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */ + rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */ + oris r3, r3, SR_KP@h /* Kp = 1 */ +3: mtsrin r3, r4 + addi r3, r3, 0x111 /* increment VSID */ + addis r4, r4, 0x1000 /* address of next segment */ + bdnz 3b + blr + +/* + * This is where the main kernel code starts. + */ +start_here: + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + /* Set up for using our exception vectors */ + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG_THREAD,r4 + lis r4, (swapper_pg_dir - PAGE_OFFSET)@h + ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l + mtspr SPRN_SPRG_PGDIR, r4 + + /* stack */ + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) +/* + * Do early platform-specific initialization, + * and set up the MMU. + */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif + li r3,0 + mr r4,r31 + bl machine_init + bl __save_cpu_setup + bl MMU_init + bl MMU_init_hw_patch + +/* + * Go back to running unmapped so we can load up new values + * for SDR1 (hash table pointer) and the segment registers + * and change to using our exception vectors. + */ + lis r4,2f@h + ori r4,r4,2f@l + tophys(r4,r4) + li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) + + .align 4 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + RFI +/* Load up the kernel context */ +2: bl load_up_mmu + +#ifdef CONFIG_BDI_SWITCH + /* Add helper information for the Abatron bdiGDB debugger. + * We do this here because we know the mmu is disabled, and + * will be enabled for real in just a few instructions. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r5, 0xf0(0) /* This much match your Abatron config */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + tophys(r5, r5) + stw r6, 0(r5) +#endif /* CONFIG_BDI_SWITCH */ + +/* Now turn on the MMU for real! */ + li r4,MSR_KERNEL + lis r3,start_kernel@h + ori r3,r3,start_kernel@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + RFI + +/* + * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); + * + * Set up the segment registers for a new context. + */ +_ENTRY(switch_mmu_context) + lwz r3,MMCONTEXTID(r4) + cmpwi cr0,r3,0 + blt- 4f + mulli r3,r3,897 /* multiply context by skew factor */ + rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ +#ifdef CONFIG_PPC_KUEP + oris r3, r3, SR_NX@h /* Set Nx */ +#endif +#ifdef CONFIG_PPC_KUAP + oris r3, r3, SR_KS@h /* Set Ks */ +#endif + li r0,NUM_USER_SEGMENTS + mtctr r0 + + lwz r4, MM_PGD(r4) +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is passed as second argument. + */ + lis r5, abatron_pteptrs@ha + stw r4, abatron_pteptrs@l + 0x4(r5) +#endif + tophys(r4, r4) + mtspr SPRN_SPRG_PGDIR, r4 + li r4,0 + isync +3: + mtsrin r3,r4 + addi r3,r3,0x111 /* next VSID */ + rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b + sync + isync + blr +4: trap + EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0 + blr +EXPORT_SYMBOL(switch_mmu_context) + +/* + * An undocumented "feature" of 604e requires that the v bit + * be cleared before changing BAT values. + * + * Also, newer IBM firmware does not clear bat3 and 4 so + * this makes sure it's done. + * -- Cort + */ +clear_bats: + li r10,0 + + mtspr SPRN_DBAT0U,r10 + mtspr SPRN_DBAT0L,r10 + mtspr SPRN_DBAT1U,r10 + mtspr SPRN_DBAT1L,r10 + mtspr SPRN_DBAT2U,r10 + mtspr SPRN_DBAT2L,r10 + mtspr SPRN_DBAT3U,r10 + mtspr SPRN_DBAT3L,r10 + mtspr SPRN_IBAT0U,r10 + mtspr SPRN_IBAT0L,r10 + mtspr SPRN_IBAT1U,r10 + mtspr SPRN_IBAT1L,r10 + mtspr SPRN_IBAT2U,r10 + mtspr SPRN_IBAT2L,r10 + mtspr SPRN_IBAT3U,r10 + mtspr SPRN_IBAT3L,r10 +BEGIN_MMU_FTR_SECTION + /* Here's a tweak: at this point, CPU setup have + * not been called yet, so HIGH_BAT_EN may not be + * set in HID0 for the 745x processors. However, it + * seems that doesn't affect our ability to actually + * write to these SPRs. + */ + mtspr SPRN_DBAT4U,r10 + mtspr SPRN_DBAT4L,r10 + mtspr SPRN_DBAT5U,r10 + mtspr SPRN_DBAT5L,r10 + mtspr SPRN_DBAT6U,r10 + mtspr SPRN_DBAT6L,r10 + mtspr SPRN_DBAT7U,r10 + mtspr SPRN_DBAT7L,r10 + mtspr SPRN_IBAT4U,r10 + mtspr SPRN_IBAT4L,r10 + mtspr SPRN_IBAT5U,r10 + mtspr SPRN_IBAT5L,r10 + mtspr SPRN_IBAT6U,r10 + mtspr SPRN_IBAT6L,r10 + mtspr SPRN_IBAT7U,r10 + mtspr SPRN_IBAT7L,r10 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + blr + +_ENTRY(update_bats) + lis r4, 1f@h + ori r4, r4, 1f@l + tophys(r4, r4) + mfmsr r6 + mflr r7 + li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR) + rlwinm r0, r6, 0, ~MSR_RI + rlwinm r0, r0, 0, ~MSR_EE + mtmsr r0 + + .align 4 + mtspr SPRN_SRR0, r4 + mtspr SPRN_SRR1, r3 + RFI +1: bl clear_bats + lis r3, BATS@ha + addi r3, r3, BATS@l + tophys(r3, r3) + LOAD_BAT(0, r3, r4, r5) + LOAD_BAT(1, r3, r4, r5) + LOAD_BAT(2, r3, r4, r5) + LOAD_BAT(3, r3, r4, r5) +BEGIN_MMU_FTR_SECTION + LOAD_BAT(4, r3, r4, r5) + LOAD_BAT(5, r3, r4, r5) + LOAD_BAT(6, r3, r4, r5) + LOAD_BAT(7, r3, r4, r5) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI) + mtmsr r3 + mtspr SPRN_SRR0, r7 + mtspr SPRN_SRR1, r6 + RFI + +flush_tlbs: + lis r10, 0x40 +1: addic. r10, r10, -0x1000 + tlbie r10 + bgt 1b + sync + blr + +mmu_off: + addi r4, r3, __after_mmu_off - _start + mfmsr r3 + andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */ + beqlr + andc r3,r3,r0 + + .align 4 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + sync + RFI + +/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ +initial_bats: + lis r11,PAGE_OFFSET@h + tophys(r8,r11) +#ifdef CONFIG_SMP + ori r8,r8,0x12 /* R/W access, M=1 */ +#else + ori r8,r8,2 /* R/W access */ +#endif /* CONFIG_SMP */ + ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */ + + mtspr SPRN_DBAT0L,r8 /* N.B. 6xx have valid */ + mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */ + mtspr SPRN_IBAT0L,r8 + mtspr SPRN_IBAT0U,r11 + isync + blr + +#ifdef CONFIG_BOOTX_TEXT +setup_disp_bat: + /* + * setup the display bat prepared for us in prom.c + */ + mflr r8 + bl reloc_offset + mtlr r8 + addis r8,r3,disp_BAT@ha + addi r8,r8,disp_BAT@l + cmpwi cr0,r8,0 + beqlr + lwz r11,0(r8) + lwz r8,4(r8) + mtspr SPRN_DBAT3L,r8 + mtspr SPRN_DBAT3U,r11 + blr +#endif /* CONFIG_BOOTX_TEXT */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_CPM +setup_cpm_bat: + lis r8, 0xf000 + ori r8, r8, 0x002a + mtspr SPRN_DBAT1L, r8 + + lis r11, 0xf000 + ori r11, r11, (BL_1M << 2) | 2 + mtspr SPRN_DBAT1U, r11 + + blr +#endif + +#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO +setup_usbgecko_bat: + /* prepare a BAT for early io */ +#if defined(CONFIG_GAMECUBE) + lis r8, 0x0c00 +#elif defined(CONFIG_WII) + lis r8, 0x0d00 +#else +#error Invalid platform for USB Gecko based early debugging. +#endif + /* + * The virtual address used must match the virtual address + * associated to the fixmap entry FIX_EARLY_DEBUG_BASE. + */ + lis r11, 0xfffe /* top 128K */ + ori r8, r8, 0x002a /* uncached, guarded ,rw */ + ori r11, r11, 0x2 /* 128K, Vs=1, Vp=0 */ + mtspr SPRN_DBAT1L, r8 + mtspr SPRN_DBAT1U, r11 + blr +#endif + +#ifdef CONFIG_8260 +/* Jump into the system reset for the rom. + * We first disable the MMU, and then jump to the ROM reset address. + * + * r3 is the board info structure, r4 is the location for starting. + * I use this for building a small kernel that can load other kernels, + * rather than trying to write or rely on a rom monitor that can tftp load. + */ + .globl m8260_gorom +m8260_gorom: + mfmsr r0 + rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ + sync + mtmsr r0 + sync + mfspr r11, SPRN_HID0 + lis r10, 0 + ori r10,r10,HID0_ICE|HID0_DCE + andc r11, r11, r10 + mtspr SPRN_HID0, r11 + isync + li r5, MSR_ME|MSR_RI + lis r6,2f@h + addis r6,r6,-KERNELBASE@h + ori r6,r6,2f@l + mtspr SPRN_SRR0,r6 + mtspr SPRN_SRR1,r5 + isync + sync + rfi +2: + mtlr r4 + blr +#endif + + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the data segment, + * which is page-aligned. + */ + .data + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 +EXPORT_SYMBOL(empty_zero_page) + + .globl swapper_pg_dir +swapper_pg_dir: + .space PGD_TABLE_SIZE + +/* Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 -- cgit v1.2.3 From 865418795a1dea1c2b58a5fd7b6bdcb93e0c36b8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 6 Oct 2020 09:05:37 +0000 Subject: powerpc/32s: Remove #ifdef CONFIG_PPC_BOOK3S_32 in head_book3s_32.S head_book3s_32.S is only built when CONFIG_PPC_BOOK3S_32 is selected. Remove all conditions based on CONFIG_PPC_BOOK3S_32 in the file. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1b68632425d8866d147aea9005004e4594672211.1601975100.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index b7b554533e0d..5eb9eedac920 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -174,10 +174,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) bl reloc_offset li r24,0 /* cpu# */ bl call_setup_cpu /* Call setup_cpu for this CPU */ -#ifdef CONFIG_PPC_BOOK3S_32 bl reloc_offset bl init_idle_6xx -#endif /* CONFIG_PPC_BOOK3S_32 */ /* @@ -878,10 +876,8 @@ __secondary_start: lis r3,-KERNELBASE@h mr r4,r24 bl call_setup_cpu /* Call setup_cpu for this CPU */ -#ifdef CONFIG_PPC_BOOK3S_32 lis r3,-KERNELBASE@h bl init_idle_6xx -#endif /* CONFIG_PPC_BOOK3S_32 */ /* get current's stack and current */ lis r2,secondary_current@ha @@ -921,17 +917,6 @@ __secondary_start: #include "../kvm/book3s_rmhandlers.S" #endif -/* - * Those generic dummy functions are kept for CPUs not - * included in CONFIG_PPC_BOOK3S_32 - */ -#if !defined(CONFIG_PPC_BOOK3S_32) -_ENTRY(__save_cpu_setup) - blr -_ENTRY(__restore_cpu_setup) - blr -#endif /* !defined(CONFIG_PPC_BOOK3S_32) */ - /* * Load stuff into the MMU. Intended to be called with * IR=0 and DR=0. -- cgit v1.2.3 From 15c102153e722cc6e0729764a7068c209a7469cd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Oct 2020 12:42:39 +0000 Subject: powerpc/time: Rename mftbl() to mftb() On PPC64, we have mftb(). On PPC32, we have mftbl() and an #define mftb() mftbl(). mftb() and mftbl() are equivalent, their purpose is to read the content of SPRN_TRBL, as returned by 'mftb' simplified instruction. binutils seems to define 'mftbl' instruction as an equivalent of 'mftb'. However in both 32 bits and 64 bits documentation, only 'mftb' is defined, and when performing a disassembly with objdump, the displayed instruction is 'mftb' No need to have two ways to do the same thing with different names, rename mftbl() to have only mftb(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/94dc68d3d9ef9eb549796d4b938b6ba0305a049b.1601556145.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 5 ++--- arch/powerpc/include/asm/time.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 788058af1d44..c66dcdb47c44 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1439,19 +1439,18 @@ static inline void msr_check_and_clear(unsigned long bits) #else /* __powerpc64__ */ #if defined(CONFIG_PPC_8xx) -#define mftbl() ({unsigned long rval; \ +#define mftb() ({unsigned long rval; \ asm volatile("mftbl %0" : "=r" (rval)); rval;}) #define mftbu() ({unsigned long rval; \ asm volatile("mftbu %0" : "=r" (rval)); rval;}) #else -#define mftbl() ({unsigned long rval; \ +#define mftb() ({unsigned long rval; \ asm volatile("mfspr %0, %1" : "=r" (rval) : \ "i" (SPRN_TBRL)); rval;}) #define mftbu() ({unsigned long rval; \ asm volatile("mfspr %0, %1" : "=r" (rval) : \ "i" (SPRN_TBRU)); rval;}) #endif -#define mftb() mftbl() #endif /* !__powerpc64__ */ #define mttbl(v) asm volatile("mttbl %0":: "r"(v)) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index a0c8ae4cb27c..6e681160981b 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -47,7 +47,7 @@ struct div_result { static inline unsigned long get_tbl(void) { - return mftbl(); + return mftb(); } static inline unsigned int get_tbu(void) -- cgit v1.2.3 From ff125fbcd45d1706861579dbe66e31f5b3f1e779 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Oct 2020 12:42:40 +0000 Subject: powerpc/time: Make mftb() common to PPC32 and PPC64 No need to have two versions that are identical. CONFIG_PPC_CELL is only selected by PPC64 targets. CONFIG_E500 is the only PPC64 target selecting CONFIG_FSL_BOOK3E. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6bf23ec744aab4ba63506a011f6a145ea35d620d.1601556145.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c66dcdb47c44..f877a576b338 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1419,8 +1419,7 @@ static inline void msr_check_and_clear(unsigned long bits) __msr_check_and_clear(bits); } -#ifdef __powerpc64__ -#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) +#if defined(CONFIG_PPC_CELL) || defined(CONFIG_E500) #define mftb() ({unsigned long rval; \ asm volatile( \ "90: mfspr %0, %2;\n" \ @@ -1430,28 +1429,23 @@ static inline void msr_check_and_clear(unsigned long bits) : "=r" (rval) \ : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ rval;}) +#elif defined(CONFIG_PPC_8xx) +#define mftb() ({unsigned long rval; \ + asm volatile("mftbl %0" : "=r" (rval)); rval;}) #else #define mftb() ({unsigned long rval; \ asm volatile("mfspr %0, %1" : \ "=r" (rval) : "i" (SPRN_TBRL)); rval;}) #endif /* !CONFIG_PPC_CELL */ -#else /* __powerpc64__ */ - #if defined(CONFIG_PPC_8xx) -#define mftb() ({unsigned long rval; \ - asm volatile("mftbl %0" : "=r" (rval)); rval;}) #define mftbu() ({unsigned long rval; \ asm volatile("mftbu %0" : "=r" (rval)); rval;}) #else -#define mftb() ({unsigned long rval; \ - asm volatile("mfspr %0, %1" : "=r" (rval) : \ - "i" (SPRN_TBRL)); rval;}) #define mftbu() ({unsigned long rval; \ asm volatile("mfspr %0, %1" : "=r" (rval) : \ "i" (SPRN_TBRU)); rval;}) #endif -#endif /* !__powerpc64__ */ #define mttbl(v) asm volatile("mttbl %0":: "r"(v)) #define mttbu(v) asm volatile("mttbu %0":: "r"(v)) -- cgit v1.2.3 From 942e89115b588b4b5df86930b5302a5c07b820ba Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Oct 2020 12:42:41 +0000 Subject: powerpc/time: Avoid using get_tbl() and get_tbu() internally get_tbl() is confusing as it returns the content of TBL register on PPC32 but the concatenation of TBL and TBU on PPC64. Use mftb() instead. Do the same with get_tbu() for consistency allthough it's name is less confusing. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/41573406a4eab98838decaa91649086fef1e6119.1601556145.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/delay.h | 2 +- arch/powerpc/include/asm/time.h | 8 ++++---- arch/powerpc/kernel/time.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/delay.h b/arch/powerpc/include/asm/delay.h index 66963f7d3e64..51bb8c1476c7 100644 --- a/arch/powerpc/include/asm/delay.h +++ b/arch/powerpc/include/asm/delay.h @@ -54,7 +54,7 @@ extern void udelay(unsigned long usecs); ({ \ typeof(condition) __ret; \ unsigned long __loops = tb_ticks_per_usec * timeout; \ - unsigned long __start = get_tbl(); \ + unsigned long __start = mftb(); \ \ if (delay) { \ while (!(__ret = (condition)) && \ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 6e681160981b..c83116ec79c3 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -76,9 +76,9 @@ static inline u64 get_tb(void) unsigned int tbhi, tblo, tbhi2; do { - tbhi = get_tbu(); - tblo = get_tbl(); - tbhi2 = get_tbu(); + tbhi = mftbu(); + tblo = mftb(); + tbhi2 = mftbu(); } while (tbhi != tbhi2); return ((u64)tbhi << 32) | tblo; @@ -123,7 +123,7 @@ static inline void set_dec(u64 val) static inline unsigned long tb_ticks_since(unsigned long tstamp) { - return get_tbl() - tstamp; + return mftb() - tstamp; } #define mulhwu(x,y) \ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 760ea359a7f7..74efe46f5532 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -448,8 +448,8 @@ void __delay(unsigned long loops) */ spin_cpu_relax(); } else { - start = get_tbl(); - while (get_tbl() - start < loops) + start = mftb(); + while (mftb() - start < loops) spin_cpu_relax(); } spin_end(); -- cgit v1.2.3 From e8d5bf30eafc37e31ce68bc7ccf1db970fe3cd04 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Oct 2020 12:42:42 +0000 Subject: powerpc/time: Remove get_tbu() get_tbu() is redundant with mftbu() and is not used anymore. Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1746e2d11ea90c3f45877e1fcc6c79ce96cf6b98.1601556145.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/time.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index c83116ec79c3..3dd2deb623da 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -49,11 +49,6 @@ static inline unsigned long get_tbl(void) { return mftb(); } - -static inline unsigned int get_tbu(void) -{ - return mftbu(); -} #endif /* !CONFIG_PPC64 */ static inline u64 get_vtb(void) -- cgit v1.2.3 From 1156a6285cd38e5a6987ddee3758e7954c56cb3d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Oct 2020 12:42:43 +0000 Subject: powerpc/time: Make get_tbl() common to PPC32 and PPC64 On PPC64, get_tbl() is defined as an alias of get_tb() which return the result of mftb(). That exactly the same as what the PPC32 version does. We don't need two versions. Remove the PPC64 definition of get_tbl() and use the PPC32 version for both. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a8eaabb87d69534e533ebac805163e08146e05bd.1601556145.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/time.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 3dd2deb623da..6ec0409d10f4 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -38,18 +38,11 @@ struct div_result { u64 result_low; }; -#ifdef CONFIG_PPC64 - /* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ -#define get_tbl get_tb - -#else - static inline unsigned long get_tbl(void) { return mftb(); } -#endif /* !CONFIG_PPC64 */ static inline u64 get_vtb(void) { -- cgit v1.2.3 From 9686e431c683ee7b8aca0f3985c244aee3d9f30d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Oct 2020 12:42:44 +0000 Subject: powerpc/time: Make get_tb() common to PPC32 and PPC64 mftbu() is always defined now, so the #ifdef can be removed and replaced by an IS_ENABLED(CONFIG_PPC64) inside the PPC32 version of get_tb(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/47e49717d2643169ffcbe5d507f184cf49f0fe95.1601556145.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/time.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 6ec0409d10f4..2f566c1a754c 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -53,16 +53,13 @@ static inline u64 get_vtb(void) return 0; } -#ifdef CONFIG_PPC64 -static inline u64 get_tb(void) -{ - return mftb(); -} -#else /* CONFIG_PPC64 */ static inline u64 get_tb(void) { unsigned int tbhi, tblo, tbhi2; + if (IS_ENABLED(CONFIG_PPC64)) + return mftb(); + do { tbhi = mftbu(); tblo = mftb(); @@ -71,7 +68,6 @@ static inline u64 get_tb(void) return ((u64)tbhi << 32) | tblo; } -#endif /* !CONFIG_PPC64 */ static inline void set_tb(unsigned int upper, unsigned int lower) { -- cgit v1.2.3 From ffd0b25ca049a477cb757e5bcf2d5e1664d12e5d Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 14 Oct 2020 14:28:11 -0400 Subject: Revert "powerpc/pci: unmap legacy INTx interrupts when a PHB is removed" This reverts commit 3a3181e16fbde752007759f8759d25e0ff1fc425 which causes memory corruptions on POWER9 powernv. eg: pci_bus 0035:08: busn_res: [bus 08-0c] is released ============================================================================= BUG kmalloc-16 (Tainted: G W O ): Object already free ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in pcibios_scan_phb+0x104/0x3e0 age=1960714 cpu=4 pid=1 __slab_alloc+0xa4/0xf0 __kmalloc+0x294/0x330 pcibios_scan_phb+0x104/0x3e0 pcibios_init+0x84/0x124 do_one_initcall+0xac/0x528 kernel_init_freeable+0x35c/0x3fc kernel_init+0x24/0x148 ret_from_kernel_thread+0x5c/0x80 INFO: Freed in pcibios_remove_bus+0x70/0x90 age=0 cpu=16 pid=1717146 kfree+0x49c/0x510 pcibios_remove_bus+0x70/0x90 pci_remove_bus+0xe4/0x110 pci_remove_bus_device+0x74/0x170 pci_remove_bus_device+0x4c/0x170 pci_stop_and_remove_bus_device_locked+0x34/0x50 remove_store+0xc0/0xe0 dev_attr_store+0x30/0x50 sysfs_kf_write+0x68/0xb0 kernfs_fop_write+0x114/0x260 vfs_write+0xe4/0x260 ksys_write+0x74/0x130 system_call_exception+0xf8/0x1d0 system_call_common+0xe8/0x218 INFO: Slab 0x0000000099caaf22 objects=178 used=174 fp=0x00000000006a64b0 flags=0x7fff8000000201 INFO: Object 0x00000000f360132d @offset=30192 fp=0x0000000000000000 Signed-off-by: Qian Cai Acked-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201014182811.12027-1-cai@lca.pw --- arch/powerpc/include/asm/pci-bridge.h | 6 -- arch/powerpc/kernel/pci-common.c | 114 ---------------------------------- 2 files changed, 120 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index d21e070352dc..d2a2a14e56f9 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -48,9 +48,6 @@ struct pci_controller_ops { /* * Structure of a PCI controller (host bridge) - * - * @irq_count: number of interrupt mappings - * @irq_map: interrupt mappings */ struct pci_controller { struct pci_bus *bus; @@ -130,9 +127,6 @@ struct pci_controller { void *private_data; struct npu *npu; - - unsigned int irq_count; - unsigned int *irq_map; }; /* These are used for config access before all the PCI probing diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index deb831f0ae13..be108616a721 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -353,115 +353,6 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr) return NULL; } -/* - * Assumption is made on the interrupt parent. All interrupt-map - * entries are considered to have the same parent. - */ -static int pcibios_irq_map_count(struct pci_controller *phb) -{ - const __be32 *imap; - int imaplen; - struct device_node *parent; - u32 intsize, addrsize, parintsize, paraddrsize; - - if (of_property_read_u32(phb->dn, "#interrupt-cells", &intsize)) - return 0; - if (of_property_read_u32(phb->dn, "#address-cells", &addrsize)) - return 0; - - imap = of_get_property(phb->dn, "interrupt-map", &imaplen); - if (!imap) { - pr_debug("%pOF : no interrupt-map\n", phb->dn); - return 0; - } - imaplen /= sizeof(u32); - pr_debug("%pOF : imaplen=%d\n", phb->dn, imaplen); - - if (imaplen < (addrsize + intsize + 1)) - return 0; - - imap += intsize + addrsize; - parent = of_find_node_by_phandle(be32_to_cpup(imap)); - if (!parent) { - pr_debug("%pOF : no imap parent found !\n", phb->dn); - return 0; - } - - if (of_property_read_u32(parent, "#interrupt-cells", &parintsize)) { - pr_debug("%pOF : parent lacks #interrupt-cells!\n", phb->dn); - return 0; - } - - if (of_property_read_u32(parent, "#address-cells", ¶ddrsize)) - paraddrsize = 0; - - return imaplen / (addrsize + intsize + 1 + paraddrsize + parintsize); -} - -static void pcibios_irq_map_init(struct pci_controller *phb) -{ - phb->irq_count = pcibios_irq_map_count(phb); - if (phb->irq_count < PCI_NUM_INTX) - phb->irq_count = PCI_NUM_INTX; - - pr_debug("%pOF : interrupt map #%d\n", phb->dn, phb->irq_count); - - phb->irq_map = kcalloc(phb->irq_count, sizeof(unsigned int), - GFP_KERNEL); -} - -static void pci_irq_map_register(struct pci_dev *pdev, unsigned int virq) -{ - struct pci_controller *phb = pci_bus_to_host(pdev->bus); - int i; - - if (!phb->irq_map) - return; - - for (i = 0; i < phb->irq_count; i++) { - /* - * Look for an empty or an equivalent slot, as INTx - * interrupts can be shared between adapters. - */ - if (phb->irq_map[i] == virq || !phb->irq_map[i]) { - phb->irq_map[i] = virq; - break; - } - } - - if (i == phb->irq_count) - pr_err("PCI:%s all platform interrupts mapped\n", - pci_name(pdev)); -} - -/* - * Clearing the mapped interrupts will also clear the underlying - * mappings of the ESB pages of the interrupts when under XIVE. It is - * a requirement of PowerVM to clear all memory mappings before - * removing a PHB. - */ -static void pci_irq_map_dispose(struct pci_bus *bus) -{ - struct pci_controller *phb = pci_bus_to_host(bus); - int i; - - if (!phb->irq_map) - return; - - pr_debug("PCI: Clearing interrupt mappings for PHB %04x:%02x...\n", - pci_domain_nr(bus), bus->number); - for (i = 0; i < phb->irq_count; i++) - irq_dispose_mapping(phb->irq_map[i]); - - kfree(phb->irq_map); -} - -void pcibios_remove_bus(struct pci_bus *bus) -{ - pci_irq_map_dispose(bus); -} -EXPORT_SYMBOL_GPL(pcibios_remove_bus); - /* * Reads the interrupt pin to determine if interrupt is use by card. * If the interrupt is used, then gets the interrupt line from the @@ -510,8 +401,6 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) pci_dev->irq = virq; - /* Record all interrut mappings for later removal of a PHB */ - pci_irq_map_register(pci_dev, virq); return 0; } @@ -1665,9 +1554,6 @@ void pcibios_scan_phb(struct pci_controller *hose) pr_debug("PCI: Scanning PHB %pOF\n", node); - /* Allocate interrupt mappings array */ - pcibios_irq_map_init(hose); - /* Get some IO space for the new PHB */ pcibios_setup_phb_io_space(hose); -- cgit v1.2.3