From 67c3e59443f5fc77be39e2ce0db75fbfa78c7965 Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Mon, 2 Jul 2018 11:08:16 +0200
Subject: powerpc/pseries: Fix missing of_node_put() in rng_init()

The call to of_find_compatible_node() returns a node pointer with
refcount incremented thus it must be explicitly decremented here
before returning.

Fixes: a489043f4626 ("powerpc/pseries: Implement arch_get_random_long() based on H_RANDOM")
Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1530522496-14816-1-git-send-email-hofrat@osadl.org
---
 arch/powerpc/platforms/pseries/rng.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
index bbb97169bf63..6268545947b8 100644
--- a/arch/powerpc/platforms/pseries/rng.c
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -36,6 +36,7 @@ static __init int rng_init(void)
 
 	ppc_md.get_random_seed = pseries_get_random_long;
 
+	of_node_put(dn);
 	return 0;
 }
 machine_subsys_initcall(pseries, rng_init);
-- 
cgit v1.2.3


From d3e669f31ec35856f5e85df9224ede5bdbf1bc7b Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Wed, 4 Jul 2018 10:03:27 +0200
Subject: powerpc/icp-hv: Fix missing of_node_put() in success path

Both of_find_compatible_node() and of_find_node_by_type() will return
a refcounted node on success - thus for the success path the node must
be explicitly released with a of_node_put().

Fixes: 0b05ac6e2480 ("powerpc/xics: Rewrite XICS driver")
Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1530691407-3991-1-git-send-email-hofrat@osadl.org
---
 arch/powerpc/sysdev/xics/icp-hv.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index ad8117148ea3..21b9d1bf39ff 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -174,6 +174,7 @@ int icp_hv_init(void)
 
 	icp_ops = &icp_hv_ops;
 
+	of_node_put(np);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 374f6178f3483dcad151fc14b2fad92ed6652f07 Mon Sep 17 00:00:00 2001
From: Frederic Barrat <fbarrat@linux.ibm.com>
Date: Fri, 3 Apr 2020 17:38:38 +0200
Subject: ocxl: Remove custom service to allocate interrupts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We now allocate interrupts through xive directly.

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Greg Kurz <groug@kaod.org>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200403153838.29224-5-fbarrat@linux.ibm.com
---
 arch/powerpc/include/asm/pnv-ocxl.h   |  3 ---
 arch/powerpc/platforms/powernv/ocxl.c | 30 ------------------------------
 2 files changed, 33 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
index ee79d2cd9fb6..d37ededca3ee 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -28,7 +28,4 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **p
 void pnv_ocxl_spa_release(void *platform_data);
 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);
 
-int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
-void pnv_ocxl_free_xive_irq(u32 irq);
-
 #endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index 8c65aacda9c8..ecdad219d704 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -2,7 +2,6 @@
 // Copyright 2017 IBM Corp.
 #include <asm/pnv-ocxl.h>
 #include <asm/opal.h>
-#include <asm/xive.h>
 #include <misc/ocxl-config.h>
 #include "pci.h"
 
@@ -484,32 +483,3 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
 	return rc;
 }
 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
-
-int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
-{
-	__be64 flags, trigger_page;
-	s64 rc;
-	u32 hwirq;
-
-	hwirq = xive_native_alloc_irq();
-	if (!hwirq)
-		return -ENOENT;
-
-	rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL,
-				NULL);
-	if (rc || !trigger_page) {
-		xive_native_free_irq(hwirq);
-		return -ENOENT;
-	}
-	*irq = hwirq;
-	*trigger_addr = be64_to_cpu(trigger_page);
-	return 0;
-
-}
-EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq);
-
-void pnv_ocxl_free_xive_irq(u32 irq)
-{
-	xive_native_free_irq(irq);
-}
-EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);
-- 
cgit v1.2.3


From 8c7614d648037b0776e0b76cb62911be3b059ea4 Mon Sep 17 00:00:00 2001
From: Biwen Li <biwen.li@nxp.com>
Date: Wed, 27 May 2020 11:42:27 +0800
Subject: powerpc/dts/t4240rdb: remove interrupts property

Since the interrupt pin for RTC DS1374 is not connected
to the CPU on T4240RDB, remove the interrupt property
from the device tree.

This also fix the following warning for hwclock.util-linux:
$ hwclock.util-linux
hwclock.util-linux: select() to /dev/rtc0
to wait for clock tick timed out

Signed-off-by: Biwen Li <biwen.li@nxp.com>
Acked-by: Li Yang <leoyang.li@nxp.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200527034228.23793-1-biwen.li@oss.nxp.com
---
 arch/powerpc/boot/dts/fsl/t4240rdb.dts | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/fsl/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
index a56a705d41f7..145896f2eef6 100644
--- a/arch/powerpc/boot/dts/fsl/t4240rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
@@ -144,7 +144,6 @@
 			rtc@68 {
 				compatible = "dallas,ds1374";
 				reg = <0x68>;
-				interrupts = <0x1 0x1 0 0>;
 			};
 		};
 
-- 
cgit v1.2.3


From 843dc8ee23d1b353fa9cc24da3e52be0111d5931 Mon Sep 17 00:00:00 2001
From: Biwen Li <biwen.li@nxp.com>
Date: Wed, 27 May 2020 11:42:28 +0800
Subject: powerc/dtc/t1024rdb: remove interrupts property

Since the interrupt pin for RTC DS1339 is not connected
to the CPU on T1024RDB, remove the interrupt property
from the device tree.

This also fix the following warning for hwclock.util-linux:
$ hwclock.util-linux
hwclock.util-linux: select() to /dev/rtc0
to wait for clock tick timed out

Signed-off-by: Biwen Li <biwen.li@nxp.com>
Acked-by: Li Yang <leoyang.li@nxp.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200527034228.23793-2-biwen.li@oss.nxp.com
---
 arch/powerpc/boot/dts/fsl/t1024rdb.dts | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
index 73a645324bc1..dbcd31cc35dc 100644
--- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
@@ -161,7 +161,6 @@
 			rtc@68 {
 				compatible = "dallas,ds1339";
 				reg = <0x68>;
-				interrupts = <0x1 0x1 0 0>;
 			};
 		};
 
-- 
cgit v1.2.3


From 738e6cad0ace88edec8f4ffa082749ad5df26409 Mon Sep 17 00:00:00 2001
From: zhengbin <zhengbin13@huawei.com>
Date: Tue, 19 Nov 2019 14:14:30 +0800
Subject: powerpc/fadump: Remove set but not used variable 'elf'

Fix gcc '-Wunused-but-set-variable' warning:

arch/powerpc/kernel/fadump.c: In function fadump_update_elfcore_header:
arch/powerpc/kernel/fadump.c:790:17: warning: variable elf set but not used [-Wunused-but-set-variable]

It is introduced by commit ebaeb5ae2437 ("fadump:
Convert firmware-assisted cpu state dump data into elf notes."),
but never used, so remove it.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: zhengbin <zhengbin13@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1574144074-142032-2-git-send-email-zhengbin13@huawei.com
---
 arch/powerpc/kernel/fadump.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 10ebb4bf71ad..d192067a112c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -754,10 +754,8 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
 
 void fadump_update_elfcore_header(char *bufp)
 {
-	struct elfhdr *elf;
 	struct elf_phdr *phdr;
 
-	elf = (struct elfhdr *)bufp;
 	bufp += sizeof(struct elfhdr);
 
 	/* First note is a place holder for cpu notes info. */
-- 
cgit v1.2.3


From ef23cf9a89a7aec19a29d548d1e219d436b23b6e Mon Sep 17 00:00:00 2001
From: zhengbin <zhengbin13@huawei.com>
Date: Tue, 19 Nov 2019 14:14:31 +0800
Subject: powerpc/perf: Remove set but not used variable 'target'

Fix gcc '-Wunused-but-set-variable' warning:

arch/powerpc/perf/imc-pmu.c: In function trace_imc_event_init:
arch/powerpc/perf/imc-pmu.c:1292:22: warning: variable target set but not used [-Wunused-but-set-variable]

It is introduced by commit 012ae244845f ("powerpc/perf:
Trace imc PMU functions"), but never used, so remove it.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: zhengbin <zhengbin13@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1574144074-142032-3-git-send-email-zhengbin13@huawei.com
---
 arch/powerpc/perf/imc-pmu.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index a45d694a5d5d..00bb186caed9 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1426,8 +1426,6 @@ static void trace_imc_event_del(struct perf_event *event, int flags)
 
 static int trace_imc_event_init(struct perf_event *event)
 {
-	struct task_struct *target;
-
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
 
@@ -1458,7 +1456,6 @@ static int trace_imc_event_init(struct perf_event *event)
 	mutex_unlock(&imc_global_refc.lock);
 
 	event->hw.idx = -1;
-	target = event->hw.target;
 
 	event->pmu->task_ctx_nr = perf_hw_context;
 	event->destroy = reset_global_refc;
-- 
cgit v1.2.3


From 18102e4bcc47f5b5ac70e2e4461d022c1ee6df24 Mon Sep 17 00:00:00 2001
From: zhengbin <zhengbin13@huawei.com>
Date: Tue, 19 Nov 2019 14:14:34 +0800
Subject: powerpc/powernv: Remove set but not used variable 'parent'

Fix gcc '-Wunused-but-set-variable' warning:

arch/powerpc/platforms/powernv/pci-ioda.c: In function pnv_ioda_configure_pe:
arch/powerpc/platforms/powernv/pci-ioda.c:867:18: warning: variable parent set but not used [-Wunused-but-set-variable]

It is not used since commit b131a8425c34 ("powerpc/powernv:
Set PELTV for compound PEs")

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: zhengbin <zhengbin13@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1574144074-142032-6-git-send-email-zhengbin13@huawei.com
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 023a4f987bb2..2b4ceb5e6ce4 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -894,7 +894,6 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 
 int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
-	struct pci_dev *parent;
 	uint8_t bcomp, dcomp, fcomp;
 	long rc, rid_end, rid;
 
@@ -904,7 +903,6 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 
 		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
 		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
-		parent = pe->pbus->self;
 		if (pe->flags & PNV_IODA_PE_BUS_ALL)
 			count = resource_size(&pe->pbus->busn_res);
 		else
@@ -925,12 +923,6 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 		}
 		rid_end = pe->rid + (count << 8);
 	} else {
-#ifdef CONFIG_PCI_IOV
-		if (pe->flags & PNV_IODA_PE_VF)
-			parent = pe->parent_dev;
-		else
-#endif /* CONFIG_PCI_IOV */
-			parent = pe->pdev->bus->self;
 		bcomp = OpalPciBusAll;
 		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
 		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
-- 
cgit v1.2.3


From ccaea15296f9773abd43aaa17ee4b88848e4a505 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Fri, 31 Jul 2020 17:04:59 +0530
Subject: powerpc/vmemmap: Fix memory leak with vmemmap list allocation
 failures.

If we fail to allocate vmemmap list, we don't keep track of allocated
vmemmap block buf. Hence on section deactivate we skip vmemmap block
buf free. This results in memory leak.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200731113500.248306-1-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/mm/init_64.c | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 02e127fa5777..41b7da84030e 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -162,16 +162,16 @@ static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node)
 	return next++;
 }
 
-static __meminit void vmemmap_list_populate(unsigned long phys,
-					    unsigned long start,
-					    int node)
+static __meminit int vmemmap_list_populate(unsigned long phys,
+					   unsigned long start,
+					   int node)
 {
 	struct vmemmap_backing *vmem_back;
 
 	vmem_back = vmemmap_list_alloc(node);
 	if (unlikely(!vmem_back)) {
-		WARN_ON(1);
-		return;
+		pr_debug("vmemap list allocation failed\n");
+		return -ENOMEM;
 	}
 
 	vmem_back->phys = phys;
@@ -179,6 +179,7 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
 	vmem_back->list = vmemmap_list;
 
 	vmemmap_list = vmem_back;
+	return 0;
 }
 
 static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
@@ -199,6 +200,7 @@ static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long star
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap)
 {
+	bool altmap_alloc;
 	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
 
 	/* Align to the page size of the linear mapping. */
@@ -228,13 +230,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 			p = vmemmap_alloc_block_buf(page_size, node, altmap);
 			if (!p)
 				pr_debug("altmap block allocation failed, falling back to system memory");
+			else
+				altmap_alloc = true;
 		}
-		if (!p)
+		if (!p) {
 			p = vmemmap_alloc_block_buf(page_size, node, NULL);
+			altmap_alloc = false;
+		}
 		if (!p)
 			return -ENOMEM;
 
-		vmemmap_list_populate(__pa(p), start, node);
+		if (vmemmap_list_populate(__pa(p), start, node)) {
+			/*
+			 * If we don't populate vmemap list, we don't have
+			 * the ability to free the allocated vmemmap
+			 * pages in section_deactivate. Hence free them
+			 * here.
+			 */
+			int nr_pfns = page_size >> PAGE_SHIFT;
+			unsigned long page_order = get_order(page_size);
+
+			if (altmap_alloc)
+				vmem_altmap_free(altmap, nr_pfns);
+			else
+				free_pages((unsigned long)p, page_order);
+			return -ENOMEM;
+		}
 
 		pr_debug("      * %016lx..%016lx allocated at %p\n",
 			 start, start + page_size, p);
-- 
cgit v1.2.3


From 1c0a7ac0ec63ee626f669c9a4e278f6ae1dbfcf2 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Fri, 31 Jul 2020 17:05:00 +0530
Subject: powerpc/vmemmap: Don't warn if we don't find a mapping vmemmap list
 entry

Now that we are handling vmemmap list allocation failure correctly, don't
WARN in section deactivate when we don't find a mapping vmemmap list entry.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200731113500.248306-2-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/mm/init_64.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 41b7da84030e..a8618f7d00a3 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -285,10 +285,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
 		vmem_back_prev = vmem_back;
 	}
 
-	if (unlikely(!vmem_back)) {
-		WARN_ON(1);
+	if (unlikely(!vmem_back))
 		return 0;
-	}
 
 	/* remove it from vmemmap_list */
 	if (vmem_back == vmemmap_list) /* remove head */
-- 
cgit v1.2.3


From 346427e668163e85cbbe14e4d9a2ddd49df1536c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 4 Aug 2020 18:43:16 +0100
Subject: powerpc/oprofile: fix spelling mistake "contex" -> "context"

There is a spelling mistake in a pr_debug message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200804174316.402425-1-colin.king@canonical.com
---
 arch/powerpc/oprofile/cell/spu_task_sync.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index df59d0bb121f..489f993100d5 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -572,7 +572,7 @@ void spu_sync_buffer(int spu_num, unsigned int *samples,
 		 * samples are recorded.
 		 * No big deal -- so we just drop a few samples.
 		 */
-		pr_debug("SPU_PROF: No cached SPU contex "
+		pr_debug("SPU_PROF: No cached SPU context "
 			  "for SPU #%d. Dropping samples.\n", spu_num);
 		goto out;
 	}
-- 
cgit v1.2.3


From f6bac19cf65c5be21d14a0c9684c8f560f2096dd Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Tue, 4 Aug 2020 10:54:05 +1000
Subject: powerpc/powernv/smp: Fix spurious DBG() warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building with W=1 we get the following warning:

 arch/powerpc/platforms/powernv/smp.c: In function ‘pnv_smp_cpu_kill_self’:
 arch/powerpc/platforms/powernv/smp.c:276:16: error: suggest braces around
 	empty body in an ‘if’ statement [-Werror=empty-body]
   276 |      cpu, srr1);
       |                ^
 cc1: all warnings being treated as errors

The full context is this block:

 if (srr1 && !generic_check_cpu_restart(cpu))
 	DBG("CPU%d Unexpected exit while offline srr1=%lx!\n",
 			cpu, srr1);

When building with DEBUG undefined DBG() expands to nothing and GCC emits
the warning due to the lack of braces around an empty statement.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200804005410.146094-2-oohall@gmail.com
---
 arch/powerpc/platforms/powernv/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index b2ba3e95bda7..bbf361f23ae8 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -43,7 +43,7 @@
 #include <asm/udbg.h>
 #define DBG(fmt...) udbg_printf(fmt)
 #else
-#define DBG(fmt...)
+#define DBG(fmt...) do { } while (0)
 #endif
 
 static void pnv_smp_setup_cpu(int cpu)
-- 
cgit v1.2.3


From 8471c1dd93de9a2278d41c527b76291e4ace8f1c Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Tue, 4 Aug 2020 10:54:06 +1000
Subject: powerpc/powernv: Include asm/powernv.h from the local powernv.h

The asm/powernv.h header provides prototypes for functions which need to be
called by non-powernv platform code. Also include it in the powernv.h
that's local to the platform directory to squash some warnings about
non-static functions missing prototypes.

Also include powernv.h since from opal-memcons.c since it has the
prototypes for the memcons wrangling functions which are used for the opal
and ultravisor msglog.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200804005410.146094-3-oohall@gmail.com
---
 arch/powerpc/platforms/powernv/opal-msglog.c | 2 ++
 arch/powerpc/platforms/powernv/powernv.h     | 7 +++++++
 2 files changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index d26da19a611f..d3b6e135c18b 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -12,6 +12,8 @@
 #include <linux/types.h>
 #include <asm/barrier.h>
 
+#include "powernv.h"
+
 /* OPAL in-memory console. Defined in OPAL source at core/console.c */
 struct memcons {
 	__be64 magic;
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 1aa51c4fa904..11df4e16a1cc 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -2,6 +2,13 @@
 #ifndef _POWERNV_H
 #define _POWERNV_H
 
+/*
+ * There's various hacks scattered throughout the generic powerpc arch code
+ * that needs to call into powernv platform stuff. The prototypes for those
+ * functions are in asm/powernv.h
+ */
+#include <asm/powernv.h>
+
 #ifdef CONFIG_SMP
 extern void pnv_smp_init(void);
 #else
-- 
cgit v1.2.3


From 3b70464aa78917e88c1d4bfc2100c344c0eda8e0 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Tue, 4 Aug 2020 10:54:07 +1000
Subject: powerpc/powernv: Staticify functions without prototypes

There's a few scattered in the powernv platform.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200804005410.146094-4-oohall@gmail.com
---
 arch/powerpc/platforms/powernv/eeh-powernv.c | 4 ++--
 arch/powerpc/platforms/powernv/rng.c         | 2 +-
 arch/powerpc/platforms/powernv/vas-window.c  | 9 ++++-----
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 9af8c3b98853..663bd69ac51b 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -38,7 +38,7 @@
 
 static int eeh_event_irq = -EINVAL;
 
-void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
+static void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
 {
 	dev_dbg(&pdev->dev, "EEH: Setting up device\n");
 	eeh_probe_device(pdev);
@@ -190,7 +190,7 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
 
 #endif /* CONFIG_DEBUG_FS */
 
-void pnv_eeh_enable_phbs(void)
+static void pnv_eeh_enable_phbs(void)
 {
 	struct pci_controller *hose;
 	struct pnv_phb *phb;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 8035caf6e297..72c25295c1c2 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -65,7 +65,7 @@ int powernv_get_random_real_mode(unsigned long *v)
 	return 1;
 }
 
-int powernv_get_random_darn(unsigned long *v)
+static int powernv_get_random_darn(unsigned long *v)
 {
 	unsigned long val;
 
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 6434f9cb5aed..5f5fe63a3d1c 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -186,7 +186,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window)
  * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
  * Map these bus addresses and save the mapped kernel addresses in @window.
  */
-int map_winctx_mmio_bars(struct vas_window *window)
+static int map_winctx_mmio_bars(struct vas_window *window)
 {
 	int len;
 	u64 start;
@@ -214,7 +214,7 @@ int map_winctx_mmio_bars(struct vas_window *window)
  *	 registers are not sequential. And, we can only write to offsets
  *	 with valid registers.
  */
-void reset_window_regs(struct vas_window *window)
+static void reset_window_regs(struct vas_window *window)
 {
 	write_hvwc_reg(window, VREG(LPID), 0ULL);
 	write_hvwc_reg(window, VREG(PID), 0ULL);
@@ -357,7 +357,8 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin,
  *	as a one-time task? That could work for NX but what about other
  *	receivers?  Let the receivers tell us the rx-fifo buffers for now.
  */
-int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
+static void init_winctx_regs(struct vas_window *window,
+			     struct vas_winctx *winctx)
 {
 	u64 val;
 	int fifo_size;
@@ -499,8 +500,6 @@ int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
 	val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
 	val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
 	write_hvwc_reg(window, VREG(WINCTL), val);
-
-	return 0;
 }
 
 static void vas_release_window_id(struct ida *ida, int winid)
-- 
cgit v1.2.3


From fb248c3121af713f31736af359608491544cfc23 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Tue, 4 Aug 2020 10:54:08 +1000
Subject: powerpc/powernv: Fix spurious kerneldoc warnings in opal-prd.c

Comments opening with /** are parsed by kerneldoc and this causes the
following warning to be printed:

	arch/powerpc/platforms/powernv/opal-prd.c:31: warning: cannot understand
	function prototype: 'struct opal_prd_msg_queue_item '

opal_prd_mesg_queue_item is an internal data structure so there's no real
need for it to be documented at all. Fix up the comment to squash the
warning.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200804005410.146094-5-oohall@gmail.com
---
 arch/powerpc/platforms/powernv/opal-prd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 45f4223a790f..deddaebf8c14 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -24,7 +24,7 @@
 #include <linux/uaccess.h>
 
 
-/**
+/*
  * The msg member must be at the end of the struct, as it's followed by the
  * message data.
  */
-- 
cgit v1.2.3


From 3ced132a055c4e5046d21732393ae6848ff309e0 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Tue, 4 Aug 2020 10:54:10 +1000
Subject: powerpc/nx: Don't pack struct coprocessor_request_block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Building with W=1 results in the following warning:

In file included from arch/powerpc/platforms/powernv/vas-fault.c:16:
./arch/powerpc/include/asm/icswx.h:159:1: error: alignment 1 of ‘struct
	coprocessor_request_block’ is less than 16 [-Werror=packed-not-aligned]
  159 | } __packed;
      | ^
./arch/powerpc/include/asm/icswx.h:159:1: error: alignment 1 of ‘struct
	coprocessor_request_block’ is less than 16 [-Werror=packed-not-aligned]
./arch/powerpc/include/asm/icswx.h:159:1: error: alignment 1 of ‘struct
	coprocessor_request_block’ is less than 16 [-Werror=packed-not-aligned]
./arch/powerpc/include/asm/icswx.h:159:1: error: alignment 1 of ‘struct
	coprocessor_request_block’ is less than 16 [-Werror=packed-not-aligned]
cc1: all warnings being treated as errors

This happens because coprocessor_request_block includes several
sub-structures with an alignment specified using the __aligned(XX)
attribute. The problem comes from coprocessor_request_block having the
__packed attribute. Packing the structure causes the preferred alignment of
the nested structures to be ignored and we get the warnings as a result.

This isn't a problem in practice since the struct is defined with explicit
padding in the form of reserved fields, but we'd like to get rid of the
spurious warnings. The simplest solution is to remove the packed attribute
and use a BUILD_BUG_ON() to ensure the struct is the correct (expected by
HW) size compile time.

Also add a __aligned(128) to the request block structure since Book4 for P8
suggests the HW requires it to be aligned to a 128 byte boundary. There's a
similar requirement for P9 since the COPY and PASTE instructions used to
invoke VAS/NX accelerators operates on a cache line boundary.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200804005410.146094-7-oohall@gmail.com
---
 arch/powerpc/include/asm/icswx.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h
index b0c70a35fd0e..f6599ccb3012 100644
--- a/arch/powerpc/include/asm/icswx.h
+++ b/arch/powerpc/include/asm/icswx.h
@@ -156,8 +156,7 @@ struct coprocessor_request_block {
 	u8 reserved[32];
 
 	struct coprocessor_status_block csb;
-} __packed;
-
+} __aligned(128);
 
 /* RFC02167 Initiate Coprocessor Instructions document
  * Chapter 8.2.1.1.1 RS
@@ -188,6 +187,9 @@ static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb)
 	__be64 ccw_reg = ccw;
 	u32 cr;
 
+	/* NB: the same structures are used by VAS-NX */
+	BUILD_BUG_ON(sizeof(*crb) != 128);
+
 	__asm__ __volatile__(
 	PPC_ICSWX(%1,0,%2) "\n"
 	"mfcr %0\n"
-- 
cgit v1.2.3


From b51ba4fe2e134b631f9c8f45423707aab71449b5 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 Aug 2020 06:01:42 +0000
Subject: powerpc/32s: Fix assembler warning about r0

The assembler says:
  arch/powerpc/kernel/head_32.S:1095: Warning: invalid register expression

It's objecting to the use of r0 as the RA argument. That's because
when RA = 0 the literal value 0 is used, rather than the content of
r0, making the use of r0 in the source potentially confusing.

Fix it to use a literal 0, the generated code is identical.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/2b69ac8e1cddff6f808fc7415907179eab4aae9e.1596693679.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/head_32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index f3ab94d73936..5624db0e09a1 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -1092,7 +1092,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 	 */
 	lis	r5, abatron_pteptrs@h
 	ori	r5, r5, abatron_pteptrs@l
-	stw	r5, 0xf0(r0)	/* This much match your Abatron config */
+	stw	r5, 0xf0(0)	/* This much match your Abatron config */
 	lis	r6, swapper_pg_dir@h
 	ori	r6, r6, swapper_pg_dir@l
 	tophys(r5, r5)
-- 
cgit v1.2.3


From 169b9afee572853522901b7cbf34842c0494a887 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 Aug 2020 12:19:06 +0000
Subject: powerpc/hwirq: Remove stale forward irq_chip declaration

Since commit identified below, the forward declaration of
struct irq_chip is useless (was struct hw_interrupt_type at that time)

Remove it, together with the associated comment.

Fixes: c0ad90a32fb6 ("[PATCH] genirq: add ->retrigger() irq op to consolidate hw_irq_resend()")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/fbe58d27cf128d5fe581e4510ded8701858f268e.1596716328.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/hw_irq.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 3a0db7b0b46e..538698facb80 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -372,12 +372,6 @@ static inline void may_hard_irq_enable(void) { }
 
 #define ARCH_IRQ_INIT_FLAGS	IRQ_NOREQUEST
 
-/*
- * interrupt-retrigger: should we handle this via lost interrupts and IPIs
- * or should we not care like we do now ? --BenH.
- */
-struct irq_chip;
-
 #endif  /* __ASSEMBLY__ */
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_POWERPC_HW_IRQ_H */
-- 
cgit v1.2.3


From b134cfc3e3276ccd5d29e39de5c848a45b08e410 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 Aug 2020 12:19:46 +0000
Subject: powerpc/irq: Drop forward declaration of struct irqaction

Since the commit identified below, the forward declaration of
struct irqaction is useless. Drop it.

Fixes: b709c0832824 ("ppc64: move stack switching up in interrupt processing")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/e0bcdabac45fcd26c02d7df273bd4a5827c6033d.1596716375.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/irq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 814dfab7e392..4f983ca4030a 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -35,7 +35,6 @@ static __inline__ int irq_canonicalize(int irq)
 
 extern int distribute_irqs;
 
-struct irqaction;
 struct pt_regs;
 
 #define __ARCH_HAS_DO_SOFTIRQ
-- 
cgit v1.2.3


From 63442de4301188129e1fcff144fbfb966ad5eb19 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 Aug 2020 12:20:34 +0000
Subject: powerpc/fpu: Drop cvt_fd() and cvt_df()

Those two functions have been unused since commit identified below.
Drop them.

Fixes: 31bfdb036f12 ("powerpc: Use instruction emulation infrastructure to handle alignment faults")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/d5641ada199b8dd2af16ad00a66084cf974f2704.1596716418.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/processor.h |  2 --
 arch/powerpc/kernel/fpu.S            | 15 ---------------
 2 files changed, 17 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ed0d633ab5aa..2d68dbe56601 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -439,8 +439,6 @@ extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 extern void poweroff_now(void);
 extern int fix_alignment(struct pt_regs *);
-extern void cvt_fd(float *from, double *to);
-extern void cvt_df(double *from, float *to);
 extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 4ae39db70044..825893d4cb59 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -134,18 +134,3 @@ _GLOBAL(save_fpu)
 	mffs	fr0
 	stfd	fr0,FPSTATE_FPSCR(r6)
 	blr
-
-/*
- * These are used in the alignment trap handler when emulating
- * single-precision loads and stores.
- */
-
-_GLOBAL(cvt_fd)
-	lfs	0,0(r3)
-	stfd	0,0(r4)
-	blr
-
-_GLOBAL(cvt_df)
-	lfd	0,0(r3)
-	stfs	0,0(r4)
-	blr
-- 
cgit v1.2.3


From 82eb1792426f8a171cdaa6cfccb63c39f55bc9bd Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 Aug 2020 12:20:35 +0000
Subject: powerpc: drop hard_reset_now() and poweroff_now() declaration

Those function have never existed. Drop their declaration.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/edcdd72a36495d25213c0256c8022367458e0d19.1596716418.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/processor.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 2d68dbe56601..8184eb357f10 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -436,8 +436,6 @@ extern void power9_idle_type(unsigned long stop_psscr_val,
 			      unsigned long stop_psscr_mask);
 
 extern void flush_instruction_cache(void);
-extern void hard_reset_now(void);
-extern void poweroff_now(void);
 extern int fix_alignment(struct pt_regs *);
 extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
 
-- 
cgit v1.2.3


From 59562b5c33d6ff3685509ed58b2ed3c5b5712704 Mon Sep 17 00:00:00 2001
From: Scott Cheloha <cheloha@linux.ibm.com>
Date: Mon, 27 Jul 2020 13:46:04 -0500
Subject: powerpc/perf: consolidate GPCI hcall structs into asm/hvcall.h

The H_GetPerformanceCounterInfo (GPCI) hypercall input/output structs are
useful to modules outside of perf/, so move them into asm/hvcall.h to live
alongside the other powerpc hypercall structs.

Leave the perf-specific GPCI stuff in perf/hv-gpci.h.

Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
Acked-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200727184605.2945095-1-cheloha@linux.ibm.com
---
 arch/powerpc/include/asm/hvcall.h | 36 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/perf/hv-gpci.c       |  9 ---------
 arch/powerpc/perf/hv-gpci.h       | 27 ---------------------------
 3 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index fbb377055471..e8f116a425f9 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -560,6 +560,42 @@ struct hv_guest_state {
 /* Latest version of hv_guest_state structure */
 #define HV_GUEST_STATE_VERSION	1
 
+/*
+ * From the document "H_GetPerformanceCounterInfo Interface" v1.07
+ *
+ * H_GET_PERF_COUNTER_INFO argument
+ */
+struct hv_get_perf_counter_info_params {
+	__be32 counter_request; /* I */
+	__be32 starting_index;  /* IO */
+	__be16 secondary_index; /* IO */
+	__be16 returned_values; /* O */
+	__be32 detail_rc; /* O, only needed when called via *_norets() */
+
+	/*
+	 * O, size each of counter_value element in bytes, only set for version
+	 * >= 0x3
+	 */
+	__be16 cv_element_size;
+
+	/* I, 0 (zero) for versions < 0x3 */
+	__u8 counter_info_version_in;
+
+	/* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */
+	__u8 counter_info_version_out;
+	__u8 reserved[0xC];
+	__u8 counter_value[];
+} __packed;
+
+#define HGPCI_REQ_BUFFER_SIZE	4096
+#define HGPCI_MAX_DATA_BYTES \
+	(HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params))
+
+struct hv_gpci_request_buffer {
+	struct hv_get_perf_counter_info_params params;
+	uint8_t bytes[HGPCI_MAX_DATA_BYTES];
+} __packed;
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 6884d16ec19b..1667315b82e9 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -123,17 +123,8 @@ static const struct attribute_group *attr_groups[] = {
 	NULL,
 };
 
-#define HGPCI_REQ_BUFFER_SIZE	4096
-#define HGPCI_MAX_DATA_BYTES \
-	(HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params))
-
 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
 
-struct hv_gpci_request_buffer {
-	struct hv_get_perf_counter_info_params params;
-	uint8_t bytes[HGPCI_MAX_DATA_BYTES];
-} __packed;
-
 static unsigned long single_gpci_request(u32 req, u32 starting_index,
 		u16 secondary_index, u8 version_in, u32 offset, u8 length,
 		u64 *value)
diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h
index a3053eda5dcc..4d108262bed7 100644
--- a/arch/powerpc/perf/hv-gpci.h
+++ b/arch/powerpc/perf/hv-gpci.h
@@ -2,33 +2,6 @@
 #ifndef LINUX_POWERPC_PERF_HV_GPCI_H_
 #define LINUX_POWERPC_PERF_HV_GPCI_H_
 
-#include <linux/types.h>
-
-/* From the document "H_GetPerformanceCounterInfo Interface" v1.07 */
-
-/* H_GET_PERF_COUNTER_INFO argument */
-struct hv_get_perf_counter_info_params {
-	__be32 counter_request; /* I */
-	__be32 starting_index;  /* IO */
-	__be16 secondary_index; /* IO */
-	__be16 returned_values; /* O */
-	__be32 detail_rc; /* O, only needed when called via *_norets() */
-
-	/*
-	 * O, size each of counter_value element in bytes, only set for version
-	 * >= 0x3
-	 */
-	__be16 cv_element_size;
-
-	/* I, 0 (zero) for versions < 0x3 */
-	__u8 counter_info_version_in;
-
-	/* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */
-	__u8 counter_info_version_out;
-	__u8 reserved[0xC];
-	__u8 counter_value[];
-} __packed;
-
 /*
  * counter info version => fw version/reference (spec version)
  *
-- 
cgit v1.2.3


From 5d1bc776428f34941a6237afb9454061b5b5e1e1 Mon Sep 17 00:00:00 2001
From: Scott Cheloha <cheloha@linux.ibm.com>
Date: Mon, 27 Jul 2020 13:46:05 -0500
Subject: powerpc/pseries: new lparcfg key/value pair: partition_affinity_score

The H_GetPerformanceCounterInfo (GPCI) PHYP hypercall has a subcall,
Affinity_Domain_Info_By_Partition, which returns, among other things,
a "partition affinity score" for a given LPAR.  This score, a value on
[0-100], represents the processor-memory affinity for the LPAR in
question.  A score of 0 indicates the worst possible affinity while a
score of 100 indicates perfect affinity.  The score can be used to
reason about performance.

This patch adds the score for the local LPAR to the lparcfg procfile
under a new 'partition_affinity_score' key.

Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
Reviewed-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Acked-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200727184605.2945095-2-cheloha@linux.ibm.com
---
 arch/powerpc/platforms/pseries/lparcfg.c | 35 ++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index b8d28ab88178..e278390ab28d 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -136,6 +136,39 @@ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
 	return rc;
 }
 
+static void show_gpci_data(struct seq_file *m)
+{
+	struct hv_gpci_request_buffer *buf;
+	unsigned int affinity_score;
+	long ret;
+
+	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+	if (buf == NULL)
+		return;
+
+	/*
+	 * Show the local LPAR's affinity score.
+	 *
+	 * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
+	 * The score is at byte 0xB in the output buffer.
+	 */
+	memset(&buf->params, 0, sizeof(buf->params));
+	buf->params.counter_request = cpu_to_be32(0xB1);
+	buf->params.starting_index = cpu_to_be32(-1);	/* local LPAR */
+	buf->params.counter_info_version_in = 0x5;	/* v5+ for score */
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
+				 sizeof(*buf));
+	if (ret != H_SUCCESS) {
+		pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
+			 ret, be32_to_cpu(buf->params.detail_rc));
+		goto out;
+	}
+	affinity_score = buf->bytes[0xB];
+	seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
+out:
+	kfree(buf);
+}
+
 static unsigned h_pic(unsigned long *pool_idle_time,
 		      unsigned long *num_procs)
 {
@@ -487,6 +520,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 			   partition_active_processors * 100);
 	}
 
+	show_gpci_data(m);
+
 	seq_printf(m, "partition_active_processors=%d\n",
 		   partition_active_processors);
 
-- 
cgit v1.2.3


From 6c9100ea39d209e1625ba0fe06134192d9c4752a Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Fri, 7 Aug 2020 17:27:13 +0200
Subject: powerpc: Use simple i2c probe function

The i2c probe functions here don't use the id information provided in
their second argument, so the single-parameter i2c probe
function ("probe_new") can be used instead.

This avoids scanning the identifier tables during probes.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Acked-by: Wolfram Sang <wsa@kernel.org>
Reviewed-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200807152713.381588-1-steve@sk2.org
---
 arch/powerpc/platforms/44x/ppc476.c            | 5 ++---
 arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c
index cba83eee685c..07f7e3ce67b5 100644
--- a/arch/powerpc/platforms/44x/ppc476.c
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -86,8 +86,7 @@ static void __noreturn avr_reset_system(char *cmd)
 	avr_halt_system(AVR_PWRCTL_RESET);
 }
 
-static int avr_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int avr_probe(struct i2c_client *client)
 {
 	avr_i2c_client = client;
 	ppc_md.restart = avr_reset_system;
@@ -104,7 +103,7 @@ static struct i2c_driver avr_driver = {
 	.driver = {
 		.name = "akebono-avr",
 	},
-	.probe = avr_probe,
+	.probe_new = avr_probe,
 	.id_table = avr_id,
 };
 
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 0967bdfb1691..409481016928 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -142,7 +142,7 @@ static int mcu_gpiochip_remove(struct mcu *mcu)
 	return 0;
 }
 
-static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int mcu_probe(struct i2c_client *client)
 {
 	struct mcu *mcu;
 	int ret;
@@ -221,7 +221,7 @@ static struct i2c_driver mcu_driver = {
 		.name = "mcu-mpc8349emitx",
 		.of_match_table = mcu_of_match_table,
 	},
-	.probe = mcu_probe,
+	.probe_new = mcu_probe,
 	.remove	= mcu_remove,
 	.id_table = mcu_ids,
 };
-- 
cgit v1.2.3


From e53281bc21f061f96c9004f534bc3e807d70cb73 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 14 Aug 2020 06:54:49 +0000
Subject: powerpc: Drop _nmask_and_or_msr()

_nmask_and_or_msr() is only used at two places to set MSR_IP.

The SYNC is unnecessary as the users are not PowerPC 601.

Can be easily writen in C.

Do it, and drop _nmask_and_or_msr()

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/c2d2b8dfb8dd677026b26dffc8d31070c38a6b89.1597388079.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/processor.h              |  1 -
 arch/powerpc/kernel/misc_32.S                     | 13 -------------
 arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c |  3 ++-
 arch/powerpc/platforms/embedded6xx/storcenter.c   |  3 ++-
 4 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8184eb357f10..8c7ad14d68de 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -437,7 +437,6 @@ extern void power9_idle_type(unsigned long stop_psscr_val,
 
 extern void flush_instruction_cache(void);
 extern int fix_alignment(struct pt_regs *);
-extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
 
 #ifdef CONFIG_PPC64
 /*
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index b24f866fef81..8d9cb5df580e 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -215,19 +215,6 @@ _GLOBAL(low_choose_7447a_dfs)
 
 #endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */
 
-/*
- * complement mask on the msr then "or" some values on.
- *     _nmask_and_or_msr(nmask, value_to_or)
- */
-_GLOBAL(_nmask_and_or_msr)
-	mfmsr	r0		/* Get current msr */
-	andc	r0,r0,r3	/* And off the bits set in r3 (first parm) */
-	or	r0,r0,r4	/* Or on the bits in r4 (second parm) */
-	SYNC			/* Some chip revs have problems here... */
-	mtmsr	r0		/* Update machine state */
-	isync
-	blr			/* Done */
-
 #ifdef CONFIG_40x
 
 /*
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 15437abe1f6d..b95c3380d2b5 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -147,7 +147,8 @@ static void __noreturn mpc7448_hpc2_restart(char *cmd)
 	local_irq_disable();
 
 	/* Set exception prefix high - to the firmware */
-	_nmask_and_or_msr(0, MSR_IP);
+	mtmsr(mfmsr() | MSR_IP);
+	isync();
 
 	for (;;) ;		/* Spin until reset happens */
 }
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
index ed1914dd34bb..e346ddcef45e 100644
--- a/arch/powerpc/platforms/embedded6xx/storcenter.c
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -101,7 +101,8 @@ static void __noreturn storcenter_restart(char *cmd)
 	local_irq_disable();
 
 	/* Set exception prefix high - to the firmware */
-	_nmask_and_or_msr(0, MSR_IP);
+	mtmsr(mfmsr() | MSR_IP);
+	isync();
 
 	/* Wait for reset to happen */
 	for (;;) ;
-- 
cgit v1.2.3


From 9d6792ffe140240ae54c881cc4183f9acc24b4df Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Thu, 13 Aug 2020 10:11:31 -0500
Subject: powerpc/pseries: explicitly reschedule during drmem_lmb list
 traversal

The drmem lmb list can have hundreds of thousands of entries, and
unfortunately lookups take the form of linear searches. As long as
this is the case, traversals have the potential to monopolize the CPU
and provoke lockup reports, workqueue stalls, and the like unless
they explicitly yield.

Rather than placing cond_resched() calls within various
for_each_drmem_lmb() loop blocks in the code, put it in the iteration
expression of the loop macro itself so users can't omit it.

Introduce a drmem_lmb_next() iteration helper function which calls
cond_resched() at a regular interval during array traversal. Each
iteration of the loop in DLPAR code paths can involve around ten RTAS
calls which can each take up to 250us, so this ensures the check is
performed at worst every few milliseconds.

Fixes: 6c6ea53725b3 ("powerpc/mm: Separate ibm, dynamic-memory data from DT format")
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200813151131.2070161-1-nathanl@linux.ibm.com
---
 arch/powerpc/include/asm/drmem.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 17ccc6474ab6..6fb928605ed1 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -8,6 +8,8 @@
 #ifndef _ASM_POWERPC_LMB_H
 #define _ASM_POWERPC_LMB_H
 
+#include <linux/sched.h>
+
 struct drmem_lmb {
 	u64     base_addr;
 	u32     drc_index;
@@ -26,8 +28,22 @@ struct drmem_lmb_info {
 
 extern struct drmem_lmb_info *drmem_info;
 
+static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb,
+					       const struct drmem_lmb *start)
+{
+	/*
+	 * DLPAR code paths can take several milliseconds per element
+	 * when interacting with firmware. Ensure that we don't
+	 * unfairly monopolize the CPU.
+	 */
+	if (((++lmb - start) % 16) == 0)
+		cond_resched();
+
+	return lmb;
+}
+
 #define for_each_drmem_lmb_in_range(lmb, start, end)		\
-	for ((lmb) = (start); (lmb) < (end); (lmb)++)
+	for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start))
 
 #define for_each_drmem_lmb(lmb)					\
 	for_each_drmem_lmb_in_range((lmb),			\
-- 
cgit v1.2.3


From e426ab39f41045a4c163031272b2f48d944b69c0 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 14 Aug 2020 05:56:24 +0000
Subject: powerpc: Remove flush_instruction_cache for book3s/32

The only callers of flush_instruction_cache() are:

arch/powerpc/kernel/swsusp_booke.S:	bl flush_instruction_cache
arch/powerpc/mm/nohash/40x.c:	flush_instruction_cache();
arch/powerpc/mm/nohash/44x.c:	flush_instruction_cache();
arch/powerpc/mm/nohash/fsl_booke.c:	flush_instruction_cache();
arch/powerpc/platforms/44x/machine_check.c:			flush_instruction_cache();
arch/powerpc/platforms/44x/machine_check.c:		flush_instruction_cache();

This function is not used by book3s/32, drop it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/50098f49877cea0f46730a9df82dcabf84160e4b.1597384512.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/misc_32.S | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 8d9cb5df580e..aa860b8d1dcc 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -258,9 +258,8 @@ _ASM_NOKPROBE_SYMBOL(real_writeb)
 
 /*
  * Flush instruction cache.
- * This is a no-op on the 601.
  */
-#ifndef CONFIG_PPC_8xx
+#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32)
 _GLOBAL(flush_instruction_cache)
 #if defined(CONFIG_4xx)
 	lis	r3, KERNELBASE@h
@@ -277,18 +276,11 @@ _GLOBAL(flush_instruction_cache)
 	mfspr	r3,SPRN_L1CSR1
 	ori	r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
 	mtspr	SPRN_L1CSR1,r3
-#elif defined(CONFIG_PPC_BOOK3S_601)
-	blr			/* for 601, do nothing */
-#else
-	/* 603/604 processor - use invalidate-all bit in HID0 */
-	mfspr	r3,SPRN_HID0
-	ori	r3,r3,HID0_ICFI
-	mtspr	SPRN_HID0,r3
 #endif /* CONFIG_4xx */
 	isync
 	blr
 EXPORT_SYMBOL(flush_instruction_cache)
-#endif /* CONFIG_PPC_8xx */
+#endif
 
 /*
  * Copy a whole page.  We use the dcbz instruction on the destination
-- 
cgit v1.2.3


From f663f3312051402d32952c44d156a20c0b854753 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 14 Aug 2020 05:56:25 +0000
Subject: powerpc: Move flush_instruction_cache() prototype in asm/cacheflush.h

flush_instruction_cache() belongs to the cache flushing function
family.

Move its prototype in asm/cacheflush.h

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/993445b5227e8ca2f0e38bcc9ea3dfea6e865920.1597384512.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/cacheflush.h      | 2 ++
 arch/powerpc/include/asm/processor.h       | 1 -
 arch/powerpc/platforms/44x/machine_check.c | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 54764c6e922d..481877879fec 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -98,6 +98,8 @@ static inline void invalidate_dcache_range(unsigned long start,
 	mb();	/* sync */
 }
 
+void flush_instruction_cache(void);
+
 #include <asm-generic/cacheflush.h>
 
 #endif /* _ASM_POWERPC_CACHEFLUSH_H */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8c7ad14d68de..36a71cd41f37 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -435,7 +435,6 @@ extern void power7_idle_type(unsigned long type);
 extern void power9_idle_type(unsigned long stop_psscr_val,
 			      unsigned long stop_psscr_mask);
 
-extern void flush_instruction_cache(void);
 extern int fix_alignment(struct pt_regs *);
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
index 90ad6ac529d2..a5c898bb9bab 100644
--- a/arch/powerpc/platforms/44x/machine_check.c
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -7,6 +7,7 @@
 #include <linux/ptrace.h>
 
 #include <asm/reg.h>
+#include <asm/cacheflush.h>
 
 int machine_check_440A(struct pt_regs *regs)
 {
-- 
cgit v1.2.3


From de39b19452e784de5f90ae899851ab29a29bb42c Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 14 Aug 2020 05:56:26 +0000
Subject: powerpc: Rewrite 4xx flush_cache_instruction() in C

Nothing prevents flush_cache_instruction() from being writen in C.

Do it to improve readability and maintainability.

This function is very small and isn't called from assembly,
make it static inline in asm/cacheflush.h

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/93d93fc69b4b3ad3ceba2fc0756333c0c0245bb7.1597384512.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/cacheflush.h | 8 ++++++++
 arch/powerpc/kernel/misc_32.S         | 7 +------
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 481877879fec..138e46d8c04e 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -98,7 +98,15 @@ static inline void invalidate_dcache_range(unsigned long start,
 	mb();	/* sync */
 }
 
+#ifdef CONFIG_4xx
+static inline void flush_instruction_cache(void)
+{
+	iccci((void *)KERNELBASE);
+	isync();
+}
+#else
 void flush_instruction_cache(void);
+#endif
 
 #include <asm-generic/cacheflush.h>
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index aa860b8d1dcc..9e8f730fe04d 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -259,12 +259,8 @@ _ASM_NOKPROBE_SYMBOL(real_writeb)
 /*
  * Flush instruction cache.
  */
-#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32)
+#ifdef CONFIG_FSL_BOOKE
 _GLOBAL(flush_instruction_cache)
-#if defined(CONFIG_4xx)
-	lis	r3, KERNELBASE@h
-	iccci	0,r3
-#elif defined(CONFIG_FSL_BOOKE)
 #ifdef CONFIG_E200
 	mfspr   r3,SPRN_L1CSR0
 	ori     r3,r3,L1CSR0_CFI|L1CSR0_CLFC
@@ -276,7 +272,6 @@ _GLOBAL(flush_instruction_cache)
 	mfspr	r3,SPRN_L1CSR1
 	ori	r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
 	mtspr	SPRN_L1CSR1,r3
-#endif /* CONFIG_4xx */
 	isync
 	blr
 EXPORT_SYMBOL(flush_instruction_cache)
-- 
cgit v1.2.3


From 704dfe931df951895dea98bd1d9cacbb601b6451 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 14 Aug 2020 05:56:27 +0000
Subject: powerpc: Rewrite FSL_BOOKE flush_cache_instruction() in C

Nothing prevents flush_cache_instruction() from being writen in C.

Do it to improve readability and maintainability.

This function is only use by low level callers, it is not
intended to be used by module. Don't export it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/f989eff8296800c427622c0985384148404e4f0b.1597384512.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/misc_32.S      | 22 ----------------------
 arch/powerpc/mm/nohash/fsl_booke.c | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 9e8f730fe04d..717e658b90fd 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -255,28 +255,6 @@ _ASM_NOKPROBE_SYMBOL(real_writeb)
 
 #endif /* CONFIG_40x */
 
-
-/*
- * Flush instruction cache.
- */
-#ifdef CONFIG_FSL_BOOKE
-_GLOBAL(flush_instruction_cache)
-#ifdef CONFIG_E200
-	mfspr   r3,SPRN_L1CSR0
-	ori     r3,r3,L1CSR0_CFI|L1CSR0_CLFC
-	/* msync; isync recommended here */
-	mtspr   SPRN_L1CSR0,r3
-	isync
-	blr
-#endif
-	mfspr	r3,SPRN_L1CSR1
-	ori	r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
-	mtspr	SPRN_L1CSR1,r3
-	isync
-	blr
-EXPORT_SYMBOL(flush_instruction_cache)
-#endif
-
 /*
  * Copy a whole page.  We use the dcbz instruction on the destination
  * to reduce memory traffic (it eliminates the unnecessary reads of
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c
index 0c294827d6e5..36bda962d3b3 100644
--- a/arch/powerpc/mm/nohash/fsl_booke.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -219,6 +219,22 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 	return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
 }
 
+void flush_instruction_cache(void)
+{
+	unsigned long tmp;
+
+	if (IS_ENABLED(CONFIG_E200)) {
+		tmp = mfspr(SPRN_L1CSR0);
+		tmp |= L1CSR0_CFI | L1CSR0_CLFC;
+		mtspr(SPRN_L1CSR0, tmp);
+	} else {
+		tmp = mfspr(SPRN_L1CSR1);
+		tmp |= L1CSR1_ICFI | L1CSR1_ICLFR;
+		mtspr(SPRN_L1CSR1, tmp);
+	}
+	isync();
+}
+
 /*
  * MMU_init_hw does the chip-specific initialization of the MMU hardware.
  */
-- 
cgit v1.2.3


From e5e179aa3a39c818db8fbc2dce8d2cd24adaf657 Mon Sep 17 00:00:00 2001
From: Scott Cheloha <cheloha@linux.ibm.com>
Date: Mon, 10 Aug 2020 20:51:15 -0500
Subject: pseries/drmem: don't cache node id in drmem_lmb struct

At memory hot-remove time we can retrieve an LMB's nid from its
corresponding memory_block.  There is no need to store the nid
in multiple locations.

Note that lmb_to_memblock() uses find_memory_block() to get the
corresponding memory_block.  As find_memory_block() runs in sub-linear
time this approach is negligibly slower than what we do at present.

In exchange for this lookup at hot-remove time we no longer need to
call memory_add_physaddr_to_nid() during drmem_init() for each LMB.
On powerpc, memory_add_physaddr_to_nid() is a linear search, so this
spares us an O(n^2) initialization during boot.

On systems with many LMBs that initialization overhead is palpable and
disruptive.  For example, on a box with 249854 LMBs we're seeing
drmem_init() take upwards of 30 seconds to complete:

[   53.721639] drmem: initializing drmem v2
[   80.604346] watchdog: BUG: soft lockup - CPU#65 stuck for 23s! [swapper/0:1]
[   80.604377] Modules linked in:
[   80.604389] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc2+ #4
[   80.604397] NIP:  c0000000000a4980 LR: c0000000000a4940 CTR: 0000000000000000
[   80.604407] REGS: c0002dbff8493830 TRAP: 0901   Not tainted  (5.6.0-rc2+)
[   80.604412] MSR:  8000000002009033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 44000248  XER: 0000000d
[   80.604431] CFAR: c0000000000a4a38 IRQMASK: 0
[   80.604431] GPR00: c0000000000a4940 c0002dbff8493ac0 c000000001904400 c0003cfffffede30
[   80.604431] GPR04: 0000000000000000 c000000000f4095a 000000000000002f 0000000010000000
[   80.604431] GPR08: c0000bf7ecdb7fb8 c0000bf7ecc2d3c8 0000000000000008 c00c0002fdfb2001
[   80.604431] GPR12: 0000000000000000 c00000001e8ec200
[   80.604477] NIP [c0000000000a4980] hot_add_scn_to_nid+0xa0/0x3e0
[   80.604486] LR [c0000000000a4940] hot_add_scn_to_nid+0x60/0x3e0
[   80.604492] Call Trace:
[   80.604498] [c0002dbff8493ac0] [c0000000000a4940] hot_add_scn_to_nid+0x60/0x3e0 (unreliable)
[   80.604509] [c0002dbff8493b20] [c000000000087c10] memory_add_physaddr_to_nid+0x20/0x60
[   80.604521] [c0002dbff8493b40] [c0000000010d4880] drmem_init+0x25c/0x2f0
[   80.604530] [c0002dbff8493c10] [c000000000010154] do_one_initcall+0x64/0x2c0
[   80.604540] [c0002dbff8493ce0] [c0000000010c4aa0] kernel_init_freeable+0x2d8/0x3a0
[   80.604550] [c0002dbff8493db0] [c000000000010824] kernel_init+0x2c/0x148
[   80.604560] [c0002dbff8493e20] [c00000000000b648] ret_from_kernel_thread+0x5c/0x74
[   80.604567] Instruction dump:
[   80.604574] 392918e8 e9490000 e90a000a e92a0000 80ea000c 1d080018 3908ffe8 7d094214
[   80.604586] 7fa94040 419d00dc e9490010 714a0088 <2faa0008> 409e00ac e9490000 7fbe5040
[   89.047390] drmem: 249854 LMB(s)

With a patched kernel on the same machine we're no longer seeing the
soft lockup.  drmem_init() now completes in negligible time, even when
the LMB count is large.

Fixes: b2d3b5ee66f2 ("powerpc/pseries: Track LMB nid instead of using device tree")
Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
Reviewed-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200811015115.63677-1-cheloha@linux.ibm.com
---
 arch/powerpc/include/asm/drmem.h                | 21 ---------------------
 arch/powerpc/mm/drmem.c                         |  6 +-----
 arch/powerpc/platforms/pseries/hotplug-memory.c | 24 ++++++++++++++++--------
 3 files changed, 17 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 6fb928605ed1..030a19d92213 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -15,9 +15,6 @@ struct drmem_lmb {
 	u32     drc_index;
 	u32     aa_index;
 	u32     flags;
-#ifdef CONFIG_MEMORY_HOTPLUG
-	int	nid;
-#endif
 };
 
 struct drmem_lmb_info {
@@ -121,22 +118,4 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
 	lmb->aa_index = 0xffffffff;
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-static inline void lmb_set_nid(struct drmem_lmb *lmb)
-{
-	lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
-}
-static inline void lmb_clear_nid(struct drmem_lmb *lmb)
-{
-	lmb->nid = -1;
-}
-#else
-static inline void lmb_set_nid(struct drmem_lmb *lmb)
-{
-}
-static inline void lmb_clear_nid(struct drmem_lmb *lmb)
-{
-}
-#endif
-
 #endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index b2eeea39684c..9af3832c9d8d 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -389,10 +389,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
 	if (!drmem_info->lmbs)
 		return;
 
-	for_each_drmem_lmb(lmb) {
+	for_each_drmem_lmb(lmb)
 		read_drconf_v1_cell(lmb, &prop);
-		lmb_set_nid(lmb);
-	}
 }
 
 static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -437,8 +435,6 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
 
 			lmb->aa_index = dr_cell.aa_index;
 			lmb->flags = dr_cell.flags;
-
-			lmb_set_nid(lmb);
 		}
 	}
 }
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 5d545b78111f..0ea976d1cac4 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -354,25 +354,32 @@ static int dlpar_add_lmb(struct drmem_lmb *);
 
 static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 {
+	struct memory_block *mem_block;
 	unsigned long block_sz;
 	int rc;
 
 	if (!lmb_is_removable(lmb))
 		return -EINVAL;
 
+	mem_block = lmb_to_memblock(lmb);
+	if (mem_block == NULL)
+		return -EINVAL;
+
 	rc = dlpar_offline_lmb(lmb);
-	if (rc)
+	if (rc) {
+		put_device(&mem_block->dev);
 		return rc;
+	}
 
 	block_sz = pseries_memory_block_size();
 
-	__remove_memory(lmb->nid, lmb->base_addr, block_sz);
+	__remove_memory(mem_block->nid, lmb->base_addr, block_sz);
+	put_device(&mem_block->dev);
 
 	/* Update memory regions for memory remove */
 	memblock_remove(lmb->base_addr, block_sz);
 
 	invalidate_lmb_associativity_index(lmb);
-	lmb_clear_nid(lmb);
 	lmb->flags &= ~DRCONF_MEM_ASSIGNED;
 
 	return 0;
@@ -591,7 +598,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
 static int dlpar_add_lmb(struct drmem_lmb *lmb)
 {
 	unsigned long block_sz;
-	int rc;
+	int nid, rc;
 
 	if (lmb->flags & DRCONF_MEM_ASSIGNED)
 		return -EINVAL;
@@ -602,11 +609,13 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 		return rc;
 	}
 
-	lmb_set_nid(lmb);
 	block_sz = memory_block_size_bytes();
 
+	/* Find the node id for this address. */
+	nid = memory_add_physaddr_to_nid(lmb->base_addr);
+
 	/* Add the memory */
-	rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
+	rc = __add_memory(nid, lmb->base_addr, block_sz);
 	if (rc) {
 		invalidate_lmb_associativity_index(lmb);
 		return rc;
@@ -614,9 +623,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 
 	rc = dlpar_online_lmb(lmb);
 	if (rc) {
-		__remove_memory(lmb->nid, lmb->base_addr, block_sz);
+		__remove_memory(nid, lmb->base_addr, block_sz);
 		invalidate_lmb_associativity_index(lmb);
-		lmb_clear_nid(lmb);
 	} else {
 		lmb->flags |= DRCONF_MEM_ASSIGNED;
 	}
-- 
cgit v1.2.3


From d9de6b0da85c9f51734f7648f6e860b89f94c801 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Aug 2020 00:04:28 +1000
Subject: powerpc: unrel_branch_check.sh: fix shellcheck complaints

No functional change

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200811140435.20957-2-sfr@canb.auug.org.au
---
 arch/powerpc/tools/unrel_branch_check.sh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 6e6a30aea3ed..4c1e04ba5081 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
 # Copyright © 2016 IBM Corporation
 #
 # This program is free software; you can redistribute it and/or
@@ -26,7 +27,7 @@ awk '{print $1}'
 
 BRANCHES=$(
 $objdump -R "$vmlinux" -D --start-address=0xc000000000000000           \
-		--stop-address=${end_intr} |
+		--stop-address="$end_intr" |
 grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" |
 grep -v '\<__start_initialization_multiplatform>' |
 grep -v -e 'b.\?.\?ctr' |
@@ -40,12 +41,12 @@ awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}'
 
 for tuple in $BRANCHES
 do
-	from=`echo $tuple | cut -d':' -f1`
-	branch=`echo $tuple | cut -d':' -f2`
-	to=`echo $tuple | cut -d':' -f3 | sed 's/cr[0-7],//'`
-	sym=`echo $tuple | cut -d':' -f4`
+	from=$(echo "$tuple" | cut -d':' -f1)
+	branch=$(echo "$tuple" | cut -d':' -f2)
+	to=$(echo "$tuple" | cut -d':' -f3 | sed 's/cr[0-7],//')
+	sym=$(echo "$tuple" | cut -d':' -f4)
 
-	if (( $to > $end_intr ))
+	if (( to > end_intr ))
 	then
 		if [ -z "$bad_branches" ]; then
 			echo "WARNING: Unrelocated relative branches"
-- 
cgit v1.2.3


From 20ff8ec182160df86571a8af5773ff1e52837d73 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Aug 2020 00:04:29 +1000
Subject: powerpc: unrel_branch_check.sh: simplify and combine some executions

Also some minor style changes.

There should still be no change in behaviour.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200811140435.20957-3-sfr@canb.auug.org.au
---
 arch/powerpc/tools/unrel_branch_check.sh | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 4c1e04ba5081..d735e3875b5e 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -17,37 +17,34 @@ objdump="$1"
 vmlinux="$2"
 
 #__end_interrupts should be located within the first 64K
+kstart=0xc000000000000000
+printf -v kend '0x%x' $(( kstart + 0x10000 ))
 
 end_intr=0x$(
-$objdump -R "$vmlinux" -d --start-address=0xc000000000000000           \
-		 --stop-address=0xc000000000010000 |
-grep '\<__end_interrupts>:' |
-awk '{print $1}'
+$objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" |
+awk '$2 == "<__end_interrupts>:" { print $1 }'
 )
 
 BRANCHES=$(
-$objdump -R "$vmlinux" -D --start-address=0xc000000000000000           \
-		--stop-address="$end_intr" |
+$objdump -R -D --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
 grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" |
-grep -v '\<__start_initialization_multiplatform>' |
-grep -v -e 'b.\?.\?ctr' |
-grep -v -e 'b.\?.\?lr' |
-sed -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \
+sed -e '/\<__start_initialization_multiplatform>/d' \
+	-e '/b.\?.\?ctr/d' \
+	-e '/b.\?.\?lr/d' \
+	-e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \
 	-e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \
 	-e 's/[[:space:]]0x/ /' \
 	-e 's/://' |
 awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}'
 )
 
-for tuple in $BRANCHES
-do
+for tuple in $BRANCHES; do
 	from=$(echo "$tuple" | cut -d':' -f1)
 	branch=$(echo "$tuple" | cut -d':' -f2)
 	to=$(echo "$tuple" | cut -d':' -f3 | sed 's/cr[0-7],//')
 	sym=$(echo "$tuple" | cut -d':' -f4)
 
-	if (( to > end_intr ))
-	then
+	if (( to > end_intr )); then
 		if [ -z "$bad_branches" ]; then
 			echo "WARNING: Unrelocated relative branches"
 			bad_branches="yes"
-- 
cgit v1.2.3


From 4e71106c343c625c0bf72a65b244e35e7d2cd037 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Aug 2020 00:04:30 +1000
Subject: powerpc: unrel_branch_check.sh: simplify objdump's asm output

We don't use the raw hex instruction dump, so elide it and adjust the
following expressions.

Also use \s instead of [[:space:]] everywhere.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200811140435.20957-4-sfr@canb.auug.org.au
---
 arch/powerpc/tools/unrel_branch_check.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index d735e3875b5e..7e936e2cf70d 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -26,16 +26,16 @@ awk '$2 == "<__end_interrupts>:" { print $1 }'
 )
 
 BRANCHES=$(
-$objdump -R -D --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
-grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" |
+$objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
+grep -e "^c[0-9a-f]*:\s*b" |
 sed -e '/\<__start_initialization_multiplatform>/d' \
 	-e '/b.\?.\?ctr/d' \
 	-e '/b.\?.\?lr/d' \
-	-e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \
-	-e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \
-	-e 's/[[:space:]]0x/ /' \
+	-e 's/\bbt.\?\s*[[:digit:]][[:digit:]]*,/beq/' \
+	-e 's/\bbf.\?\s*[[:digit:]][[:digit:]]*,/bne/' \
+	-e 's/\s0x/ /' \
 	-e 's/://' |
-awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}'
+awk '{ print $1 ":" $2 ":0x" $3 ":" $4 " "}'
 )
 
 for tuple in $BRANCHES; do
-- 
cgit v1.2.3


From 3d97abbc9f6fe90973551f3e3eef47ffef863114 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Aug 2020 00:04:31 +1000
Subject: powerpc: unrel_branch_check.sh: convert grep | sed | awk to just sed

Also start using sed -E and make all the separate expressions into a
single one with comments.  Pull the stripping of condition registers
back into the sed command.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200811140435.20957-5-sfr@canb.auug.org.au
---
 arch/powerpc/tools/unrel_branch_check.sh | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 7e936e2cf70d..dc82289b2252 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -27,21 +27,31 @@ awk '$2 == "<__end_interrupts>:" { print $1 }'
 
 BRANCHES=$(
 $objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
-grep -e "^c[0-9a-f]*:\s*b" |
-sed -e '/\<__start_initialization_multiplatform>/d' \
-	-e '/b.\?.\?ctr/d' \
-	-e '/b.\?.\?lr/d' \
-	-e 's/\bbt.\?\s*[[:digit:]][[:digit:]]*,/beq/' \
-	-e 's/\bbf.\?\s*[[:digit:]][[:digit:]]*,/bne/' \
-	-e 's/\s0x/ /' \
-	-e 's/://' |
-awk '{ print $1 ":" $2 ":0x" $3 ":" $4 " "}'
+sed -E -n '
+# match lines that start with a kernel address
+/^c[0-9a-f]*:\s*b/ {
+	# drop a target that we do not care about
+	/\<__start_initialization_multiplatform>/d
+	# drop branches via ctr or lr
+	/\<b.?.?(ct|l)r/d
+	# cope with some differences between Clang and GNU objdumps
+	s/\<bt.?\s*[[:digit:]]+,/beq/
+	s/\<bf.?\s*[[:digit:]]+,/bne/
+	# tidy up
+	s/\s0x/ /
+	s/://
+	# format for the loop below
+	s/^(\S+)\s+(\S+)\s+(\S+)\s*(\S*).*$/\1:\2:0x\3:\4/
+	# strip out condition registers
+	s/:0xcr[0-7],/:0x/
+	p
+}'
 )
 
 for tuple in $BRANCHES; do
 	from=$(echo "$tuple" | cut -d':' -f1)
 	branch=$(echo "$tuple" | cut -d':' -f2)
-	to=$(echo "$tuple" | cut -d':' -f3 | sed 's/cr[0-7],//')
+	to=$(echo "$tuple" | cut -d':' -f3)
 	sym=$(echo "$tuple" | cut -d':' -f4)
 
 	if (( to > end_intr )); then
-- 
cgit v1.2.3


From b84eaab6ede6477484edc043456cf7d7cfc7f8b3 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Aug 2020 00:04:32 +1000
Subject: powerpc: unrel_branch_check.sh: simplify and tidy up the final loop

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200811140435.20957-6-sfr@canb.auug.org.au
---
 arch/powerpc/tools/unrel_branch_check.sh | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index dc82289b2252..54ebd05615d4 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -25,7 +25,6 @@ $objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" |
 awk '$2 == "<__end_interrupts>:" { print $1 }'
 )
 
-BRANCHES=$(
 $objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
 sed -E -n '
 # match lines that start with a kernel address
@@ -45,24 +44,19 @@ sed -E -n '
 	# strip out condition registers
 	s/:0xcr[0-7],/:0x/
 	p
-}'
-)
-
-for tuple in $BRANCHES; do
-	from=$(echo "$tuple" | cut -d':' -f1)
-	branch=$(echo "$tuple" | cut -d':' -f2)
-	to=$(echo "$tuple" | cut -d':' -f3)
-	sym=$(echo "$tuple" | cut -d':' -f4)
+}' | {
 
+all_good=true
+while IFS=: read -r from branch to sym; do
 	if (( to > end_intr )); then
-		if [ -z "$bad_branches" ]; then
-			echo "WARNING: Unrelocated relative branches"
-			bad_branches="yes"
+		if $all_good; then
+			printf '%s\n' 'WARNING: Unrelocated relative branches'
+			all_good=false
 		fi
-		echo "$from $branch-> $to $sym"
+		printf '%s %s-> %s %s\n' "$from" "$branch" "$to" "$sym"
 	fi
 done
 
-if [ -z "$bad_branches" ]; then
-	exit 0
-fi
+$all_good
+
+}
-- 
cgit v1.2.3


From 3745ae63b405b09c86718f95d96c4b2d2827b087 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Aug 2020 00:04:33 +1000
Subject: powerpc: unrel_branch_check.sh: fix up the file header

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200811140435.20957-7-sfr@canb.auug.org.au
---
 arch/powerpc/tools/unrel_branch_check.sh | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 54ebd05615d4..4489f16a443c 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -1,16 +1,9 @@
 #!/bin/bash
-# Copyright © 2016 IBM Corporation
+# SPDX-License-Identifier: GPL-2.0+
+# Copyright © 2016,2020 IBM Corporation
 #
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version
-# 2 of the License, or (at your option) any later version.
-#
-# This script checks the relocations of a vmlinux for "suspicious"
-# branches from unrelocated code (head_64.S code).
-
-# Turn this on if you want more debug output:
-# set -x
+# This script checks the unrelocated code of a vmlinux for "suspicious"
+# branches to relocated code (head_64.S code).
 
 # Have Kbuild supply the path to objdump so we handle cross compilation.
 objdump="$1"
-- 
cgit v1.2.3


From af13a2244d59c4d63a25abd8257cbaef9d9ffebc Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Aug 2020 00:04:34 +1000
Subject: powerpc: unrel_branch_check.sh: exit silently for early errors

If we can't find the address of __end_interrupts, then we still exit
successfully as that is the current behaviour.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200811140435.20957-8-sfr@canb.auug.org.au
---
 arch/powerpc/tools/unrel_branch_check.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 4489f16a443c..70da90270c78 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -14,9 +14,12 @@ kstart=0xc000000000000000
 printf -v kend '0x%x' $(( kstart + 0x10000 ))
 
 end_intr=0x$(
-$objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" |
+$objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" 2>/dev/null |
 awk '$2 == "<__end_interrupts>:" { print $1 }'
 )
+if [ "$end_intr" = "0x" ]; then
+	exit 0
+fi
 
 $objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
 sed -E -n '
-- 
cgit v1.2.3


From b71dca9891b330d5c2d3ff5d41704aa6f64f8e32 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Aug 2020 18:10:35 +1000
Subject: powerpc: unrel_branch_check.sh: use nm to find symbol value

This is considerably faster then parsing the objdump asm output.  It will
also make the enabling of llvm-objdump a little easier.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200812081036.7969-2-sfr@canb.auug.org.au
---
 arch/powerpc/Makefile.postlink           |  2 +-
 arch/powerpc/tools/unrel_branch_check.sh | 13 +++++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index 2268396ff4bb..a6c77f4d32b2 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -18,7 +18,7 @@ quiet_cmd_relocs_check = CHKREL  $@
 ifdef CONFIG_PPC_BOOK3S_64
       cmd_relocs_check =						\
 	$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \
-	$(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@"
+	$(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$(NM)" "$@"
 else
       cmd_relocs_check =						\
 	$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 70da90270c78..0369eb2e7e4b 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -5,18 +5,15 @@
 # This script checks the unrelocated code of a vmlinux for "suspicious"
 # branches to relocated code (head_64.S code).
 
-# Have Kbuild supply the path to objdump so we handle cross compilation.
+# Have Kbuild supply the path to objdump and nm so we handle cross compilation.
 objdump="$1"
-vmlinux="$2"
+nm="$2"
+vmlinux="$3"
 
-#__end_interrupts should be located within the first 64K
 kstart=0xc000000000000000
-printf -v kend '0x%x' $(( kstart + 0x10000 ))
 
-end_intr=0x$(
-$objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" 2>/dev/null |
-awk '$2 == "<__end_interrupts>:" { print $1 }'
-)
+end_intr=0x$($nm -p "$vmlinux" |
+	sed -E -n '/\s+[[:alpha:]]\s+__end_interrupts\s*$/{s///p;q}')
 if [ "$end_intr" = "0x" ]; then
 	exit 0
 fi
-- 
cgit v1.2.3


From 6b1992bcdee8b86a74362192d4d8906731918bcc Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Aug 2020 18:10:36 +1000
Subject: powerpc: unrel_branch_check.sh: enable the use of llvm-objdump v9, 10
 or 11

Currently, using llvm-objtool, this script just silently succeeds without
actually do the intended checking.  So this updates it to work properly.

Firstly, llvm-objdump does not add target symbol names to the end
of branches in its asm output, so we have to drop the branch to
__start_initialization_multiplatform using its address.

Secondly, v9 and 10 specify branch targets as .+<offset>, so we convert
those to actual addresses.

Thirdly, v10 and 11 error out on a vmlinux if given the -R option
complaining that it is "not a dynamic object".  The -R does not make
any difference to the asm output, so remove it.

Lastly, v11 produces asm that is very similar to Gnu objtool (at least
as far as branches are concerned), so no further changes are necessary
to make it work.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200812081036.7969-3-sfr@canb.auug.org.au
---
 arch/powerpc/tools/unrel_branch_check.sh | 34 +++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 0369eb2e7e4b..8301efee1e6c 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -18,12 +18,16 @@ if [ "$end_intr" = "0x" ]; then
 	exit 0
 fi
 
-$objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
+# we know that there is a correct branch to
+# __start_initialization_multiplatform, so find its address
+# so we can exclude it.
+sim=0x$($nm -p "$vmlinux" |
+	sed -E -n '/\s+[[:alpha:]]\s+__start_initialization_multiplatform\s*$/{s///p;q}')
+
+$objdump -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
 sed -E -n '
 # match lines that start with a kernel address
 /^c[0-9a-f]*:\s*b/ {
-	# drop a target that we do not care about
-	/\<__start_initialization_multiplatform>/d
 	# drop branches via ctr or lr
 	/\<b.?.?(ct|l)r/d
 	# cope with some differences between Clang and GNU objdumps
@@ -33,14 +37,34 @@ sed -E -n '
 	s/\s0x/ /
 	s/://
 	# format for the loop below
-	s/^(\S+)\s+(\S+)\s+(\S+)\s*(\S*).*$/\1:\2:0x\3:\4/
+	s/^(\S+)\s+(\S+)\s+(\S+)\s*(\S*).*$/\1:\2:\3:\4/
 	# strip out condition registers
-	s/:0xcr[0-7],/:0x/
+	s/:cr[0-7],/:/
 	p
 }' | {
 
 all_good=true
 while IFS=: read -r from branch to sym; do
+	case "$to" in
+	c*)	to="0x$to"
+		;;
+	.+*)
+		to=${to#.+}
+		if [ "$branch" = 'b' ]; then
+			if (( to >= 0x2000000 )); then
+				to=$(( to - 0x4000000 ))
+			fi
+		elif (( to >= 0x8000 )); then
+			to=$(( to - 0x10000 ))
+		fi
+		printf -v to '0x%x' $(( "0x$from" + to ))
+		;;
+	*)	printf 'Unkown branch format\n'
+		;;
+	esac
+	if [ "$to" = "$sim" ]; then
+		continue
+	fi
 	if (( to > end_intr )); then
 		if $all_good; then
 			printf '%s\n' 'WARNING: Unrelocated relative branches'
-- 
cgit v1.2.3


From 76d46a1e2fe2c35f24c07b7cc8a41afbf98b349e Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 14 Aug 2020 05:49:29 +0000
Subject: powerpc: Remove flush_instruction_cache() on 8xx

flush_instruction_cache() is never used on 8xx, remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/245cabd8f291facac8c8c5fd370e361a69e02860.1597384145.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/mm/nohash/8xx.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index d2b37146ae6c..231ca95f9ffb 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -244,13 +244,6 @@ void set_context(unsigned long id, pgd_t *pgd)
 	mb();
 }
 
-void flush_instruction_cache(void)
-{
-	isync();
-	mtspr(SPRN_IC_CST, IDC_INVALL);
-	isync();
-}
-
 #ifdef CONFIG_PPC_KUEP
 void __init setup_kuep(bool disabled)
 {
-- 
cgit v1.2.3


From c20beffeec3cb6f6f52d9aef27f91a3f453a91f4 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Wed, 12 Aug 2020 12:25:16 +0000
Subject: powerpc/uaccess: Use flexible addressing with
 __put_user()/__get_user()

At the time being, __put_user()/__get_user() and friends only use
D-form addressing, with 0 offset. Ex:

	lwz	reg1, 0(reg2)

Give the compiler the opportunity to use other adressing modes
whenever possible, to get more optimised code.

Hereunder is a small exemple:

struct test {
	u32 item1;
	u16 item2;
	u8 item3;
	u64 item4;
};

int set_test_user(struct test __user *from, struct test __user *to)
{
	int err;
	u32 item1;
	u16 item2;
	u8 item3;
	u64 item4;

	err = __get_user(item1, &from->item1);
	err |= __get_user(item2, &from->item2);
	err |= __get_user(item3, &from->item3);
	err |= __get_user(item4, &from->item4);

	err |= __put_user(item1, &to->item1);
	err |= __put_user(item2, &to->item2);
	err |= __put_user(item3, &to->item3);
	err |= __put_user(item4, &to->item4);

	return err;
}

Before the patch:

00000df0 <set_test_user>:
 df0:	94 21 ff f0 	stwu    r1,-16(r1)
 df4:	39 40 00 00 	li      r10,0
 df8:	93 c1 00 08 	stw     r30,8(r1)
 dfc:	93 e1 00 0c 	stw     r31,12(r1)
 e00:	7d 49 53 78 	mr      r9,r10
 e04:	80 a3 00 00 	lwz     r5,0(r3)
 e08:	38 e3 00 04 	addi    r7,r3,4
 e0c:	7d 46 53 78 	mr      r6,r10
 e10:	a0 e7 00 00 	lhz     r7,0(r7)
 e14:	7d 29 33 78 	or      r9,r9,r6
 e18:	39 03 00 06 	addi    r8,r3,6
 e1c:	7d 46 53 78 	mr      r6,r10
 e20:	89 08 00 00 	lbz     r8,0(r8)
 e24:	7d 29 33 78 	or      r9,r9,r6
 e28:	38 63 00 08 	addi    r3,r3,8
 e2c:	7d 46 53 78 	mr      r6,r10
 e30:	83 c3 00 00 	lwz     r30,0(r3)
 e34:	83 e3 00 04 	lwz     r31,4(r3)
 e38:	7d 29 33 78 	or      r9,r9,r6
 e3c:	7d 43 53 78 	mr      r3,r10
 e40:	90 a4 00 00 	stw     r5,0(r4)
 e44:	7d 29 1b 78 	or      r9,r9,r3
 e48:	38 c4 00 04 	addi    r6,r4,4
 e4c:	7d 43 53 78 	mr      r3,r10
 e50:	b0 e6 00 00 	sth     r7,0(r6)
 e54:	7d 29 1b 78 	or      r9,r9,r3
 e58:	38 e4 00 06 	addi    r7,r4,6
 e5c:	7d 43 53 78 	mr      r3,r10
 e60:	99 07 00 00 	stb     r8,0(r7)
 e64:	7d 23 1b 78 	or      r3,r9,r3
 e68:	38 84 00 08 	addi    r4,r4,8
 e6c:	93 c4 00 00 	stw     r30,0(r4)
 e70:	93 e4 00 04 	stw     r31,4(r4)
 e74:	7c 63 53 78 	or      r3,r3,r10
 e78:	83 c1 00 08 	lwz     r30,8(r1)
 e7c:	83 e1 00 0c 	lwz     r31,12(r1)
 e80:	38 21 00 10 	addi    r1,r1,16
 e84:	4e 80 00 20 	blr

After the patch:

00000dbc <set_test_user>:
 dbc:	39 40 00 00 	li      r10,0
 dc0:	7d 49 53 78 	mr      r9,r10
 dc4:	80 03 00 00 	lwz     r0,0(r3)
 dc8:	7d 48 53 78 	mr      r8,r10
 dcc:	a1 63 00 04 	lhz     r11,4(r3)
 dd0:	7d 29 43 78 	or      r9,r9,r8
 dd4:	7d 48 53 78 	mr      r8,r10
 dd8:	88 a3 00 06 	lbz     r5,6(r3)
 ddc:	7d 29 43 78 	or      r9,r9,r8
 de0:	7d 48 53 78 	mr      r8,r10
 de4:	80 c3 00 08 	lwz     r6,8(r3)
 de8:	80 e3 00 0c 	lwz     r7,12(r3)
 dec:	7d 29 43 78 	or      r9,r9,r8
 df0:	7d 43 53 78 	mr      r3,r10
 df4:	90 04 00 00 	stw     r0,0(r4)
 df8:	7d 29 1b 78 	or      r9,r9,r3
 dfc:	7d 43 53 78 	mr      r3,r10
 e00:	b1 64 00 04 	sth     r11,4(r4)
 e04:	7d 29 1b 78 	or      r9,r9,r3
 e08:	7d 43 53 78 	mr      r3,r10
 e0c:	98 a4 00 06 	stb     r5,6(r4)
 e10:	7d 23 1b 78 	or      r3,r9,r3
 e14:	90 c4 00 08 	stw     r6,8(r4)
 e18:	90 e4 00 0c 	stw     r7,12(r4)
 e1c:	7c 63 53 78 	or      r3,r3,r10
 e20:	4e 80 00 20 	blr

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/c27bc4e598daf3bbb225de7a1f5c52121cf1e279.1597235091.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/uaccess.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 00699903f1ef..b1d5e8b66b31 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -158,7 +158,7 @@ extern long __put_user_bad(void);
  */
 #define __put_user_asm(x, addr, err, op)			\
 	__asm__ __volatile__(					\
-		"1:	" op " %1,0(%2)	# put_user\n"		\
+		"1:	" op "%X2 %1,%2	# put_user\n"		\
 		"2:\n"						\
 		".section .fixup,\"ax\"\n"			\
 		"3:	li %0,%3\n"				\
@@ -166,7 +166,7 @@ extern long __put_user_bad(void);
 		".previous\n"					\
 		EX_TABLE(1b, 3b)				\
 		: "=r" (err)					\
-		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
+		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
 
 #ifdef __powerpc64__
 #define __put_user_asm2(x, ptr, retval)				\
@@ -174,8 +174,8 @@ extern long __put_user_bad(void);
 #else /* __powerpc64__ */
 #define __put_user_asm2(x, addr, err)				\
 	__asm__ __volatile__(					\
-		"1:	stw %1,0(%2)\n"				\
-		"2:	stw %1+1,4(%2)\n"			\
+		"1:	stw%X2 %1,%2\n"			\
+		"2:	stw%X2 %L1,%L2\n"			\
 		"3:\n"						\
 		".section .fixup,\"ax\"\n"			\
 		"4:	li %0,%3\n"				\
@@ -184,7 +184,7 @@ extern long __put_user_bad(void);
 		EX_TABLE(1b, 4b)				\
 		EX_TABLE(2b, 4b)				\
 		: "=r" (err)					\
-		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
+		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
 #endif /* __powerpc64__ */
 
 #define __put_user_size_allowed(x, ptr, size, retval)		\
@@ -316,7 +316,7 @@ extern long __get_user_bad(void);
 
 #define __get_user_asm(x, addr, err, op)		\
 	__asm__ __volatile__(				\
-		"1:	"op" %1,0(%2)	# get_user\n"	\
+		"1:	"op"%X2 %1, %2	# get_user\n"	\
 		"2:\n"					\
 		".section .fixup,\"ax\"\n"		\
 		"3:	li %0,%3\n"			\
@@ -325,7 +325,7 @@ extern long __get_user_bad(void);
 		".previous\n"				\
 		EX_TABLE(1b, 3b)			\
 		: "=r" (err), "=r" (x)			\
-		: "b" (addr), "i" (-EFAULT), "0" (err))
+		: "m" (*addr), "i" (-EFAULT), "0" (err))
 
 #ifdef __powerpc64__
 #define __get_user_asm2(x, addr, err)			\
@@ -333,8 +333,8 @@ extern long __get_user_bad(void);
 #else /* __powerpc64__ */
 #define __get_user_asm2(x, addr, err)			\
 	__asm__ __volatile__(				\
-		"1:	lwz %1,0(%2)\n"			\
-		"2:	lwz %1+1,4(%2)\n"		\
+		"1:	lwz%X2 %1, %2\n"			\
+		"2:	lwz%X2 %L1, %L2\n"		\
 		"3:\n"					\
 		".section .fixup,\"ax\"\n"		\
 		"4:	li %0,%3\n"			\
@@ -345,7 +345,7 @@ extern long __get_user_bad(void);
 		EX_TABLE(1b, 4b)			\
 		EX_TABLE(2b, 4b)			\
 		: "=r" (err), "=&r" (x)			\
-		: "b" (addr), "i" (-EFAULT), "0" (err))
+		: "m" (*addr), "i" (-EFAULT), "0" (err))
 #endif /* __powerpc64__ */
 
 #define __get_user_size_allowed(x, ptr, size, retval)		\
@@ -355,10 +355,10 @@ do {								\
 	if (size > sizeof(x))					\
 		(x) = __get_user_bad();				\
 	switch (size) {						\
-	case 1: __get_user_asm(x, ptr, retval, "lbz"); break;	\
-	case 2: __get_user_asm(x, ptr, retval, "lhz"); break;	\
-	case 4: __get_user_asm(x, ptr, retval, "lwz"); break;	\
-	case 8: __get_user_asm2(x, ptr, retval);  break;	\
+	case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break;	\
+	case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break;	\
+	case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break;	\
+	case 8: __get_user_asm2(x, (u64 __user *)ptr, retval);  break;	\
 	default: (x) = __get_user_bad();			\
 	}							\
 } while (0)
-- 
cgit v1.2.3


From 2f279eeb68b8eda43a95255db701b4faaeedbe0f Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Wed, 12 Aug 2020 12:25:17 +0000
Subject: powerpc/uaccess: Add pre-update addressing to __get_user_asm() and
 __put_user_asm()

Enable pre-update addressing mode in __get_user_asm() and __put_user_asm()

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/13041c7df39e89ddf574ea0cdc6dedfdd9734140.1597235091.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/uaccess.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index b1d5e8b66b31..7c2427f237e1 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -158,7 +158,7 @@ extern long __put_user_bad(void);
  */
 #define __put_user_asm(x, addr, err, op)			\
 	__asm__ __volatile__(					\
-		"1:	" op "%X2 %1,%2	# put_user\n"		\
+		"1:	" op "%U2%X2 %1,%2	# put_user\n"	\
 		"2:\n"						\
 		".section .fixup,\"ax\"\n"			\
 		"3:	li %0,%3\n"				\
@@ -166,7 +166,7 @@ extern long __put_user_bad(void);
 		".previous\n"					\
 		EX_TABLE(1b, 3b)				\
 		: "=r" (err)					\
-		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
+		: "r" (x), "m<>" (*addr), "i" (-EFAULT), "0" (err))
 
 #ifdef __powerpc64__
 #define __put_user_asm2(x, ptr, retval)				\
@@ -316,7 +316,7 @@ extern long __get_user_bad(void);
 
 #define __get_user_asm(x, addr, err, op)		\
 	__asm__ __volatile__(				\
-		"1:	"op"%X2 %1, %2	# get_user\n"	\
+		"1:	"op"%U2%X2 %1, %2	# get_user\n"	\
 		"2:\n"					\
 		".section .fixup,\"ax\"\n"		\
 		"3:	li %0,%3\n"			\
@@ -325,7 +325,7 @@ extern long __get_user_bad(void);
 		".previous\n"				\
 		EX_TABLE(1b, 3b)			\
 		: "=r" (err), "=r" (x)			\
-		: "m" (*addr), "i" (-EFAULT), "0" (err))
+		: "m<>" (*addr), "i" (-EFAULT), "0" (err))
 
 #ifdef __powerpc64__
 #define __get_user_asm2(x, addr, err)			\
-- 
cgit v1.2.3


From 353bce211e00d183344f464ba1ee0e1ffb0e2a6c Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 17 Aug 2020 05:46:39 +0000
Subject: powerpc/process: Remove unnecessary #ifdef
 CONFIG_FUNCTION_GRAPH_TRACER

ftrace_graph_ret_addr() is always defined and returns 'ip' when
CONFIG_FUNCTION GRAPH_TRACER is not set.

So the #ifdef is not needed, remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/9d11143d4e27ba8274369a926968756917584868.1597643153.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/process.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 016bd831908e..febe9f7cda2f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2096,10 +2096,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack,
 	unsigned long sp, ip, lr, newsp;
 	int count = 0;
 	int firstframe = 1;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	unsigned long ret_addr;
 	int ftrace_idx = 0;
-#endif
 
 	if (tsk == NULL)
 		tsk = current;
@@ -2127,12 +2125,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack,
 		if (!firstframe || ip != lr) {
 			printk("%s["REG"] ["REG"] %pS",
 				loglvl, sp, ip, (void *)ip);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 			ret_addr = ftrace_graph_ret_addr(current,
 						&ftrace_idx, ip, stack);
 			if (ret_addr != ip)
 				pr_cont(" (%pS)", (void *)ret_addr);
-#endif
 			if (firstframe)
 				pr_cont(" (unreliable)");
 			pr_cont("\n");
-- 
cgit v1.2.3


From 10bf59d923c2766ec8d6f0243481c865c7db9979 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Tue, 18 Aug 2020 14:45:57 +1000
Subject: powerpc/pseries/eeh: Fix dumb linebreaks

These annoy me every time I see them. Why are they here? They're not even
needed for 80cols compliance.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200818044557.135497-1-oohall@gmail.com
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index cb2d9a970b7b..1db74cec72bc 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -161,8 +161,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in
 			BUID_LO(phb->buid), option);
 
 	/* If fundamental-reset not supported, try hot-reset */
-	if (option == EEH_RESET_FUNDAMENTAL &&
-	    ret == -8) {
+	if (option == EEH_RESET_FUNDAMENTAL && ret == -8) {
 		option = EEH_RESET_HOT;
 		ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
 				config_addr, BUID_HI(phb->buid),
@@ -170,8 +169,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in
 	}
 
 	/* We need reset hold or settlement delay */
-	if (option == EEH_RESET_FUNDAMENTAL ||
-	    option == EEH_RESET_HOT)
+	if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT)
 		msleep(EEH_PE_RST_HOLD_TIME);
 	else
 		msleep(EEH_PE_RST_SETTLE_TIME);
@@ -621,8 +619,7 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
 		/* Not support */
 		return 0;
 	default:
-		pr_err("%s: Invalid option %d\n",
-			__func__, option);
+		pr_err("%s: Invalid option %d\n", __func__, option);
 		return -EINVAL;
 	}
 
@@ -954,8 +951,7 @@ static int pseries_notify_resume(struct eeh_dev *edev)
 	if (!edev)
 		return -EEXIST;
 
-	if (rtas_token("ibm,open-sriov-allow-unfreeze")
-	    == RTAS_UNKNOWN_SERVICE)
+	if (rtas_token("ibm,open-sriov-allow-unfreeze") == RTAS_UNKNOWN_SERVICE)
 		return -EINVAL;
 
 	if (edev->pdev->is_physfn || edev->pdev->is_virtfn)
-- 
cgit v1.2.3


From 529d2bd56ada4b8a4904909042792879868208cd Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 19 Aug 2020 11:57:04 +1000
Subject: powerpc/64: Remove unused generic_secondary_thread_init()

The last caller was removed in 2014 in commit fb5a515704d7 ("powerpc:
Remove platforms/wsp and associated pieces").

As Jordan noticed even though there are no callers, the code above in
fsl_secondary_thread_init() falls through into
generic_secondary_thread_init(). So we can remove the _GLOBAL but not
the body of the function.

However because fsl_secondary_thread_init() is inside #ifdef
CONFIG_PPC_BOOK3E, we can never reach the body of
generic_secondary_thread_init() unless CONFIG_PPC_BOOK3E is enabled,
so we can wrap the whole thing in a single #ifdef.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200819015704.1976364-1-mpe@ellerman.id.au
---
 arch/powerpc/include/asm/smp.h | 1 -
 arch/powerpc/kernel/head_64.S  | 7 ++-----
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 49a25e2400f2..81a49566ccd8 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -243,7 +243,6 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
  * 64-bit but defining them all here doesn't harm
  */
 extern void generic_secondary_smp_init(void);
-extern void generic_secondary_thread_init(void);
 extern unsigned long __secondary_hold_spinloop;
 extern unsigned long __secondary_hold_acknowledge;
 extern char __secondary_hold;
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 0e05a9a47a4b..1510b2a56669 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -300,9 +300,6 @@ _GLOBAL(fsl_secondary_thread_init)
 	rlwimi	r3, r3, 30, 2, 30
 	mtspr	SPRN_PIR, r3
 1:
-#endif
-
-_GLOBAL(generic_secondary_thread_init)
 	mr	r24,r3
 
 	/* turn on 64-bit mode */
@@ -312,13 +309,13 @@ _GLOBAL(generic_secondary_thread_init)
 	bl	relative_toc
 	tovirt(r2,r2)
 
-#ifdef CONFIG_PPC_BOOK3E
 	/* Book3E initialization */
 	mr	r3,r24
 	bl	book3e_secondary_thread_init
-#endif
 	b	generic_secondary_common_init
 
+#endif /* CONFIG_PPC_BOOK3E */
+
 /*
  * On pSeries and most other platforms, secondary processors spin
  * in the following code.
-- 
cgit v1.2.3


From 364b236a0b6e86439b9025d961da8602db23d5bf Mon Sep 17 00:00:00 2001
From: Jordan Niethe <jniethe5@gmail.com>
Date: Tue, 25 Aug 2020 13:51:47 +1000
Subject: powerpc/boot: Update Makefile comment for 64bit wrapper

As of commit 147c05168fc8 ("powerpc/boot: Add support for 64bit little
endian wrapper") the comment in the Makefile is misleading. The wrapper
packaging 64bit kernel may built as a 32 or 64 bit elf. Update the
comment to reflect this.

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200825035147.3239-1-jniethe5@gmail.com
---
 arch/powerpc/boot/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index b88fd27a45f0..f8ce6d2dde7b 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -7,7 +7,7 @@
 # Based on coffboot by Paul Mackerras
 # Simplified for ppc64 by Todd Inglett
 #
-# NOTE:	this code is built for 32 bit in ELF32 format even though
+# NOTE:	this code may be built for 32 bit in ELF32 format even though
 #	it packages a 64 bit kernel.  We do this to simplify the
 #	bootloader and increase compatibility with OpenFirmware.
 #
-- 
cgit v1.2.3


From 0fb4871bcc8997acbb8edf14b301fc150101d6c0 Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Fri, 28 Aug 2020 12:05:42 +1000
Subject: powerpc/tools: Remove 90 line limit in checkpatch script

As of commit bdc48fa11e46, scripts/checkpatch.pl now has a default line
length warning of 100 characters.  The powerpc wrapper script was using
a length of 90 instead of 80 in order to make checkpatch less
restrictive, but now it's making it more restrictive instead.

I think it makes sense to just use the default value now.

Signed-off-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200828020542.393022-1-ruscur@russell.cc
---
 arch/powerpc/tools/checkpatch.sh | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/tools/checkpatch.sh b/arch/powerpc/tools/checkpatch.sh
index 3ce5c093b19d..91c04802ec31 100755
--- a/arch/powerpc/tools/checkpatch.sh
+++ b/arch/powerpc/tools/checkpatch.sh
@@ -9,7 +9,6 @@ script_base=$(realpath $(dirname $0))
 exec $script_base/../../../scripts/checkpatch.pl \
 	--subjective \
 	--no-summary \
-	--max-line-length=90 \
 	--show-types \
 	--ignore ARCH_INCLUDE_LINUX \
 	--ignore BIT_MACRO \
-- 
cgit v1.2.3


From cac3e629086f1b2e31c87a6c9b0130d29843ae86 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Wed, 5 Aug 2020 00:04:52 -0300
Subject: powerpc/pseries/iommu: Create defines for operations in ibm,
 ddw-applicable

Create defines to help handling ibm,ddw-applicable values, avoiding
confusion about the index of given operations.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Tested-by: David Dai <zdai@linux.vnet.ibm.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200805030455.123024-2-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 43 ++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6d47b4a3ce39..ac0d6376bdad 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -39,6 +39,14 @@
 
 #include "pseries.h"
 
+enum {
+	DDW_QUERY_PE_DMA_WIN  = 0,
+	DDW_CREATE_PE_DMA_WIN = 1,
+	DDW_REMOVE_PE_DMA_WIN = 2,
+
+	DDW_APPLICABLE_SIZE
+};
+
 static struct iommu_table_group *iommu_pseries_alloc_group(int node)
 {
 	struct iommu_table_group *table_group;
@@ -771,12 +779,12 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
 {
 	struct dynamic_dma_window_prop *dwp;
 	struct property *win64;
-	u32 ddw_avail[3];
+	u32 ddw_avail[DDW_APPLICABLE_SIZE];
 	u64 liobn;
 	int ret = 0;
 
 	ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
-					 &ddw_avail[0], 3);
+					 &ddw_avail[0], DDW_APPLICABLE_SIZE);
 
 	win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
 	if (!win64)
@@ -798,15 +806,15 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
 		pr_debug("%pOF successfully cleared tces in window.\n",
 			 np);
 
-	ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
+	ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
 	if (ret)
 		pr_warn("%pOF: failed to remove direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np, ret, ddw_avail[2], liobn);
+			np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
 	else
 		pr_debug("%pOF: successfully removed direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np, ret, ddw_avail[2], liobn);
+			np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
 
 delprop:
 	if (remove_prop)
@@ -889,11 +897,11 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 	buid = pdn->phb->buid;
 	cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
 
-	ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
-		  cfg_addr, BUID_HI(buid), BUID_LO(buid));
+	ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, 5, (u32 *)query,
+			cfg_addr, BUID_HI(buid), BUID_LO(buid));
 	dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
-		" returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
-		BUID_LO(buid), ret);
+		" returned %d\n", ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr,
+		 BUID_HI(buid), BUID_LO(buid), ret);
 	return ret;
 }
 
@@ -920,15 +928,16 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 
 	do {
 		/* extra outputs are LIOBN and dma-addr (hi, lo) */
-		ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
-				cfg_addr, BUID_HI(buid), BUID_LO(buid),
-				page_shift, window_shift);
+		ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4,
+				(u32 *)create, cfg_addr, BUID_HI(buid),
+				BUID_LO(buid), page_shift, window_shift);
 	} while (rtas_busy_delay(ret));
 	dev_info(&dev->dev,
 		"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
-		"(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
-		 cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
-		 window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
+		"(liobn = 0x%x starting addr = %x %x)\n",
+		 ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+		 BUID_LO(buid), page_shift, window_shift, ret, create->liobn,
+		 create->addr_hi, create->addr_lo);
 
 	return ret;
 }
@@ -996,7 +1005,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	int page_shift;
 	u64 dma_addr, max_addr;
 	struct device_node *dn;
-	u32 ddw_avail[3];
+	u32 ddw_avail[DDW_APPLICABLE_SIZE];
 	struct direct_window *window;
 	struct property *win64;
 	struct dynamic_dma_window_prop *ddwprop;
@@ -1029,7 +1038,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 * the property is actually in the parent, not the PE
 	 */
 	ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
-					 &ddw_avail[0], 3);
+					 &ddw_avail[0], DDW_APPLICABLE_SIZE);
 	if (ret)
 		goto out_failed;
 
-- 
cgit v1.2.3


From 80f0251231131d164eddab78d2b6c1b8e37d0093 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Wed, 5 Aug 2020 00:04:53 -0300
Subject: powerpc/pseries/iommu: Update call to ibm, query-pe-dma-windows

>From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can make the number of
outputs from "ibm,query-pe-dma-windows" go from 5 to 6.

This change of output size is meant to expand the address size of
largest_available_block PE TCE from 32-bit to 64-bit, which ends up
shifting page_size and migration_capable.

This ends up requiring the update of
ddw_query_response->largest_available_block from u32 to u64, and manually
assigning the values from the buffer into this struct, according to
output size.

Also, a routine was created for helping reading the ddw extensions as
suggested by LoPAR: First reading the size of the extension array from
index 0, checking if the property exists, and then returning it's value.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Tested-by: David Dai <zdai@linux.vnet.ibm.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200805030455.123024-3-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 91 ++++++++++++++++++++++++++++++----
 1 file changed, 81 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index ac0d6376bdad..1a933c4e8bba 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -47,6 +47,12 @@ enum {
 	DDW_APPLICABLE_SIZE
 };
 
+enum {
+	DDW_EXT_SIZE = 0,
+	DDW_EXT_RESET_DMA_WIN = 1,
+	DDW_EXT_QUERY_OUT_SIZE = 2
+};
+
 static struct iommu_table_group *iommu_pseries_alloc_group(int node)
 {
 	struct iommu_table_group *table_group;
@@ -342,7 +348,7 @@ struct direct_window {
 /* Dynamic DMA Window support */
 struct ddw_query_response {
 	u32 windows_available;
-	u32 largest_available_block;
+	u64 largest_available_block;
 	u32 page_size;
 	u32 migration_capable;
 };
@@ -877,14 +883,62 @@ static int find_existing_ddw_windows(void)
 }
 machine_arch_initcall(pseries, find_existing_ddw_windows);
 
+/**
+ * ddw_read_ext - Get the value of an DDW extension
+ * @np:		device node from which the extension value is to be read.
+ * @extnum:	index number of the extension.
+ * @value:	pointer to return value, modified when extension is available.
+ *
+ * Checks if "ibm,ddw-extensions" exists for this node, and get the value
+ * on index 'extnum'.
+ * It can be used only to check if a property exists, passing value == NULL.
+ *
+ * Returns:
+ *	0 if extension successfully read
+ *	-EINVAL if the "ibm,ddw-extensions" does not exist,
+ *	-ENODATA if "ibm,ddw-extensions" does not have a value, and
+ *	-EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
+ */
+static inline int ddw_read_ext(const struct device_node *np, int extnum,
+			       u32 *value)
+{
+	static const char propname[] = "ibm,ddw-extensions";
+	u32 count;
+	int ret;
+
+	ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count);
+	if (ret)
+		return ret;
+
+	if (count < extnum)
+		return -EOVERFLOW;
+
+	if (!value)
+		value = &count;
+
+	return of_property_read_u32_index(np, propname, extnum, value);
+}
+
 static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
-			struct ddw_query_response *query)
+		     struct ddw_query_response *query,
+		     struct device_node *parent)
 {
 	struct device_node *dn;
 	struct pci_dn *pdn;
-	u32 cfg_addr;
+	u32 cfg_addr, ext_query, query_out[5];
 	u64 buid;
-	int ret;
+	int ret, out_sz;
+
+	/*
+	 * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
+	 * output parameters ibm,query-pe-dma-windows will have, ranging from
+	 * 5 to 6.
+	 */
+	ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query);
+	if (!ret && ext_query == 1)
+		out_sz = 6;
+	else
+		out_sz = 5;
 
 	/*
 	 * Get the config address and phb buid of the PE window.
@@ -897,11 +951,28 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 	buid = pdn->phb->buid;
 	cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
 
-	ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, 5, (u32 *)query,
+	ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
 			cfg_addr, BUID_HI(buid), BUID_LO(buid));
-	dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
-		" returned %d\n", ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr,
-		 BUID_HI(buid), BUID_LO(buid), ret);
+	dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
+		 ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+		 BUID_LO(buid), ret);
+
+	switch (out_sz) {
+	case 5:
+		query->windows_available = query_out[0];
+		query->largest_available_block = query_out[1];
+		query->page_size = query_out[2];
+		query->migration_capable = query_out[3];
+		break;
+	case 6:
+		query->windows_available = query_out[0];
+		query->largest_available_block = ((u64)query_out[1] << 32) |
+						 query_out[2];
+		query->page_size = query_out[3];
+		query->migration_capable = query_out[4];
+		break;
+	}
+
 	return ret;
 }
 
@@ -1049,7 +1120,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 * of page sizes: supported and supported for migrate-dma.
 	 */
 	dn = pci_device_to_OF_node(dev);
-	ret = query_ddw(dev, ddw_avail, &query);
+	ret = query_ddw(dev, ddw_avail, &query, pdn);
 	if (ret != 0)
 		goto out_failed;
 
@@ -1077,7 +1148,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	/* check largest block * page size > max memory hotplug addr */
 	max_addr = ddw_memory_hotplug_max();
 	if (query.largest_available_block < (max_addr >> page_shift)) {
-		dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
+		dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
 			  "%llu-sized pages\n", max_addr,  query.largest_available_block,
 			  1ULL << page_shift);
 		goto out_failed;
-- 
cgit v1.2.3


From 74d0b3994e147a2b503170b5e02f1d07dc086586 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Wed, 5 Aug 2020 00:04:54 -0300
Subject: powerpc/pseries/iommu: Move window-removing part of remove_ddw into
 remove_dma_window

Move the window-removing part of remove_ddw into a new function
(remove_dma_window), so it can be used to remove other DMA windows.

It's useful for removing DMA windows that don't create DIRECT64_PROPNAME
property, like the default DMA window from the device, which uses
"ibm,dma-window".

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Tested-by: David Dai <zdai@linux.vnet.ibm.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200805030455.123024-4-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 45 ++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 1a933c4e8bba..4e33147825cc 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -781,25 +781,14 @@ static int __init disable_ddw_setup(char *str)
 
 early_param("disable_ddw", disable_ddw_setup);
 
-static void remove_ddw(struct device_node *np, bool remove_prop)
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
+			      struct property *win)
 {
 	struct dynamic_dma_window_prop *dwp;
-	struct property *win64;
-	u32 ddw_avail[DDW_APPLICABLE_SIZE];
 	u64 liobn;
-	int ret = 0;
-
-	ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
-					 &ddw_avail[0], DDW_APPLICABLE_SIZE);
-
-	win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
-	if (!win64)
-		return;
-
-	if (ret || win64->length < sizeof(*dwp))
-		goto delprop;
+	int ret;
 
-	dwp = win64->value;
+	dwp = win->value;
 	liobn = (u64)be32_to_cpu(dwp->liobn);
 
 	/* clear the whole window, note the arg is in kernel pages */
@@ -821,10 +810,30 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
 		pr_debug("%pOF: successfully removed direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
 			np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
+}
+
+static void remove_ddw(struct device_node *np, bool remove_prop)
+{
+	struct property *win;
+	u32 ddw_avail[DDW_APPLICABLE_SIZE];
+	int ret = 0;
+
+	ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
+					 &ddw_avail[0], DDW_APPLICABLE_SIZE);
+	if (ret)
+		return;
+
+	win = of_find_property(np, DIRECT64_PROPNAME, NULL);
+	if (!win)
+		return;
+
+	if (win->length >= sizeof(struct dynamic_dma_window_prop))
+		remove_dma_window(np, ddw_avail, win);
+
+	if (!remove_prop)
+		return;
 
-delprop:
-	if (remove_prop)
-		ret = of_remove_property(np, win64);
+	ret = of_remove_property(np, win);
 	if (ret)
 		pr_warn("%pOF: failed to remove direct window property: %d\n",
 			np, ret);
-- 
cgit v1.2.3


From 8c0d51592f6f0123953633d1ecf21e843fce0bfd Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Wed, 5 Aug 2020 00:04:55 -0300
Subject: powerpc/pseries/iommu: Allow bigger 64bit window by removing default
 DMA window

On LoPAR "DMA Window Manipulation Calls", it's recommended to remove the
default DMA window for the device, before attempting to configure a DDW,
in order to make the maximum resources available for the next DDW to be
created.

This is a requirement for using DDW on devices in which hypervisor
allows only one DMA window.

If setting up a new DDW fails anywhere after the removal of this
default DMA window, it's needed to restore the default DMA window.
For this, an implementation of ibm,reset-pe-dma-windows rtas call is
needed:

Platforms supporting the DDW option starting with LoPAR level 2.7 implement
ibm,ddw-extensions. The first extension available (index 2) carries the
token for ibm,reset-pe-dma-windows rtas call, which is used to restore
the default DMA window for a device, if it has been deleted.

It does so by resetting the TCE table allocation for the PE to it's
boot time value, available in "ibm,dma-window" device tree node.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Tested-by: David Dai <zdai@linux.vnet.ibm.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200805030455.123024-5-leobras.c@gmail.com
---
 arch/powerpc/platforms/pseries/iommu.c | 73 ++++++++++++++++++++++++++++++----
 1 file changed, 66 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 4e33147825cc..e4198700ed1a 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1066,6 +1066,38 @@ static phys_addr_t ddw_memory_hotplug_max(void)
 	return max_addr;
 }
 
+/*
+ * Platforms supporting the DDW option starting with LoPAR level 2.7 implement
+ * ibm,ddw-extensions, which carries the rtas token for
+ * ibm,reset-pe-dma-windows.
+ * That rtas-call can be used to restore the default DMA window for the device.
+ */
+static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
+{
+	int ret;
+	u32 cfg_addr, reset_dma_win;
+	u64 buid;
+	struct device_node *dn;
+	struct pci_dn *pdn;
+
+	ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win);
+	if (ret)
+		return;
+
+	dn = pci_device_to_OF_node(dev);
+	pdn = PCI_DN(dn);
+	buid = pdn->phb->buid;
+	cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+	ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid),
+			BUID_LO(buid));
+	if (ret)
+		dev_info(&dev->dev,
+			 "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
+			 reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid),
+			 ret);
+}
+
 /*
  * If the PE supports dynamic dma windows, and there is space for a table
  * that can map all pages in a linear offset, then setup such a table,
@@ -1090,6 +1122,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	struct property *win64;
 	struct dynamic_dma_window_prop *ddwprop;
 	struct failed_ddw_pdn *fpdn;
+	bool default_win_removed = false;
 
 	mutex_lock(&direct_window_init_mutex);
 
@@ -1133,14 +1166,38 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	if (ret != 0)
 		goto out_failed;
 
+	/*
+	 * If there is no window available, remove the default DMA window,
+	 * if it's present. This will make all the resources available to the
+	 * new DDW window.
+	 * If anything fails after this, we need to restore it, so also check
+	 * for extensions presence.
+	 */
 	if (query.windows_available == 0) {
-		/*
-		 * no additional windows are available for this device.
-		 * We might be able to reallocate the existing window,
-		 * trading in for a larger page size.
-		 */
-		dev_dbg(&dev->dev, "no free dynamic windows");
-		goto out_failed;
+		struct property *default_win;
+		int reset_win_ext;
+
+		default_win = of_find_property(pdn, "ibm,dma-window", NULL);
+		if (!default_win)
+			goto out_failed;
+
+		reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL);
+		if (reset_win_ext)
+			goto out_failed;
+
+		remove_dma_window(pdn, ddw_avail, default_win);
+		default_win_removed = true;
+
+		/* Query again, to check if the window is available */
+		ret = query_ddw(dev, ddw_avail, &query, pdn);
+		if (ret != 0)
+			goto out_failed;
+
+		if (query.windows_available == 0) {
+			/* no windows are available for this device. */
+			dev_dbg(&dev->dev, "no free dynamic windows");
+			goto out_failed;
+		}
 	}
 	if (query.page_size & 4) {
 		page_shift = 24; /* 16MB */
@@ -1231,6 +1288,8 @@ out_free_prop:
 	kfree(win64);
 
 out_failed:
+	if (default_win_removed)
+		reset_dma_window(dev, pdn);
 
 	fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
 	if (!fpdn)
-- 
cgit v1.2.3


From 8f55984f530d7275531e17f36ea29229c2c410dd Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Thu, 1 Aug 2019 14:46:30 +0930
Subject: powerpc/powernv: Print helpful message when cores guarded

Often the firmware will guard out cores after a crash. This often
undesirable, and is not immediately noticeable.

This adds an informative message when a CPU device tree nodes are
marked bad in the device tree.

Signed-off-by: Joel Stanley <joel@jms.id.au>
[mpe: Use an eye-catcher that's less likely to get us in trouble]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20190801051630.5804-1-joel@jms.id.au
---
 arch/powerpc/platforms/powernv/setup.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 7fcb88623081..9acaa0f131b9 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -130,6 +130,28 @@ static void pnv_setup_rfi_flush(void)
 	setup_count_cache_flush();
 }
 
+static void __init pnv_check_guarded_cores(void)
+{
+	struct device_node *dn;
+	int bad_count = 0;
+
+	for_each_node_by_type(dn, "cpu") {
+		if (of_property_match_string(dn, "status", "bad") >= 0)
+			bad_count++;
+	};
+
+	if (bad_count) {
+		printk("  _     _______________\n");
+		pr_cont(" | |   /               \\\n");
+		pr_cont(" | |   |    WARNING!   |\n");
+		pr_cont(" | |   |               |\n");
+		pr_cont(" | |   | It looks like |\n");
+		pr_cont(" |_|   |  you have %*d |\n", 3, bad_count);
+		pr_cont("  _    | guarded cores |\n");
+		pr_cont(" (_)   \\_______________/\n");
+	}
+}
+
 static void __init pnv_setup_arch(void)
 {
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -150,6 +172,8 @@ static void __init pnv_setup_arch(void)
 	/* Enable NAP mode */
 	powersave_nap = 1;
 
+	pnv_check_guarded_cores();
+
 	/* XXX PMCS */
 }
 
-- 
cgit v1.2.3


From a02f6d42357acf6e5de6ffc728e6e77faf3ad217 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Wed, 2 Sep 2020 09:30:11 +0930
Subject: powerpc: Warn about use of smt_snooze_delay

It's not done anything for a long time. Save the percpu variable, and
emit a warning to remind users to not expect it to do anything.

This uses pr_warn_once instead of pr_warn_ratelimit as testing
'ppc64_cpu --smt=off' on a 24 core / 4 SMT system showed the warning
to be noisy, as the online/offline loop is slow.

Fixes: 3fa8cad82b94 ("powerpc/pseries/cpuidle: smt-snooze-delay cleanup.")
Cc: stable@vger.kernel.org # v3.14
Signed-off-by: Joel Stanley <joel@jms.id.au>
Acked-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200902000012.3440389-1-joel@jms.id.au
---
 arch/powerpc/kernel/sysfs.c | 42 +++++++++++++++++-------------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 46b4ebc33db7..5dea98fa2f93 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -32,29 +32,27 @@
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
-/*
- * SMT snooze delay stuff, 64-bit only for now
- */
-
 #ifdef CONFIG_PPC64
 
-/* Time in microseconds we delay before sleeping in the idle loop */
-static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
+/*
+ * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
+ * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
+ * 2014:
+ *
+ *  "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
+ *  up the kernel code."
+ *
+ * powerpc-utils stopped using it as of 1.3.8. At some point in the future this
+ * code should be removed.
+ */
 
 static ssize_t store_smt_snooze_delay(struct device *dev,
 				      struct device_attribute *attr,
 				      const char *buf,
 				      size_t count)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, dev);
-	ssize_t ret;
-	long snooze;
-
-	ret = sscanf(buf, "%ld", &snooze);
-	if (ret != 1)
-		return -EINVAL;
-
-	per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+	pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n",
+		     current->comm, current->pid);
 	return count;
 }
 
@@ -62,9 +60,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
 				     struct device_attribute *attr,
 				     char *buf)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, dev);
-
-	return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
+	pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n",
+		     current->comm, current->pid);
+	return sprintf(buf, "100\n");
 }
 
 static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
@@ -72,16 +70,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
 
 static int __init setup_smt_snooze_delay(char *str)
 {
-	unsigned int cpu;
-	long snooze;
-
 	if (!cpu_has_feature(CPU_FTR_SMT))
 		return 1;
 
-	snooze = simple_strtol(str, NULL, 10);
-	for_each_possible_cpu(cpu)
-		per_cpu(smt_snooze_delay, cpu) = snooze;
-
+	pr_warn("smt-snooze-delay command line option has no effect\n");
 	return 1;
 }
 __setup("smt-snooze-delay=", setup_smt_snooze_delay);
-- 
cgit v1.2.3


From dc462267d2d7aacffc3c1d99b02d7a7c59db7c66 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 25 Aug 2020 17:55:35 +1000
Subject: powerpc/64s: handle ISA v3.1 local copy-paste context switches

The ISA v3.1 the copy-paste facility has a new memory move functionality
which allows the copy buffer to be pasted to domestic memory (RAM) as
opposed to foreign memory (accelerator).

This means the POWER9 trick of avoiding the cp_abort on context switch if
the process had not mapped foreign memory does not work on POWER10. Do the
cp_abort unconditionally there.

KVM must also cp_abort on guest exit to prevent copy buffer state leaking
between contexts.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200825075535.224536-1-npiggin@gmail.com
---
 arch/powerpc/kernel/process.c           | 16 +++++++++-------
 arch/powerpc/kvm/book3s_hv.c            |  7 +++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  8 ++++++++
 3 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index febe9f7cda2f..2a6fadde58b4 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1250,15 +1250,17 @@ struct task_struct *__switch_to(struct task_struct *prev,
 		restore_math(current->thread.regs);
 
 		/*
-		 * The copy-paste buffer can only store into foreign real
-		 * addresses, so unprivileged processes can not see the
-		 * data or use it in any way unless they have foreign real
-		 * mappings. If the new process has the foreign real address
-		 * mappings, we must issue a cp_abort to clear any state and
-		 * prevent snooping, corruption or a covert channel.
+		 * On POWER9 the copy-paste buffer can only paste into
+		 * foreign real addresses, so unprivileged processes can not
+		 * see the data or use it in any way unless they have
+		 * foreign real mappings. If the new process has the foreign
+		 * real address mappings, we must issue a cp_abort to clear
+		 * any state and prevent snooping, corruption or a covert
+		 * channel. ISA v3.1 supports paste into local memory.
 		 */
 		if (current->mm &&
-			atomic_read(&current->mm->context.vas_windows))
+			(cpu_has_feature(CPU_FTR_ARCH_31) ||
+			atomic_read(&current->mm->context.vas_windows)))
 			asm volatile(PPC_CP_ABORT);
 	}
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4ba06a2a306c..3bd3118c7633 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3530,6 +3530,13 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	 */
 	asm volatile("eieio; tlbsync; ptesync");
 
+	/*
+	 * cp_abort is required if the processor supports local copy-paste
+	 * to clear the copy buffer that was under control of the guest.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		asm volatile(PPC_CP_ABORT);
+
 	mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid);	/* restore host LPID */
 	isync();
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 799d6d0f4ead..cd9995ee8441 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1830,6 +1830,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
 2:
 #endif /* CONFIG_PPC_RADIX_MMU */
 
+	/*
+	 * cp_abort is required if the processor supports local copy-paste
+	 * to clear the copy buffer that was under control of the guest.
+	 */
+BEGIN_FTR_SECTION
+	PPC_CP_ABORT
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
+
 	/*
 	 * POWER7/POWER8 guest -> host partition switch code.
 	 * We don't have to lock against tlbies but we do
-- 
cgit v1.2.3


From 231b232df8f67e7d37af01259c21f2a131c3911e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 27 Aug 2020 23:17:13 +1000
Subject: powerpc/64: Make VDSO32 track COMPAT on 64-bit

When we added the VDSO32 kconfig symbol, which controls building of
the 32-bit VDSO, we made it depend on CPU_BIG_ENDIAN (for 64-bit).

That was because back then COMPAT was always enabled for 64-bit, so
depending on it would have left the 32-bit VDSO always enabled, which
we didn't want.

But since then we have made COMPAT selectable, and off by default for
ppc64le, so VDSO32 should really depend on that.

For most people this makes no difference, none of the defconfigs
change, it's only if someone is building ppc64le with COMPAT=y, they
will now also get VDSO32. If they've enabled COMPAT in order to run
32-bit binaries they presumably also want the 32-bit VDSO.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Link: https://lore.kernel.org/r/20200908125850.407939-1-mpe@ellerman.id.au
---
 arch/powerpc/platforms/Kconfig.cputype | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 1dc9d3c81872..e74ec220b5d6 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -490,13 +490,12 @@ endmenu
 
 config VDSO32
 	def_bool y
-	depends on PPC32 || CPU_BIG_ENDIAN
+	depends on PPC32 || COMPAT
 	help
 	  This symbol controls whether we build the 32-bit VDSO. We obviously
 	  want to do that if we're building a 32-bit kernel. If we're building
-	  a 64-bit kernel then we only want a 32-bit VDSO if we're building for
-	  big endian. That is because the only little endian configuration we
-	  support is ppc64le which is 64-bit only.
+	  a 64-bit kernel then we only want a 32-bit VDSO if we're also enabling
+	  COMPAT.
 
 choice
 	prompt "Endianness selection"
-- 
cgit v1.2.3


From eae9eec476d13fad9af6da1f44a054ee02b7b161 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Date: Tue, 18 Aug 2020 19:11:26 -0300
Subject: powerpc/pseries/svm: Allocate SWIOTLB buffer anywhere in memory

POWER secure guests (i.e., guests which use the Protected Execution
Facility) need to use SWIOTLB to be able to do I/O with the
hypervisor, but they don't need the SWIOTLB memory to be in low
addresses since the hypervisor doesn't have any addressing limitation.

This solves a SWIOTLB initialization problem we are seeing in secure
guests with 128 GB of RAM: they are configured with 4 GB of
crashkernel reserved memory, which leaves no space for SWIOTLB in low
addresses.

To do this, we use mostly the same code as swiotlb_init(), but
allocate the buffer using memblock_alloc() instead of
memblock_alloc_low().

Fixes: 2efbc58f157a ("powerpc/pseries/svm: Force SWIOTLB for secure guests")
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200818221126.391073-1-bauerman@linux.ibm.com
---
 arch/powerpc/include/asm/svm.h       |  4 ++++
 arch/powerpc/mm/mem.c                |  6 +++++-
 arch/powerpc/platforms/pseries/svm.c | 26 ++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h
index 85580b30aba4..7546402d796a 100644
--- a/arch/powerpc/include/asm/svm.h
+++ b/arch/powerpc/include/asm/svm.h
@@ -15,6 +15,8 @@ static inline bool is_secure_guest(void)
 	return mfmsr() & MSR_S;
 }
 
+void __init svm_swiotlb_init(void);
+
 void dtl_cache_ctor(void *addr);
 #define get_dtl_cache_ctor()	(is_secure_guest() ? dtl_cache_ctor : NULL)
 
@@ -25,6 +27,8 @@ static inline bool is_secure_guest(void)
 	return false;
 }
 
+static inline void svm_swiotlb_init(void) {}
+
 #define get_dtl_cache_ctor() NULL
 
 #endif /* CONFIG_PPC_SVM */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 42e25874f5a8..ddc32cc1b6cf 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -49,6 +49,7 @@
 #include <asm/swiotlb.h>
 #include <asm/rtas.h>
 #include <asm/kasan.h>
+#include <asm/svm.h>
 
 #include <mm/mmu_decl.h>
 
@@ -282,7 +283,10 @@ void __init mem_init(void)
 	 * back to to-down.
 	 */
 	memblock_set_bottom_up(true);
-	swiotlb_init(0);
+	if (is_secure_guest())
+		svm_swiotlb_init();
+	else
+		swiotlb_init(0);
 #endif
 
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index e6d7a344d9f2..7b739cc7a8a9 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/memblock.h>
 #include <asm/machdep.h>
 #include <asm/svm.h>
 #include <asm/swiotlb.h>
@@ -35,6 +36,31 @@ static int __init init_svm(void)
 }
 machine_early_initcall(pseries, init_svm);
 
+/*
+ * Initialize SWIOTLB. Essentially the same as swiotlb_init(), except that it
+ * can allocate the buffer anywhere in memory. Since the hypervisor doesn't have
+ * any addressing limitation, we don't need to allocate it in low addresses.
+ */
+void __init svm_swiotlb_init(void)
+{
+	unsigned char *vstart;
+	unsigned long bytes, io_tlb_nslabs;
+
+	io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT);
+	io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+
+	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+	vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
+	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
+		return;
+
+	if (io_tlb_start)
+		memblock_free_early(io_tlb_start,
+				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+	panic("SVM: Cannot allocate SWIOTLB buffer");
+}
+
 int set_memory_encrypted(unsigned long addr, int numpages)
 {
 	if (!PAGE_ALIGNED(addr))
-- 
cgit v1.2.3


From 4759c11ed20454b7b36db4ec15f7d5aa1519af4a Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Wed, 2 Sep 2020 09:59:38 +0530
Subject: powerpc/watchpoint: Fix quadword instruction handling on p10
 predecessors

On p10 predecessors, watchpoint with quadword access is compared at
quadword length. If the watch range is doubleword or less than that
in a first half of quadword aligned 16 bytes, and if there is any
unaligned quadword access which will access only the 2nd half, the
handler should consider it as extraneous and emulate/single-step it
before continuing.

Fixes: 74c6881019b7 ("powerpc/watchpoint: Prepare handler to handle more than one watchpoint")
Reported-by: Pedro Miraglia Franco de Carvalho <pedromfc@linux.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200902042945.129369-2-ravi.bangoria@linux.ibm.com
---
 arch/powerpc/include/asm/hw_breakpoint.h |  1 +
 arch/powerpc/kernel/hw_breakpoint.c      | 12 ++++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index db206a7f38e2..9b68eafebf43 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -42,6 +42,7 @@ struct arch_hw_breakpoint {
 #else
 #define HW_BREAKPOINT_SIZE  0x8
 #endif
+#define HW_BREAKPOINT_SIZE_QUADWORD	0x10
 
 #define DABR_MAX_LEN	8
 #define DAWR_MAX_LEN	512
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 1f4a1efa0074..9f7df1c37233 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -520,9 +520,17 @@ static bool ea_hw_range_overlaps(unsigned long ea, int size,
 				 struct arch_hw_breakpoint *info)
 {
 	unsigned long hw_start_addr, hw_end_addr;
+	unsigned long align_size = HW_BREAKPOINT_SIZE;
 
-	hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
-	hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+	/*
+	 * On p10 predecessors, quadword is handle differently then
+	 * other instructions.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16)
+		align_size = HW_BREAKPOINT_SIZE_QUADWORD;
+
+	hw_start_addr = ALIGN_DOWN(info->address, align_size);
+	hw_end_addr = ALIGN(info->address + info->len, align_size);
 
 	return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
 }
-- 
cgit v1.2.3


From 4441eb02333a9b46a0d919aa7a6d3b137b5f2562 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Wed, 2 Sep 2020 09:59:39 +0530
Subject: powerpc/watchpoint: Fix handling of vector instructions

Vector load/store instructions are special because they are always
aligned. Thus unaligned EA needs to be aligned down before comparing
it with watch ranges. Otherwise we might consider valid event as
invalid.

Fixes: 74c6881019b7 ("powerpc/watchpoint: Prepare handler to handle more than one watchpoint")
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200902042945.129369-3-ravi.bangoria@linux.ibm.com
---
 arch/powerpc/kernel/hw_breakpoint.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 9f7df1c37233..f6b24838ca3c 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -644,6 +644,8 @@ static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
 	if (*type == CACHEOP) {
 		*size = cache_op_size();
 		*ea &= ~(*size - 1);
+	} else if (*type == LOAD_VMX || *type == STORE_VMX) {
+		*ea &= ~(*size - 1);
 	}
 }
 
-- 
cgit v1.2.3


From 9b6b7c680cc20971444d9f836e49fc98848bcd0a Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Wed, 2 Sep 2020 09:59:40 +0530
Subject: powerpc/watchpoint/ptrace: Fix SETHWDEBUG when
 CONFIG_HAVE_HW_BREAKPOINT=N

When kernel is compiled with CONFIG_HAVE_HW_BREAKPOINT=N, user can
still create watchpoint using PPC_PTRACE_SETHWDEBUG, with limited
functionalities. But, such watchpoints are never firing because of
the missing privilege settings. Fix that.

It's safe to set HW_BRK_TYPE_PRIV_ALL because we don't really leak
any kernel address in signal info. Setting HW_BRK_TYPE_PRIV_ALL will
also help to find scenarios when kernel accesses user memory.

Reported-by: Pedro Miraglia Franco de Carvalho <pedromfc@linux.ibm.com>
Suggested-by: Pedro Miraglia Franco de Carvalho <pedromfc@linux.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200902042945.129369-4-ravi.bangoria@linux.ibm.com
---
 arch/powerpc/kernel/ptrace/ptrace-noadv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index 697c7e4b5877..57a0ab822334 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -217,7 +217,7 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf
 		return -EIO;
 
 	brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE);
-	brk.type = HW_BRK_TYPE_TRANSLATE;
+	brk.type = HW_BRK_TYPE_TRANSLATE | HW_BRK_TYPE_PRIV_ALL;
 	brk.len = DABR_MAX_LEN;
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
 		brk.type |= HW_BRK_TYPE_READ;
-- 
cgit v1.2.3


From edc8dd99b29e4d705c45e2a3a6c01b096ee056db Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Wed, 2 Sep 2020 09:59:41 +0530
Subject: powerpc/watchpoint: Move DAWR detection logic outside of
 hw_breakpoint.c

Power10 hw has multiple DAWRs but hw doesn't tell which DAWR caused
the exception. So we have a sw logic to detect that in hw_breakpoint.c.
But hw_breakpoint.c gets compiled only with CONFIG_HAVE_HW_BREAKPOINT=Y.
Move DAWR detection logic outside of hw_breakpoint.c so that it can be
reused when CONFIG_HAVE_HW_BREAKPOINT is not set.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200902042945.129369-5-ravi.bangoria@linux.ibm.com
---
 arch/powerpc/include/asm/hw_breakpoint.h        |   8 ++
 arch/powerpc/kernel/Makefile                    |   3 +-
 arch/powerpc/kernel/hw_breakpoint.c             | 159 +----------------------
 arch/powerpc/kernel/hw_breakpoint_constraints.c | 162 ++++++++++++++++++++++++
 4 files changed, 174 insertions(+), 158 deletions(-)
 create mode 100644 arch/powerpc/kernel/hw_breakpoint_constraints.c

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 9b68eafebf43..81872c420476 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -10,6 +10,7 @@
 #define _PPC_BOOK3S_64_HW_BREAKPOINT_H
 
 #include <asm/cpu_has_feature.h>
+#include <asm/inst.h>
 
 #ifdef	__KERNEL__
 struct arch_hw_breakpoint {
@@ -52,6 +53,13 @@ static inline int nr_wp_slots(void)
 	return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1;
 }
 
+bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+			  unsigned long ea, int type, int size,
+			  struct arch_hw_breakpoint *info);
+
+void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+			 int *type, int *size, unsigned long *ea);
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include <linux/kdebug.h>
 #include <asm/reg.h>
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index cbf41fb4ee89..a5550c2b24c4 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -45,7 +45,8 @@ obj-y				:= cputable.o syscalls.o \
 				   signal.o sysfs.o cacheinfo.o time.o \
 				   prom.o traps.o setup-common.o \
 				   udbg.o misc.o io.o misc_$(BITS).o \
-				   of_platform.o prom_parse.o firmware.o
+				   of_platform.o prom_parse.o firmware.o \
+				   hw_breakpoint_constraints.o
 obj-y				+= ptrace/
 obj-$(CONFIG_PPC64)		+= setup_64.o \
 				   paca.o nvram_64.o note.o syscall_64.o
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index f6b24838ca3c..f4e8f21046f5 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -494,161 +494,6 @@ reset:
 	}
 }
 
-static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info)
-{
-	return ((info->address <= dar) && (dar - info->address < info->len));
-}
-
-static bool ea_user_range_overlaps(unsigned long ea, int size,
-				   struct arch_hw_breakpoint *info)
-{
-	return ((ea < info->address + info->len) &&
-		(ea + size > info->address));
-}
-
-static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
-{
-	unsigned long hw_start_addr, hw_end_addr;
-
-	hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
-	hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
-
-	return ((hw_start_addr <= dar) && (hw_end_addr > dar));
-}
-
-static bool ea_hw_range_overlaps(unsigned long ea, int size,
-				 struct arch_hw_breakpoint *info)
-{
-	unsigned long hw_start_addr, hw_end_addr;
-	unsigned long align_size = HW_BREAKPOINT_SIZE;
-
-	/*
-	 * On p10 predecessors, quadword is handle differently then
-	 * other instructions.
-	 */
-	if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16)
-		align_size = HW_BREAKPOINT_SIZE_QUADWORD;
-
-	hw_start_addr = ALIGN_DOWN(info->address, align_size);
-	hw_end_addr = ALIGN(info->address + info->len, align_size);
-
-	return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
-}
-
-/*
- * If hw has multiple DAWR registers, we also need to check all
- * dawrx constraint bits to confirm this is _really_ a valid event.
- * If type is UNKNOWN, but privilege level matches, consider it as
- * a positive match.
- */
-static bool check_dawrx_constraints(struct pt_regs *regs, int type,
-				    struct arch_hw_breakpoint *info)
-{
-	if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
-		return false;
-
-	/*
-	 * The Cache Management instructions other than dcbz never
-	 * cause a match. i.e. if type is CACHEOP, the instruction
-	 * is dcbz, and dcbz is treated as Store.
-	 */
-	if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
-		return false;
-
-	if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
-		return false;
-
-	if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER))
-		return false;
-
-	return true;
-}
-
-/*
- * Return true if the event is valid wrt dawr configuration,
- * including extraneous exception. Otherwise return false.
- */
-static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
-			      unsigned long ea, int type, int size,
-			      struct arch_hw_breakpoint *info)
-{
-	bool in_user_range = dar_in_user_range(regs->dar, info);
-	bool dawrx_constraints;
-
-	/*
-	 * 8xx supports only one breakpoint and thus we can
-	 * unconditionally return true.
-	 */
-	if (IS_ENABLED(CONFIG_PPC_8xx)) {
-		if (!in_user_range)
-			info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-		return true;
-	}
-
-	if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
-		if (cpu_has_feature(CPU_FTR_ARCH_31) &&
-		    !dar_in_hw_range(regs->dar, info))
-			return false;
-
-		return true;
-	}
-
-	dawrx_constraints = check_dawrx_constraints(regs, type, info);
-
-	if (type == UNKNOWN) {
-		if (cpu_has_feature(CPU_FTR_ARCH_31) &&
-		    !dar_in_hw_range(regs->dar, info))
-			return false;
-
-		return dawrx_constraints;
-	}
-
-	if (ea_user_range_overlaps(ea, size, info))
-		return dawrx_constraints;
-
-	if (ea_hw_range_overlaps(ea, size, info)) {
-		if (dawrx_constraints) {
-			info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-			return true;
-		}
-	}
-	return false;
-}
-
-static int cache_op_size(void)
-{
-#ifdef __powerpc64__
-	return ppc64_caches.l1d.block_size;
-#else
-	return L1_CACHE_BYTES;
-#endif
-}
-
-static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
-			     int *type, int *size, unsigned long *ea)
-{
-	struct instruction_op op;
-
-	if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip))
-		return;
-
-	analyse_instr(&op, regs, *instr);
-	*type = GETTYPE(op.type);
-	*ea = op.ea;
-#ifdef __powerpc64__
-	if (!(regs->msr & MSR_64BIT))
-		*ea &= 0xffffffffUL;
-#endif
-
-	*size = GETSIZE(op.type);
-	if (*type == CACHEOP) {
-		*size = cache_op_size();
-		*ea &= ~(*size - 1);
-	} else if (*type == LOAD_VMX || *type == STORE_VMX) {
-		*ea &= ~(*size - 1);
-	}
-}
-
 static bool is_larx_stcx_instr(int type)
 {
 	return type == LARX || type == STCX;
@@ -732,7 +577,7 @@ int hw_breakpoint_handler(struct die_args *args)
 	rcu_read_lock();
 
 	if (!IS_ENABLED(CONFIG_PPC_8xx))
-		get_instr_detail(regs, &instr, &type, &size, &ea);
+		wp_get_instr_detail(regs, &instr, &type, &size, &ea);
 
 	for (i = 0; i < nr_wp_slots(); i++) {
 		bp[i] = __this_cpu_read(bp_per_reg[i]);
@@ -742,7 +587,7 @@ int hw_breakpoint_handler(struct die_args *args)
 		info[i] = counter_arch_bp(bp[i]);
 		info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
 
-		if (check_constraints(regs, instr, ea, type, size, info[i])) {
+		if (wp_check_constraints(regs, instr, ea, type, size, info[i])) {
 			if (!IS_ENABLED(CONFIG_PPC_8xx) &&
 			    ppc_inst_equal(instr, ppc_inst(0))) {
 				handler_error(bp[i], info[i]);
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
new file mode 100644
index 000000000000..867ee4aa026a
--- /dev/null
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/sstep.h>
+#include <asm/cache.h>
+
+static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+	return ((info->address <= dar) && (dar - info->address < info->len));
+}
+
+static bool ea_user_range_overlaps(unsigned long ea, int size,
+				   struct arch_hw_breakpoint *info)
+{
+	return ((ea < info->address + info->len) &&
+		(ea + size > info->address));
+}
+
+static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+	unsigned long hw_start_addr, hw_end_addr;
+
+	hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
+	hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+	return ((hw_start_addr <= dar) && (hw_end_addr > dar));
+}
+
+static bool ea_hw_range_overlaps(unsigned long ea, int size,
+				 struct arch_hw_breakpoint *info)
+{
+	unsigned long hw_start_addr, hw_end_addr;
+	unsigned long align_size = HW_BREAKPOINT_SIZE;
+
+	/*
+	 * On p10 predecessors, quadword is handle differently then
+	 * other instructions.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16)
+		align_size = HW_BREAKPOINT_SIZE_QUADWORD;
+
+	hw_start_addr = ALIGN_DOWN(info->address, align_size);
+	hw_end_addr = ALIGN(info->address + info->len, align_size);
+
+	return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
+}
+
+/*
+ * If hw has multiple DAWR registers, we also need to check all
+ * dawrx constraint bits to confirm this is _really_ a valid event.
+ * If type is UNKNOWN, but privilege level matches, consider it as
+ * a positive match.
+ */
+static bool check_dawrx_constraints(struct pt_regs *regs, int type,
+				    struct arch_hw_breakpoint *info)
+{
+	if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
+		return false;
+
+	/*
+	 * The Cache Management instructions other than dcbz never
+	 * cause a match. i.e. if type is CACHEOP, the instruction
+	 * is dcbz, and dcbz is treated as Store.
+	 */
+	if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
+		return false;
+
+	if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
+		return false;
+
+	if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER))
+		return false;
+
+	return true;
+}
+
+/*
+ * Return true if the event is valid wrt dawr configuration,
+ * including extraneous exception. Otherwise return false.
+ */
+bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+			  unsigned long ea, int type, int size,
+			  struct arch_hw_breakpoint *info)
+{
+	bool in_user_range = dar_in_user_range(regs->dar, info);
+	bool dawrx_constraints;
+
+	/*
+	 * 8xx supports only one breakpoint and thus we can
+	 * unconditionally return true.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_8xx)) {
+		if (!in_user_range)
+			info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+		return true;
+	}
+
+	if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
+		if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    !dar_in_hw_range(regs->dar, info))
+			return false;
+
+		return true;
+	}
+
+	dawrx_constraints = check_dawrx_constraints(regs, type, info);
+
+	if (type == UNKNOWN) {
+		if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    !dar_in_hw_range(regs->dar, info))
+			return false;
+
+		return dawrx_constraints;
+	}
+
+	if (ea_user_range_overlaps(ea, size, info))
+		return dawrx_constraints;
+
+	if (ea_hw_range_overlaps(ea, size, info)) {
+		if (dawrx_constraints) {
+			info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+			return true;
+		}
+	}
+	return false;
+}
+
+static int cache_op_size(void)
+{
+#ifdef __powerpc64__
+	return ppc64_caches.l1d.block_size;
+#else
+	return L1_CACHE_BYTES;
+#endif
+}
+
+void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+			 int *type, int *size, unsigned long *ea)
+{
+	struct instruction_op op;
+
+	if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip))
+		return;
+
+	analyse_instr(&op, regs, *instr);
+	*type = GETTYPE(op.type);
+	*ea = op.ea;
+#ifdef __powerpc64__
+	if (!(regs->msr & MSR_64BIT))
+		*ea &= 0xffffffffUL;
+#endif
+
+	*size = GETSIZE(op.type);
+	if (*type == CACHEOP) {
+		*size = cache_op_size();
+		*ea &= ~(*size - 1);
+	} else if (*type == LOAD_VMX || *type == STORE_VMX) {
+		*ea &= ~(*size - 1);
+	}
+}
-- 
cgit v1.2.3


From 5b905d77987de065bdd3a2906816b5f143df087b Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Wed, 2 Sep 2020 09:59:42 +0530
Subject: powerpc/watchpoint: Fix exception handling for
 CONFIG_HAVE_HW_BREAKPOINT=N

On powerpc, ptrace watchpoint works in one-shot mode. i.e. kernel
disables event every time it fires and user has to re-enable it.
Also, in case of ptrace watchpoint, kernel notifies ptrace user
before executing instruction.

With CONFIG_HAVE_HW_BREAKPOINT=N, kernel is missing to disable
ptrace event and thus it's causing infinite loop of exceptions.
This is especially harmful when user watches on a data which is
also read/written by kernel, eg syscall parameters. In such case,
infinite exceptions happens in kernel mode which causes soft-lockup.

Fixes: 9422de3e953d ("powerpc: Hardware breakpoints rewrite to handle non DABR breakpoint registers")
Reported-by: Pedro Miraglia Franco de Carvalho <pedromfc@linux.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200902042945.129369-6-ravi.bangoria@linux.ibm.com
---
 arch/powerpc/include/asm/hw_breakpoint.h  |  3 ++
 arch/powerpc/kernel/process.c             | 48 +++++++++++++++++++++++++++++++
 arch/powerpc/kernel/ptrace/ptrace-noadv.c |  4 ++-
 3 files changed, 54 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 81872c420476..abebfbee5b1c 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -18,6 +18,7 @@ struct arch_hw_breakpoint {
 	u16		type;
 	u16		len; /* length of the target data symbol */
 	u16		hw_len; /* length programmed in hw */
+	u8		flags;
 };
 
 /* Note: Don't change the first 6 bits below as they are in the same order
@@ -37,6 +38,8 @@ struct arch_hw_breakpoint {
 #define HW_BRK_TYPE_PRIV_ALL	(HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
 				 HW_BRK_TYPE_HYP)
 
+#define HW_BRK_FLAG_DISABLED	0x1
+
 /* Minimum granularity */
 #ifdef CONFIG_PPC_8xx
 #define HW_BREAKPOINT_SIZE  0x4
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 142680e885ad..483e36a42617 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -642,6 +642,44 @@ void do_send_trap(struct pt_regs *regs, unsigned long address,
 				    (void __user *)address);
 }
 #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
+
+static void do_break_handler(struct pt_regs *regs)
+{
+	struct arch_hw_breakpoint null_brk = {0};
+	struct arch_hw_breakpoint *info;
+	struct ppc_inst instr = ppc_inst(0);
+	int type = 0;
+	int size = 0;
+	unsigned long ea;
+	int i;
+
+	/*
+	 * If underneath hw supports only one watchpoint, we know it
+	 * caused exception. 8xx also falls into this category.
+	 */
+	if (nr_wp_slots() == 1) {
+		__set_breakpoint(0, &null_brk);
+		current->thread.hw_brk[0] = null_brk;
+		current->thread.hw_brk[0].flags |= HW_BRK_FLAG_DISABLED;
+		return;
+	}
+
+	/* Otherwise findout which DAWR caused exception and disable it. */
+	wp_get_instr_detail(regs, &instr, &type, &size, &ea);
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		info = &current->thread.hw_brk[i];
+		if (!info->address)
+			continue;
+
+		if (wp_check_constraints(regs, instr, ea, type, size, info)) {
+			__set_breakpoint(i, &null_brk);
+			current->thread.hw_brk[i] = null_brk;
+			current->thread.hw_brk[i].flags |= HW_BRK_FLAG_DISABLED;
+		}
+	}
+}
+
 void do_break (struct pt_regs *regs, unsigned long address,
 		    unsigned long error_code)
 {
@@ -653,6 +691,16 @@ void do_break (struct pt_regs *regs, unsigned long address,
 	if (debugger_break_match(regs))
 		return;
 
+	/*
+	 * We reach here only when watchpoint exception is generated by ptrace
+	 * event (or hw is buggy!). Now if CONFIG_HAVE_HW_BREAKPOINT is set,
+	 * watchpoint is already handled by hw_breakpoint_handler() so we don't
+	 * have to do anything. But when CONFIG_HAVE_HW_BREAKPOINT is not set,
+	 * we need to manually handle the watchpoint here.
+	 */
+	if (!IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT))
+		do_break_handler(regs);
+
 	/* Deliver the signal to userspace */
 	force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address);
 }
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index 57a0ab822334..c9122ed91340 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -286,11 +286,13 @@ long ppc_del_hwdebug(struct task_struct *child, long data)
 	}
 	return ret;
 #else /* CONFIG_HAVE_HW_BREAKPOINT */
-	if (child->thread.hw_brk[data - 1].address == 0)
+	if (!(child->thread.hw_brk[data - 1].flags & HW_BRK_FLAG_DISABLED) &&
+	    child->thread.hw_brk[data - 1].address == 0)
 		return -ENOENT;
 
 	child->thread.hw_brk[data - 1].address = 0;
 	child->thread.hw_brk[data - 1].type = 0;
+	child->thread.hw_brk[data - 1].flags = 0;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
 	return 0;
-- 
cgit v1.2.3


From 58da5984d2ea6d95f3f9d9e8dd9f7e1b0dddfb3c Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Wed, 2 Sep 2020 09:59:43 +0530
Subject: powerpc/watchpoint: Add hw_len wherever missing

There are couple of places where we set len but not hw_len. For
ptrace/perf watchpoints, when CONFIG_HAVE_HW_BREAKPOINT=Y, hw_len
will be calculated and set internally while parsing watchpoint.
But when CONFIG_HAVE_HW_BREAKPOINT=N, we need to manually set
'hw_len'. Similarly for xmon as well, hw_len needs to be set
directly.

Fixes: b57aeab811db ("powerpc/watchpoint: Fix length calculation for unaligned target")
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200902042945.129369-7-ravi.bangoria@linux.ibm.com
---
 arch/powerpc/kernel/ptrace/ptrace-noadv.c | 1 +
 arch/powerpc/xmon/xmon.c                  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index c9122ed91340..48c52426af80 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -219,6 +219,7 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf
 	brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE);
 	brk.type = HW_BRK_TYPE_TRANSLATE | HW_BRK_TYPE_PRIV_ALL;
 	brk.len = DABR_MAX_LEN;
+	brk.hw_len = DABR_MAX_LEN;
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
 		brk.type |= HW_BRK_TYPE_READ;
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index df7bca00f5ec..55c43a6c9111 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -969,6 +969,7 @@ static void insert_cpu_bpts(void)
 			brk.address = dabr[i].address;
 			brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
 			brk.len = 8;
+			brk.hw_len = 8;
 			__set_breakpoint(i, &brk);
 		}
 	}
-- 
cgit v1.2.3


From fa725cc53d353110f39a9e5b9f60d6acb2c7ff49 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Wed, 2 Sep 2020 09:59:44 +0530
Subject: powerpc/watchpoint/ptrace: Introduce
 PPC_DEBUG_FEATURE_DATA_BP_ARCH_31

PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 can be used to determine whether
we are running on an ISA 3.1 compliant machine. Which is needed to
determine DAR behaviour, 512 byte boundary limit etc. This was
requested by Pedro Miraglia Franco de Carvalho for extending
watchpoint features in gdb. Note that availability of 2nd DAWR is
independent of this flag and should be checked using
ppc_debug_info->num_data_bps.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200902042945.129369-8-ravi.bangoria@linux.ibm.com
---
 Documentation/powerpc/ptrace.rst          | 1 +
 arch/powerpc/include/uapi/asm/ptrace.h    | 1 +
 arch/powerpc/kernel/ptrace/ptrace-noadv.c | 2 ++
 3 files changed, 4 insertions(+)

(limited to 'arch')

diff --git a/Documentation/powerpc/ptrace.rst b/Documentation/powerpc/ptrace.rst
index 864d4b6dddd1..77725d69eb4a 100644
--- a/Documentation/powerpc/ptrace.rst
+++ b/Documentation/powerpc/ptrace.rst
@@ -46,6 +46,7 @@ features will have bits indicating whether there is support for::
   #define PPC_DEBUG_FEATURE_DATA_BP_RANGE		0x4
   #define PPC_DEBUG_FEATURE_DATA_BP_MASK		0x8
   #define PPC_DEBUG_FEATURE_DATA_BP_DAWR		0x10
+  #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31		0x20
 
 2. PTRACE_SETHWDEBUG
 
diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h
index f5f1ccc740fc..7004cfea3f5f 100644
--- a/arch/powerpc/include/uapi/asm/ptrace.h
+++ b/arch/powerpc/include/uapi/asm/ptrace.h
@@ -222,6 +222,7 @@ struct ppc_debug_info {
 #define PPC_DEBUG_FEATURE_DATA_BP_RANGE		0x0000000000000004
 #define PPC_DEBUG_FEATURE_DATA_BP_MASK		0x0000000000000008
 #define PPC_DEBUG_FEATURE_DATA_BP_DAWR		0x0000000000000010
+#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31	0x0000000000000020
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index 48c52426af80..aa36fcad36cd 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -57,6 +57,8 @@ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
 	} else {
 		dbginfo->features = 0;
 	}
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_ARCH_31;
 }
 
 int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
-- 
cgit v1.2.3


From 2a32abac8860aa1c3a1fc99973ce67179575b36c Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Mon, 8 Jun 2020 12:39:01 +0530
Subject: powerpc/percpu: Update percpu bootmem allocator

This update the ppc64 version to be closer to x86/sparc.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200608070904.387440-1-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/kernel/setup_64.c | 45 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6be430107c6f..55bbbf89ea82 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -756,17 +756,46 @@ void __init emergency_stack_init(void)
 }
 
 #ifdef CONFIG_SMP
-#define PCPU_DYN_SIZE		()
-
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
+/**
+ * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
+ * @cpu: cpu to allocate for
+ * @size: size allocation in bytes
+ * @align: alignment
+ *
+ * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
+ * does the right thing for NUMA regardless of the current
+ * configuration.
+ *
+ * RETURNS:
+ * Pointer to the allocated area on success, NULL on failure.
+ */
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
+					size_t align)
 {
-	return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
-				      MEMBLOCK_ALLOC_ACCESSIBLE,
-				      early_cpu_to_node(cpu));
+	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int node = early_cpu_to_node(cpu);
+	void *ptr;
 
+	if (!node_online(node) || !NODE_DATA(node)) {
+		ptr = memblock_alloc_from(size, align, goal);
+		pr_info("cpu %d has no node %d or node-local memory\n",
+			cpu, node);
+		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
+			 cpu, size, __pa(ptr));
+	} else {
+		ptr = memblock_alloc_try_nid(size, align, goal,
+					     MEMBLOCK_ALLOC_ACCESSIBLE, node);
+		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
+			 "%016lx\n", cpu, size, node, __pa(ptr));
+	}
+	return ptr;
+#else
+	return memblock_alloc_from(size, align, goal);
+#endif
 }
 
-static void __init pcpu_fc_free(void *ptr, size_t size)
+static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
 	memblock_free(__pa(ptr), size);
 }
@@ -801,7 +830,7 @@ void __init setup_per_cpu_areas(void)
 		atom_size = 1 << 20;
 
 	rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
-				    pcpu_fc_alloc, pcpu_fc_free);
+				    pcpu_alloc_bootmem, pcpu_free_bootmem);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
 
-- 
cgit v1.2.3


From eb553f16973ade990d05946af9ae191394712c8a Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Mon, 8 Jun 2020 12:39:02 +0530
Subject: powerpc/64/mm: implement page mapping percpu first chunk allocator

Implement page mapping percpu first chunk allocator as a fallback to
the embedding allocator. With 4K hash translation we limit our page
table range to 64TB and commit: 0034d395f89d ("powerpc/mm/hash64: Map all the
kernel regions in the same 0xc range") moved all kernel mapping to
that 64TB range. In-order to support sparse memory layout we need
to increase our linear mapping space and reduce other mappings.

With such a layout percpu embedded first chunk allocator will fail
because of small vmalloc range. Add a fallback to page mapping
percpu first chunk allocator for such failures.

The below dmesg output can be observed in such case.

 percpu: max_distance=0x1ffffef00000 too large for vmalloc space 0x10000000000
 PERCPU: auto allocator failed (-22), falling back to page size
 percpu: 40 4K pages/cpu s148816 r0 d15024

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200608070904.387440-2-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/Kconfig           |  5 +++-
 arch/powerpc/kernel/setup_64.c | 62 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 63 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 787e829b6f25..4b33477dafb8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -59,7 +59,10 @@ config HAVE_SETUP_PER_CPU_AREA
 	def_bool PPC64
 
 config NEED_PER_CPU_EMBED_FIRST_CHUNK
-	def_bool PPC64
+	def_bool y if PPC64
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+	def_bool y if PPC64
 
 config NR_IRQS
 	int "Number of virtual interrupt numbers"
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 55bbbf89ea82..bb9cab3641d7 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -66,6 +66,7 @@
 #include <asm/feature-fixups.h>
 #include <asm/kup.h>
 #include <asm/early_ioremap.h>
+#include <asm/pgalloc.h>
 
 #include "setup.h"
 
@@ -811,13 +812,58 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		pud_t *new;
+
+		new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+		if (!new)
+			goto err_alloc;
+		p4d_populate(&init_mm, p4d, new);
+	}
+
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud)) {
+		pmd_t *new;
+
+		new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+		if (!new)
+			goto err_alloc;
+		pud_populate(&init_mm, pud, new);
+	}
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd)) {
+		pte_t *new;
+
+		new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+		if (!new)
+			goto err_alloc;
+		pmd_populate_kernel(&init_mm, pmd, new);
+	}
+
+	return;
+
+err_alloc:
+	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+
+
 void __init setup_per_cpu_areas(void)
 {
 	const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 	size_t atom_size;
 	unsigned long delta;
 	unsigned int cpu;
-	int rc;
+	int rc = -EINVAL;
 
 	/*
 	 * Linear mapping is one of 4K, 1M and 16M.  For 4K, no need
@@ -829,8 +875,18 @@ void __init setup_per_cpu_areas(void)
 	else
 		atom_size = 1 << 20;
 
-	rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
-				    pcpu_alloc_bootmem, pcpu_free_bootmem);
+	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+		rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+					    pcpu_alloc_bootmem, pcpu_free_bootmem);
+		if (rc)
+			pr_warn("PERCPU: %s allocator failed (%d), "
+				"falling back to page size\n",
+				pcpu_fc_names[pcpu_chosen_fc], rc);
+	}
+
+	if (rc < 0)
+		rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
+					   pcpu_populate_pte);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
 
-- 
cgit v1.2.3


From 7746406baa3bc9e23fdd7b7da2f04d86e25ab837 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Mon, 8 Jun 2020 12:39:03 +0530
Subject: powerpc/book3s64/hash/4k: Support large linear mapping range with 4K

With commit: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel
regions in the same 0xc range"), we now split the 64TB address range
into 4 contexts each of 16TB. That implies we can do only 16TB linear
mapping.

On some systems, eg. Power9, memory attached to nodes > 0 will appear
above 16TB in the linear mapping. This resulted in kernel crash when
we boot such systems in hash translation mode with 4K PAGE_SIZE.

This patch updates the kernel mapping such that we now start supporting upto
61TB of memory with 4K. The kernel mapping now looks like below 4K PAGE_SIZE
and hash translation.

    vmalloc start     = 0xc0003d0000000000
    IO start          = 0xc0003e0000000000
    vmemmap start     = 0xc0003f0000000000

Our MAX_PHYSMEM_BITS for 4K is still 64TB even though we can only map 61TB.
We prevent bolt mapping anything outside 61TB range by checking against
H_VMALLOC_START.

Fixes: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the same 0xc range")
Reported-by: Cameron Berkenpas <cam@neo-zeon.de>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200608070904.387440-3-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 082b98808701..b3ca542f871e 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -13,20 +13,19 @@
  */
 #define MAX_EA_BITS_PER_CONTEXT		46
 
-#define REGION_SHIFT		(MAX_EA_BITS_PER_CONTEXT - 2)
 
 /*
- * Our page table limit us to 64TB. Hence for the kernel mapping,
- * each MAP area is limited to 16 TB.
- * The four map areas are:  linear mapping, vmap, IO and vmemmap
+ * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB
+ * of vmemmap space. To better support sparse memory layout, we use 61TB
+ * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap.
  */
+#define REGION_SHIFT		(40)
 #define H_KERN_MAP_SIZE		(ASM_CONST(1) << REGION_SHIFT)
 
 /*
- * Define the address range of the kernel non-linear virtual area
- * 16TB
+ * Define the address range of the kernel non-linear virtual area (61TB)
  */
-#define H_KERN_VIRT_START	ASM_CONST(0xc000100000000000)
+#define H_KERN_VIRT_START	ASM_CONST(0xc0003d0000000000)
 
 #ifndef __ASSEMBLY__
 #define H_PTE_TABLE_SIZE	(sizeof(pte_t) << H_PTE_INDEX_SIZE)
-- 
cgit v1.2.3


From b32d5d7e920a364287f6206af2d20179978a617d Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Mon, 8 Jun 2020 12:39:04 +0530
Subject: powerpc/mm/book3s: Split radix and hash MAX_PHYSMEM limit

MAX_PHYSMEM #define is used along with sparsemem to determine the SECTION_SHIFT
value. Powerpc also uses the same value to limit the max memory enabled on the
system. With 4K PAGE_SIZE and hash translation mode, we want to limit the max
memory enabled to 64TB due to page table size restrictions. However, with
radix translation, we don't have these restrictions. Hence split the radix
and hash MA_PHYSMEM limit and use different limit for each of them.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200608070904.387440-4-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  5 +++++
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 13 +++++++++++++
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/mmu.h      | 15 ---------------
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  7 +++++++
 arch/powerpc/include/asm/book3s/64/radix.h    | 16 ++++++++++++++++
 arch/powerpc/kernel/prom.c                    |  5 +++++
 arch/powerpc/mm/book3s64/slb.c                |  4 ++--
 8 files changed, 50 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index b3ca542f871e..b6ac4f86c87b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -22,6 +22,11 @@
 #define REGION_SHIFT		(40)
 #define H_KERN_MAP_SIZE		(ASM_CONST(1) << REGION_SHIFT)
 
+/*
+ * Limits the linear mapping range
+ */
+#define H_MAX_PHYSMEM_BITS	46
+
 /*
  * Define the address range of the kernel non-linear virtual area (61TB)
  */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index f20de1149ebe..338e62fbea0b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -7,6 +7,19 @@
 #define H_PUD_INDEX_SIZE  10  // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB
 #define H_PGD_INDEX_SIZE   8  // size: 8B <<  8 = 2KB, maps 2^8  x 16TB =  4PB
 
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#define H_MAX_PHYSMEM_BITS	51
+#else
+#define H_MAX_PHYSMEM_BITS	46
+#endif
 
 /*
  * Each context is 512TB size. SLB miss for first context/default context
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 93d18da5e7ec..683a9c7d1b03 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -577,8 +577,8 @@ extern void slb_set_size(u16 size);
  * For vmalloc and memmap, we use just one context with 512TB. With 64 byte
  * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)).
  */
-#if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
-#define MAX_KERNEL_CTX_CNT	(1UL << (MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
+#if (H_MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
+#define MAX_KERNEL_CTX_CNT	(1UL << (H_MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
 #else
 #define MAX_KERNEL_CTX_CNT	1
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index b392384a3b15..ddc414ab3c4d 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -27,21 +27,6 @@ struct mmu_psize_def {
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 #endif /* __ASSEMBLY__ */
 
-/*
- * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
- * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
- * page_to_nid does a page->section->node lookup
- * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
- * memory requirements with large number of sections.
- * 51 bits is the max physical real address on POWER9
- */
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) &&  \
-	defined(CONFIG_PPC_64K_PAGES)
-#define MAX_PHYSMEM_BITS 51
-#else
-#define MAX_PHYSMEM_BITS 46
-#endif
-
 /* 64-bit classic hash table MMU */
 #include <asm/book3s/64/mmu-hash.h>
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 495fc0ccb453..76880c7066fa 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -294,6 +294,13 @@ extern unsigned long pci_io_base;
 #include <asm/book3s/64/hash.h>
 #include <asm/book3s/64/radix.h>
 
+#if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS
+#define  MAX_PHYSMEM_BITS	H_MAX_PHYSMEM_BITS
+#else
+#define  MAX_PHYSMEM_BITS	R_MAX_PHYSMEM_BITS
+#endif
+
+
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/pgtable-64k.h>
 #else
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 0cba794c4fb8..c7813dc628fc 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -91,6 +91,22 @@
  * +------------------------------+  Kernel linear (0xc.....)
  */
 
+
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#define R_MAX_PHYSMEM_BITS	51
+#else
+#define R_MAX_PHYSMEM_BITS	46
+#endif
+
 #define RADIX_KERN_VIRT_START	ASM_CONST(0xc008000000000000)
 /*
  * 49 =  MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index d8a2fb87ba0c..c1545f22c077 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -776,6 +776,11 @@ void __init early_init_devtree(void *params)
 	limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
 	memblock_enforce_memory_limit(limit);
 
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES)
+	if (!early_radix_enabled())
+		memblock_cap_memory_range(0, 1UL << (H_MAX_PHYSMEM_BITS));
+#endif
+
 	memblock_allow_resize();
 	memblock_dump_all();
 
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 156c38f89511..c30fcbfa0e32 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -765,8 +765,8 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
 
 	if (id == LINEAR_MAP_REGION_ID) {
 
-		/* We only support upto MAX_PHYSMEM_BITS */
-		if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS))
+		/* We only support upto H_MAX_PHYSMEM_BITS */
+		if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS))
 			return -EFAULT;
 
 		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
-- 
cgit v1.2.3


From 66943005cc41f48e4d05614e8f76c0ca1812f0fd Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 5 Sep 2020 09:02:20 +1000
Subject: powerpc/tau: Use appropriate temperature sample interval

According to the MPC750 Users Manual, the SITV value in Thermal
Management Register 3 is 13 bits long. The present code calculates the
SITV value as 60 * 500 cycles. This would overflow to give 10 us on
a 500 MHz CPU rather than the intended 60 us. (But according to the
Microprocessor Datasheet, there is also a factor of 266 that has to be
applied to this value on certain parts i.e. speed sort above 266 MHz.)
Always use the maximum cycle count, as recommended by the Datasheet.

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/896f542e5f0f1d6cf8218524c2b67d79f3d69b3c.1599260540.git.fthain@telegraphics.com.au
---
 arch/powerpc/include/asm/reg.h |  2 +-
 arch/powerpc/kernel/tau_6xx.c  | 12 ++++--------
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 88fb88491fe9..5647006ed373 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -817,7 +817,7 @@
 #define THRM1_TIN	(1 << 31)
 #define THRM1_TIV	(1 << 30)
 #define THRM1_THRES(x)	((x&0x7f)<<23)
-#define THRM3_SITV(x)	((x&0x3fff)<<1)
+#define THRM3_SITV(x)	((x & 0x1fff) << 1)
 #define THRM1_TID	(1<<2)
 #define THRM1_TIE	(1<<1)
 #define THRM1_V		(1<<0)
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index e2ab8a111b69..976d5bc1b517 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -178,15 +178,11 @@ static void tau_timeout(void * info)
 	 * complex sleep code needs to be added. One mtspr every time
 	 * tau_timeout is called is probably not a big deal.
 	 *
-	 * Enable thermal sensor and set up sample interval timer
-	 * need 20 us to do the compare.. until a nice 'cpu_speed' function
-	 * call is implemented, just assume a 500 mhz clock. It doesn't really
-	 * matter if we take too long for a compare since it's all interrupt
-	 * driven anyway.
-	 *
-	 * use a extra long time.. (60 us @ 500 mhz)
+	 * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
+	 * recommends that "the maximum value be set in THRM3 under all
+	 * conditions."
 	 */
-	mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E);
+	mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
 
 	local_irq_restore(flags);
 }
-- 
cgit v1.2.3


From b1c6a0a10bfaf36ec82fde6f621da72407fa60a1 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 5 Sep 2020 09:02:20 +1000
Subject: powerpc/tau: Convert from timer to workqueue

Since commit 19dbdcb8039cf ("smp: Warn on function calls from softirq
context") the Thermal Assist Unit driver causes a warning like the
following when CONFIG_SMP is enabled.

  ------------[ cut here ]------------
  WARNING: CPU: 0 PID: 0 at kernel/smp.c:428 smp_call_function_many_cond+0xf4/0x38c
  Modules linked in:
  CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.7.0-pmac #3
  NIP:  c00b37a8 LR: c00b3abc CTR: c001218c
  REGS: c0799c60 TRAP: 0700   Not tainted  (5.7.0-pmac)
  MSR:  00029032 <EE,ME,IR,DR,RI>  CR: 42000224  XER: 00000000
  GPR00: c00b3abc c0799d18 c076e300 c079ef5c c0011fec 00000000 00000000 00000000
  GPR08: 00000100 00000100 00008000 ffffffff 42000224 00000000 c079d040 c079d044
  GPR16: 00000001 00000000 00000004 c0799da0 c079f054 c07a0000 c07a0000 00000000
  GPR24: c0011fec 00000000 c079ef5c c079ef5c 00000000 00000000 00000000 00000000
  NIP [c00b37a8] smp_call_function_many_cond+0xf4/0x38c
  LR [c00b3abc] on_each_cpu+0x38/0x68
  Call Trace:
  [c0799d18] [ffffffff] 0xffffffff (unreliable)
  [c0799d68] [c00b3abc] on_each_cpu+0x38/0x68
  [c0799d88] [c0096704] call_timer_fn.isra.26+0x20/0x7c
  [c0799d98] [c0096b40] run_timer_softirq+0x1d4/0x3fc
  [c0799df8] [c05b4368] __do_softirq+0x118/0x240
  [c0799e58] [c0039c44] irq_exit+0xc4/0xcc
  [c0799e68] [c000ade8] timer_interrupt+0x1b0/0x230
  [c0799ea8] [c0013520] ret_from_except+0x0/0x14
  --- interrupt: 901 at arch_cpu_idle+0x24/0x6c
      LR = arch_cpu_idle+0x24/0x6c
  [c0799f70] [00000001] 0x1 (unreliable)
  [c0799f80] [c0060990] do_idle+0xd8/0x17c
  [c0799fa0] [c0060ba8] cpu_startup_entry+0x24/0x28
  [c0799fb0] [c072d220] start_kernel+0x434/0x44c
  [c0799ff0] [00003860] 0x3860
  Instruction dump:
  8129f204 2f890000 40beff98 3d20c07a 8929eec4 2f890000 40beff88 0fe00000
  81220000 552805de 550802ef 4182ff84 <0fe00000> 3860ffff 7f65db78 7f44d378
  ---[ end trace 34a886e47819c2eb ]---

Don't call on_each_cpu() from a timer callback, call it from a worker
thread instead.

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/bb61650bea4f4c91fb8e24b9a6f130a1438651a7.1599260540.git.fthain@telegraphics.com.au
---
 arch/powerpc/kernel/tau_6xx.c | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 976d5bc1b517..268205cc347d 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -13,13 +13,14 @@
  */
 
 #include <linux/errno.h>
-#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
 
 #include <asm/io.h>
 #include <asm/reg.h>
@@ -39,8 +40,6 @@ static struct tau_temp
 	unsigned char grew;
 } tau[NR_CPUS];
 
-struct timer_list tau_timer;
-
 #undef DEBUG
 
 /* TODO: put these in a /proc interface, with some sanity checks, and maybe
@@ -50,7 +49,7 @@ struct timer_list tau_timer;
 #define step_size		2	/* step size when temp goes out of range */
 #define window_expand		1	/* expand the window by this much */
 /* configurable values for shrinking the window */
-#define shrink_timer	2*HZ	/* period between shrinking the window */
+#define shrink_timer	2000	/* period between shrinking the window */
 #define min_window	2	/* minimum window size, degrees C */
 
 static void set_thresholds(unsigned long cpu)
@@ -187,14 +186,18 @@ static void tau_timeout(void * info)
 	local_irq_restore(flags);
 }
 
-static void tau_timeout_smp(struct timer_list *unused)
-{
+static struct workqueue_struct *tau_workq;
 
-	/* schedule ourselves to be run again */
-	mod_timer(&tau_timer, jiffies + shrink_timer) ;
+static void tau_work_func(struct work_struct *work)
+{
+	msleep(shrink_timer);
 	on_each_cpu(tau_timeout, NULL, 0);
+	/* schedule ourselves to be run again */
+	queue_work(tau_workq, work);
 }
 
+DECLARE_WORK(tau_work, tau_work_func);
+
 /*
  * setup the TAU
  *
@@ -227,21 +230,16 @@ static int __init TAU_init(void)
 		return 1;
 	}
 
-
-	/* first, set up the window shrinking timer */
-	timer_setup(&tau_timer, tau_timeout_smp, 0);
-	tau_timer.expires = jiffies + shrink_timer;
-	add_timer(&tau_timer);
+	tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0);
+	if (!tau_workq)
+		return -ENOMEM;
 
 	on_each_cpu(TAU_init_smp, NULL, 0);
 
-	printk("Thermal assist unit ");
-#ifdef CONFIG_TAU_INT
-	printk("using interrupts, ");
-#else
-	printk("using timers, ");
-#endif
-	printk("shrink_timer: %d jiffies\n", shrink_timer);
+	queue_work(tau_workq, &tau_work);
+
+	pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
+		IS_ENABLED(CONFIG_TAU_INT) ? "interrupts" : "workqueue", shrink_timer);
 	tau_initialized = 1;
 
 	return 0;
-- 
cgit v1.2.3


From 420ab2bc7544d978a5d0762ee736412fe9c796ab Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 5 Sep 2020 09:02:20 +1000
Subject: powerpc/tau: Remove duplicated set_thresholds() call

The commentary at the call site seems to disagree with the code. The
conditional prevents calling set_thresholds() via the exception handler,
which appears to crash. Perhaps that's because it immediately triggers
another TAU exception. Anyway, calling set_thresholds() from TAUupdate()
is redundant because tau_timeout() does so.

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/d7c7ee33232cf72a6a6bbb6ef05838b2e2b113c0.1599260540.git.fthain@telegraphics.com.au
---
 arch/powerpc/kernel/tau_6xx.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 268205cc347d..b8d7e7d498e0 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -110,11 +110,6 @@ static void TAUupdate(int cpu)
 #ifdef DEBUG
 	printk("grew = %d\n", tau[cpu].grew);
 #endif
-
-#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */
-	set_thresholds(cpu);
-#endif
-
 }
 
 #ifdef CONFIG_TAU_INT
-- 
cgit v1.2.3


From 5e3119e15fed5b9a9a7e528665ff098a4a8dbdbc Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 5 Sep 2020 09:02:20 +1000
Subject: powerpc/tau: Check processor type before enabling TAU interrupt

According to Freescale's documentation, MPC74XX processors have an
erratum that prevents the TAU interrupt from working, so don't try to
use it when running on those processors.

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/c281611544768e758bd58fe812cf702a5bd2d042.1599260540.git.fthain@telegraphics.com.au
---
 arch/powerpc/kernel/tau_6xx.c  | 33 ++++++++++++++-------------------
 arch/powerpc/platforms/Kconfig |  5 ++---
 2 files changed, 16 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index b8d7e7d498e0..614b5b272d9c 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -40,6 +40,8 @@ static struct tau_temp
 	unsigned char grew;
 } tau[NR_CPUS];
 
+static bool tau_int_enable;
+
 #undef DEBUG
 
 /* TODO: put these in a /proc interface, with some sanity checks, and maybe
@@ -54,22 +56,13 @@ static struct tau_temp
 
 static void set_thresholds(unsigned long cpu)
 {
-#ifdef CONFIG_TAU_INT
-	/*
-	 * setup THRM1,
-	 * threshold, valid bit, enable interrupts, interrupt when below threshold
-	 */
-	mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID);
+	u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0;
 
-	/* setup THRM2,
-	 * threshold, valid bit, enable interrupts, interrupt when above threshold
-	 */
-	mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE);
-#else
-	/* same thing but don't enable interrupts */
-	mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID);
-	mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V);
-#endif
+	/* setup THRM1, threshold, valid bit, interrupt when below threshold */
+	mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID);
+
+	/* setup THRM2, threshold, valid bit, interrupt when above threshold */
+	mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie);
 }
 
 static void TAUupdate(int cpu)
@@ -142,9 +135,8 @@ static void tau_timeout(void * info)
 	local_irq_save(flags);
 	cpu = smp_processor_id();
 
-#ifndef CONFIG_TAU_INT
-	TAUupdate(cpu);
-#endif
+	if (!tau_int_enable)
+		TAUupdate(cpu);
 
 	size = tau[cpu].high - tau[cpu].low;
 	if (size > min_window && ! tau[cpu].grew) {
@@ -225,6 +217,9 @@ static int __init TAU_init(void)
 		return 1;
 	}
 
+	tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
+			 !strcmp(cur_cpu_spec->platform, "ppc750");
+
 	tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0);
 	if (!tau_workq)
 		return -ENOMEM;
@@ -234,7 +229,7 @@ static int __init TAU_init(void)
 	queue_work(tau_workq, &tau_work);
 
 	pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
-		IS_ENABLED(CONFIG_TAU_INT) ? "interrupts" : "workqueue", shrink_timer);
+		tau_int_enable ? "interrupts" : "workqueue", shrink_timer);
 	tau_initialized = 1;
 
 	return 0;
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index fb7515b4fa9c..9fe36f0b54c1 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -223,9 +223,8 @@ config TAU
 	  temperature within 2-4 degrees Celsius. This option shows the current
 	  on-die temperature in /proc/cpuinfo if the cpu supports it.
 
-	  Unfortunately, on some chip revisions, this sensor is very inaccurate
-	  and in many cases, does not work at all, so don't assume the cpu
-	  temp is actually what /proc/cpuinfo says it is.
+	  Unfortunately, this sensor is very inaccurate when uncalibrated, so
+	  don't assume the cpu temp is actually what /proc/cpuinfo says it is.
 
 config TAU_INT
 	bool "Interrupt driven TAU driver (DANGEROUS)"
-- 
cgit v1.2.3


From e63d6fb5637e92725cf143559672a34b706bca4f Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 5 Sep 2020 09:02:20 +1000
Subject: powerpc/tau: Disable TAU between measurements

Enabling CONFIG_TAU_INT causes random crashes:

Unrecoverable exception 1700 at c0009414 (msr=1000)
Oops: Unrecoverable exception, sig: 6 [#1]
BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac
Modules linked in:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.7.0-pmac-00043-gd5f545e1a8593 #5
NIP:  c0009414 LR: c0009414 CTR: c00116fc
REGS: c0799eb8 TRAP: 1700   Not tainted  (5.7.0-pmac-00043-gd5f545e1a8593)
MSR:  00001000 <ME>  CR: 22000228  XER: 00000100

GPR00: 00000000 c0799f70 c076e300 00800000 0291c0ac 00e00000 c076e300 00049032
GPR08: 00000001 c00116fc 00000000 dfbd3200 ffffffff 007f80a8 00000000 00000000
GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 c075ce04
GPR24: c075ce04 dfff8880 c07b0000 c075ce04 00080000 00000001 c079ef98 c079ef5c
NIP [c0009414] arch_cpu_idle+0x24/0x6c
LR [c0009414] arch_cpu_idle+0x24/0x6c
Call Trace:
[c0799f70] [00000001] 0x1 (unreliable)
[c0799f80] [c0060990] do_idle+0xd8/0x17c
[c0799fa0] [c0060ba4] cpu_startup_entry+0x20/0x28
[c0799fb0] [c072d220] start_kernel+0x434/0x44c
[c0799ff0] [00003860] 0x3860
Instruction dump:
XXXXXXXX XXXXXXXX XXXXXXXX 3d20c07b XXXXXXXX XXXXXXXX XXXXXXXX 7c0802a6
XXXXXXXX XXXXXXXX XXXXXXXX 4e800421 XXXXXXXX XXXXXXXX XXXXXXXX 7d2000a6
---[ end trace 3a0c9b5cb216db6b ]---

Resolve this problem by disabling each THRMn comparator when handling
the associated THRMn interrupt and by disabling the TAU entirely when
updating THRMn thresholds.

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/5a0ba3dc5612c7aac596727331284a3676c08472.1599260540.git.fthain@telegraphics.com.au
---
 arch/powerpc/kernel/tau_6xx.c  | 65 ++++++++++++++++--------------------------
 arch/powerpc/platforms/Kconfig |  9 ++----
 2 files changed, 26 insertions(+), 48 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 614b5b272d9c..0b4694b8d248 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -42,8 +42,6 @@ static struct tau_temp
 
 static bool tau_int_enable;
 
-#undef DEBUG
-
 /* TODO: put these in a /proc interface, with some sanity checks, and maybe
  * dynamic adjustment to minimize # of interrupts */
 /* configurable values for step size and how much to expand the window when
@@ -67,42 +65,33 @@ static void set_thresholds(unsigned long cpu)
 
 static void TAUupdate(int cpu)
 {
-	unsigned thrm;
-
-#ifdef DEBUG
-	printk("TAUupdate ");
-#endif
+	u32 thrm;
+	u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
 
 	/* if both thresholds are crossed, the step_sizes cancel out
 	 * and the window winds up getting expanded twice. */
-	if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
-		if(thrm & THRM1_TIN){ /* crossed low threshold */
-			if (tau[cpu].low >= step_size){
-				tau[cpu].low -= step_size;
-				tau[cpu].high -= (step_size - window_expand);
-			}
-			tau[cpu].grew = 1;
-#ifdef DEBUG
-			printk("low threshold crossed ");
-#endif
+	thrm = mfspr(SPRN_THRM1);
+	if ((thrm & bits) == bits) {
+		mtspr(SPRN_THRM1, 0);
+
+		if (tau[cpu].low >= step_size) {
+			tau[cpu].low -= step_size;
+			tau[cpu].high -= (step_size - window_expand);
 		}
+		tau[cpu].grew = 1;
+		pr_debug("%s: low threshold crossed\n", __func__);
 	}
-	if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
-		if(thrm & THRM1_TIN){ /* crossed high threshold */
-			if (tau[cpu].high <= 127-step_size){
-				tau[cpu].low += (step_size - window_expand);
-				tau[cpu].high += step_size;
-			}
-			tau[cpu].grew = 1;
-#ifdef DEBUG
-			printk("high threshold crossed ");
-#endif
+	thrm = mfspr(SPRN_THRM2);
+	if ((thrm & bits) == bits) {
+		mtspr(SPRN_THRM2, 0);
+
+		if (tau[cpu].high <= 127 - step_size) {
+			tau[cpu].low += (step_size - window_expand);
+			tau[cpu].high += step_size;
 		}
+		tau[cpu].grew = 1;
+		pr_debug("%s: high threshold crossed\n", __func__);
 	}
-
-#ifdef DEBUG
-	printk("grew = %d\n", tau[cpu].grew);
-#endif
 }
 
 #ifdef CONFIG_TAU_INT
@@ -127,17 +116,17 @@ void TAUException(struct pt_regs * regs)
 static void tau_timeout(void * info)
 {
 	int cpu;
-	unsigned long flags;
 	int size;
 	int shrink;
 
-	/* disabling interrupts *should* be okay */
-	local_irq_save(flags);
 	cpu = smp_processor_id();
 
 	if (!tau_int_enable)
 		TAUupdate(cpu);
 
+	/* Stop thermal sensor comparisons and interrupts */
+	mtspr(SPRN_THRM3, 0);
+
 	size = tau[cpu].high - tau[cpu].low;
 	if (size > min_window && ! tau[cpu].grew) {
 		/* do an exponential shrink of half the amount currently over size */
@@ -159,18 +148,12 @@ static void tau_timeout(void * info)
 
 	set_thresholds(cpu);
 
-	/*
-	 * Do the enable every time, since otherwise a bunch of (relatively)
-	 * complex sleep code needs to be added. One mtspr every time
-	 * tau_timeout is called is probably not a big deal.
-	 *
+	/* Restart thermal sensor comparisons and interrupts.
 	 * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
 	 * recommends that "the maximum value be set in THRM3 under all
 	 * conditions."
 	 */
 	mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
-
-	local_irq_restore(flags);
 }
 
 static struct workqueue_struct *tau_workq;
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 9fe36f0b54c1..b439b027a42f 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -227,7 +227,7 @@ config TAU
 	  don't assume the cpu temp is actually what /proc/cpuinfo says it is.
 
 config TAU_INT
-	bool "Interrupt driven TAU driver (DANGEROUS)"
+	bool "Interrupt driven TAU driver (EXPERIMENTAL)"
 	depends on TAU
 	help
 	  The TAU supports an interrupt driven mode which causes an interrupt
@@ -235,12 +235,7 @@ config TAU_INT
 	  to get notified the temp has exceeded a range. With this option off,
 	  a timer is used to re-check the temperature periodically.
 
-	  However, on some cpus it appears that the TAU interrupt hardware
-	  is buggy and can cause a situation which would lead unexplained hard
-	  lockups.
-
-	  Unless you are extending the TAU driver, or enjoy kernel/hardware
-	  debugging, leave this option off.
+	  If in doubt, say N here.
 
 config TAU_AVERAGE
 	bool "Average high and low temp"
-- 
cgit v1.2.3


From 542db12a9c42d1ce70c45091765e02f74c129f43 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 31 Aug 2020 07:58:19 +0000
Subject: powerpc: Fix random segfault when freeing hugetlb range

The following random segfault is observed from time to time with
map_hugetlb selftest:

root@localhost:~# ./map_hugetlb 1 19
524288 kB hugepages
Mapping 1 Mbytes
Segmentation fault

[   31.219972] map_hugetlb[365]: segfault (11) at 117 nip 77974f8c lr 779a6834 code 1 in ld-2.23.so[77966000+21000]
[   31.220192] map_hugetlb[365]: code: 9421ffc0 480318d1 93410028 90010044 9361002c 93810030 93a10034 93c10038
[   31.220307] map_hugetlb[365]: code: 93e1003c 93210024 8123007c 81430038 <80e90004> 814a0004 7f443a14 813a0004
[   31.221911] BUG: Bad rss-counter state mm:(ptrval) type:MM_FILEPAGES val:33
[   31.229362] BUG: Bad rss-counter state mm:(ptrval) type:MM_ANONPAGES val:5

This fault is due to hugetlb_free_pgd_range() freeing page tables
that are also used by regular pages.

As explain in the comment at the beginning of
hugetlb_free_pgd_range(), the verification done in free_pgd_range()
on floor and ceiling is not done here, which means
hugetlb_free_pte_range() can free outside the expected range.

As the verification cannot be done in hugetlb_free_pgd_range(), it
must be done in hugetlb_free_pte_range().

Fixes: b250c8c08c79 ("powerpc/8xx: Manage 512k huge pages as standard pages.")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/f0cb2a5477cd87d1eaadb128042e20aeb2bc2859.1598860677.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/mm/hugetlbpage.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 26292544630f..e7ae2a2c4545 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -330,10 +330,24 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
 				 get_hugepd_cache_index(pdshift - shift));
 }
 
-static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr)
+static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+				   unsigned long addr, unsigned long end,
+				   unsigned long floor, unsigned long ceiling)
 {
+	unsigned long start = addr;
 	pgtable_t token = pmd_pgtable(*pmd);
 
+	start &= PMD_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PMD_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
 	pmd_clear(pmd);
 	pte_free_tlb(tlb, token, addr);
 	mm_dec_nr_ptes(tlb->mm);
@@ -363,7 +377,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 			 */
 			WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx));
 
-			hugetlb_free_pte_range(tlb, pmd, addr);
+			hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling);
 
 			continue;
 		}
-- 
cgit v1.2.3


From 175a99991511fed16108dcb823f0af8e72325a1f Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 31 Aug 2020 08:30:43 +0000
Subject: powerpc/8xx: Refactor calculation of number of entries per PTE in
 page tables

On 8xx, the number of entries occupied by a PTE in the page tables
depends on the size of the page. At the time being, this calculation
is done in two places: in pte_update() and in set_huge_pte_at()

Refactor this calculation into a helper called
number_of_cells_per_pte(). For the time being, the val param is
unused. It will be used by following patch.

Instead of opencoding is_hugepd(), use hugepd_ok() with a forward
declaration.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/f6ea2483c2c389567b007945948f704d18cfaeea.1598862623.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/nohash/32/pgtable.h | 18 ++++++++++++------
 arch/powerpc/mm/pgtable.c                    |  6 ++++--
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index b9e134d0f03a..80bbc21b87f0 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -227,6 +227,17 @@ static inline void pmd_clear(pmd_t *pmdp)
  */
 #ifdef CONFIG_PPC_8xx
 static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr);
+static int hugepd_ok(hugepd_t hpd);
+
+static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge)
+{
+	if (!huge)
+		return PAGE_SIZE / SZ_4K;
+	else if (hugepd_ok(*((hugepd_t *)pmd)))
+		return 1;
+	else
+		return SZ_512K / SZ_4K;
+}
 
 static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
 				     unsigned long clr, unsigned long set, int huge)
@@ -237,12 +248,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
 	int num, i;
 	pmd_t *pmd = pmd_off(mm, addr);
 
-	if (!huge)
-		num = PAGE_SIZE / SZ_4K;
-	else if ((pmd_val(*pmd) & _PMD_PAGE_MASK) != _PMD_PAGE_8M)
-		num = SZ_512K / SZ_4K;
-	else
-		num = 1;
+	num = number_of_cells_per_pte(pmd, new, huge);
 
 	for (i = 0; i < num; i++, entry++, new += SZ_4K)
 		*entry = new;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 9c0547d77af3..2dcad640b869 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -266,8 +266,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_
 	pmd_t *pmd = pmd_off(mm, addr);
 	pte_basic_t val;
 	pte_basic_t *entry = &ptep->pte;
-	int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K;
-	int i;
+	int num, i;
 
 	/*
 	 * Make sure hardware valid bit is not set. We don't do
@@ -280,6 +279,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_
 	pte = set_pte_filter(pte);
 
 	val = pte_val(pte);
+
+	num = number_of_cells_per_pte(pmd, val, 1);
+
 	for (i = 0; i < num; i++, entry++, val += SZ_4K)
 		*entry = val;
 }
-- 
cgit v1.2.3


From e47168f3d1b14af5281cf50c59561d59d28201f9 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 31 Aug 2020 08:30:44 +0000
Subject: powerpc/8xx: Support 16k hugepages with 4k pages

The 8xx has 4 page sizes: 4k, 16k, 512k and 8M

4k and 16k can be selected at build time as standard page sizes,
and 512k and 8M are hugepages.

When 4k standard pages are selected, 16k pages are not available.

Allow 16k pages as hugepages when 4k pages are used.

To allow that, implement arch_make_huge_pte() which receives
the necessary arguments to allow setting the PTE in accordance
with the page size:
- 512 k pages must have _PAGE_HUGE and _PAGE_SPS. They are set
by pte_mkhuge(). arch_make_huge_pte() does nothing.
- 16 k pages must have only _PAGE_SPS. arch_make_huge_pte() clears
_PAGE_HUGE.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/a518abc29266a708dfbccc8fce9ae6694fe4c2c6.1598862623.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 14 ++++++++++++++
 arch/powerpc/include/asm/nohash/32/pgtable.h     |  2 ++
 arch/powerpc/mm/hugetlbpage.c                    |  2 +-
 arch/powerpc/mm/nohash/tlb.c                     |  4 ----
 arch/powerpc/mm/ptdump/8xx.c                     |  5 +++++
 include/uapi/asm-generic/hugetlb_encode.h        |  1 +
 include/uapi/linux/mman.h                        |  1 +
 7 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
index e752a5807a59..39be9aea86db 100644
--- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -65,4 +65,18 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	pte_update(mm, addr, ptep, clr, set, 1);
 }
 
+#ifdef CONFIG_PPC_4K_PAGES
+static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+				       struct page *page, int writable)
+{
+	size_t size = huge_page_size(hstate_vma(vma));
+
+	if (size == SZ_16K)
+		return __pte(pte_val(entry) & ~_PAGE_HUGE);
+	else
+		return entry;
+}
+#define arch_make_huge_pte arch_make_huge_pte
+#endif
+
 #endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 80bbc21b87f0..ee2243ba96cf 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -235,6 +235,8 @@ static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge)
 		return PAGE_SIZE / SZ_4K;
 	else if (hugepd_ok(*((hugepd_t *)pmd)))
 		return 1;
+	else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE))
+		return SZ_16K / SZ_4K;
 	else
 		return SZ_512K / SZ_4K;
 }
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index e7ae2a2c4545..36c3800769fb 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -180,7 +180,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 	if (!hpdp)
 		return NULL;
 
-	if (IS_ENABLED(CONFIG_PPC_8xx) && sz == SZ_512K)
+	if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT)
 		return pte_alloc_map(mm, (pmd_t *)hpdp, addr);
 
 	BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 14514585db98..5872f69141d5 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -83,16 +83,12 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
 };
 #elif defined(CONFIG_PPC_8xx)
 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
-	/* we only manage 4k and 16k pages as normal pages */
-#ifdef CONFIG_PPC_4K_PAGES
 	[MMU_PAGE_4K] = {
 		.shift	= 12,
 	},
-#else
 	[MMU_PAGE_16K] = {
 		.shift	= 14,
 	},
-#endif
 	[MMU_PAGE_512K] = {
 		.shift	= 19,
 	},
diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index 8a797dcbf475..86da2a669680 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -11,8 +11,13 @@
 
 static const struct flag_info flag_array[] = {
 	{
+#ifdef CONFIG_PPC_16K_PAGES
 		.mask	= _PAGE_HUGE,
 		.val	= _PAGE_HUGE,
+#else
+		.mask	= _PAGE_SPS,
+		.val	= _PAGE_SPS,
+#endif
 		.set	= "huge",
 		.clear	= "    ",
 	}, {
diff --git a/include/uapi/asm-generic/hugetlb_encode.h b/include/uapi/asm-generic/hugetlb_encode.h
index b0f8e87235bd..4f3d5aaa11f5 100644
--- a/include/uapi/asm-generic/hugetlb_encode.h
+++ b/include/uapi/asm-generic/hugetlb_encode.h
@@ -20,6 +20,7 @@
 #define HUGETLB_FLAG_ENCODE_SHIFT	26
 #define HUGETLB_FLAG_ENCODE_MASK	0x3f
 
+#define HUGETLB_FLAG_ENCODE_16KB	(14 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_64KB	(16 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_512KB	(19 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_1MB		(20 << HUGETLB_FLAG_ENCODE_SHIFT)
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index 923cc162609c..f55bc680b5b0 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -27,6 +27,7 @@
 #define MAP_HUGE_SHIFT	HUGETLB_FLAG_ENCODE_SHIFT
 #define MAP_HUGE_MASK	HUGETLB_FLAG_ENCODE_MASK
 
+#define MAP_HUGE_16KB	HUGETLB_FLAG_ENCODE_16KB
 #define MAP_HUGE_64KB	HUGETLB_FLAG_ENCODE_64KB
 #define MAP_HUGE_512KB	HUGETLB_FLAG_ENCODE_512KB
 #define MAP_HUGE_1MB	HUGETLB_FLAG_ENCODE_1MB
-- 
cgit v1.2.3


From fcf1f26895a4f14618b0dc04e0801b123c55e4a3 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 4 Sep 2020 10:46:47 +0000
Subject: powerpc/uaccess: Add pre-update addressing to __put_user_asm_goto()

Enable pre-update addressing mode in __put_user_asm_goto()

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/346f65d677adb11865f7762c25a1ca3c64404ba5.1599216023.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 7c2427f237e1..a5cfe867fbdc 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -254,7 +254,7 @@ do {								\
 		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
 		EX_TABLE(1b, %l2)				\
 		:						\
-		: "r" (x), "m" (*addr)				\
+		: "r" (x), "m<>" (*addr)				\
 		:						\
 		: label)
 
-- 
cgit v1.2.3


From ee0a49a6870ea75e25b4d4984c1bb6b3b7c65f2b Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 4 Sep 2020 11:01:30 +0000
Subject: powerpc/uaccess: Switch __put_user_size_allowed() to
 __put_user_asm_goto()

__put_user_asm_goto() provides more flexibility to GCC and avoids using
a local variable to tell if the write succeeded or not.
GCC can then avoid implementing a cmp in the fast path.

See the difference for a small function like the PPC64 version of
save_general_regs() in arch/powerpc/kernel/signal_32.c:

Before the patch (unreachable nop removed):

0000000000000c10 <.save_general_regs>:
     c10:	39 20 00 2c 	li      r9,44
     c14:	39 40 00 00 	li      r10,0
     c18:	7d 29 03 a6 	mtctr   r9
     c1c:	38 c0 00 00 	li      r6,0
     c20:	48 00 00 14 	b       c34 <.save_general_regs+0x24>
     c30:	42 40 00 40 	bdz     c70 <.save_general_regs+0x60>
     c34:	28 2a 00 27 	cmpldi  r10,39
     c38:	7c c8 33 78 	mr      r8,r6
     c3c:	79 47 1f 24 	rldicr  r7,r10,3,60
     c40:	39 20 00 01 	li      r9,1
     c44:	41 82 00 0c 	beq     c50 <.save_general_regs+0x40>
     c48:	7d 23 38 2a 	ldx     r9,r3,r7
     c4c:	79 29 00 20 	clrldi  r9,r9,32
     c50:	91 24 00 00 	stw     r9,0(r4)
     c54:	2c 28 00 00 	cmpdi   r8,0
     c58:	39 4a 00 01 	addi    r10,r10,1
     c5c:	38 84 00 04 	addi    r4,r4,4
     c60:	41 82 ff d0 	beq     c30 <.save_general_regs+0x20>
     c64:	38 60 ff f2 	li      r3,-14
     c68:	4e 80 00 20 	blr
     c70:	38 60 00 00 	li      r3,0
     c74:	4e 80 00 20 	blr

0000000000000000 <.fixup>:
  cc:	39 00 ff f2 	li      r8,-14
  d0:	48 00 00 00 	b       d0 <.fixup+0xd0>
			d0: R_PPC64_REL24	.text+0xc54

After the patch:

0000000000001490 <.save_general_regs>:
    1490:	39 20 00 2c 	li      r9,44
    1494:	39 40 00 00 	li      r10,0
    1498:	7d 29 03 a6 	mtctr   r9
    149c:	60 00 00 00 	nop
    14a0:	28 2a 00 27 	cmpldi  r10,39
    14a4:	79 48 1f 24 	rldicr  r8,r10,3,60
    14a8:	39 20 00 01 	li      r9,1
    14ac:	41 82 00 0c 	beq     14b8 <.save_general_regs+0x28>
    14b0:	7d 23 40 2a 	ldx     r9,r3,r8
    14b4:	79 29 00 20 	clrldi  r9,r9,32
    14b8:	91 24 00 00 	stw     r9,0(r4)
    14bc:	39 4a 00 01 	addi    r10,r10,1
    14c0:	38 84 00 04 	addi    r4,r4,4
    14c4:	42 00 ff dc 	bdnz    14a0 <.save_general_regs+0x10>
    14c8:	38 60 00 00 	li      r3,0
    14cc:	4e 80 00 20 	blr
    14d0:	38 60 ff f2 	li      r3,-14
    14d4:	4e 80 00 20 	blr

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/94ba5a5138f99522e1562dbcdb38d31aa790dc89.1599216721.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/uaccess.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a5cfe867fbdc..96d1c144f92b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -189,14 +189,14 @@ extern long __put_user_bad(void);
 
 #define __put_user_size_allowed(x, ptr, size, retval)		\
 do {								\
+	__label__ __pu_failed;					\
+								\
 	retval = 0;						\
-	switch (size) {						\
-	  case 1: __put_user_asm(x, ptr, retval, "stb"); break;	\
-	  case 2: __put_user_asm(x, ptr, retval, "sth"); break;	\
-	  case 4: __put_user_asm(x, ptr, retval, "stw"); break;	\
-	  case 8: __put_user_asm2(x, ptr, retval); break;	\
-	  default: __put_user_bad();				\
-	}							\
+	__put_user_size_goto(x, ptr, size, __pu_failed);	\
+	break;							\
+								\
+__pu_failed:							\
+	retval = -EFAULT;					\
 } while (0)
 
 #define __put_user_size(x, ptr, size, retval)			\
-- 
cgit v1.2.3


From e64ac41ab0c510b3f85199a585eb886cad92fb19 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 4 Sep 2020 11:01:31 +0000
Subject: powerpc/uaccess: Switch __patch_instruction() to
 __put_user_asm_goto()

__patch_instruction() is the only user of __put_user_asm() outside
of asm/uaccess.h

Switch to the new __put_user_asm_goto() to enable retirement of
__put_user_asm() in a later patch.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b9745b122f4a9ae72cef445c61320022ab8b77b7.1599216721.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/lib/code-patching.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 8c3934ea6220..2333625b5e31 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -21,21 +21,18 @@
 static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr,
 			       struct ppc_inst *patch_addr)
 {
-	int err = 0;
-
-	if (!ppc_inst_prefixed(instr)) {
-		__put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw");
-	} else {
-		__put_user_asm(ppc_inst_as_u64(instr), patch_addr, err, "std");
-	}
-
-	if (err)
-		return err;
+	if (!ppc_inst_prefixed(instr))
+		__put_user_asm_goto(ppc_inst_val(instr), patch_addr, failed, "stw");
+	else
+		__put_user_asm_goto(ppc_inst_as_u64(instr), patch_addr, failed, "std");
 
 	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
 							    "r" (exec_addr));
 
 	return 0;
+
+failed:
+	return -EFAULT;
 }
 
 int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
-- 
cgit v1.2.3


From 7fdf966bed155b214f4f1f9b67825a40b2e9b998 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 4 Sep 2020 11:01:32 +0000
Subject: powerpc/uaccess: Remove __put_user_asm() and __put_user_asm2()

__put_user_asm() and __put_user_asm2() are not used anymore.

Remove them.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/d66c4a372738d2fbd81f433ca86e4295871ace6a.1599216721.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/uaccess.h | 41 +++++---------------------------------
 1 file changed, 5 insertions(+), 36 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 96d1c144f92b..26781b044932 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -151,42 +151,6 @@ static inline int __access_ok(unsigned long addr, unsigned long size,
 
 extern long __put_user_bad(void);
 
-/*
- * We don't tell gcc that we are accessing memory, but this is OK
- * because we do not write to any memory gcc knows about, so there
- * are no aliasing issues.
- */
-#define __put_user_asm(x, addr, err, op)			\
-	__asm__ __volatile__(					\
-		"1:	" op "%U2%X2 %1,%2	# put_user\n"	\
-		"2:\n"						\
-		".section .fixup,\"ax\"\n"			\
-		"3:	li %0,%3\n"				\
-		"	b 2b\n"					\
-		".previous\n"					\
-		EX_TABLE(1b, 3b)				\
-		: "=r" (err)					\
-		: "r" (x), "m<>" (*addr), "i" (-EFAULT), "0" (err))
-
-#ifdef __powerpc64__
-#define __put_user_asm2(x, ptr, retval)				\
-	  __put_user_asm(x, ptr, retval, "std")
-#else /* __powerpc64__ */
-#define __put_user_asm2(x, addr, err)				\
-	__asm__ __volatile__(					\
-		"1:	stw%X2 %1,%2\n"			\
-		"2:	stw%X2 %L1,%L2\n"			\
-		"3:\n"						\
-		".section .fixup,\"ax\"\n"			\
-		"4:	li %0,%3\n"				\
-		"	b 3b\n"					\
-		".previous\n"					\
-		EX_TABLE(1b, 4b)				\
-		EX_TABLE(2b, 4b)				\
-		: "=r" (err)					\
-		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
-#endif /* __powerpc64__ */
-
 #define __put_user_size_allowed(x, ptr, size, retval)		\
 do {								\
 	__label__ __pu_failed;					\
@@ -249,6 +213,11 @@ do {								\
 })
 
 
+/*
+ * We don't tell gcc that we are accessing memory, but this is OK
+ * because we do not write to any memory gcc knows about, so there
+ * are no aliasing issues.
+ */
 #define __put_user_asm_goto(x, addr, label, op)			\
 	asm volatile goto(					\
 		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
-- 
cgit v1.2.3


From c118c7303ad528be8ff2aea8cd1ee15452c763f0 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 7 Sep 2020 13:42:09 +0000
Subject: powerpc/32: Fix vmap stack - Do not activate MMU before reading task
 struct

We need r1 to be properly set before activating MMU, so
reading task_struct->stack must be done with MMU off.

This means we need an additional register to play with MSR
bits while r11 now points to the stack. For that, move r10
back to CR (As is already done for hash MMU) and use r10.

We still don't have r1 correct yet when we activate MMU.
It is done in following patch.

Fixes: 028474876f47 ("powerpc/32: prepare for CONFIG_VMAP_STACK")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/a027d447022a006c9c4958ac734128e577a3c5c1.1599486108.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/head_32.S |  6 ------
 arch/powerpc/kernel/head_32.h | 31 ++++++-------------------------
 2 files changed, 6 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 5624db0e09a1..b0e184f795c0 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -274,14 +274,8 @@ __secondary_hold_acknowledge:
 	DO_KVM  0x200
 MachineCheck:
 	EXCEPTION_PROLOG_0
-#ifdef CONFIG_VMAP_STACK
-	li	r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
-	mtmsr	r11
-	isync
-#endif
 #ifdef CONFIG_PPC_CHRP
 	mfspr	r11, SPRN_SPRG_THREAD
-	tovirt_vmstack r11, r11
 	lwz	r11, RTAS_SP(r11)
 	cmpwi	cr1, r11, 0
 	bne	cr1, 7f
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 9abec6cd099c..21effebb9277 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -39,24 +39,13 @@
 .endm
 
 .macro EXCEPTION_PROLOG_1 for_rtas=0
-#ifdef CONFIG_VMAP_STACK
-	.ifeq	\for_rtas
-	li	r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
-	mtmsr	r11
-	isync
-	.endif
 	subi	r11, r1, INT_FRAME_SIZE		/* use r1 if kernel */
-#else
-	tophys(r11,r1)			/* use tophys(r1) if kernel */
-	subi	r11, r11, INT_FRAME_SIZE	/* alloc exc. frame */
-#endif
 	beq	1f
 	mfspr	r11,SPRN_SPRG_THREAD
-	tovirt_vmstack r11, r11
 	lwz	r11,TASK_STACK-THREAD(r11)
 	addi	r11, r11, THREAD_SIZE - INT_FRAME_SIZE
-	tophys_novmstack r11, r11
 1:
+	tophys_novmstack r11, r11
 #ifdef CONFIG_VMAP_STACK
 	mtcrf	0x7f, r11
 	bt	32 - THREAD_ALIGN_SHIFT, stack_overflow
@@ -64,12 +53,11 @@
 .endm
 
 .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
+#ifdef CONFIG_VMAP_STACK
 	mtcr	r10
-FTR_SECTION_ELSE
-	stw	r10, _CCR(r11)
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+	li	r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+	mtmsr	r10
+	isync
 #else
 	stw	r10,_CCR(r11)		/* save registers */
 #endif
@@ -77,11 +65,9 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
 	stw	r12,GPR12(r11)
 	stw	r9,GPR9(r11)
 	stw	r10,GPR10(r11)
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
+#ifdef CONFIG_VMAP_STACK
 	mfcr	r10
 	stw	r10, _CCR(r11)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 #endif
 	mfspr	r12,SPRN_SPRG_SCRATCH1
 	stw	r12,GPR11(r11)
@@ -97,11 +83,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 	stw	r10, _DSISR(r11)
 	.endif
 	lwz	r9, SRR1(r12)
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
 	andi.	r10, r9, MSR_PR
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
 	lwz	r12, SRR0(r12)
 #else
 	mfspr	r12,SPRN_SRR0
@@ -328,7 +310,6 @@ label:
 #ifdef CONFIG_VMAP_STACK
 #ifdef CONFIG_SMP
 	mfspr	r11, SPRN_SPRG_THREAD
-	tovirt(r11, r11)
 	lwz	r11, TASK_CPU - THREAD(r11)
 	slwi	r11, r11, 3
 	addis	r11, r11, emergency_ctx@ha
-- 
cgit v1.2.3


From da7bb43ab9da39bcfed0d146ce94e1f0cbae4ca0 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 7 Sep 2020 13:42:10 +0000
Subject: powerpc/32: Fix vmap stack - Properly set r1 before activating MMU

We need r1 to be properly set before activating MMU, otherwise any new
exception taken while saving registers into the stack in exception
prologs will use the user stack, which is wrong and will even lockup
or crash when KUAP is selected.

Do that by switching the meaning of r11 and r1 until we have saved r1
to the stack: copy r1 into r11 and setup the new stack pointer in r1.
To avoid complicating and impacting all generic and specific prolog
code (and more), copy back r1 into r11 once r11 is save onto
the stack.

We could get rid of copying r1 back and forth at the cost of
rewriting everything to use r1 instead of r11 all the way when
CONFIG_VMAP_STACK is set, but the effort is probably not worth it.

Fixes: 028474876f47 ("powerpc/32: prepare for CONFIG_VMAP_STACK")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/8f85e8752ac5af602db7237ef53d634f4f3d3892.1599486108.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/head_32.h | 43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 21effebb9277..cc36998c5541 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -39,15 +39,24 @@
 .endm
 
 .macro EXCEPTION_PROLOG_1 for_rtas=0
+#ifdef CONFIG_VMAP_STACK
+	mr	r11, r1
+	subi	r1, r1, INT_FRAME_SIZE		/* use r1 if kernel */
+	beq	1f
+	mfspr	r1,SPRN_SPRG_THREAD
+	lwz	r1,TASK_STACK-THREAD(r1)
+	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+#else
 	subi	r11, r1, INT_FRAME_SIZE		/* use r1 if kernel */
 	beq	1f
 	mfspr	r11,SPRN_SPRG_THREAD
 	lwz	r11,TASK_STACK-THREAD(r11)
 	addi	r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+#endif
 1:
 	tophys_novmstack r11, r11
 #ifdef CONFIG_VMAP_STACK
-	mtcrf	0x7f, r11
+	mtcrf	0x7f, r1
 	bt	32 - THREAD_ALIGN_SHIFT, stack_overflow
 #endif
 .endm
@@ -62,6 +71,15 @@
 	stw	r10,_CCR(r11)		/* save registers */
 #endif
 	mfspr	r10, SPRN_SPRG_SCRATCH0
+#ifdef CONFIG_VMAP_STACK
+	stw	r11,GPR1(r1)
+	stw	r11,0(r1)
+	mr	r11, r1
+#else
+	stw	r1,GPR1(r11)
+	stw	r1,0(r11)
+	tovirt(r1, r11)		/* set new kernel sp */
+#endif
 	stw	r12,GPR12(r11)
 	stw	r9,GPR9(r11)
 	stw	r10,GPR10(r11)
@@ -89,9 +107,6 @@
 	mfspr	r12,SPRN_SRR0
 	mfspr	r9,SPRN_SRR1
 #endif
-	stw	r1,GPR1(r11)
-	stw	r1,0(r11)
-	tovirt_novmstack r1, r11	/* set new kernel sp */
 #ifdef CONFIG_40x
 	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?) */
 #else
@@ -309,19 +324,19 @@ label:
 .macro vmap_stack_overflow_exception
 #ifdef CONFIG_VMAP_STACK
 #ifdef CONFIG_SMP
-	mfspr	r11, SPRN_SPRG_THREAD
-	lwz	r11, TASK_CPU - THREAD(r11)
-	slwi	r11, r11, 3
-	addis	r11, r11, emergency_ctx@ha
+	mfspr	r1, SPRN_SPRG_THREAD
+	lwz	r1, TASK_CPU - THREAD(r1)
+	slwi	r1, r1, 3
+	addis	r1, r1, emergency_ctx@ha
 #else
-	lis	r11, emergency_ctx@ha
+	lis	r1, emergency_ctx@ha
 #endif
-	lwz	r11, emergency_ctx@l(r11)
-	cmpwi	cr1, r11, 0
+	lwz	r1, emergency_ctx@l(r1)
+	cmpwi	cr1, r1, 0
 	bne	cr1, 1f
-	lis	r11, init_thread_union@ha
-	addi	r11, r11, init_thread_union@l
-1:	addi	r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+	lis	r1, init_thread_union@ha
+	addi	r1, r1, init_thread_union@l
+1:	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
 	EXCEPTION_PROLOG_2
 	SAVE_NVGPRS(r11)
 	addi	r3, r1, STACK_FRAME_OVERHEAD
-- 
cgit v1.2.3


From 04d476bfbb06426fef2985c8e53578bb04596a6f Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 17 Aug 2020 05:46:41 +0000
Subject: powerpc/process: Replace an #ifdef CONFIG_PPC_47x by IS_ENABLED()

isync() is always defined, no need for an #ifdef.

Replace it by IS_ENABLED(CONFIG_PPC_47x).

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/ac8da0e3baa91dda805e1e492fd65aecd90c1fb5.1597643156.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/process.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 483e36a42617..59dde9a6d3a0 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -831,9 +831,8 @@ static void switch_hw_breakpoint(struct task_struct *new)
 static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
 {
 	mtspr(SPRN_DAC1, dabr);
-#ifdef CONFIG_PPC_47x
-	isync();
-#endif
+	if (IS_ENABLED(CONFIG_PPC_47x))
+		isync();
 	return 0;
 }
 #elif defined(CONFIG_PPC_BOOK3S)
-- 
cgit v1.2.3


From bfac2799301c19d81122af04a8a3ad5ecae3737e Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 17 Aug 2020 05:46:42 +0000
Subject: powerpc/process: Replace an #ifdef CONFIG_PPC_BOOK3S_64 by
 IS_ENABLED()

This #ifdef CONFIG_PPC_BOOK3S_64 calls preload_new_slb_context()
when radix is not enabled.

radix_enabled() is always defined, and the prototype for
preload_new_slb_context() is always present, so the #ifdef
is unneeded.

Replace it by IS_ENABLED().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/d31506ca9bac9def68cf7424eded63fdc4fb6660.1597643167.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/process.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 59dde9a6d3a0..47c401246c40 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1780,10 +1780,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 #ifdef CONFIG_PPC64
 	unsigned long load_addr = regs->gpr[2];	/* saved by ELF_PLAT_INIT */
 
-#ifdef CONFIG_PPC_BOOK3S_64
-	if (!radix_enabled())
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
 		preload_new_slb_context(start, sp);
-#endif
 #endif
 
 	/*
-- 
cgit v1.2.3


From 2ec42996f5b12826466300a755413577b6913206 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 17 Aug 2020 05:46:43 +0000
Subject: powerpc/process: Replace an #if defined(CONFIG_4xx) ||
 defined(CONFIG_BOOKE) by IS_ENABLED()

The #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) encloses some
printk which can be compiled in all cases.

Replace by IS_ENABLED().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/a1b6ef3d657c8f249193442f56868fc358ea5b6c.1597643160.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/process.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 47c401246c40..4c20136dbdf6 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1502,12 +1502,13 @@ void show_regs(struct pt_regs * regs)
 	trap = TRAP(regs);
 	if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR))
 		pr_cont("CFAR: "REG" ", regs->orig_gpr3);
-	if (trap == 0x200 || trap == 0x300 || trap == 0x600)
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-		pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
-#else
-		pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
-#endif
+	if (trap == 0x200 || trap == 0x300 || trap == 0x600) {
+		if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE))
+			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
+		else
+			pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+	}
+
 #ifdef CONFIG_PPC64
 	pr_cont("IRQMASK: %lx ", regs->softe);
 #endif
-- 
cgit v1.2.3


From 8f020c7ca300fd60374f0347814c92ea513c24da Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 17 Aug 2020 05:46:44 +0000
Subject: powerpc/process: Replace #ifdef CONFIG_KALLSYMS by IS_ENABLED()

The #ifdef CONFIG_KALLSYMS encloses some printk which can
compile in all cases.

Replace by IS_ENABLED().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/2d89732a9062b2cf2651728804e4b8f6c9b9358e.1597643164.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/process.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4c20136dbdf6..743afa368849 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1525,14 +1525,14 @@ void show_regs(struct pt_regs * regs)
 			break;
 	}
 	pr_cont("\n");
-#ifdef CONFIG_KALLSYMS
 	/*
 	 * Lookup NIP late so we have the best change of getting the
 	 * above info out without failing
 	 */
-	printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
-	printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
-#endif
+	if (IS_ENABLED(CONFIG_KALLSYMS)) {
+		printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
+		printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
+	}
 	show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT);
 	if (!user_mode(regs))
 		show_instructions(regs);
-- 
cgit v1.2.3


From 60d62bfd24efce1a595d259100b8a4e7a489e834 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 17 Aug 2020 05:46:45 +0000
Subject: powerpc/process: Tag an #endif to help locate the matching #ifdef.

That #endif is more than 100 lines after the matching #ifdef,
and there are several #ifdef/#else/#endif inbetween.

Tag it as /* CONFIG_PPC_BOOK3S_64 */ to help locate the
matching #ifdef.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/3612a8f8aaca16de3fc414a7e66293319d6e213c.1597643147.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 743afa368849..3bc671df5b51 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -581,7 +581,7 @@ void notrace restore_math(struct pt_regs *regs)
 		regs->msr |= new_msr | fpexc_mode;
 	}
 }
-#endif
+#endif /* CONFIG_PPC_BOOK3S_64 */
 
 static void save_all(struct task_struct *tsk)
 {
-- 
cgit v1.2.3


From 80739c2bd29133715d6828e333649a55095f4747 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 17 Aug 2020 05:47:55 +0000
Subject: powerpc/process: Remove useless #ifdef CONFIG_VSX

cpu_has_feature(CPU_FTR_VSX) returns false when CONFIG_VSX is
not set.

There is no need to enclose the test in an #ifdef CONFIG_VSX.
Remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/0eb61cf0dc66d781d47deb2228498cd61d03a754.1597643221.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/process.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 3bc671df5b51..fb079f11a6df 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -124,10 +124,8 @@ unsigned long notrace msr_check_and_set(unsigned long bits)
 
 	newmsr = oldmsr | bits;
 
-#ifdef CONFIG_VSX
 	if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
 		newmsr |= MSR_VSX;
-#endif
 
 	if (oldmsr != newmsr)
 		mtmsr_isync(newmsr);
@@ -144,10 +142,8 @@ void notrace __msr_check_and_clear(unsigned long bits)
 
 	newmsr = oldmsr & ~bits;
 
-#ifdef CONFIG_VSX
 	if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
 		newmsr &= ~MSR_VSX;
-#endif
 
 	if (oldmsr != newmsr)
 		mtmsr_isync(newmsr);
@@ -162,10 +158,8 @@ static void __giveup_fpu(struct task_struct *tsk)
 	save_fpu(tsk);
 	msr = tsk->thread.regs->msr;
 	msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
-#ifdef CONFIG_VSX
 	if (cpu_has_feature(CPU_FTR_VSX))
 		msr &= ~MSR_VSX;
-#endif
 	tsk->thread.regs->msr = msr;
 }
 
@@ -245,10 +239,8 @@ static void __giveup_altivec(struct task_struct *tsk)
 	save_altivec(tsk);
 	msr = tsk->thread.regs->msr;
 	msr &= ~MSR_VEC;
-#ifdef CONFIG_VSX
 	if (cpu_has_feature(CPU_FTR_VSX))
 		msr &= ~MSR_VSX;
-#endif
 	tsk->thread.regs->msr = msr;
 }
 
@@ -421,10 +413,8 @@ static int __init init_msr_all_available(void)
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		msr_all_available |= MSR_VEC;
 #endif
-#ifdef CONFIG_VSX
 	if (cpu_has_feature(CPU_FTR_VSX))
 		msr_all_available |= MSR_VSX;
-#endif
 #ifdef CONFIG_SPE
 	if (cpu_has_feature(CPU_FTR_SPE))
 		msr_all_available |= MSR_SPE;
@@ -509,19 +499,18 @@ static bool should_restore_altivec(void) { return false; }
 static void do_restore_altivec(void) { }
 #endif /* CONFIG_ALTIVEC */
 
-#ifdef CONFIG_VSX
 static bool should_restore_vsx(void)
 {
 	if (cpu_has_feature(CPU_FTR_VSX))
 		return true;
 	return false;
 }
+#ifdef CONFIG_VSX
 static void do_restore_vsx(void)
 {
 	current->thread.used_vsr = 1;
 }
 #else
-static bool should_restore_vsx(void) { return false; }
 static void do_restore_vsx(void) { }
 #endif /* CONFIG_VSX */
 
-- 
cgit v1.2.3


From e3667ee427e224f9951eb3940a97477285548134 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 17 Aug 2020 05:47:56 +0000
Subject: powerpc/process: Remove useless #ifdef CONFIG_ALTIVEC

cpu_has_feature(CPU_FTR_ALTIVEC) returns false when CONFIG_ALTIVEC is
not set.

There is no need to enclose the test in an #ifdef CONFIG_ALTIVEC.
Remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/03ba6b52344ca7c336df2bc6e3d31d736c804ae2.1597643221.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/process.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index fb079f11a6df..52019b6962ba 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -409,10 +409,8 @@ static int __init init_msr_all_available(void)
 #ifdef CONFIG_PPC_FPU
 	msr_all_available |= MSR_FP;
 #endif
-#ifdef CONFIG_ALTIVEC
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		msr_all_available |= MSR_VEC;
-#endif
 	if (cpu_has_feature(CPU_FTR_VSX))
 		msr_all_available |= MSR_VSX;
 #ifdef CONFIG_SPE
@@ -446,10 +444,8 @@ void giveup_all(struct task_struct *tsk)
 	if (usermsr & MSR_FP)
 		__giveup_fpu(tsk);
 #endif
-#ifdef CONFIG_ALTIVEC
 	if (usermsr & MSR_VEC)
 		__giveup_altivec(tsk);
-#endif
 #ifdef CONFIG_SPE
 	if (usermsr & MSR_SPE)
 		__giveup_spe(tsk);
-- 
cgit v1.2.3


From 532ed1900d37a47c821718a0d8d28eb05b2c4d28 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 17 Aug 2020 05:47:57 +0000
Subject: powerpc/process: Remove useless #ifdef CONFIG_SPE

cpu_has_feature(CPU_FTR_SPE) returns false when CONFIG_SPE is
not set.

There is no need to enclose the test in an #ifdef CONFIG_SPE.
Remove it.

CPU_FTR_SPE only exists on 32 bits. Define it as 0 on 64 bits.

We have a couple of places like:

 #ifdef CONFIG_SPE
	if (cpu_has_feature(CPU_FTR_SPE)) {
		do_something_that_requires_CONFIG_SPE
	} else {
		return -EINVAL;
	}
 #else
	return -EINVAL;
 #endif

Replace them by a cleaner version:

	if (cpu_has_feature(CPU_FTR_SPE)) {
 #ifdef CONFIG_SPE
		do_something_that_requires_CONFIG_SPE
 #endif
	} else {
		return -EINVAL;
	}

When CONFIG_SPE is not set, this resolves to an unconditional
return of -EINVAL

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/698df8387555765b70ea42e4a7fa48141c309c1f.1597643221.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/cputable.h |  1 +
 arch/powerpc/kernel/process.c       | 21 +++++++--------------
 2 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 32a15dc49e8c..8ca5885bd5b9 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -170,6 +170,7 @@ static inline void cpu_feature_keys_init(void) { }
 #else	/* CONFIG_PPC32 */
 /* Define these to 0 for the sake of tests in common code */
 #define CPU_FTR_PPC_LE			(0)
+#define CPU_FTR_SPE			(0)
 #endif
 
 /*
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 52019b6962ba..348d4355bc00 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -413,10 +413,8 @@ static int __init init_msr_all_available(void)
 		msr_all_available |= MSR_VEC;
 	if (cpu_has_feature(CPU_FTR_VSX))
 		msr_all_available |= MSR_VSX;
-#ifdef CONFIG_SPE
 	if (cpu_has_feature(CPU_FTR_SPE))
 		msr_all_available |= MSR_SPE;
-#endif
 
 	return 0;
 }
@@ -446,10 +444,8 @@ void giveup_all(struct task_struct *tsk)
 #endif
 	if (usermsr & MSR_VEC)
 		__giveup_altivec(tsk);
-#ifdef CONFIG_SPE
 	if (usermsr & MSR_SPE)
 		__giveup_spe(tsk);
-#endif
 
 	msr_check_and_clear(msr_all_available);
 }
@@ -1899,7 +1895,6 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
 	 * fpexc_mode.  fpexc_mode is also used for setting FP exception
 	 * mode (asyn, precise, disabled) for 'Classic' FP. */
 	if (val & PR_FP_EXC_SW_ENABLE) {
-#ifdef CONFIG_SPE
 		if (cpu_has_feature(CPU_FTR_SPE)) {
 			/*
 			 * When the sticky exception bits are set
@@ -1913,16 +1908,15 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
 			 * anyway to restore the prctl settings from
 			 * the saved environment.
 			 */
+#ifdef CONFIG_SPE
 			tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
 			tsk->thread.fpexc_mode = val &
 				(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
+#endif
 			return 0;
 		} else {
 			return -EINVAL;
 		}
-#else
-		return -EINVAL;
-#endif
 	}
 
 	/* on a CONFIG_SPE this does not hurt us.  The bits that
@@ -1943,8 +1937,7 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
 {
 	unsigned int val;
 
-	if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
-#ifdef CONFIG_SPE
+	if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
 		if (cpu_has_feature(CPU_FTR_SPE)) {
 			/*
 			 * When the sticky exception bits are set
@@ -1958,15 +1951,15 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
 			 * anyway to restore the prctl settings from
 			 * the saved environment.
 			 */
+#ifdef CONFIG_SPE
 			tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
 			val = tsk->thread.fpexc_mode;
+#endif
 		} else
 			return -EINVAL;
-#else
-		return -EINVAL;
-#endif
-	else
+	} else {
 		val = __unpack_fe01(tsk->thread.fpexc_mode);
+	}
 	return put_user(val, (unsigned int __user *) adr);
 }
 
-- 
cgit v1.2.3


From c83c192a6fbb1d4db4144c40296ed059f5eca384 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 17 Aug 2020 05:47:58 +0000
Subject: powerpc/process: Remove useless #ifdef CONFIG_PPC_FPU

Add a stub for __giveup_fpu() when CONFIG_PPC_FPU is
not selected, as done for CONFIG_SPE and CONFIG_ALTIVEC.

This allows to remove some #ifdef CONFIG_PPC_FPU.

Also change one to IS_ENABLED().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/69c8b7954ceeccc6b849e52e1fa41b3a0f10f6c1.1597643221.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/process.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 348d4355bc00..14d5189b17d8 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -229,6 +229,8 @@ void enable_kernel_fp(void)
 	}
 }
 EXPORT_SYMBOL(enable_kernel_fp);
+#else
+static inline void __giveup_fpu(struct task_struct *tsk) { }
 #endif /* CONFIG_PPC_FPU */
 
 #ifdef CONFIG_ALTIVEC
@@ -406,9 +408,8 @@ static unsigned long msr_all_available;
 
 static int __init init_msr_all_available(void)
 {
-#ifdef CONFIG_PPC_FPU
-	msr_all_available |= MSR_FP;
-#endif
+	if (IS_ENABLED(CONFIG_PPC_FPU))
+		msr_all_available |= MSR_FP;
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		msr_all_available |= MSR_VEC;
 	if (cpu_has_feature(CPU_FTR_VSX))
@@ -438,10 +439,8 @@ void giveup_all(struct task_struct *tsk)
 
 	WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
 
-#ifdef CONFIG_PPC_FPU
 	if (usermsr & MSR_FP)
 		__giveup_fpu(tsk);
-#endif
 	if (usermsr & MSR_VEC)
 		__giveup_altivec(tsk);
 	if (usermsr & MSR_SPE)
-- 
cgit v1.2.3


From 2c637d2df4ee4830e9d3eb2bd5412250522ce96e Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 11 Sep 2020 10:29:15 +0000
Subject: powerpc/powermac: Fix low_sleep_handler with KUAP and KUEP

low_sleep_handler() has an hardcoded restore of segment registers
that doesn't take KUAP and KUEP into account.

Use head_32's load_segment_registers() routine instead.

Fixes: a68c31fc01ef ("powerpc/32s: Implement Kernel Userspace Access Protection")
Fixes: 31ed2b13c48d ("powerpc/32s: Implement Kernel Userspace Execution Prevention.")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/21b05f7298c1b18f73e6e5b4cd5005aafa24b6da.1599820109.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/head_32.S           | 2 +-
 arch/powerpc/platforms/powermac/sleep.S | 9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index b0e184f795c0..2bd0aa3a4cc7 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -996,7 +996,7 @@ BEGIN_MMU_FTR_SECTION
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	blr
 
-load_segment_registers:
+_GLOBAL(load_segment_registers)
 	li	r0, NUM_USER_SEGMENTS /* load up user segment register values */
 	mtctr	r0		/* for context 0 */
 	li	r3, 0		/* Kp = 0, Ks = 0, VSID = 0 */
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index f9a680fdd9c4..51bfdfe85058 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -294,14 +294,7 @@ grackle_wake_up:
 	 * we do any r1 memory access as we are not sure they
 	 * are in a sane state above the first 256Mb region
 	 */
-	li	r0,16		/* load up segment register values */
-	mtctr	r0		/* for context 0 */
-	lis	r3,0x2000	/* Ku = 1, VSID = 0 */
-	li	r4,0
-3:	mtsrin	r3,r4
-	addi	r3,r3,0x111	/* increment VSID */
-	addis	r4,r4,0x1000	/* address of next segment */
-	bdnz	3b
+	bl	load_segment_registers
 	sync
 	isync
 
-- 
cgit v1.2.3


From 4c42dc5c69a8f24c467a6c997909d2f1d4efdc7f Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 11 Sep 2020 05:05:38 +0000
Subject: powerpc/kasan: Fix CONFIG_KASAN_VMALLOC for 8xx

Before the commit identified below, pages tables allocation was
performed after the allocation of final shadow area for linear memory.
But that commit switched the order, leading to page tables being
already allocated at the time 8xx kasan_init_shadow_8M() is called.
Due to this, kasan_init_shadow_8M() doesn't map the needed
shadow entries because there are already page tables.

kasan_init_shadow_8M() installs huge PMD entries instead of page
tables. We could at that time free the page tables, but there is no
point in creating page tables that get freed before being used.

Only book3s/32 hash needs early allocation of page tables. For other
variants, we can keep the initial order and create remaining page
tables after the allocation of final shadow memory for linear mem.

Move back the allocation of shadow page tables for
CONFIG_KASAN_VMALLOC into kasan_init() after the loop which creates
final shadow memory for linear mem.

Fixes: 41ea93cf7ba4 ("powerpc/kasan: Fix shadow pages allocation failure")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/8ae4554357da4882612644a74387ae05525b2aaa.1599800716.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/mm/kasan/kasan_init_32.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index fb294046e00e..929716ea21e9 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -127,8 +127,7 @@ void __init kasan_mmu_init(void)
 {
 	int ret;
 
-	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
-	    IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
 		ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
 		if (ret)
@@ -139,11 +138,11 @@ void __init kasan_mmu_init(void)
 void __init kasan_init(void)
 {
 	struct memblock_region *reg;
+	int ret;
 
 	for_each_memblock(memory, reg) {
 		phys_addr_t base = reg->base;
 		phys_addr_t top = min(base + reg->size, total_lowmem);
-		int ret;
 
 		if (base >= top)
 			continue;
@@ -153,6 +152,13 @@ void __init kasan_init(void)
 			panic("kasan: kasan_init_region() failed");
 	}
 
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+		ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+		if (ret)
+			panic("kasan: kasan_init_shadow_page_tables() failed");
+	}
+
 	kasan_remap_early_shadow_ro();
 
 	clear_page(kasan_early_shadow_page);
-- 
cgit v1.2.3


From bbc4f40b5322b3e0b8678619f1c613dadc811669 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 11 Sep 2020 10:01:21 +0800
Subject: powerpc/ps3: make two symbols static

This addresses the following sparse warning:

arch/powerpc/platforms/ps3/spu.c:451:33: warning: symbol
'spu_management_ps3_ops' was not declared. Should it be static?
arch/powerpc/platforms/ps3/spu.c:592:28: warning: symbol
'spu_priv1_ps3_ops' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200911020121.1464585-1-yanaijie@huawei.com
---
 arch/powerpc/platforms/ps3/spu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 1193c294b8d0..0c252478e556 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -448,7 +448,7 @@ static void ps3_disable_spu(struct spu_context *ctx)
 	ctx->ops->runcntl_stop(ctx);
 }
 
-const struct spu_management_ops spu_management_ps3_ops = {
+static const struct spu_management_ops spu_management_ps3_ops = {
 	.enumerate_spus = ps3_enumerate_spus,
 	.create_spu = ps3_create_spu,
 	.destroy_spu = ps3_destroy_spu,
@@ -589,7 +589,7 @@ static u64 resource_allocation_enable_get(struct spu *spu)
 	return 0; /* No support. */
 }
 
-const struct spu_priv1_ops spu_priv1_ps3_ops = {
+static const struct spu_priv1_ops spu_priv1_ps3_ops = {
 	.int_mask_and = int_mask_and,
 	.int_mask_or = int_mask_or,
 	.int_mask_set = int_mask_set,
-- 
cgit v1.2.3


From 79b123cdf9cf0d4a1620baa8c611962626323a08 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Mon, 7 Sep 2020 12:55:39 +0530
Subject: powerepc/book3s64/hash: Align start/end address correctly with bolt
 mapping

This ensures we don't do a partial mapping of memory. With nvdimm, when
creating namespaces with size not aligned to 16MB, the kernel ends up partially
mapping the pages. This can result in kernel adding multiple hash page table
entries for the same range. A new namespace will result in
create_section_mapping() with start and end overlapping an already existing
bolted hash page table entry.

commit: 6acd7d5ef264 ("libnvdimm/namespace: Enforce memremap_compat_align()")
made sure that we always create namespaces aligned to 16MB. But we can do
better by avoiding mapping pages that are not aligned. This helps to catch
access to these partially mapped pages early.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200907072539.67310-1-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/mm/book3s64/hash_utils.c    | 12 +++++++++---
 arch/powerpc/mm/book3s64/radix_pgtable.c |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index c663e7ba801f..7185bc43b24f 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -260,8 +260,12 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 	DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n",
 	    vstart, vend, pstart, prot, psize, ssize);
 
-	for (vaddr = vstart, paddr = pstart; vaddr < vend;
-	     vaddr += step, paddr += step) {
+	/* Carefully map only the possible range */
+	vaddr = ALIGN(vstart, step);
+	paddr = ALIGN(pstart, step);
+	vend  = ALIGN_DOWN(vend, step);
+
+	for (; vaddr < vend; vaddr += step, paddr += step) {
 		unsigned long hash, hpteg;
 		unsigned long vsid = get_kernel_vsid(vaddr, ssize);
 		unsigned long vpn  = hpt_vpn(vaddr, vsid, ssize);
@@ -343,7 +347,9 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 	if (!mmu_hash_ops.hpte_removebolted)
 		return -ENODEV;
 
-	for (vaddr = vstart; vaddr < vend; vaddr += step) {
+	/* Unmap the full range specificied */
+	vaddr = ALIGN_DOWN(vstart, step);
+	for (;vaddr < vend; vaddr += step) {
 		rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
 		if (rc == -ENOENT) {
 			ret = -ENOENT;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index d5f0c10d752a..5c8adeb8c955 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -276,6 +276,7 @@ static int __meminit create_physical_mapping(unsigned long start,
 	int psize;
 
 	start = ALIGN(start, PAGE_SIZE);
+	end   = ALIGN_DOWN(end, PAGE_SIZE);
 	for (addr = start; addr < end; addr += mapping_size) {
 		unsigned long gap, previous_size;
 		int rc;
-- 
cgit v1.2.3


From ffd2961bb41f797eb00b58e019b707555197275e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 19 Aug 2020 19:47:00 +1000
Subject: powerpc/powernv/idle: add a basic stop 0-3 driver for POWER10

This driver does not restore stop > 3 state, so it limits itself
to states which do not lose full state or TB.

The POWER10 SPRs are sufficiently different from P9 that it seems
easier to split out the P10 code. The POWER10 deep sleep code
(e.g., the BHRB restore) has been taken out, but it can be re-added
when stop > 3 support is added.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Pratik Rajesh Sampat<psampat@linux.ibm.com>
Tested-by: Vaidyanathan Srinivasan <svaidy@linux.ibm.com>
Reviewed-by: Pratik Rajesh Sampat<psampat@linux.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200819094700.493399-1-npiggin@gmail.com
---
 arch/powerpc/include/asm/machdep.h    |   2 -
 arch/powerpc/include/asm/processor.h  |   2 +-
 arch/powerpc/include/asm/reg.h        |   1 +
 arch/powerpc/platforms/powernv/idle.c | 302 +++++++++++++++++++++++-----------
 drivers/cpuidle/cpuidle-powernv.c     |   2 +-
 5 files changed, 212 insertions(+), 97 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index a90b892f0bfe..5082cd496190 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -222,8 +222,6 @@ struct machdep_calls {
 
 extern void e500_idle(void);
 extern void power4_idle(void);
-extern void power7_idle(void);
-extern void power9_idle(void);
 extern void ppc6xx_idle(void);
 extern void book3e_idle(void);
 
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 36a71cd41f37..22ffe85a91b8 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -432,7 +432,7 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 
 extern void power7_idle_type(unsigned long type);
-extern void power9_idle_type(unsigned long stop_psscr_val,
+extern void arch300_idle_type(unsigned long stop_psscr_val,
 			      unsigned long stop_psscr_mask);
 
 extern int fix_alignment(struct pt_regs *);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 5647006ed373..d25c357a873c 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1353,6 +1353,7 @@
 #define PVR_POWER8NVL	0x004C
 #define PVR_POWER8	0x004D
 #define PVR_POWER9	0x004E
+#define PVR_POWER10	0x0080
 #define PVR_BE		0x0070
 #define PVR_PA6T	0x0090
 
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 345ab062b21a..1ed7c5286487 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -565,7 +565,7 @@ void power7_idle_type(unsigned long type)
 	irq_set_pending_from_srr1(srr1);
 }
 
-void power7_idle(void)
+static void power7_idle(void)
 {
 	if (!powersave_nap)
 		return;
@@ -659,20 +659,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
 		mmcr0		= mfspr(SPRN_MMCR0);
 	}
 
-	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
-		/*
-		 * POWER10 uses MMCRA (BHRBRD) as BHRB disable bit.
-		 * If the user hasn't asked for the BHRB to be
-		 * written, the value of MMCRA[BHRBRD] is 1.
-		 * On wakeup from stop, MMCRA[BHRBD] will be 0,
-		 * since it is previleged resource and will be lost.
-		 * Thus, if we do not save and restore the MMCRA[BHRBD],
-		 * hardware will be needlessly writing to the BHRB
-		 * in problem mode.
-		 */
-		mmcra		= mfspr(SPRN_MMCRA);
-	}
-
 	if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
 		sprs.lpcr	= mfspr(SPRN_LPCR);
 		sprs.hfscr	= mfspr(SPRN_HFSCR);
@@ -735,10 +721,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
 			mtspr(SPRN_MMCR0, mmcr0);
 		}
 
-		/* Reload MMCRA to restore BHRB disable bit for POWER10 */
-		if (cpu_has_feature(CPU_FTR_ARCH_31))
-			mtspr(SPRN_MMCRA, mmcra);
-
 		/*
 		 * DD2.2 and earlier need to set then clear bit 60 in MMCRA
 		 * to ensure the PMU starts running.
@@ -823,73 +805,6 @@ out:
 	return srr1;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-static unsigned long power9_offline_stop(unsigned long psscr)
-{
-	unsigned long srr1;
-
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	__ppc64_runlatch_off();
-	srr1 = power9_idle_stop(psscr, true);
-	__ppc64_runlatch_on();
-#else
-	/*
-	 * Tell KVM we're entering idle.
-	 * This does not have to be done in real mode because the P9 MMU
-	 * is independent per-thread. Some steppings share radix/hash mode
-	 * between threads, but in that case KVM has a barrier sync in real
-	 * mode before and after switching between radix and hash.
-	 *
-	 * kvm_start_guest must still be called in real mode though, hence
-	 * the false argument.
-	 */
-	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
-
-	__ppc64_runlatch_off();
-	srr1 = power9_idle_stop(psscr, false);
-	__ppc64_runlatch_on();
-
-	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
-	/* Order setting hwthread_state vs. testing hwthread_req */
-	smp_mb();
-	if (local_paca->kvm_hstate.hwthread_req)
-		srr1 = idle_kvm_start_guest(srr1);
-	mtmsr(MSR_KERNEL);
-#endif
-
-	return srr1;
-}
-#endif
-
-void power9_idle_type(unsigned long stop_psscr_val,
-				      unsigned long stop_psscr_mask)
-{
-	unsigned long psscr;
-	unsigned long srr1;
-
-	if (!prep_irq_for_idle_irqsoff())
-		return;
-
-	psscr = mfspr(SPRN_PSSCR);
-	psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
-
-	__ppc64_runlatch_off();
-	srr1 = power9_idle_stop(psscr, true);
-	__ppc64_runlatch_on();
-
-	fini_irq_for_idle_irqsoff();
-
-	irq_set_pending_from_srr1(srr1);
-}
-
-/*
- * Used for ppc_md.power_save which needs a function with no parameters
- */
-void power9_idle(void)
-{
-	power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
-}
-
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * This is used in working around bugs in thread reconfiguration
@@ -962,6 +877,198 @@ void pnv_power9_force_smt4_release(void)
 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
+struct p10_sprs {
+	/*
+	 * SPRs that get lost in shallow states:
+	 *
+	 * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
+	 * isa300 idle routines restore CR, LR.
+	 * CTR is volatile
+	 * idle thread doesn't use FP or VEC
+	 * kernel doesn't use TAR
+	 * HSPRG1 is only live in HV interrupt entry
+	 * SPRG2 is only live in KVM guests, KVM handles it.
+	 */
+};
+
+static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
+{
+	int cpu = raw_smp_processor_id();
+	int first = cpu_first_thread_sibling(cpu);
+	unsigned long *state = &paca_ptrs[first]->idle_state;
+	unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
+	unsigned long srr1;
+	unsigned long pls;
+//	struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
+	bool sprs_saved = false;
+
+	if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+		/* EC=ESL=0 case */
+
+		BUG_ON(!mmu_on);
+
+		/*
+		 * Wake synchronously. SRESET via xscom may still cause
+		 * a 0x100 powersave wakeup with SRR1 reason!
+		 */
+		srr1 = isa300_idle_stop_noloss(psscr);		/* go idle */
+		if (likely(!srr1))
+			return 0;
+
+		/*
+		 * Registers not saved, can't recover!
+		 * This would be a hardware bug
+		 */
+		BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+		goto out;
+	}
+
+	/* EC=ESL=1 case */
+	if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
+		/* XXX: save SPRs for deep state loss here. */
+
+		sprs_saved = true;
+
+		atomic_start_thread_idle();
+	}
+
+	srr1 = isa300_idle_stop_mayloss(psscr);		/* go idle */
+
+	psscr = mfspr(SPRN_PSSCR);
+
+	WARN_ON_ONCE(!srr1);
+	WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+	if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+		hmi_exception_realmode(NULL);
+
+	/*
+	 * On POWER10, SRR1 bits do not match exactly as expected.
+	 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+	 * just always test PSSCR for SPR/TB state loss.
+	 */
+	pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+	if (likely(pls < deep_spr_loss_state)) {
+		if (sprs_saved)
+			atomic_stop_thread_idle();
+		goto out;
+	}
+
+	/* HV state loss */
+	BUG_ON(!sprs_saved);
+
+	atomic_lock_thread_idle();
+
+	if ((*state & core_thread_mask) != 0)
+		goto core_woken;
+
+	/* XXX: restore per-core SPRs here */
+
+	if (pls >= pnv_first_tb_loss_level) {
+		/* TB loss */
+		if (opal_resync_timebase() != OPAL_SUCCESS)
+			BUG();
+	}
+
+	/*
+	 * isync after restoring shared SPRs and before unlocking. Unlock
+	 * only contains hwsync which does not necessarily do the right
+	 * thing for SPRs.
+	 */
+	isync();
+
+core_woken:
+	atomic_unlock_and_stop_thread_idle();
+
+	/* XXX: restore per-thread SPRs here */
+
+	if (!radix_enabled())
+		__slb_restore_bolted_realmode();
+
+out:
+	if (mmu_on)
+		mtmsr(MSR_KERNEL);
+
+	return srr1;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long arch300_offline_stop(unsigned long psscr)
+{
+	unsigned long srr1;
+
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	__ppc64_runlatch_off();
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		srr1 = power10_idle_stop(psscr, true);
+	else
+		srr1 = power9_idle_stop(psscr, true);
+	__ppc64_runlatch_on();
+#else
+	/*
+	 * Tell KVM we're entering idle.
+	 * This does not have to be done in real mode because the P9 MMU
+	 * is independent per-thread. Some steppings share radix/hash mode
+	 * between threads, but in that case KVM has a barrier sync in real
+	 * mode before and after switching between radix and hash.
+	 *
+	 * kvm_start_guest must still be called in real mode though, hence
+	 * the false argument.
+	 */
+	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
+
+	__ppc64_runlatch_off();
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		srr1 = power10_idle_stop(psscr, false);
+	else
+		srr1 = power9_idle_stop(psscr, false);
+	__ppc64_runlatch_on();
+
+	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	smp_mb();
+	if (local_paca->kvm_hstate.hwthread_req)
+		srr1 = idle_kvm_start_guest(srr1);
+	mtmsr(MSR_KERNEL);
+#endif
+
+	return srr1;
+}
+#endif
+
+void arch300_idle_type(unsigned long stop_psscr_val,
+				      unsigned long stop_psscr_mask)
+{
+	unsigned long psscr;
+	unsigned long srr1;
+
+	if (!prep_irq_for_idle_irqsoff())
+		return;
+
+	psscr = mfspr(SPRN_PSSCR);
+	psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+	__ppc64_runlatch_off();
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		srr1 = power10_idle_stop(psscr, true);
+	else
+		srr1 = power9_idle_stop(psscr, true);
+	__ppc64_runlatch_on();
+
+	fini_irq_for_idle_irqsoff();
+
+	irq_set_pending_from_srr1(srr1);
+}
+
+/*
+ * Used for ppc_md.power_save which needs a function with no parameters
+ */
+static void arch300_idle(void)
+{
+	arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 
 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
@@ -995,7 +1102,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
 		psscr = mfspr(SPRN_PSSCR);
 		psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
 						pnv_deepest_stop_psscr_val;
-		srr1 = power9_offline_stop(psscr);
+		srr1 = arch300_offline_stop(psscr);
 	} else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
 		srr1 = power7_offline();
 	} else {
@@ -1093,11 +1200,15 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
  * @dt_idle_states: Number of idle state entries
  * Returns 0 on success
  */
-static void __init pnv_power9_idle_init(void)
+static void __init pnv_arch300_idle_init(void)
 {
 	u64 max_residency_ns = 0;
 	int i;
 
+	/* stop is not really architected, we only have p9,p10 drivers */
+	if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
+		return;
+
 	/*
 	 * pnv_deepest_stop_{val,mask} should be set to values corresponding to
 	 * the deepest stop state.
@@ -1112,6 +1223,11 @@ static void __init pnv_power9_idle_init(void)
 		struct pnv_idle_states_t *state = &pnv_idle_states[i];
 		u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
 
+		/* No deep loss driver implemented for POWER10 yet */
+		if (pvr_version_is(PVR_POWER10) &&
+				state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
+			continue;
+
 		if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
 		     (pnv_first_tb_loss_level > psscr_rl))
 			pnv_first_tb_loss_level = psscr_rl;
@@ -1162,7 +1278,7 @@ static void __init pnv_power9_idle_init(void)
 	if (unlikely(!default_stop_found)) {
 		pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
 	} else {
-		ppc_md.power_save = power9_idle;
+		ppc_md.power_save = arch300_idle;
 		pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
 			pnv_default_stop_val, pnv_default_stop_mask);
 	}
@@ -1224,7 +1340,7 @@ static void __init pnv_probe_idle_states(void)
 	}
 
 	if (cpu_has_feature(CPU_FTR_ARCH_300))
-		pnv_power9_idle_init();
+		pnv_arch300_idle_init();
 
 	for (i = 0; i < nr_pnv_idle_states; i++)
 		supported_cpuidle_states |= pnv_idle_states[i].flags;
@@ -1295,7 +1411,7 @@ static int pnv_parse_cpuidle_dt(void)
 	for (i = 0; i < nr_idle_states; i++)
 		pnv_idle_states[i].residency_ns = temp_u32[i];
 
-	/* For power9 */
+	/* For power9 and later */
 	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		/* Read pm_crtl_val */
 		if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
@@ -1358,8 +1474,8 @@ static int __init pnv_init_idle_states(void)
 		if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
 			/* P7/P8 nap */
 			p->thread_idle_state = PNV_THREAD_RUNNING;
-		} else {
-			/* P9 stop */
+		} else if (pvr_version_is(PVR_POWER9)) {
+			/* P9 stop workarounds */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 			p->requested_psscr = 0;
 			atomic_set(&p->dont_stop, 0);
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index addaa6e6718b..c32c600b3cf8 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -141,7 +141,7 @@ static int stop_loop(struct cpuidle_device *dev,
 		     struct cpuidle_driver *drv,
 		     int index)
 {
-	power9_idle_type(stop_psscr_table[index].val,
+	arch300_idle_type(stop_psscr_table[index].val,
 			 stop_psscr_table[index].mask);
 	return index;
 }
-- 
cgit v1.2.3


From 3a3181e16fbde752007759f8759d25e0ff1fc425 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Fri, 7 Aug 2020 12:18:54 +0200
Subject: powerpc/pci: unmap legacy INTx interrupts when a PHB is removed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a passthrough IO adapter is removed from a pseries machine using
hash MMU and the XIVE interrupt mode, the POWER hypervisor expects the
guest OS to clear all page table entries related to the adapter. If
some are still present, the RTAS call which isolates the PCI slot
returns error 9001 "valid outstanding translations" and the removal of
the IO adapter fails. This is because when the PHBs are scanned, Linux
maps automatically the INTx interrupts in the Linux interrupt number
space but these are never removed.

To solve this problem, we introduce a PPC platform specific
pcibios_remove_bus() routine which clears all interrupt mappings when
the bus is removed. This also clears the associated page table entries
of the ESB pages when using XIVE.

For this purpose, we record the logical interrupt numbers of the
mapped interrupt under the PHB structure and let pcibios_remove_bus()
do the clean up.

Since some PCI adapters, like GPUs, use the "interrupt-map" property
to describe interrupt mappings other than the legacy INTx interrupts,
we can not restrict the size of the mapping array to PCI_NUM_INTX. The
number of interrupt mappings is computed from the "interrupt-map"
property and the mapping array is allocated accordingly.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200807101854.844619-1-clg@kaod.org
---
 arch/powerpc/include/asm/pci-bridge.h |   6 ++
 arch/powerpc/kernel/pci-common.c      | 114 ++++++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index d2a2a14e56f9..d21e070352dc 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -48,6 +48,9 @@ struct pci_controller_ops {
 
 /*
  * Structure of a PCI controller (host bridge)
+ *
+ * @irq_count: number of interrupt mappings
+ * @irq_map: interrupt mappings
  */
 struct pci_controller {
 	struct pci_bus *bus;
@@ -127,6 +130,9 @@ struct pci_controller {
 
 	void *private_data;
 	struct npu *npu;
+
+	unsigned int irq_count;
+	unsigned int *irq_map;
 };
 
 /* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index be108616a721..deb831f0ae13 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -353,6 +353,115 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
 	return NULL;
 }
 
+/*
+ * Assumption is made on the interrupt parent. All interrupt-map
+ * entries are considered to have the same parent.
+ */
+static int pcibios_irq_map_count(struct pci_controller *phb)
+{
+	const __be32 *imap;
+	int imaplen;
+	struct device_node *parent;
+	u32 intsize, addrsize, parintsize, paraddrsize;
+
+	if (of_property_read_u32(phb->dn, "#interrupt-cells", &intsize))
+		return 0;
+	if (of_property_read_u32(phb->dn, "#address-cells", &addrsize))
+		return 0;
+
+	imap = of_get_property(phb->dn, "interrupt-map", &imaplen);
+	if (!imap) {
+		pr_debug("%pOF : no interrupt-map\n", phb->dn);
+		return 0;
+	}
+	imaplen /= sizeof(u32);
+	pr_debug("%pOF : imaplen=%d\n", phb->dn, imaplen);
+
+	if (imaplen < (addrsize + intsize + 1))
+		return 0;
+
+	imap += intsize + addrsize;
+	parent = of_find_node_by_phandle(be32_to_cpup(imap));
+	if (!parent) {
+		pr_debug("%pOF : no imap parent found !\n", phb->dn);
+		return 0;
+	}
+
+	if (of_property_read_u32(parent, "#interrupt-cells", &parintsize)) {
+		pr_debug("%pOF : parent lacks #interrupt-cells!\n", phb->dn);
+		return 0;
+	}
+
+	if (of_property_read_u32(parent, "#address-cells", &paraddrsize))
+		paraddrsize = 0;
+
+	return imaplen / (addrsize + intsize + 1 + paraddrsize + parintsize);
+}
+
+static void pcibios_irq_map_init(struct pci_controller *phb)
+{
+	phb->irq_count = pcibios_irq_map_count(phb);
+	if (phb->irq_count < PCI_NUM_INTX)
+		phb->irq_count = PCI_NUM_INTX;
+
+	pr_debug("%pOF : interrupt map #%d\n", phb->dn, phb->irq_count);
+
+	phb->irq_map = kcalloc(phb->irq_count, sizeof(unsigned int),
+			       GFP_KERNEL);
+}
+
+static void pci_irq_map_register(struct pci_dev *pdev, unsigned int virq)
+{
+	struct pci_controller *phb = pci_bus_to_host(pdev->bus);
+	int i;
+
+	if (!phb->irq_map)
+		return;
+
+	for (i = 0; i < phb->irq_count; i++) {
+		/*
+		 * Look for an empty or an equivalent slot, as INTx
+		 * interrupts can be shared between adapters.
+		 */
+		if (phb->irq_map[i] == virq || !phb->irq_map[i]) {
+			phb->irq_map[i] = virq;
+			break;
+		}
+	}
+
+	if (i == phb->irq_count)
+		pr_err("PCI:%s all platform interrupts mapped\n",
+		       pci_name(pdev));
+}
+
+/*
+ * Clearing the mapped interrupts will also clear the underlying
+ * mappings of the ESB pages of the interrupts when under XIVE. It is
+ * a requirement of PowerVM to clear all memory mappings before
+ * removing a PHB.
+ */
+static void pci_irq_map_dispose(struct pci_bus *bus)
+{
+	struct pci_controller *phb = pci_bus_to_host(bus);
+	int i;
+
+	if (!phb->irq_map)
+		return;
+
+	pr_debug("PCI: Clearing interrupt mappings for PHB %04x:%02x...\n",
+		 pci_domain_nr(bus), bus->number);
+	for (i = 0; i < phb->irq_count; i++)
+		irq_dispose_mapping(phb->irq_map[i]);
+
+	kfree(phb->irq_map);
+}
+
+void pcibios_remove_bus(struct pci_bus *bus)
+{
+	pci_irq_map_dispose(bus);
+}
+EXPORT_SYMBOL_GPL(pcibios_remove_bus);
+
 /*
  * Reads the interrupt pin to determine if interrupt is use by card.
  * If the interrupt is used, then gets the interrupt line from the
@@ -401,6 +510,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
 
 	pci_dev->irq = virq;
 
+	/* Record all interrut mappings for later removal of a PHB */
+	pci_irq_map_register(pci_dev, virq);
 	return 0;
 }
 
@@ -1554,6 +1665,9 @@ void pcibios_scan_phb(struct pci_controller *hose)
 
 	pr_debug("PCI: Scanning PHB %pOF\n", node);
 
+	/* Allocate interrupt mappings array */
+	pcibios_irq_map_init(hose);
+
 	/* Get some IO space for the new PHB */
 	pcibios_setup_phb_io_space(hose);
 
-- 
cgit v1.2.3


From d53c3dfb23c45f7d4f910c3a3ca84bf0a99c6143 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 14 Sep 2020 14:52:16 +1000
Subject: mm: fix exec activate_mm vs TLB shootdown and lazy tlb switching race

Reading and modifying current->mm and current->active_mm and switching
mm should be done with irqs off, to prevent races seeing an intermediate
state.

This is similar to commit 38cf307c1f20 ("mm: fix kthread_use_mm() vs TLB
invalidate"). At exec-time when the new mm is activated, the old one
should usually be single-threaded and no longer used, unless something
else is holding an mm_users reference (which may be possible).

Absent other mm_users, there is also a race with preemption and lazy tlb
switching. Consider the kernel_execve case where the current thread is
using a lazy tlb active mm:

  call_usermodehelper()
    kernel_execve()
      old_mm = current->mm;
      active_mm = current->active_mm;
      *** preempt *** -------------------->  schedule()
                                               prev->active_mm = NULL;
                                               mmdrop(prev active_mm);
                                             ...
                      <--------------------  schedule()
      current->mm = mm;
      current->active_mm = mm;
      if (!old_mm)
          mmdrop(active_mm);

If we switch back to the kernel thread from a different mm, there is a
double free of the old active_mm, and a missing free of the new one.

Closing this race only requires interrupts to be disabled while ->mm
and ->active_mm are being switched, but the TLB problem requires also
holding interrupts off over activate_mm. Unfortunately not all archs
can do that yet, e.g., arm defers the switch if irqs are disabled and
expects finish_arch_post_lock_switch() to be called to complete the
flush; um takes a blocking lock in activate_mm().

So as a first step, disable interrupts across the mm/active_mm updates
to close the lazy tlb preempt race, and provide an arch option to
extend that to activate_mm which allows architectures doing IPI based
TLB shootdowns to close the second race.

This is a bit ugly, but in the interest of fixing the bug and backporting
before all architectures are converted this is a compromise.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200914045219.3736466-2-npiggin@gmail.com
---
 arch/Kconfig |  7 +++++++
 fs/exec.c    | 17 +++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index af14a567b493..94821e3f94d1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -414,6 +414,13 @@ config MMU_GATHER_NO_GATHER
 	bool
 	depends on MMU_GATHER_TABLE_FREE
 
+config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+	bool
+	help
+	  Temporary select until all architectures can be converted to have
+	  irqs disabled over activate_mm. Architectures that do IPI based TLB
+	  shootdowns should enable this.
+
 config ARCH_HAVE_NMI_SAFE_CMPXCHG
 	bool
 
diff --git a/fs/exec.c b/fs/exec.c
index a91003e28eaa..d4fb18baf1fb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1130,11 +1130,24 @@ static int exec_mmap(struct mm_struct *mm)
 	}
 
 	task_lock(tsk);
-	active_mm = tsk->active_mm;
 	membarrier_exec_mmap(mm);
-	tsk->mm = mm;
+
+	local_irq_disable();
+	active_mm = tsk->active_mm;
 	tsk->active_mm = mm;
+	tsk->mm = mm;
+	/*
+	 * This prevents preemption while active_mm is being loaded and
+	 * it and mm are being updated, which could cause problems for
+	 * lazy tlb mm refcounting when these are updated by context
+	 * switches. Not all architectures can handle irqs off over
+	 * activate_mm yet.
+	 */
+	if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+		local_irq_enable();
 	activate_mm(active_mm, mm);
+	if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+		local_irq_enable();
 	tsk->mm->vmacache_seqnum = 0;
 	vmacache_flush(tsk);
 	task_unlock(tsk);
-- 
cgit v1.2.3


From 66acd46080bd9e5ad2be4b0eb1d498d5145d058e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 14 Sep 2020 14:52:17 +1000
Subject: powerpc: select ARCH_WANT_IRQS_OFF_ACTIVATE_MM

powerpc uses IPIs in some situations to switch a kernel thread away
from a lazy tlb mm, which is subject to the TLB flushing race
described in the changelog introducing ARCH_WANT_IRQS_OFF_ACTIVATE_MM.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200914045219.3736466-3-npiggin@gmail.com
---
 arch/powerpc/Kconfig                   | 1 +
 arch/powerpc/include/asm/mmu_context.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1f48bbfb3ce9..65cb32211574 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -149,6 +149,7 @@ config PPC
 	select ARCH_USE_QUEUED_RWLOCKS		if PPC_QUEUED_SPINLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS	if PPC_QUEUED_SPINLOCKS
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
 	select ARCH_WEAK_RELEASE_ACQUIRE
 	select BINFMT_ELF
 	select BUILDTIME_TABLE_SORT
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 7f3658a97384..e02aa793420b 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -244,7 +244,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  */
 static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
 {
-	switch_mm(prev, next, current);
+	switch_mm_irqs_off(prev, next, current);
 }
 
 /* We don't currently use enter_lazy_tlb() for anything */
-- 
cgit v1.2.3


From bafb056ce27940c9994ea905336aa8f27b4f7275 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 14 Sep 2020 14:52:18 +1000
Subject: sparc64: remove mm_cpumask clearing to fix kthread_use_mm race

The de facto (and apparently uncommented) standard for using an mm had,
thanks to this code in sparc if nothing else, been that you must have a
reference on mm_users *and that reference must have been obtained with
mmget()*, i.e., from a thread with a reference to mm_users that had used
the mm.

The introduction of mmget_not_zero() in commit d2005e3f41d4
("userfaultfd: don't pin the user memory in userfaultfd_file_create()")
allowed mm_count holders to aoperate on user mappings asynchronously
from the actual threads using the mm, but they were not to load those
mappings into their TLB (i.e., walking vmas and page tables is okay,
kthread_use_mm() is not).

io_uring 2b188cc1bb857 ("Add io_uring IO interface") added code which
does a kthread_use_mm() from a mmget_not_zero() refcount.

The problem with this is code which previously assumed mm == current->mm
and mm->mm_users == 1 implies the mm will remain single-threaded at
least until this thread creates another mm_users reference, has now
broken.

arch/sparc/kernel/smp_64.c:

    if (atomic_read(&mm->mm_users) == 1) {
        cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
        goto local_flush_and_out;
    }

vs fs/io_uring.c

    if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) ||
                 !mmget_not_zero(ctx->sqo_mm)))
        return -EFAULT;
    kthread_use_mm(ctx->sqo_mm);

mmget_not_zero() could come in right after the mm_users == 1 test, then
kthread_use_mm() which sets its CPU in the mm_cpumask. That update could
be lost if cpumask_copy() occurs afterward.

I propose we fix this by allowing mmget_not_zero() to be a first-class
reference, and not have this obscure undocumented and unchecked
restriction.

The basic fix for sparc64 is to remove its mm_cpumask clearing code. The
optimisation could be effectively restored by sending IPIs to mm_cpumask
members and having them remove themselves from mm_cpumask. This is more
tricky so I leave it as an exercise for someone with a sparc64 SMP.
powerpc has a (currently similarly broken) example.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200914045219.3736466-4-npiggin@gmail.com
---
 arch/sparc/kernel/smp_64.c | 65 ++++++++++------------------------------------
 1 file changed, 14 insertions(+), 51 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index e286e2badc8a..e38d8bf454e8 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1039,38 +1039,9 @@ void smp_fetch_global_pmu(void)
  * are flush_tlb_*() routines, and these run after flush_cache_*()
  * which performs the flushw.
  *
- * The SMP TLB coherency scheme we use works as follows:
- *
- * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
- *    space has (potentially) executed on, this is the heuristic
- *    we use to avoid doing cross calls.
- *
- *    Also, for flushing from kswapd and also for clones, we
- *    use cpu_vm_mask as the list of cpus to make run the TLB.
- *
- * 2) TLB context numbers are shared globally across all processors
- *    in the system, this allows us to play several games to avoid
- *    cross calls.
- *
- *    One invariant is that when a cpu switches to a process, and
- *    that processes tsk->active_mm->cpu_vm_mask does not have the
- *    current cpu's bit set, that tlb context is flushed locally.
- *
- *    If the address space is non-shared (ie. mm->count == 1) we avoid
- *    cross calls when we want to flush the currently running process's
- *    tlb state.  This is done by clearing all cpu bits except the current
- *    processor's in current->mm->cpu_vm_mask and performing the
- *    flush locally only.  This will force any subsequent cpus which run
- *    this task to flush the context from the local tlb if the process
- *    migrates to another cpu (again).
- *
- * 3) For shared address spaces (threads) and swapping we bite the
- *    bullet for most cases and perform the cross call (but only to
- *    the cpus listed in cpu_vm_mask).
- *
- *    The performance gain from "optimizing" away the cross call for threads is
- *    questionable (in theory the big win for threads is the massive sharing of
- *    address space state across processors).
+ * mm->cpu_vm_mask is a bit mask of which cpus an address
+ * space has (potentially) executed on, this is the heuristic
+ * we use to limit cross calls.
  */
 
 /* This currently is only used by the hugetlb arch pre-fault
@@ -1080,18 +1051,13 @@ void smp_fetch_global_pmu(void)
 void smp_flush_tlb_mm(struct mm_struct *mm)
 {
 	u32 ctx = CTX_HWBITS(mm->context);
-	int cpu = get_cpu();
 
-	if (atomic_read(&mm->mm_users) == 1) {
-		cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
-		goto local_flush_and_out;
-	}
+	get_cpu();
 
 	smp_cross_call_masked(&xcall_flush_tlb_mm,
 			      ctx, 0, 0,
 			      mm_cpumask(mm));
 
-local_flush_and_out:
 	__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
 
 	put_cpu();
@@ -1114,17 +1080,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
 {
 	u32 ctx = CTX_HWBITS(mm->context);
 	struct tlb_pending_info info;
-	int cpu = get_cpu();
+
+	get_cpu();
 
 	info.ctx = ctx;
 	info.nr = nr;
 	info.vaddrs = vaddrs;
 
-	if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
-		cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
-	else
-		smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
-				       &info, 1);
+	smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
+			       &info, 1);
 
 	__flush_tlb_pending(ctx, nr, vaddrs);
 
@@ -1134,14 +1098,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
 void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
 {
 	unsigned long context = CTX_HWBITS(mm->context);
-	int cpu = get_cpu();
 
-	if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
-		cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
-	else
-		smp_cross_call_masked(&xcall_flush_tlb_page,
-				      context, vaddr, 0,
-				      mm_cpumask(mm));
+	get_cpu();
+
+	smp_cross_call_masked(&xcall_flush_tlb_page,
+			      context, vaddr, 0,
+			      mm_cpumask(mm));
+
 	__flush_tlb_page(context, vaddr);
 
 	put_cpu();
-- 
cgit v1.2.3


From a665eec0a22e11cdde708c1c256a465ebe768047 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 14 Sep 2020 14:52:19 +1000
Subject: powerpc/64s/radix: Fix mm_cpumask trimming race vs kthread_use_mm

Commit 0cef77c7798a7 ("powerpc/64s/radix: flush remote CPUs out of
single-threaded mm_cpumask") added a mechanism to trim the mm_cpumask of
a process under certain conditions. One of the assumptions is that
mm_users would not be incremented via a reference outside the process
context with mmget_not_zero() then go on to kthread_use_mm() via that
reference.

That invariant was broken by io_uring code (see previous sparc64 fix),
but I'll point Fixes: to the original powerpc commit because we are
changing that assumption going forward, so this will make backports
match up.

Fix this by no longer relying on that assumption, but by having each CPU
check the mm is not being used, and clearing their own bit from the mask
only if it hasn't been switched-to by the time the IPI is processed.

This relies on commit 38cf307c1f20 ("mm: fix kthread_use_mm() vs TLB
invalidate") and ARCH_WANT_IRQS_OFF_ACTIVATE_MM to disable irqs over mm
switch sequences.

Fixes: 0cef77c7798a7 ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Michael Ellerman <mpe@ellerman.id.au>
Depends-on: 38cf307c1f20 ("mm: fix kthread_use_mm() vs TLB invalidate")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200914045219.3736466-5-npiggin@gmail.com
---
 arch/powerpc/include/asm/tlb.h       | 13 -------------
 arch/powerpc/mm/book3s64/radix_tlb.c | 23 ++++++++++++++++-------
 2 files changed, 16 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index fbc6f3002f23..d97f061fecac 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -66,19 +66,6 @@ static inline int mm_is_thread_local(struct mm_struct *mm)
 		return false;
 	return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm));
 }
-static inline void mm_reset_thread_local(struct mm_struct *mm)
-{
-	WARN_ON(atomic_read(&mm->context.copros) > 0);
-	/*
-	 * It's possible for mm_access to take a reference on mm_users to
-	 * access the remote mm from another thread, but it's not allowed
-	 * to set mm_cpumask, so mm_users may be > 1 here.
-	 */
-	WARN_ON(current->mm != mm);
-	atomic_set(&mm->context.active_cpus, 1);
-	cpumask_clear(mm_cpumask(mm));
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-}
 #else /* CONFIG_PPC_BOOK3S_64 */
 static inline int mm_is_thread_local(struct mm_struct *mm)
 {
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 0d233763441f..143b4fd396f0 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -645,19 +645,29 @@ static void do_exit_flush_lazy_tlb(void *arg)
 	struct mm_struct *mm = arg;
 	unsigned long pid = mm->context.id;
 
+	/*
+	 * A kthread could have done a mmget_not_zero() after the flushing CPU
+	 * checked mm_is_singlethreaded, and be in the process of
+	 * kthread_use_mm when interrupted here. In that case, current->mm will
+	 * be set to mm, because kthread_use_mm() setting ->mm and switching to
+	 * the mm is done with interrupts off.
+	 */
 	if (current->mm == mm)
-		return; /* Local CPU */
+		goto out_flush;
 
 	if (current->active_mm == mm) {
-		/*
-		 * Must be a kernel thread because sender is single-threaded.
-		 */
-		BUG_ON(current->mm);
+		WARN_ON_ONCE(current->mm != NULL);
+		/* Is a kernel thread and is using mm as the lazy tlb */
 		mmgrab(&init_mm);
-		switch_mm(mm, &init_mm, current);
 		current->active_mm = &init_mm;
+		switch_mm_irqs_off(mm, &init_mm, current);
 		mmdrop(mm);
 	}
+
+	atomic_dec(&mm->context.active_cpus);
+	cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm));
+
+out_flush:
 	_tlbiel_pid(pid, RIC_FLUSH_ALL);
 }
 
@@ -672,7 +682,6 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm)
 	 */
 	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
 				(void *)mm, 1);
-	mm_reset_thread_local(mm);
 }
 
 void radix__flush_tlb_mm(struct mm_struct *mm)
-- 
cgit v1.2.3


From ca78ef2f08ccfa29b711d644964cdf9d7ace15e5 Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.ibm.com>
Date: Sat, 12 Sep 2020 13:44:51 +0530
Subject: powerpc/papr_scm: Fix warning triggered by perf_stats_show()

A warning is reported by the kernel in case perf_stats_show() returns
an error code. The warning is of the form below:

 papr_scm ibm,persistent-memory:ibm,pmemory@44100001:
 	  Failed to query performance stats, Err:-10
 dev_attr_show: perf_stats_show+0x0/0x1c0 [papr_scm] returned bad count
 fill_read_buffer: dev_attr_show+0x0/0xb0 returned bad count

On investigation it looks like that the compiler is silently
truncating the return value of drc_pmem_query_stats() from 'long' to
'int', since the variable used to store the return code 'rc' is an
'int'. This truncated value is then returned back as a 'ssize_t' back
from perf_stats_show() to 'dev_attr_show()' which thinks of it as a
large unsigned number and triggers this warning..

To fix this we update the type of variable 'rc' from 'int' to
'ssize_t' that prevents the compiler from truncating the return value
of drc_pmem_query_stats() and returning correct signed value back from
perf_stats_show().

Fixes: 2d02bf835e57 ("powerpc/papr_scm: Fetch nvdimm performance stats from PHYP")
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200912081451.66225-1-vaibhav@linux.ibm.com
---
 arch/powerpc/platforms/pseries/papr_scm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a88a707a608a..5493bc847bd0 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -785,7 +785,8 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
 static ssize_t perf_stats_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
-	int index, rc;
+	int index;
+	ssize_t rc;
 	struct seq_buf s;
 	struct papr_scm_perf_stat *stat;
 	struct papr_scm_perf_stats *stats;
@@ -820,7 +821,7 @@ static ssize_t perf_stats_show(struct device *dev,
 
 free_stats:
 	kfree(stats);
-	return rc ? rc : seq_buf_used(&s);
+	return rc ? rc : (ssize_t)seq_buf_used(&s);
 }
 DEVICE_ATTR_ADMIN_RO(perf_stats);
 
-- 
cgit v1.2.3


From f3232321db58480804f80d59aeb651a5c859a200 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Fri, 7 Aug 2020 13:15:17 +0530
Subject: powerpc/topology: Override cpu_smt_mask

On Power9, a pair of SMT4 cores can be presented by the firmware as a SMT8
core for backward compatibility reasons, with the fusion of two SMT4 cores.
Powerpc allows LPARs to be live migrated from Power8 to Power9.  Existing
software developed/configured for Power8, expects to see a SMT8 core.

In order to maintain userspace backward compatibility (with Power8 chips in
case of Power9) in enterprise Linux systems, the topology_sibling_cpumask
has to be set to SMT8 core.

cpu_smt_mask() should generally point to the cpu mask of the SMT4 core.
Hence override the default cpu_smt_mask() to be powerpc specific
allowing for better scheduling behaviour on Power.

schbench
(latency measured in usecs, so lesser is better)
Without patch                   With patch
Latency percentiles (usec)	Latency percentiles (usec)
	50.0000th: 34           	50.0000th: 38
	75.0000th: 47           	75.0000th: 52
	90.0000th: 54           	90.0000th: 60
	95.0000th: 57           	95.0000th: 64
	*99.0000th: 62          	*99.0000th: 72
	99.5000th: 65           	99.5000th: 75
	99.9000th: 76           	99.9000th: 3452
	min=0, max=9205         	min=0, max=9344

schbench (With Cede disabled)
Without patch                   With patch
Latency percentiles (usec) 	Latency percentiles (usec)
	50.0000th: 20           	50.0000th: 21
	75.0000th: 28           	75.0000th: 29
	90.0000th: 33           	90.0000th: 34
	95.0000th: 35           	95.0000th: 37
	*99.0000th: 40          	*99.0000th: 40
	99.5000th: 48           	99.5000th: 42
	99.9000th: 94           	99.9000th: 79
	min=0, max=791          	min=0, max=791

perf bench sched pipe
usec/ops : lesser is better
Without patch
  N           Min           Max        Median           Avg        Stddev
101      5.095113      5.595269      5.204842     5.2298776    0.10762713

5.10 - 5.15 : ##################################################   23% (24)
5.15 - 5.20 : #############################################        21% (22)
5.20 - 5.25 : ##################################################   23% (24)
5.25 - 5.30 : #########################                            11% (12)
5.30 - 5.35 : ##########                                            4% (5)
5.35 - 5.40 : ########                                              3% (4)
5.40 - 5.45 : ########                                              3% (4)
5.45 - 5.50 : ####                                                  1% (2)
5.50 - 5.55 : ##                                                    0% (1)
5.55 - 5.60 : ####                                                  1% (2)

With patch
  N           Min           Max        Median           Avg        Stddev
101      5.134675      8.524719      5.207658     5.2780985    0.34911969

5.1 - 5.5 : ##################################################   94% (95)
5.5 - 5.8 : ##                                                    3% (4)
5.8 - 6.2 :                                                       0% (1)
6.2 - 6.5 :
6.5 - 6.8 :
6.8 - 7.2 :
7.2 - 7.5 :
7.5 - 7.8 :
7.8 - 8.2 :
8.2 - 8.5 :

perf bench sched pipe (cede disabled)
usec/ops : lesser is better
Without patch
  N           Min           Max        Median           Avg        Stddev
101      7.884227     12.576538      7.956474     8.0170722    0.46159054

7.9 - 8.4 : ##################################################   99% (100)
8.4 - 8.8 :
8.8 - 9.3 :
9.3 - 9.8 :
9.8 - 10.2 :
10.2 - 10.7 :
10.7 - 11.2 :
11.2 - 11.6 :
11.6 - 12.1 :
12.1 - 12.6 :

With patch
  N           Min           Max        Median           Avg        Stddev
101      7.956021      8.217284      8.015615     8.0283866   0.049844967

7.96 - 7.98 : ######################                               12% (13)
7.98 - 8.01 : ##################################################   28% (29)
8.01 - 8.03 : ####################################                 20% (21)
8.03 - 8.06 : #########################                            14% (15)
8.06 - 8.09 : ######################                               12% (13)
8.09 - 8.11 : ######                                                3% (4)
8.11 - 8.14 : ###                                                   1% (2)
8.14 - 8.17 : ###                                                   1% (2)
8.17 - 8.19 :
8.19 - 8.22 : #                                                     0% (1)

Observations: With the patch, the initial run/iteration takes a slight
longer time. This can be attributed to the fact that now we pick a CPU
from a idle core which could be sleep mode. Once we remove the cede,
state the numbers improve in favour of the patch.

ebizzy:
transactions per second (higher is better)
without patch
  N           Min           Max        Median           Avg        Stddev
100       1018433       1304470       1193208     1182315.7     60018.733

1018433 - 1047037 : ######                                                3% (3)
1047037 - 1075640 : ########                                              4% (4)
1075640 - 1104244 : ########                                              4% (4)
1104244 - 1132848 : ###############                                       7% (7)
1132848 - 1161452 : ####################################                 17% (17)
1161452 - 1190055 : ##########################                           12% (12)
1190055 - 1218659 : #############################################        21% (21)
1218659 - 1247263 : ##################################################   23% (23)
1247263 - 1275866 : ########                                              4% (4)
1275866 - 1304470 : ########                                              4% (4)

with patch
  N           Min           Max        Median           Avg        Stddev
100        967014       1292938       1208819     1185281.8     69815.851

 967014 - 999606  : ##                                                    1% (1)
 999606 - 1032199 : ##                                                    1% (1)
1032199 - 1064791 : ############                                          6% (6)
1064791 - 1097384 : ##########                                            5% (5)
1097384 - 1129976 : ##################                                    9% (9)
1129976 - 1162568 : ####################                                 10% (10)
1162568 - 1195161 : ##########################                           13% (13)
1195161 - 1227753 : ############################################         22% (22)
1227753 - 1260346 : ##################################################   25% (25)
1260346 - 1292938 : ##############                                        7% (7)

Observations: Not much changes, ebizzy is not much impacted.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200807074517.27957-2-srikar@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/cputhreads.h |  1 -
 arch/powerpc/include/asm/smp.h        | 13 +++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index deb99fd6e060..98c8bd155bf9 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -23,7 +23,6 @@
 extern int threads_per_core;
 extern int threads_per_subcore;
 extern int threads_shift;
-extern bool has_big_cores;
 extern cpumask_t threads_core_mask;
 #else
 #define threads_per_core	1
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 81a49566ccd8..b727f5f7b8f9 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -135,6 +135,19 @@ static inline struct cpumask *cpu_smallcore_mask(int cpu)
 
 extern int cpu_to_core_id(int cpu);
 
+extern bool has_big_cores;
+
+#define cpu_smt_mask cpu_smt_mask
+#ifdef CONFIG_SCHED_SMT
+static inline const struct cpumask *cpu_smt_mask(int cpu)
+{
+	if (has_big_cores)
+		return per_cpu(cpu_smallcore_map, cpu);
+
+	return per_cpu(cpu_sibling_map, cpu);
+}
+#endif /* CONFIG_SCHED_SMT */
+
 /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
  *
  * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up
-- 
cgit v1.2.3


From 67df77845c181166d4bc324cbb0382f7e81c7631 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 17 Aug 2020 11:22:57 +0530
Subject: powerpc/numa: Restrict possible nodes based on platform

As per draft LoPAPR (Revision 2.9_pre7), section B.5.3 "Run Time
Abstraction Services (RTAS) Node" available at:
  https://openpowerfoundation.org/wp-content/uploads/2020/07/LoPAR-20200611.pdf

... there are 2 device tree properties:

  "ibm,max-associativity-domains"
   which defines the maximum number of domains that the firmware i.e
   PowerVM can support.

and:

  "ibm,current-associativity-domains"
   which defines the maximum number of domains that the current
   platform can support.

The value of "ibm,max-associativity-domains" is always greater than or
equal to "ibm,current-associativity-domains" property. If the latter
property is not available, use "ibm,max-associativity-domain" as a
fallback. In this yet to be released LoPAPR, "ibm,current-associativity-domains"
is mentioned in page 833 / B.5.3 which is covered under under
"Appendix B. System Binding" section

Currently powerpc uses the "ibm,max-associativity-domains" property
while setting the possible number of nodes. This is currently set at
32. However the possible number of nodes for a platform may be
significantly less. Hence set the possible number of nodes based on
"ibm,current-associativity-domains" property.

Nathan Lynch had raised a valid concern that post LPM (Live Partition
Migration), a user could DLPAR add processors and memory after LPM
with "new" associativity properties:
  https://lore.kernel.org/linuxppc-dev/871rljfet9.fsf@linux.ibm.com/t/#u

He also pointed out that "ibm,max-associativity-domains" has the same
contents on all currently available PowerVM systems, unlike
"ibm,current-associativity-domains" and hence may be better able to
handle the new NUMA associativity properties.

However with the recent commit dbce45628085 ("powerpc/numa: Limit
possible nodes to within num_possible_nodes"), all new NUMA
associativity properties are capped to initially set nr_node_ids.
Hence this commit should be safe with any new DLPAR add post LPM.

  $ lsprop /proc/device-tree/rtas/ibm,*associ*-domains
  /proc/device-tree/rtas/ibm,current-associativity-domains
  		 00000005 00000001 00000002 00000002 00000002 00000010
  /proc/device-tree/rtas/ibm,max-associativity-domains
  		 00000005 00000001 00000008 00000020 00000020 00000100

  $ cat /sys/devices/system/node/possible ##Before patch
  0-31

  $ cat /sys/devices/system/node/possible ##After patch
  0-1

Note the maximum nodes this platform can support is only 2 but the
possible nodes is set to 32.

This is important because lot of kernel and user space code allocate
structures for all possible nodes leading to a lot of memory that is
allocated but not used.

I ran a simple experiment to create and destroy 100 memory cgroups on
boot on a 8 node machine (Power8 Alpine).

Before patch:
  free -k at boot
                total        used        free      shared  buff/cache   available
  Mem:      523498176     4106816   518820608       22272      570752   516606720
  Swap:       4194240           0     4194240

  free -k after creating 100 memory cgroups
                total        used        free      shared  buff/cache   available
  Mem:      523498176     4628416   518246464       22336      623296   516058688
  Swap:       4194240           0     4194240

  free -k after destroying 100 memory cgroups
                total        used        free      shared  buff/cache   available
  Mem:      523498176     4697408   518173760       22400      627008   515987904
  Swap:       4194240           0     4194240

After patch:
  free -k at boot
                total        used        free      shared  buff/cache   available
  Mem:      523498176     3969472   518933888       22272      594816   516731776
  Swap:       4194240           0     4194240

  free -k after creating 100 memory cgroups
                total        used        free      shared  buff/cache   available
  Mem:      523498176     4181888   518676096       22208      640192   516496448
  Swap:       4194240           0     4194240

  free -k after destroying 100 memory cgroups
                total        used        free      shared  buff/cache   available
  Mem:      523498176     4232320   518619904       22272      645952   516443264
  Swap:       4194240           0     4194240

Observations:
  Fixed kernel takes 137344 kb (4106816-3969472) less to boot.
  Fixed kernel takes 309184 kb (4628416-4181888-137344) less to create 100 memcgs.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
[mpe: Reformat change log a bit for readability]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200817055257.110873-1-srikar@linux.vnet.ibm.com
---
 arch/powerpc/mm/numa.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 1f61fa2148b5..5ddc83ba20f4 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -900,10 +900,19 @@ static void __init find_possible_nodes(void)
 	if (!rtas)
 		return;
 
-	if (of_property_read_u32_index(rtas,
-				"ibm,max-associativity-domains",
+	if (of_property_read_u32_index(rtas, "ibm,current-associativity-domains",
+				min_common_depth, &numnodes)) {
+		/*
+		 * ibm,current-associativity-domains is a fairly recent
+		 * property. If it doesn't exist, then fallback on
+		 * ibm,max-associativity-domains. Current denotes what the
+		 * platform can support compared to max which denotes what the
+		 * Hypervisor can support.
+		 */
+		if (of_property_read_u32_index(rtas, "ibm,max-associativity-domains",
 				min_common_depth, &numnodes))
-		goto out;
+			goto out;
+	}
 
 	for (i = 0; i < numnodes; i++) {
 		if (!node_possible(i))
-- 
cgit v1.2.3


From a874f1005ef5dfe53dfd8cda59a6600e89986ecd Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Tue, 18 Aug 2020 13:41:02 +0530
Subject: powerpc/numa: Set numa_node for all possible cpus

A Powerpc system with multiple possible nodes and with CONFIG_NUMA
enabled always used to have a node 0, even if node 0 does not any cpus
or memory attached to it. As per PAPR, node affinity of a cpu is only
available once its present / online. For all cpus that are possible but
not present, cpu_to_node() would point to node 0.

To ensure a cpuless, memoryless dummy node is not online, powerpc need
to make sure all possible but not present cpu_to_node are set to a
proper node.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200818081104.57888-2-srikar@linux.vnet.ibm.com
---
 arch/powerpc/mm/numa.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 5ddc83ba20f4..f63e1a41402f 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -507,6 +507,11 @@ static int numa_setup_cpu(unsigned long lcpu)
 	int fcpu = cpu_first_thread_sibling(lcpu);
 	int nid = NUMA_NO_NODE;
 
+	if (!cpu_present(lcpu)) {
+		set_cpu_numa_node(lcpu, first_online_node);
+		return first_online_node;
+	}
+
 	/*
 	 * If a valid cpu-to-node mapping is already available, use it
 	 * directly instead of querying the firmware, since it represents
@@ -944,8 +949,17 @@ void __init mem_topology_setup(void)
 
 	reset_numa_cpu_lookup_table();
 
-	for_each_present_cpu(cpu)
+	for_each_possible_cpu(cpu) {
+		/*
+		 * Powerpc with CONFIG_NUMA always used to have a node 0,
+		 * even if it was memoryless or cpuless. For all cpus that
+		 * are possible but not present, cpu_to_node() would point
+		 * to node 0. To remove a cpuless, memoryless dummy node,
+		 * powerpc need to make sure all possible but not present
+		 * cpu_to_node are set to a proper node.
+		 */
 		numa_setup_cpu(cpu);
+	}
 }
 
 void __init initmem_init(void)
-- 
cgit v1.2.3


From 6398eaa268168b528dd1d3d0e70e61e9c13bea23 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Tue, 18 Aug 2020 13:41:03 +0530
Subject: powerpc/numa: Prefer node id queried from vphn

Node id queried from the static device tree may not
be correct. For example: it may always show 0 on a shared processor.
Hence prefer the node id queried from vphn and fallback on the device tree
based node id if vphn query fails.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200818081104.57888-3-srikar@linux.vnet.ibm.com
---
 arch/powerpc/mm/numa.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f63e1a41402f..9f0127cfb978 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -728,21 +728,22 @@ static int __init parse_numa_properties(void)
 	 */
 	for_each_present_cpu(i) {
 		struct device_node *cpu;
-		int nid;
-
-		cpu = of_get_cpu_node(i, NULL);
-		BUG_ON(!cpu);
-		nid = of_node_to_nid_single(cpu);
-		of_node_put(cpu);
+		int nid = vphn_get_nid(i);
 
 		/*
 		 * Don't fall back to default_nid yet -- we will plug
 		 * cpus into nodes once the memory scan has discovered
 		 * the topology.
 		 */
-		if (nid < 0)
-			continue;
-		node_set_online(nid);
+		if (nid == NUMA_NO_NODE) {
+			cpu = of_get_cpu_node(i, NULL);
+			BUG_ON(!cpu);
+			nid = of_node_to_nid_single(cpu);
+			of_node_put(cpu);
+		}
+
+		if (likely(nid > 0))
+			node_set_online(nid);
 	}
 
 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
-- 
cgit v1.2.3


From e75130f20b1f48e04ccc806aea01f0a361f9cb6b Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Tue, 18 Aug 2020 13:41:04 +0530
Subject: powerpc/numa: Offline memoryless cpuless node 0

Currently Linux kernel with CONFIG_NUMA on a system with multiple
possible nodes, marks node 0 as online at boot.  However in practice,
there are systems which have node 0 as memoryless and cpuless.

This can cause numa_balancing to be enabled on systems with only one node
with memory and CPUs. The existence of this dummy node which is cpuless and
memoryless node can confuse users/scripts looking at output of lscpu /
numactl.

By marking, node 0 as offline, lets stop assuming that node 0 is
always online. If node 0 has CPU or memory that are online, node 0 will
again be set as online.

v5.8
 available: 2 nodes (0,2)
 node 0 cpus:
 node 0 size: 0 MB
 node 0 free: 0 MB
 node 2 cpus: 0 1 2 3 4 5 6 7
 node 2 size: 32625 MB
 node 2 free: 31490 MB
 node distances:
 node   0   2
   0:  10  20
   2:  20  10

proc and sys files
------------------
 /sys/devices/system/node/online:            0,2
 /proc/sys/kernel/numa_balancing:            1
 /sys/devices/system/node/has_cpu:           2
 /sys/devices/system/node/has_memory:        2
 /sys/devices/system/node/has_normal_memory: 2
 /sys/devices/system/node/possible:          0-31

v5.8 + patch
------------------
 available: 1 nodes (2)
 node 2 cpus: 0 1 2 3 4 5 6 7
 node 2 size: 32625 MB
 node 2 free: 31487 MB
 node distances:
 node   2
   2:  10

proc and sys files
------------------
/sys/devices/system/node/online:            2
/proc/sys/kernel/numa_balancing:            0
/sys/devices/system/node/has_cpu:           2
/sys/devices/system/node/has_memory:        2
/sys/devices/system/node/has_normal_memory: 2
/sys/devices/system/node/possible:          0-31

Example of a node with online CPUs/memory on node 0.
(Same o/p with and without patch)
numactl -H
available: 4 nodes (0-3)
node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
node 0 size: 32482 MB
node 0 free: 22994 MB
node 1 cpus: 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
node 1 size: 0 MB
node 1 free: 0 MB
node 2 cpus: 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
node 2 size: 0 MB
node 2 free: 0 MB
node 3 cpus: 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 node 3 size: 0 MB
node 3 free: 0 MB
node distances:
node   0   1   2   3
  0:  10  20  40  40
  1:  20  10  40  40
  2:  40  40  10  20
  3:  40  40  20  10

Note: On Powerpc, cpu_to_node of possible but not present cpus would
previously return 0. Hence this commit depends on commit ("powerpc/numa: Set
numa_node for all possible cpus") and commit ("powerpc/numa: Prefer node id
queried from vphn"). Without the 2 commits, Powerpc system might crash.

1. User space applications like Numactl, lscpu, that parse the sysfs tend to
believe there is an extra online node. This tends to confuse users and
applications. Other user space applications start believing that system was
not able to use all the resources (i.e missing resources) or the system was
not setup correctly.

2. Also existence of dummy node also leads to inconsistent information. The
number of online nodes is inconsistent with the information in the
device-tree and resource-dump

3. When the dummy node is present, single node non-Numa systems end up showing
up as NUMA systems and numa_balancing gets enabled. This will mean we take
the hit from the unnecessary numa hinting faults.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200818081104.57888-4-srikar@linux.vnet.ibm.com
---
 arch/powerpc/mm/numa.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9f0127cfb978..481951ac3e55 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -933,6 +933,16 @@ void __init mem_topology_setup(void)
 {
 	int cpu;
 
+	/*
+	 * Linux/mm assumes node 0 to be online at boot. However this is not
+	 * true on PowerPC, where node 0 is similar to any other node, it
+	 * could be cpuless, memoryless node. So force node 0 to be offline
+	 * for now. This will prevent cpuless, memoryless node 0 showing up
+	 * unnecessarily as online. If a node has cpus or memory that need
+	 * to be online, then node will anyway be marked online.
+	 */
+	node_set_offline(0);
+
 	if (parse_numa_properties())
 		setup_nonnuma();
 
-- 
cgit v1.2.3


From d0fd24bbd27619d7b8d4da26a19a2027931ae9fc Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 10 Aug 2020 12:48:25 +0530
Subject: powerpc/smp: Fix a warning under !NEED_MULTIPLE_NODES

Fix a build warning in a non CONFIG_NEED_MULTIPLE_NODES
"error: _numa_cpu_lookup_table_ undeclared"

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200810071834.92514-2-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8261999c7d52..d511bf73ade9 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -861,6 +861,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 					GFP_KERNEL, cpu_to_node(cpu));
 		zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
 					GFP_KERNEL, cpu_to_node(cpu));
+#ifdef CONFIG_NEED_MULTIPLE_NODES
 		/*
 		 * numa_node_id() works after this.
 		 */
@@ -869,6 +870,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 			set_cpu_numa_mem(cpu,
 				local_memory_node(numa_cpu_lookup_table[cpu]));
 		}
+#endif
 	}
 
 	/* Init the cpumasks so the boot CPU is related to itself */
-- 
cgit v1.2.3


From 2ef0ca54d97f40f7621d595ac5479bd7fa076bfa Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 10 Aug 2020 12:48:26 +0530
Subject: powerpc/smp: Merge Power9 topology with Power topology

A new sched_domain_topology_level was added just for Power9. However the
same can be achieved by merging powerpc_topology with power9_topology
and makes the code more simpler especially when adding a new sched
domain.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200810071834.92514-3-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 25 +++----------------------
 1 file changed, 3 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index d511bf73ade9..1fb98b255b4c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1314,7 +1314,7 @@ int setup_profiling_timer(unsigned int multiplier)
 }
 
 #ifdef CONFIG_SCHED_SMT
-/* cpumask of CPUs with asymetric SMT dependancy */
+/* cpumask of CPUs with asymmetric SMT dependency */
 static int powerpc_smt_flags(void)
 {
 	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
@@ -1327,14 +1327,6 @@ static int powerpc_smt_flags(void)
 }
 #endif
 
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
-	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
-	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
-	{ NULL, },
-};
-
 /*
  * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
  * This topology makes it *much* cheaper to migrate tasks between adjacent cores
@@ -1362,7 +1354,7 @@ static const struct cpumask *smallcore_smt_mask(int cpu)
 }
 #endif
 
-static struct sched_domain_topology_level power9_topology[] = {
+static struct sched_domain_topology_level powerpc_topology[] = {
 #ifdef CONFIG_SCHED_SMT
 	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
 #endif
@@ -1387,21 +1379,10 @@ void __init smp_cpus_done(unsigned int max_cpus)
 #ifdef CONFIG_SCHED_SMT
 	if (has_big_cores) {
 		pr_info("Big cores detected but using small core scheduling\n");
-		power9_topology[0].mask = smallcore_smt_mask;
 		powerpc_topology[0].mask = smallcore_smt_mask;
 	}
 #endif
-	/*
-	 * If any CPU detects that it's sharing a cache with another CPU then
-	 * use the deeper topology that is aware of this sharing.
-	 */
-	if (shared_caches) {
-		pr_info("Using shared cache scheduler topology\n");
-		set_sched_topology(power9_topology);
-	} else {
-		pr_info("Using standard scheduler topology\n");
-		set_sched_topology(powerpc_topology);
-	}
+	set_sched_topology(powerpc_topology);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-- 
cgit v1.2.3


From 5e93f16ae48b728775496429c6db53d0bf8cdd9b Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 10 Aug 2020 12:48:27 +0530
Subject: powerpc/smp: Move powerpc_topology above

Just moving the powerpc_topology description above.
This will help in using functions in this file and avoid declarations.

No other functional changes

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200810071834.92514-4-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 104 +++++++++++++++++++++++-----------------------
 1 file changed, 52 insertions(+), 52 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1fb98b255b4c..b12d143c7104 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -819,6 +819,58 @@ out:
 	return err;
 }
 
+static bool shared_caches;
+
+#ifdef CONFIG_SCHED_SMT
+/* cpumask of CPUs with asymmetric SMT dependency */
+static int powerpc_smt_flags(void)
+{
+	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+
+	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+		flags |= SD_ASYM_PACKING;
+	}
+	return flags;
+}
+#endif
+
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES;
+}
+
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *shared_cache_mask(int cpu)
+{
+	return cpu_l2_cache_mask(cpu);
+}
+
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+	return cpu_smallcore_mask(cpu);
+}
+#endif
+
+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+	{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
 static int init_big_cores(void)
 {
 	int cpu;
@@ -1248,8 +1300,6 @@ static void add_cpu_to_masks(int cpu)
 			set_cpus_related(cpu, i, cpu_core_mask);
 }
 
-static bool shared_caches;
-
 /* Activate a secondary processor. */
 void start_secondary(void *unused)
 {
@@ -1313,56 +1363,6 @@ int setup_profiling_timer(unsigned int multiplier)
 	return 0;
 }
 
-#ifdef CONFIG_SCHED_SMT
-/* cpumask of CPUs with asymmetric SMT dependency */
-static int powerpc_smt_flags(void)
-{
-	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
-
-	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
-		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
-		flags |= SD_ASYM_PACKING;
-	}
-	return flags;
-}
-#endif
-
-/*
- * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
- * This topology makes it *much* cheaper to migrate tasks between adjacent cores
- * since the migrated task remains cache hot. We want to take advantage of this
- * at the scheduler level so an extra topology level is required.
- */
-static int powerpc_shared_cache_flags(void)
-{
-	return SD_SHARE_PKG_RESOURCES;
-}
-
-/*
- * We can't just pass cpu_l2_cache_mask() directly because
- * returns a non-const pointer and the compiler barfs on that.
- */
-static const struct cpumask *shared_cache_mask(int cpu)
-{
-	return cpu_l2_cache_mask(cpu);
-}
-
-#ifdef CONFIG_SCHED_SMT
-static const struct cpumask *smallcore_smt_mask(int cpu)
-{
-	return cpu_smallcore_mask(cpu);
-}
-#endif
-
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
-	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
-	{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
-	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
-	{ NULL, },
-};
-
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	/*
-- 
cgit v1.2.3


From 3c6032a8fe99547d31b2b57715e303a67d1b0c66 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 10 Aug 2020 12:48:28 +0530
Subject: powerpc/smp: Move topology fixups into a new function

Move topology fixup based on the platform attributes into its own
function which is called just before set_sched_topology.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200810071834.92514-5-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index b12d143c7104..9f4333d0748b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1363,6 +1363,16 @@ int setup_profiling_timer(unsigned int multiplier)
 	return 0;
 }
 
+static void fixup_topology(void)
+{
+#ifdef CONFIG_SCHED_SMT
+	if (has_big_cores) {
+		pr_info("Big cores detected but using small core scheduling\n");
+		powerpc_topology[0].mask = smallcore_smt_mask;
+	}
+#endif
+}
+
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	/*
@@ -1376,12 +1386,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 	dump_numa_cpu_topology();
 
-#ifdef CONFIG_SCHED_SMT
-	if (has_big_cores) {
-		pr_info("Big cores detected but using small core scheduling\n");
-		powerpc_topology[0].mask = smallcore_smt_mask;
-	}
-#endif
+	fixup_topology();
 	set_sched_topology(powerpc_topology);
 }
 
-- 
cgit v1.2.3


From f6606cfdfbcda00ce8a6e63c8fc13c93e73ac059 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Sun, 13 Sep 2020 22:40:38 +0530
Subject: powerpc/smp: Dont assume l2-cache to be superset of sibling

Current code assumes that cpumask of cpus sharing a l2-cache mask will
always be a superset of cpu_sibling_mask.

Lets stop that assumption. cpu_l2_cache_mask is a superset of
cpu_sibling_mask if and only if shared_caches is set.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200913171038.GB11808@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9f4333d0748b..168532e37305 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1186,9 +1186,23 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
 	int i;
 
 	l2_cache = cpu_to_l2cache(cpu);
-	if (!l2_cache)
+	if (!l2_cache) {
+		struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
+
+		/*
+		 * If no l2cache for this CPU, assume all siblings to share
+		 * cache with this CPU.
+		 */
+		if (has_big_cores)
+			sibling_mask = cpu_smallcore_mask;
+
+		for_each_cpu(i, sibling_mask(cpu))
+			set_cpus_related(cpu, i, cpu_l2_cache_mask);
+
 		return false;
+	}
 
+	cpumask_set_cpu(cpu, mask_fn(cpu));
 	for_each_cpu(i, cpu_online_mask) {
 		/*
 		 * when updating the marks the current CPU has not been marked
@@ -1271,29 +1285,30 @@ static void add_cpu_to_masks(int cpu)
 	 * add it to it's own thread sibling mask.
 	 */
 	cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+	cpumask_set_cpu(cpu, cpu_core_mask(cpu));
 
 	for (i = first_thread; i < first_thread + threads_per_core; i++)
 		if (cpu_online(i))
 			set_cpus_related(i, cpu, cpu_sibling_mask);
 
 	add_cpu_to_smallcore_masks(cpu);
-	/*
-	 * Copy the thread sibling mask into the cache sibling mask
-	 * and mark any CPUs that share an L2 with this CPU.
-	 */
-	for_each_cpu(i, cpu_sibling_mask(cpu))
-		set_cpus_related(cpu, i, cpu_l2_cache_mask);
 	update_mask_by_l2(cpu, cpu_l2_cache_mask);
 
-	/*
-	 * Copy the cache sibling mask into core sibling mask and mark
-	 * any CPUs on the same chip as this CPU.
-	 */
-	for_each_cpu(i, cpu_l2_cache_mask(cpu))
-		set_cpus_related(cpu, i, cpu_core_mask);
+	if (pkg_id == -1) {
+		struct cpumask *(*mask)(int) = cpu_sibling_mask;
+
+		/*
+		 * Copy the sibling mask into core sibling mask and
+		 * mark any CPUs on the same chip as this CPU.
+		 */
+		if (shared_caches)
+			mask = cpu_l2_cache_mask;
+
+		for_each_cpu(i, mask(cpu))
+			set_cpus_related(cpu, i, cpu_core_mask);
 
-	if (pkg_id == -1)
 		return;
+	}
 
 	for_each_cpu(i, cpu_online_mask)
 		if (get_physical_package_id(i) == pkg_id)
-- 
cgit v1.2.3


From caa8e29da59926bef099b46ab6280333d583e944 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 10 Aug 2020 12:48:30 +0530
Subject: powerpc/smp: Optimize start_secondary

In start_secondary, even if shared_cache was already set, system does a
redundant match for cpumask. This redundant check can be removed by
checking if shared_cache is already set.

While here, localize the sibling_mask variable to within the if
condition.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200810071834.92514-7-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 168532e37305..016a822eb8c4 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -852,7 +852,7 @@ static int powerpc_shared_cache_flags(void)
  */
 static const struct cpumask *shared_cache_mask(int cpu)
 {
-	return cpu_l2_cache_mask(cpu);
+	return per_cpu(cpu_l2_cache_map, cpu);
 }
 
 #ifdef CONFIG_SCHED_SMT
@@ -1319,7 +1319,6 @@ static void add_cpu_to_masks(int cpu)
 void start_secondary(void *unused)
 {
 	unsigned int cpu = smp_processor_id();
-	struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
 
 	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
@@ -1345,14 +1344,20 @@ void start_secondary(void *unused)
 	/* Update topology CPU masks */
 	add_cpu_to_masks(cpu);
 
-	if (has_big_cores)
-		sibling_mask = cpu_smallcore_mask;
 	/*
 	 * Check for any shared caches. Note that this must be done on a
 	 * per-core basis because one core in the pair might be disabled.
 	 */
-	if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
-		shared_caches = true;
+	if (!shared_caches) {
+		struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
+		struct cpumask *mask = cpu_l2_cache_mask(cpu);
+
+		if (has_big_cores)
+			sibling_mask = cpu_smallcore_mask;
+
+		if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
+			shared_caches = true;
+	}
 
 	set_numa_node(numa_cpu_lookup_table[cpu]);
 	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
-- 
cgit v1.2.3


From f9f130ff2ec93c5949576bbfb168cc9530c23649 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 10 Aug 2020 12:48:31 +0530
Subject: powerpc/numa: Detect support for coregroup

Add support for grouping cores based on the device-tree classification.
- The last domain in the associativity domains always refers to the
core.
- If primary reference domain happens to be the penultimate domain in
the associativity domains device-tree property, then there are no
coregroups. However if its not a penultimate domain, then there are
coregroups. There can be more than one coregroup. For now we would be
interested in the last or the smallest coregroups, i.e one sub-group
per DIE.

Currently there are no firmwares that are exposing this grouping. Hence
allow the basis for grouping to be abstract.  Once the firmware starts
using this grouping, code would be added to detect the type of grouping
and adjust the sd domain flags accordingly.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200810071834.92514-8-srikar@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/smp.h |  1 +
 arch/powerpc/kernel/smp.c      |  1 +
 arch/powerpc/mm/numa.c         | 34 +++++++++++++++++++++-------------
 3 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index b727f5f7b8f9..041f0b97c45b 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -28,6 +28,7 @@
 extern int boot_cpuid;
 extern int spinning_secondaries;
 extern u32 *cpu_to_phys_id;
+extern bool coregroup_enabled;
 
 extern void cpu_die(void);
 extern int cpu_to_chip_id(int cpu);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 016a822eb8c4..a44b9350d2ef 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -75,6 +75,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
 struct task_struct *secondary_current;
 bool has_big_cores;
+bool coregroup_enabled;
 
 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 481951ac3e55..b2c44c5a81fb 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -897,7 +897,9 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 static void __init find_possible_nodes(void)
 {
 	struct device_node *rtas;
-	u32 numnodes, i;
+	const __be32 *domains;
+	int prop_length, max_nodes;
+	u32 i;
 
 	if (!numa_enabled)
 		return;
@@ -906,25 +908,31 @@ static void __init find_possible_nodes(void)
 	if (!rtas)
 		return;
 
-	if (of_property_read_u32_index(rtas, "ibm,current-associativity-domains",
-				min_common_depth, &numnodes)) {
-		/*
-		 * ibm,current-associativity-domains is a fairly recent
-		 * property. If it doesn't exist, then fallback on
-		 * ibm,max-associativity-domains. Current denotes what the
-		 * platform can support compared to max which denotes what the
-		 * Hypervisor can support.
-		 */
-		if (of_property_read_u32_index(rtas, "ibm,max-associativity-domains",
-				min_common_depth, &numnodes))
+	/*
+	 * ibm,current-associativity-domains is a fairly recent property. If
+	 * it doesn't exist, then fallback on ibm,max-associativity-domains.
+	 * Current denotes what the platform can support compared to max
+	 * which denotes what the Hypervisor can support.
+	 */
+	domains = of_get_property(rtas, "ibm,current-associativity-domains",
+					&prop_length);
+	if (!domains) {
+		domains = of_get_property(rtas, "ibm,max-associativity-domains",
+					&prop_length);
+		if (!domains)
 			goto out;
 	}
 
-	for (i = 0; i < numnodes; i++) {
+	max_nodes = of_read_number(&domains[min_common_depth], 1);
+	for (i = 0; i < max_nodes; i++) {
 		if (!node_possible(i))
 			node_set(i, node_possible_map);
 	}
 
+	prop_length /= sizeof(int);
+	if (prop_length > min_common_depth + 2)
+		coregroup_enabled = 1;
+
 out:
 	of_node_put(rtas);
 }
-- 
cgit v1.2.3


From 6e086302816b2ced602bc99641eb0189c05f018a Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 10 Aug 2020 12:48:32 +0530
Subject: powerpc/smp: Allocate cpumask only after searching thread group

If allocated earlier and the search fails, then cpu_l1_cache_map cpumask
is unnecessarily cleared. However cpu_l1_cache_map can be allocated /
cleared after we search thread group.

Please note CONFIG_CPUMASK_OFFSTACK is not set on Powerpc. Hence cpumask
allocated by zalloc_cpumask_var_node is never freed.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200810071834.92514-9-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a44b9350d2ef..41f76c8cd024 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -790,10 +790,6 @@ static int init_cpu_l1_cache_map(int cpu)
 	if (err)
 		goto out;
 
-	zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
-				GFP_KERNEL,
-				cpu_to_node(cpu));
-
 	cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
 
 	if (unlikely(cpu_group_start == -1)) {
@@ -802,6 +798,9 @@ static int init_cpu_l1_cache_map(int cpu)
 		goto out;
 	}
 
+	zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
+				GFP_KERNEL, cpu_to_node(cpu));
+
 	for (i = first_thread; i < first_thread + threads_per_core; i++) {
 		int i_group_start = get_cpu_thread_group_start(i, &tg);
 
-- 
cgit v1.2.3


From 72730bfc2a2b91a525f38dfc830f598bdb95f216 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 10 Aug 2020 12:48:33 +0530
Subject: powerpc/smp: Create coregroup domain

Add percpu coregroup maps and masks to create coregroup domain.
If a coregroup doesn't exist, the coregroup domain will be degenerated
in favour of SMT/CACHE domain. Do note this patch is only creating stubs
for cpu_to_coregroup_id. The actual cpu_to_coregroup_id implementation
would be in a subsequent patch.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200810071834.92514-10-srikar@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/topology.h | 10 +++++++
 arch/powerpc/kernel/smp.c           | 54 ++++++++++++++++++++++++++++++++++++-
 arch/powerpc/mm/numa.c              |  5 ++++
 3 files changed, 68 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index f0b6300e7dd3..6609174918ab 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -88,12 +88,22 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
 extern int find_and_online_cpu_nid(int cpu);
+extern int cpu_to_coregroup_id(int cpu);
 #else
 static inline int find_and_online_cpu_nid(int cpu)
 {
 	return 0;
 }
 
+static inline int cpu_to_coregroup_id(int cpu)
+{
+#ifdef CONFIG_SMP
+	return cpu_to_core_id(cpu);
+#else
+	return 0;
+#endif
+}
+
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
 #include <asm-generic/topology.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 41f76c8cd024..3d96752d6570 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -81,12 +81,22 @@ DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
 
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 EXPORT_SYMBOL_GPL(has_big_cores);
 
+enum {
+#ifdef CONFIG_SCHED_SMT
+	smt_idx,
+#endif
+	cache_idx,
+	mc_idx,
+	die_idx,
+};
+
 #define MAX_THREAD_LIST_SIZE	8
 #define THREAD_GROUP_SHARE_L1   1
 struct thread_groups {
@@ -862,11 +872,27 @@ static const struct cpumask *smallcore_smt_mask(int cpu)
 }
 #endif
 
+static struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return per_cpu(cpu_coregroup_map, cpu);
+}
+
+static bool has_coregroup_support(void)
+{
+	return coregroup_enabled;
+}
+
+static const struct cpumask *cpu_mc_mask(int cpu)
+{
+	return cpu_coregroup_mask(cpu);
+}
+
 static struct sched_domain_topology_level powerpc_topology[] = {
 #ifdef CONFIG_SCHED_SMT
 	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
 #endif
 	{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+	{ cpu_mc_mask, SD_INIT_NAME(MC) },
 	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
 	{ NULL, },
 };
@@ -913,6 +939,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 					GFP_KERNEL, cpu_to_node(cpu));
 		zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
 					GFP_KERNEL, cpu_to_node(cpu));
+		if (has_coregroup_support())
+			zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
+						GFP_KERNEL, cpu_to_node(cpu));
+
 #ifdef CONFIG_NEED_MULTIPLE_NODES
 		/*
 		 * numa_node_id() works after this.
@@ -930,6 +960,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
 	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
 
+	if (has_coregroup_support())
+		cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
+
 	init_big_cores();
 	if (has_big_cores) {
 		cpumask_set_cpu(boot_cpuid,
@@ -1234,6 +1267,8 @@ static void remove_cpu_from_masks(int cpu)
 		set_cpus_unrelated(cpu, i, cpu_sibling_mask);
 		if (has_big_cores)
 			set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
+		if (has_coregroup_support())
+			set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
 	}
 }
 #endif
@@ -1294,6 +1329,20 @@ static void add_cpu_to_masks(int cpu)
 	add_cpu_to_smallcore_masks(cpu);
 	update_mask_by_l2(cpu, cpu_l2_cache_mask);
 
+	if (has_coregroup_support()) {
+		int coregroup_id = cpu_to_coregroup_id(cpu);
+
+		cpumask_set_cpu(cpu, cpu_coregroup_mask(cpu));
+		for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) {
+			int fcpu = cpu_first_thread_sibling(i);
+
+			if (fcpu == first_thread)
+				set_cpus_related(cpu, i, cpu_coregroup_mask);
+			else if (coregroup_id == cpu_to_coregroup_id(i))
+				set_cpus_related(cpu, i, cpu_coregroup_mask);
+		}
+	}
+
 	if (pkg_id == -1) {
 		struct cpumask *(*mask)(int) = cpu_sibling_mask;
 
@@ -1388,9 +1437,12 @@ static void fixup_topology(void)
 #ifdef CONFIG_SCHED_SMT
 	if (has_big_cores) {
 		pr_info("Big cores detected but using small core scheduling\n");
-		powerpc_topology[0].mask = smallcore_smt_mask;
+		powerpc_topology[smt_idx].mask = smallcore_smt_mask;
 	}
 #endif
+
+	if (!has_coregroup_support())
+		powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b2c44c5a81fb..dfebca905acb 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1245,6 +1245,11 @@ int find_and_online_cpu_nid(int cpu)
 	return new_nid;
 }
 
+int cpu_to_coregroup_id(int cpu)
+{
+	return cpu_to_core_id(cpu);
+}
+
 static int topology_update_init(void)
 {
 	topology_inited = 1;
-- 
cgit v1.2.3


From fa35e868f9ddcbb7984fd5ab7f91aef924fa8543 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 10 Aug 2020 12:48:34 +0530
Subject: powerpc/smp: Implement cpu_to_coregroup_id

Lookup the coregroup id from the associativity array.

If unable to detect the coregroup id, fallback on the core id.
This way, ensure sched_domain degenerates and an extra sched domain is
not created.

Ideally this function should have been implemented in
arch/powerpc/kernel/smp.c. However if its implemented in mm/numa.c, we
don't need to find the primary domain again.

If the device-tree mentions more than one coregroup, then kernel
implements only the last or the smallest coregroup, which currently
corresponds to the penultimate domain in the device-tree.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200810071834.92514-11-srikar@linux.vnet.ibm.com
---
 arch/powerpc/mm/numa.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index dfebca905acb..b725fb66e913 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1247,6 +1247,26 @@ int find_and_online_cpu_nid(int cpu)
 
 int cpu_to_coregroup_id(int cpu)
 {
+	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+	int index;
+
+	if (cpu < 0 || cpu > nr_cpu_ids)
+		return -1;
+
+	if (!coregroup_enabled)
+		goto out;
+
+	if (!firmware_has_feature(FW_FEATURE_VPHN))
+		goto out;
+
+	if (vphn_get_associativity(cpu, associativity))
+		goto out;
+
+	index = of_read_number(associativity, 1);
+	if (index > min_common_depth + 1)
+		return of_read_number(&associativity[index - 1], 1);
+
+out:
 	return cpu_to_core_id(cpu);
 }
 
-- 
cgit v1.2.3


From d208e13c6a2277d9fb71fad6a1394c70bdd7b634 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 17 Sep 2020 12:20:16 +1000
Subject: powerpc/process: Fix uninitialised variable error

Clang, and GCC with -Wmaybe-uninitialized, can't see that val is
unused in get_fpexec_mode():

  arch/powerpc/kernel/process.c:1940:7: error: variable 'val' is used
  uninitialized whenever 'if' condition is true
		  if (cpu_has_feature(CPU_FTR_SPE)) {
		      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~

We know that CPU_FTR_SPE will only be true iff CONFIG_SPE is also
true, but the compiler doesn't.

Avoid it by initialising val to zero.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 532ed1900d37 ("powerpc/process: Remove useless #ifdef CONFIG_SPE")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/r/20200917024509.3253837-1-mpe@ellerman.id.au
---
 arch/powerpc/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 14d5189b17d8..d421a2c7f822 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1934,7 +1934,7 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
 
 int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
 {
-	unsigned int val;
+	unsigned int val = 0;
 
 	if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
 		if (cpu_has_feature(CPU_FTR_SPE)) {
-- 
cgit v1.2.3


From bda7673d64b6c2e92423363a756caa657464e096 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 17 Sep 2020 10:06:43 +0800
Subject: powerpc/book3s64: fix link error with CONFIG_PPC_RADIX_MMU=n

Fix link error when CONFIG_PPC_RADIX_MMU is disabled:
powerpc64-linux-gnu-ld: arch/powerpc/platforms/pseries/lpar.o:(.toc+0x0): undefined reference to `mmu_pid_bits'

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200917020643.90375-1-yangyingliang@huawei.com
---
 arch/powerpc/platforms/pseries/lpar.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index baf24eacd268..764170fdb0f7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1724,6 +1724,7 @@ void __init hpte_init_pseries(void)
 		pseries_lpar_register_process_table(0, 0, 0);
 }
 
+#ifdef CONFIG_PPC_RADIX_MMU
 void radix_init_pseries(void)
 {
 	pr_info("Using radix MMU under hypervisor\n");
@@ -1731,6 +1732,7 @@ void radix_init_pseries(void)
 	pseries_lpar_register_process_table(__pa(process_tb),
 						0, PRTB_SIZE_SHIFT - 12);
 }
+#endif
 
 #ifdef CONFIG_PPC_SMLPAR
 #define CMO_FREE_HINT_DEFAULT 1
-- 
cgit v1.2.3


From 96543e7352bded5d6d1a0e0022376ebdd6c1b8ab Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Wed, 16 Sep 2020 10:50:26 +0800
Subject: powerpc/pseries: convert to use DEFINE_SEQ_ATTRIBUTE macro

Use DEFINE_SEQ_ATTRIBUTE macro to simplify the code.

Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200916025026.3992835-1-liushixin2@huawei.com
---
 arch/powerpc/platforms/pseries/hvCall_inst.c | 23 +++--------------------
 1 file changed, 3 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index c40c62ec432e..2c59b4986ea5 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -70,31 +70,14 @@ static int hc_show(struct seq_file *m, void *p)
 	return 0;
 }
 
-static const struct seq_operations hcall_inst_seq_ops = {
+static const struct seq_operations hcall_inst_sops = {
         .start = hc_start,
         .next  = hc_next,
         .stop  = hc_stop,
         .show  = hc_show
 };
 
-static int hcall_inst_seq_open(struct inode *inode, struct file *file)
-{
-	int rc;
-	struct seq_file *seq;
-
-	rc = seq_open(file, &hcall_inst_seq_ops);
-	seq = file->private_data;
-	seq->private = file_inode(file)->i_private;
-
-	return rc;
-}
-
-static const struct file_operations hcall_inst_seq_fops = {
-	.open = hcall_inst_seq_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(hcall_inst);
 
 #define	HCALL_ROOT_DIR		"hcall_inst"
 #define CPU_NAME_BUF_SIZE	32
@@ -149,7 +132,7 @@ static int __init hcall_inst_init(void)
 		snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
 		debugfs_create_file(cpu_name_buf, 0444, hcall_root,
 				    per_cpu(hcall_stats, cpu),
-				    &hcall_inst_seq_fops);
+				    &hcall_inst_fops);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From ef1edbba52883907caf02ab85e0d00a2e4648f05 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 16 Sep 2020 21:56:36 +1000
Subject: powerpc/mm/64s: Fix slb_setup_new_exec() sparse warning

Sparse says:
  symbol slb_setup_new_exec was not declared. Should it be static?

No, it should have a declaration in a header, add one.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200916115637.3100484-1-mpe@ellerman.id.au
---
 arch/powerpc/mm/book3s64/internal.h    | 2 ++
 arch/powerpc/mm/book3s64/mmu_context.c | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h
index 7eda0d30d765..c12d78ee42f5 100644
--- a/arch/powerpc/mm/book3s64/internal.h
+++ b/arch/powerpc/mm/book3s64/internal.h
@@ -13,4 +13,6 @@ static inline bool stress_slb(void)
 	return static_branch_unlikely(&stress_slb_key);
 }
 
+void slb_setup_new_exec(void);
+
 #endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index 0ba30b8b935b..1c54821de7bf 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -21,6 +21,8 @@
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 
+#include "internal.h"
+
 static DEFINE_IDA(mmu_context_ida);
 
 static int alloc_context_id(int min_id, int max_id)
@@ -48,8 +50,6 @@ int hash__alloc_context_id(void)
 }
 EXPORT_SYMBOL_GPL(hash__alloc_context_id);
 
-void slb_setup_new_exec(void);
-
 static int realloc_context_ids(mm_context_t *ctx)
 {
 	int i, id;
-- 
cgit v1.2.3


From d10ebe79dfae7dc59b6cf77ffa615f0b8dae21bf Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 16 Sep 2020 21:56:37 +1000
Subject: powerpc/perf: Add declarations to fix sparse warnings

Sparse warns about all the init functions:
  symbol init_ppc970_pmu was not declared. Should it be static?
  symbol init_power5p_pmu was not declared. Should it be static?
  symbol init_power5_pmu was not declared. Should it be static?
  symbol init_power6_pmu was not declared. Should it be static?
  symbol init_power7_pmu was not declared. Should it be static?
  symbol init_power9_pmu was not declared. Should it be static?
  symbol init_power8_pmu was not declared. Should it be static?
  symbol init_generic_compat_pmu was not declared. Should it be static?

They're already declared in internal.h, so just make sure all the C
files include that directly or indirectly.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://lore.kernel.org/r/20200916115637.3100484-2-mpe@ellerman.id.au
---
 arch/powerpc/perf/isa207-common.h | 2 ++
 arch/powerpc/perf/power10-pmu.c   | 1 -
 arch/powerpc/perf/power5+-pmu.c   | 2 ++
 arch/powerpc/perf/power5-pmu.c    | 2 ++
 arch/powerpc/perf/power6-pmu.c    | 2 ++
 arch/powerpc/perf/power7-pmu.c    | 2 ++
 arch/powerpc/perf/ppc970-pmu.c    | 2 ++
 7 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 044de65e96b9..7025de5e60e7 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -13,6 +13,8 @@
 #include <asm/firmware.h>
 #include <asm/cputable.h>
 
+#include "internal.h"
+
 #define EVENT_EBB_MASK		1ull
 #define EVENT_EBB_SHIFT		PERF_EVENT_CONFIG_EBB_SHIFT
 #define EVENT_BHRB_MASK		1ull
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index 83148656b524..9dbe8f9b89b4 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -9,7 +9,6 @@
 #define pr_fmt(fmt)	"power10-pmu: " fmt
 
 #include "isa207-common.h"
-#include "internal.h"
 
 /*
  * Raw event encoding for Power10:
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index a62b2cd7914f..3e64b4a1511f 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -10,6 +10,8 @@
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
+#include "internal.h"
+
 /*
  * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
  */
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index 8732b587cf71..017bb19b73fb 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -10,6 +10,8 @@
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
+#include "internal.h"
+
 /*
  * Bits in event code for POWER5 (not POWER5++)
  */
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 0e318cf87129..189974478e9f 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -10,6 +10,8 @@
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
+#include "internal.h"
+
 /*
  * Bits in event code for POWER6
  */
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 5e0bf09cf077..bacfab104a1a 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -10,6 +10,8 @@
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
+#include "internal.h"
+
 /*
  * Bits in event code for POWER7
  */
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index d35223fb112c..7d78df97f272 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -9,6 +9,8 @@
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
+#include "internal.h"
+
 /*
  * Bits in event code for PPC970
  */
-- 
cgit v1.2.3


From 1ea21ba231f248034e8c794aa675869ca2b97d42 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 19 Aug 2020 11:56:32 +1000
Subject: powerpc: Move arch_cpu_idle_dead() into smp.c

arch_cpu_idle_dead() is in idle.c, which makes sense, but it's inside
a CONFIG_HOTPLUG_CPU block.

It would be more at home in smp.c, inside the existing
CONFIG_HOTPLUG_CPU block. Note that CONFIG_HOTPLUG_CPU depends on
CONFIG_SMP so even though smp.c is not built for SMP=n builds, that's
fine.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200819015634.1974478-1-mpe@ellerman.id.au
---
 arch/powerpc/kernel/idle.c | 8 --------
 arch/powerpc/kernel/smp.c  | 6 ++++++
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 422e31d2f5a2..ae0e2632393d 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -41,14 +41,6 @@ static int __init powersave_off(char *arg)
 }
 __setup("powersave=off", powersave_off);
 
-#ifdef CONFIG_HOTPLUG_CPU
-void arch_cpu_idle_dead(void)
-{
-	sched_preempt_enable_no_resched();
-	cpu_die();
-}
-#endif
-
 void arch_cpu_idle(void)
 {
 	ppc64_runlatch_off();
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 3d96752d6570..24b0476c1d4f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1489,6 +1489,12 @@ void __cpu_die(unsigned int cpu)
 		smp_ops->cpu_die(cpu);
 }
 
+void arch_cpu_idle_dead(void)
+{
+	sched_preempt_enable_no_resched();
+	cpu_die();
+}
+
 void cpu_die(void)
 {
 	/*
-- 
cgit v1.2.3


From bf3c1464db883a953ad7bbed64924480b8b2b244 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 19 Aug 2020 11:56:33 +1000
Subject: powerpc/smp: Fold cpu_die() into its only caller

Avoid the eternal confusion between cpu_die() and __cpu_die() by
removing the former, folding it into its only caller.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200819015634.1974478-2-mpe@ellerman.id.au
---
 arch/powerpc/include/asm/smp.h | 1 -
 arch/powerpc/kernel/smp.c      | 4 ----
 2 files changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 041f0b97c45b..0bf80c5b440a 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -30,7 +30,6 @@ extern int spinning_secondaries;
 extern u32 *cpu_to_phys_id;
 extern bool coregroup_enabled;
 
-extern void cpu_die(void);
 extern int cpu_to_chip_id(int cpu);
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 24b0476c1d4f..4ae767c455b8 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1492,11 +1492,7 @@ void __cpu_die(unsigned int cpu)
 void arch_cpu_idle_dead(void)
 {
 	sched_preempt_enable_no_resched();
-	cpu_die();
-}
 
-void cpu_die(void)
-{
 	/*
 	 * Disable on the down path. This will be re-enabled by
 	 * start_secondary() via start_secondary_resume() below
-- 
cgit v1.2.3


From 39f87561454dc33efb2a3d8354d066207acac8a6 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 19 Aug 2020 11:56:34 +1000
Subject: powerpc/smp: Move ppc_md.cpu_die() to smp_ops.cpu_offline_self()

We have smp_ops->cpu_die() and ppc_md.cpu_die(). One of them offlines
the current CPU and one offlines another CPU, can you guess which is
which? Also one is in smp_ops and one is in ppc_md?

So rename ppc_md.cpu_die(), to cpu_offline_self(), because that's what
it does. And move it into smp_ops where it belongs.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200819015634.1974478-3-mpe@ellerman.id.au
---
 arch/powerpc/include/asm/machdep.h           | 1 -
 arch/powerpc/include/asm/smp.h               | 3 +++
 arch/powerpc/kernel/smp.c                    | 4 ++--
 arch/powerpc/kernel/sysfs.c                  | 4 +++-
 arch/powerpc/platforms/85xx/smp.c            | 4 ++--
 arch/powerpc/platforms/powermac/pmac.h       | 2 +-
 arch/powerpc/platforms/powermac/sleep.S      | 6 +++---
 arch/powerpc/platforms/powermac/smp.c        | 8 ++++----
 arch/powerpc/platforms/powernv/smp.c         | 4 ++--
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 6 +++---
 10 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 5082cd496190..95081078aa8a 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -65,7 +65,6 @@ struct machdep_calls {
 	void __noreturn	(*restart)(char *cmd);
 	void __noreturn (*halt)(void);
 	void		(*panic)(char *str);
-	void		(*cpu_die)(void);
 
 	long		(*time_init)(void); /* Optional, may be NULL */
 
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 0bf80c5b440a..635bdf947105 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -50,6 +50,9 @@ struct smp_ops_t {
 	int   (*cpu_disable)(void);
 	void  (*cpu_die)(unsigned int nr);
 	int   (*cpu_bootable)(unsigned int nr);
+#ifdef CONFIG_HOTPLUG_CPU
+	void  (*cpu_offline_self)(void);
+#endif
 };
 
 extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 4ae767c455b8..58990baa5182 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1499,8 +1499,8 @@ void arch_cpu_idle_dead(void)
 	 */
 	this_cpu_disable_ftrace();
 
-	if (ppc_md.cpu_die)
-		ppc_md.cpu_die();
+	if (smp_ops->cpu_offline_self)
+		smp_ops->cpu_offline_self();
 
 	/* If we return, we re-enter start_secondary */
 	start_secondary_resume();
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 5dea98fa2f93..928b29e0ee83 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -1160,6 +1160,7 @@ static int __init topology_init(void)
 	for_each_possible_cpu(cpu) {
 		struct cpu *c = &per_cpu(cpu_devices, cpu);
 
+#ifdef CONFIG_HOTPLUG_CPU
 		/*
 		 * For now, we just see if the system supports making
 		 * the RTAS calls for CPU hotplug.  But, there may be a
@@ -1167,8 +1168,9 @@ static int __init topology_init(void)
 		 * CPU.  For instance, the boot cpu might never be valid
 		 * for hotplugging.
 		 */
-		if (ppc_md.cpu_die)
+		if (smp_ops->cpu_offline_self)
 			c->hotpluggable = 1;
+#endif
 
 		if (cpu_online(cpu) || c->hotpluggable) {
 			register_cpu(c, cpu);
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index fda108bae95f..c6df294054fe 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -112,7 +112,7 @@ static void mpc85xx_take_timebase(void)
 	local_irq_restore(flags);
 }
 
-static void smp_85xx_mach_cpu_die(void)
+static void smp_85xx_cpu_offline_self(void)
 {
 	unsigned int cpu = smp_processor_id();
 
@@ -506,7 +506,7 @@ void __init mpc85xx_smp_init(void)
 	if (qoriq_pm_ops) {
 		smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
 		smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
-		ppc_md.cpu_die = smp_85xx_mach_cpu_die;
+		smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
 		smp_85xx_ops.cpu_die = qoriq_cpu_kill;
 	}
 #endif
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 16a52afdb76e..0d715db434dc 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -34,7 +34,7 @@ extern void pmac_check_ht_link(void);
 
 extern void pmac_setup_smp(void);
 extern int psurge_secondary_virq;
-extern void low_cpu_die(void) __attribute__((noreturn));
+extern void low_cpu_offline_self(void) __attribute__((noreturn));
 
 extern int pmac_nvram_init(void);
 extern void pmac_pic_init(void);
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index 51bfdfe85058..7e0f8ba6e54a 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -201,8 +201,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	addi r3,r3,sleep_storage@l
 	stw r5,0(r3)
 
-	.globl	low_cpu_die
-low_cpu_die:
+	.globl	low_cpu_offline_self
+low_cpu_offline_self:
 	/* Flush & disable all caches */
 	bl	flush_disable_caches
 
@@ -244,7 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
 	mtmsr	r2
 	isync
 	b	1b
-_ASM_NOKPROBE_SYMBOL(low_cpu_die)
+_ASM_NOKPROBE_SYMBOL(low_cpu_offline_self)
 /*
  * Here is the resume code.
  */
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index eb23264910e1..a6fedcfb714f 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -920,7 +920,7 @@ static int smp_core99_cpu_disable(void)
 
 #ifdef CONFIG_PPC32
 
-static void pmac_cpu_die(void)
+static void pmac_cpu_offline_self(void)
 {
 	int cpu = smp_processor_id();
 
@@ -930,12 +930,12 @@ static void pmac_cpu_die(void)
 	generic_set_cpu_dead(cpu);
 	smp_wmb();
 	mb();
-	low_cpu_die();
+	low_cpu_offline_self();
 }
 
 #else /* CONFIG_PPC32 */
 
-static void pmac_cpu_die(void)
+static void pmac_cpu_offline_self(void)
 {
 	int cpu = smp_processor_id();
 
@@ -1020,7 +1020,7 @@ void __init pmac_setup_smp(void)
 #endif /* CONFIG_PPC_PMAC32_PSURGE */
 
 #ifdef CONFIG_HOTPLUG_CPU
-	ppc_md.cpu_die = pmac_cpu_die;
+	smp_ops->cpu_offline_self = pmac_cpu_offline_self;
 #endif
 }
 
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index bbf361f23ae8..54c4ba45c7ce 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -158,7 +158,7 @@ static void pnv_flush_interrupts(void)
 	}
 }
 
-static void pnv_smp_cpu_kill_self(void)
+static void pnv_cpu_offline_self(void)
 {
 	unsigned long srr1, unexpected_mask, wmask;
 	unsigned int cpu;
@@ -417,6 +417,7 @@ static struct smp_ops_t pnv_smp_ops = {
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable	= pnv_smp_cpu_disable,
 	.cpu_die	= generic_cpu_die,
+	.cpu_offline_self = pnv_cpu_offline_self,
 #endif /* CONFIG_HOTPLUG_CPU */
 };
 
@@ -430,7 +431,6 @@ void __init pnv_smp_init(void)
 	smp_ops = &pnv_smp_ops;
 
 #ifdef CONFIG_HOTPLUG_CPU
-	ppc_md.cpu_die	= pnv_smp_cpu_kill_self;
 #ifdef CONFIG_KEXEC_CORE
 	crash_wake_offline = 1;
 #endif
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7a974ed6b240..f2837e33bf5d 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -55,7 +55,7 @@ static void rtas_stop_self(void)
 	panic("Alas, I survived.\n");
 }
 
-static void pseries_mach_cpu_die(void)
+static void pseries_cpu_offline_self(void)
 {
 	unsigned int hwcpu = hard_smp_processor_id();
 
@@ -102,7 +102,7 @@ static int pseries_cpu_disable(void)
  * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
  * notifications.
  *
- * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
+ * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to
  * self-destruct.
  */
 static void pseries_cpu_die(unsigned int cpu)
@@ -901,7 +901,7 @@ static int __init pseries_cpu_hotplug_init(void)
 		return 0;
 	}
 
-	ppc_md.cpu_die = pseries_mach_cpu_die;
+	smp_ops->cpu_offline_self = pseries_cpu_offline_self;
 	smp_ops->cpu_disable = pseries_cpu_disable;
 	smp_ops->cpu_die = pseries_cpu_die;
 
-- 
cgit v1.2.3


From 6c71cfcc01685ef495ca7886471a76e73446424e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 21 Aug 2020 20:34:07 +1000
Subject: powerpc/prom_init: Check display props exist before enabling btext

It's possible to enable CONFIG_PPC_EARLY_DEBUG_BOOTX for a pseries
kernel (maybe it shouldn't be), which is then booted with qemu/slof.

But if you do that the kernel crashes in draw_byte(), with a DAR
pointing somewhere near INT_MAX.

Adding some debug to prom_init we see that we're not able to read the
"address" property from OF, so we're just using whatever junk value
was on the stack.

So check the properties can be read properly from OF, if not we bail
out before initialising btext, which avoids the crash.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Link: https://lore.kernel.org/r/20200821103407.3362149-1-mpe@ellerman.id.au
---
 arch/powerpc/kernel/prom_init.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index ae7ec9903191..5090a5ab54e5 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2422,10 +2422,19 @@ static void __init prom_check_displays(void)
 			u32 width, height, pitch, addr;
 
 			prom_printf("Setting btext !\n");
-			prom_getprop(node, "width", &width, 4);
-			prom_getprop(node, "height", &height, 4);
-			prom_getprop(node, "linebytes", &pitch, 4);
-			prom_getprop(node, "address", &addr, 4);
+
+			if (prom_getprop(node, "width", &width, 4) == PROM_ERROR)
+				return;
+
+			if (prom_getprop(node, "height", &height, 4) == PROM_ERROR)
+				return;
+
+			if (prom_getprop(node, "linebytes", &pitch, 4) == PROM_ERROR)
+				return;
+
+			if (prom_getprop(node, "address", &addr, 4) == PROM_ERROR)
+				return;
+
 			prom_printf("W=%d H=%d LB=%d addr=0x%x\n",
 				    width, height, pitch, addr);
 			btext_setup_display(width, height, 8, pitch, addr);
-- 
cgit v1.2.3


From 8ec5cb12cd957e59b0470b75d26c901aaf645bc3 Mon Sep 17 00:00:00 2001
From: Qinglang Miao <miaoqinglang@huawei.com>
Date: Wed, 16 Sep 2020 14:21:29 +0800
Subject: powerpc/powernv: fix wrong warning message in opalcore_config_init()

The logic of the warn output is incorrect. The two args should be
exchanged.

Signed-off-by: Qinglang Miao <miaoqinglang@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200916062129.190864-1-miaoqinglang@huawei.com
---
 arch/powerpc/platforms/powernv/opal-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
index 6dba3b62269f..23571f0b555a 100644
--- a/arch/powerpc/platforms/powernv/opal-core.c
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -510,7 +510,7 @@ static void __init opalcore_config_init(void)
 	idx = be32_to_cpu(opalc_metadata->region_cnt);
 	if (idx > MAX_PT_LOAD_CNT) {
 		pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)",
-			MAX_PT_LOAD_CNT, idx);
+			idx, MAX_PT_LOAD_CNT);
 		idx = MAX_PT_LOAD_CNT;
 	}
 	for (i = 0; i < idx; i++) {
-- 
cgit v1.2.3


From 7b2aab5f22f0f7cc9e2f8672c9e65e2e88d30102 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Mon, 14 Sep 2020 23:10:01 +0200
Subject: powerpc/sysfs: Remove unused 'err' variable in
 sysfs_create_dscr_default()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a compile error with W=1.

arch/powerpc/kernel/sysfs.c: In function ‘sysfs_create_dscr_default’:
arch/powerpc/kernel/sysfs.c:228:7: error: variable ‘err’ set but not used [-Werror=unused-but-set-variable]
   int err = 0;
       ^~~
cc1: all warnings being treated as errors

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200914211007.2285999-2-clg@kaod.org
---
 arch/powerpc/kernel/sysfs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 928b29e0ee83..2e08640bb3b4 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -217,14 +217,13 @@ static DEVICE_ATTR(dscr_default, 0600,
 static void sysfs_create_dscr_default(void)
 {
 	if (cpu_has_feature(CPU_FTR_DSCR)) {
-		int err = 0;
 		int cpu;
 
 		dscr_default = spr_default_dscr;
 		for_each_possible_cpu(cpu)
 			paca_ptrs[cpu]->dscr_default = dscr_default;
 
-		err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
+		device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
 	}
 }
 #endif /* CONFIG_PPC64 */
-- 
cgit v1.2.3


From 5ab187e01a5310e1f9cd2f6b192b2343b8bd14cb Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Mon, 14 Sep 2020 23:10:03 +0200
Subject: powerpc/sstep: Remove empty if statement checking for invalid form
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The check should be performed by the caller. This fixes a compile
error with W=1.

../arch/powerpc/lib/sstep.c: In function ‘mlsd_8lsd_ea’:
../arch/powerpc/lib/sstep.c:225:3: error: suggest braces around empty body in an ‘if’ statement [-Werror=empty-body]
   ; /* Invalid form. Should already be checked for by caller! */
   ^

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200914211007.2285999-4-clg@kaod.org
---
 arch/powerpc/lib/sstep.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index caee8cc77e19..e9dcaba9a4f8 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -219,10 +219,13 @@ static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr,
 		ea += regs->gpr[ra];
 	else if (!prefix_r && !ra)
 		; /* Leave ea as is */
-	else if (prefix_r && !ra)
+	else if (prefix_r)
 		ea += regs->nip;
-	else if (prefix_r && ra)
-		; /* Invalid form. Should already be checked for by caller! */
+
+	/*
+	 * (prefix_r && ra) is an invalid form. Should already be
+	 * checked for by caller!
+	 */
 
 	return ea;
 }
-- 
cgit v1.2.3


From 2228f19cf90ef796c8d84f54f3a5db2dcc85c83f Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Mon, 14 Sep 2020 23:10:04 +0200
Subject: powerpc/xive: Make debug routines static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a compile error with W=1.

CC      arch/powerpc/sysdev/xive/common.o
../arch/powerpc/sysdev/xive/common.c:1568:6: error: no previous prototype for ‘xive_debug_show_cpu’ [-Werror=missing-prototypes]
 void xive_debug_show_cpu(struct seq_file *m, int cpu)
      ^~~~~~~~~~~~~~~~~~~
../arch/powerpc/sysdev/xive/common.c:1602:6: error: no previous prototype for ‘xive_debug_show_irq’ [-Werror=missing-prototypes]
 void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d)
      ^~~~~~~~~~~~~~~~~~~

Fixes: 930914b7d528 ("powerpc/xive: Add a debugfs file to dump internal XIVE state")
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200914211007.2285999-5-clg@kaod.org
---
 arch/powerpc/sysdev/xive/common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index f591be9f01f4..a80440af491a 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1565,7 +1565,7 @@ static int __init xive_off(char *arg)
 }
 __setup("xive=off", xive_off);
 
-void xive_debug_show_cpu(struct seq_file *m, int cpu)
+static void xive_debug_show_cpu(struct seq_file *m, int cpu)
 {
 	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
 
@@ -1599,7 +1599,7 @@ void xive_debug_show_cpu(struct seq_file *m, int cpu)
 	seq_puts(m, "\n");
 }
 
-void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d)
+static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d)
 {
 	struct irq_chip *chip = irq_data_get_irq_chip(d);
 	int rc;
-- 
cgit v1.2.3


From ebbfeef0d8093a06ff39c60105b6650be3344cbe Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Mon, 14 Sep 2020 23:10:07 +0200
Subject: powerpc/32: Declare stack_overflow_exception() prototype
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a compile error with W=1.

CC      arch/powerpc/kernel/traps.o
../arch/powerpc/kernel/traps.c:1663:6: error: no previous prototype for ‘stack_overflow_exception’ [-Werror=missing-prototypes]
 void stack_overflow_exception(struct pt_regs *regs)
      ^~~~~~~~~~~~~~~~~~~~~~~~

Fixes: 3978eb78517c ("powerpc/32: Add early stack overflow detection with VMAP stack.")
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200914211007.2285999-8-clg@kaod.org
---
 arch/powerpc/include/asm/asm-prototypes.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index de14b1a34d56..4957119604c7 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -67,6 +67,7 @@ void single_step_exception(struct pt_regs *regs);
 void program_check_exception(struct pt_regs *regs);
 void alignment_exception(struct pt_regs *regs);
 void StackOverflow(struct pt_regs *regs);
+void stack_overflow_exception(struct pt_regs *regs);
 void kernel_fp_unavailable_exception(struct pt_regs *regs);
 void altivec_unavailable_exception(struct pt_regs *regs);
 void vsx_unavailable_exception(struct pt_regs *regs);
-- 
cgit v1.2.3


From aea948bb80b478ddc2448f7359d574387521a52d Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Date: Tue, 6 Oct 2020 13:02:18 +0530
Subject: powerpc/powernv/elog: Fix race while processing OPAL error log event.

Every error log reported by OPAL is exported to userspace through a
sysfs interface and notified using kobject_uevent(). The userspace
daemon (opal_errd) then reads the error log and acknowledges the error
log is saved safely to disk. Once acknowledged the kernel removes the
respective sysfs file entry causing respective resources to be
released including kobject.

However it's possible the userspace daemon may already be scanning
elog entries when a new sysfs elog entry is created by the kernel.
User daemon may read this new entry and ack it even before kernel can
notify userspace about it through kobject_uevent() call. If that
happens then we have a potential race between
elog_ack_store->kobject_put() and kobject_uevent which can lead to
use-after-free of a kernfs object resulting in a kernel crash. eg:

  BUG: Unable to handle kernel data access on read at 0x6b6b6b6b6b6b6bfb
  Faulting instruction address: 0xc0000000008ff2a0
  Oops: Kernel access of bad area, sig: 11 [#1]
  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
  CPU: 27 PID: 805 Comm: irq/29-opal-elo Not tainted 5.9.0-rc2-gcc-8.2.0-00214-g6f56a67bcbb5-dirty #363
  ...
  NIP kobject_uevent_env+0xa0/0x910
  LR  elog_event+0x1f4/0x2d0
  Call Trace:
    0x5deadbeef0000122 (unreliable)
    elog_event+0x1f4/0x2d0
    irq_thread_fn+0x4c/0xc0
    irq_thread+0x1c0/0x2b0
    kthread+0x1c4/0x1d0
    ret_from_kernel_thread+0x5c/0x6c

This patch fixes this race by protecting the sysfs file
creation/notification by holding a reference count on kobject until we
safely send kobject_uevent().

The function create_elog_obj() returns the elog object which if used
by caller function will end up in use-after-free problem again.
However, the return value of create_elog_obj() function isn't being
used today and there is no need as well. Hence change it to return
void to make this fix complete.

Fixes: 774fea1a38c6 ("powerpc/powernv: Read OPAL error log and export it through sysfs")
Cc: stable@vger.kernel.org # v3.15+
Reported-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: Oliver O'Halloran <oohall@gmail.com>
Reviewed-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[mpe: Rework the logic to use a single return, reword comments, add oops]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201006122051.190176-1-mpe@ellerman.id.au
---
 arch/powerpc/platforms/powernv/opal-elog.c | 33 +++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 62ef7ad995da..5e33b1fc67c2 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -179,14 +179,14 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj,
 	return count;
 }
 
-static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
+static void create_elog_obj(uint64_t id, size_t size, uint64_t type)
 {
 	struct elog_obj *elog;
 	int rc;
 
 	elog = kzalloc(sizeof(*elog), GFP_KERNEL);
 	if (!elog)
-		return NULL;
+		return;
 
 	elog->kobj.kset = elog_kset;
 
@@ -219,18 +219,37 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
 	rc = kobject_add(&elog->kobj, NULL, "0x%llx", id);
 	if (rc) {
 		kobject_put(&elog->kobj);
-		return NULL;
+		return;
 	}
 
+	/*
+	 * As soon as the sysfs file for this elog is created/activated there is
+	 * a chance the opal_errd daemon (or any userspace) might read and
+	 * acknowledge the elog before kobject_uevent() is called. If that
+	 * happens then there is a potential race between
+	 * elog_ack_store->kobject_put() and kobject_uevent() which leads to a
+	 * use-after-free of a kernfs object resulting in a kernel crash.
+	 *
+	 * To avoid that, we need to take a reference on behalf of the bin file,
+	 * so that our reference remains valid while we call kobject_uevent().
+	 * We then drop our reference before exiting the function, leaving the
+	 * bin file to drop the last reference (if it hasn't already).
+	 */
+
+	/* Take a reference for the bin file */
+	kobject_get(&elog->kobj);
 	rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr);
-	if (rc) {
+	if (rc == 0) {
+		kobject_uevent(&elog->kobj, KOBJ_ADD);
+	} else {
+		/* Drop the reference taken for the bin file */
 		kobject_put(&elog->kobj);
-		return NULL;
 	}
 
-	kobject_uevent(&elog->kobj, KOBJ_ADD);
+	/* Drop our reference */
+	kobject_put(&elog->kobj);
 
-	return elog;
+	return;
 }
 
 static irqreturn_t elog_event(int irq, void *data)
-- 
cgit v1.2.3


From 9983efa83b0a98da33807ccf5c047e204fdcac4c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 16 Sep 2020 13:02:33 +1000
Subject: powerpc: untangle cputable mce include

Having cputable.h include mce.h means it pulls in a bunch of low level
headers (e.g., synch.h) which then can't use CPU_FTR_ definitions.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200916030234.4110379-1-npiggin@gmail.com
---
 arch/powerpc/include/asm/cputable.h | 5 -----
 arch/powerpc/kernel/cputable.c      | 1 +
 arch/powerpc/kernel/dt_cpu_ftrs.c   | 1 +
 3 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 8ca5885bd5b9..a1a300c1a20e 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -9,11 +9,6 @@
 
 #ifndef __ASSEMBLY__
 
-/*
- * Added to include __machine_check_early_realmode_* functions
- */
-#include <asm/mce.h>
-
 /* This structure can grow, it's real size is used by head.S code
  * via the mkdefs mechanism.
  */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 2aa89c6b2896..b5bc2edef440 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -16,6 +16,7 @@
 #include <asm/oprofile_impl.h>
 #include <asm/cputable.h>
 #include <asm/prom.h>		/* for PTRRELOC on ARCH=ppc */
+#include <asm/mce.h>
 #include <asm/mmu.h>
 #include <asm/setup.h>
 
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index f204ad79b6b5..1098863e17ee 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -17,6 +17,7 @@
 
 #include <asm/cputable.h>
 #include <asm/dt_cpu_ftrs.h>
+#include <asm/mce.h>
 #include <asm/mmu.h>
 #include <asm/oprofile_impl.h>
 #include <asm/prom.h>
-- 
cgit v1.2.3


From 05504b42562066ae27ce3e7dcec37f81dea476cb Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 16 Sep 2020 13:02:34 +1000
Subject: powerpc/64s: Add cp_abort after tlbiel to invalidate copy-buffer
 address

The copy buffer is implemented as a real address in the nest which is
translated from EA by copy, and used for memory access by paste. This
requires that it be invalidated by TLB invalidation.

TLBIE does invalidate the copy buffer, but TLBIEL does not. Add
cp_abort to the tlbiel sequence.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Fixup whitespace and comment formatting]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200916030234.4110379-2-npiggin@gmail.com
---
 arch/powerpc/include/asm/synch.h       | 19 ++++++++++++++++++-
 arch/powerpc/mm/book3s64/hash_native.c |  8 ++++----
 arch/powerpc/mm/book3s64/radix_tlb.c   | 12 ++++++------
 3 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index aca70fb43147..1d67bc8d7bc6 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -3,8 +3,9 @@
 #define _ASM_POWERPC_SYNCH_H 
 #ifdef __KERNEL__
 
+#include <asm/cputable.h>
 #include <asm/feature-fixups.h>
-#include <asm/asm-const.h>
+#include <asm/ppc-opcode.h>
 
 #ifndef __ASSEMBLY__
 extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
@@ -20,6 +21,22 @@ static inline void isync(void)
 {
 	__asm__ __volatile__ ("isync" : : : "memory");
 }
+
+static inline void ppc_after_tlbiel_barrier(void)
+{
+	asm volatile("ptesync": : :"memory");
+	/*
+	 * POWER9, POWER10 need a cp_abort after tlbiel to ensure the copy is
+	 * invalidated correctly. If this is not done, the paste can take data
+	 * from the physical address that was translated at copy time.
+	 *
+	 * POWER9 in practice does not need this, because address spaces with
+	 * accelerators mapped will use tlbie (which does invalidate the copy)
+	 * to invalidate translations. It's not possible to limit POWER10 this
+	 * way due to local copy-paste.
+	 */
+	asm volatile(ASM_FTR_IFSET(PPC_CP_ABORT, "", %0) : : "i" (CPU_FTR_ARCH_31) : "memory");
+}
 #endif /* __ASSEMBLY__ */
 
 #if defined(__powerpc64__)
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index cf20e5229ce1..0203cdf48c54 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -82,7 +82,7 @@ static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
 	for (set = 0; set < num_sets; set++)
 		tlbiel_hash_set_isa206(set, is);
 
-	asm volatile("ptesync": : :"memory");
+	ppc_after_tlbiel_barrier();
 }
 
 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
@@ -110,7 +110,7 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
 	 */
 	tlbiel_hash_set_isa300(0, is, 0, 2, 1);
 
-	asm volatile("ptesync": : :"memory");
+	ppc_after_tlbiel_barrier();
 
 	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
 }
@@ -303,7 +303,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
 	asm volatile("ptesync": : :"memory");
 	if (use_local) {
 		__tlbiel(vpn, psize, apsize, ssize);
-		asm volatile("ptesync": : :"memory");
+		ppc_after_tlbiel_barrier();
 	} else {
 		__tlbie(vpn, psize, apsize, ssize);
 		fixup_tlbie_vpn(vpn, psize, apsize, ssize);
@@ -879,7 +879,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 				__tlbiel(vpn, psize, psize, ssize);
 			} pte_iterate_hashed_end();
 		}
-		asm volatile("ptesync":::"memory");
+		ppc_after_tlbiel_barrier();
 	} else {
 		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 143b4fd396f0..b487b489d4b6 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -65,7 +65,7 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
 	for (set = 1; set < num_sets; set++)
 		tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
 
-	asm volatile("ptesync": : :"memory");
+	ppc_after_tlbiel_barrier();
 }
 
 void radix__tlbiel_all(unsigned int action)
@@ -296,7 +296,7 @@ static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
 
 	/* For PWC, only one flush is needed */
 	if (ric == RIC_FLUSH_PWC) {
-		asm volatile("ptesync": : :"memory");
+		ppc_after_tlbiel_barrier();
 		return;
 	}
 
@@ -304,7 +304,7 @@ static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
 	for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
 		__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
 
-	asm volatile("ptesync": : :"memory");
+	ppc_after_tlbiel_barrier();
 	asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
 }
 
@@ -431,7 +431,7 @@ static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
 
 	asm volatile("ptesync": : :"memory");
 	__tlbiel_va(va, pid, ap, ric);
-	asm volatile("ptesync": : :"memory");
+	ppc_after_tlbiel_barrier();
 }
 
 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
@@ -442,7 +442,7 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
 	if (also_pwc)
 		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
 	__tlbiel_va_range(start, end, pid, page_size, psize);
-	asm volatile("ptesync": : :"memory");
+	ppc_after_tlbiel_barrier();
 }
 
 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
@@ -949,7 +949,7 @@ is_local:
 			if (hflush)
 				__tlbiel_va_range(hstart, hend, pid,
 						PMD_SIZE, MMU_PAGE_2M);
-			asm volatile("ptesync": : :"memory");
+			ppc_after_tlbiel_barrier();
 		} else if (cputlb_use_tlbie()) {
 			asm volatile("ptesync": : :"memory");
 			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
-- 
cgit v1.2.3


From cdb1ea0276bd6a225aa1203b4829b8c3c0d4d069 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 25 Aug 2020 17:56:12 +1000
Subject: powerpc/pseries: add new branch prediction security bits for link
 stack

The hypervisor interface has defined branch prediction security bits for
handling the link stack. Wire them up.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200825075612.224656-1-npiggin@gmail.com
---
 arch/powerpc/include/asm/hvcall.h      | 2 ++
 arch/powerpc/platforms/pseries/setup.c | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index e8f116a425f9..c1fbccb04390 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -375,11 +375,13 @@
 #define H_CPU_CHAR_THREAD_RECONFIG_CTRL	(1ull << 57) // IBM bit 6
 #define H_CPU_CHAR_COUNT_CACHE_DISABLED	(1ull << 56) // IBM bit 7
 #define H_CPU_CHAR_BCCTR_FLUSH_ASSIST	(1ull << 54) // IBM bit 9
+#define H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST (1ull << 52) // IBM bit 11
 
 #define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0
 #define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1
 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ull << 61) // IBM bit 2
 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE	(1ull << 58) // IBM bit 5
+#define H_CPU_BEHAV_FLUSH_LINK_STACK	(1ull << 57) // IBM bit 6
 
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 #define PROC_TABLE_OP_MASK	0x18
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 2f4ee0a90284..633c45ec406d 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -519,9 +519,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
 	if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
 		security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
 
+	if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST)
+		security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST);
+
 	if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
 		security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
 
+	if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK)
+		security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
 	/*
 	 * The features below are enabled by default, so we instead look to see
 	 * if firmware has *disabled* them, and clear them if so.
-- 
cgit v1.2.3


From 903fd31d3212ab72d564c68f6cfb5d04db68773e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 15 Sep 2020 21:46:45 +1000
Subject: powerpc/64: fix irq replay missing preempt

Prior to commit 3282a3da25bd ("powerpc/64: Implement soft interrupt
replay in C"), replayed interrupts returned by the regular interrupt
exit code, which performs preemption in case an interrupt had set
need_resched.

This logic was missed by the conversion. Adding preempt_disable/enable
around the interrupt replay and final irq enable will reschedule if
needed.

Fixes: 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200915114650.3980244-1-npiggin@gmail.com
---
 arch/powerpc/kernel/irq.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bf21ebd36190..77019699606a 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -368,6 +368,12 @@ notrace void arch_local_irq_restore(unsigned long mask)
 		}
 	}
 
+	/*
+	 * Disable preempt here, so that the below preempt_enable will
+	 * perform resched if required (a replayed interrupt may set
+	 * need_resched).
+	 */
+	preempt_disable();
 	irq_soft_mask_set(IRQS_ALL_DISABLED);
 	trace_hardirqs_off();
 
@@ -377,6 +383,7 @@ notrace void arch_local_irq_restore(unsigned long mask)
 	trace_hardirqs_on();
 	irq_soft_mask_set(IRQS_ENABLED);
 	__hard_irq_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(arch_local_irq_restore);
 
-- 
cgit v1.2.3


From 2b48e96be2f9f7151197fd25dc41487054bc6f5b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 15 Sep 2020 21:46:46 +1000
Subject: powerpc/64: fix irq replay pt_regs->softe value

Replayed interrupts get an "artificial" struct pt_regs constructed to
pass to interrupt handler functions. This did not get the softe field
set correctly, it's as though the interrupt has hit while irqs are
disabled. It should be IRQS_ENABLED.

This is possibly harmless, asynchronous handlers should not be testing
if irqs were disabled, but it might be possible for example some code
is shared with synchronous or NMI handlers, and it makes more sense if
debug output looks at this.

Fixes: 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200915114650.3980244-2-npiggin@gmail.com
---
 arch/powerpc/kernel/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 77019699606a..3fdad9336885 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -214,7 +214,7 @@ void replay_soft_interrupts(void)
 	struct pt_regs regs;
 
 	ppc_save_regs(&regs);
-	regs.softe = IRQS_ALL_DISABLED;
+	regs.softe = IRQS_ENABLED;
 
 again:
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-- 
cgit v1.2.3


From 012a9a97a8fd6c96d5ec64eb0583220490d95e73 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 15 Sep 2020 21:46:47 +1000
Subject: powerpc/64e: remove PACA_IRQ_EE_EDGE

This is not used anywhere.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200915114650.3980244-3-npiggin@gmail.com
---
 arch/powerpc/include/asm/hw_irq.h    |  5 ++---
 arch/powerpc/kernel/exceptions-64e.S |  1 -
 arch/powerpc/kernel/irq.c            | 23 -----------------------
 3 files changed, 2 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 538698facb80..2034f9590a8c 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -25,9 +25,8 @@
 #define PACA_IRQ_DBELL		0x02
 #define PACA_IRQ_EE		0x04
 #define PACA_IRQ_DEC		0x08 /* Or FIT */
-#define PACA_IRQ_EE_EDGE	0x10 /* BookE only */
-#define PACA_IRQ_HMI		0x20
-#define PACA_IRQ_PMI		0x40
+#define PACA_IRQ_HMI		0x10
+#define PACA_IRQ_PMI		0x20
 
 /*
  * Some soft-masked interrupts must be hard masked until they are replayed
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index d9ed79415100..ca444ca82b8d 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -988,7 +988,6 @@ kernel_dbg_exc:
 .endm
 
 masked_interrupt_book3e_0x500:
-	// XXX When adding support for EPR, use PACA_IRQ_EE_EDGE
 	masked_interrupt_book3e PACA_IRQ_EE 1
 
 masked_interrupt_book3e_0x900:
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 3fdad9336885..736a6b56e7d6 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -181,16 +181,6 @@ notrace unsigned int __check_irq_replay(void)
 		return 0x500;
 	}
 
-	/*
-	 * Check if an EPR external interrupt happened this bit is typically
-	 * set if we need to handle another "edge" interrupt from within the
-	 * MPIC "EPR" handler.
-	 */
-	if (happened & PACA_IRQ_EE_EDGE) {
-		local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
-		return 0x500;
-	}
-
 	if (happened & PACA_IRQ_DBELL) {
 		local_paca->irq_happened &= ~PACA_IRQ_DBELL;
 		return 0x280;
@@ -270,19 +260,6 @@ again:
 			hard_irq_disable();
 	}
 
-	/*
-	 * Check if an EPR external interrupt happened this bit is typically
-	 * set if we need to handle another "edge" interrupt from within the
-	 * MPIC "EPR" handler.
-	 */
-	if (IS_ENABLED(CONFIG_PPC_BOOK3E) && (happened & PACA_IRQ_EE_EDGE)) {
-		local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
-		regs.trap = 0x500;
-		do_IRQ(&regs);
-		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
-			hard_irq_disable();
-	}
-
 	if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) {
 		local_paca->irq_happened &= ~PACA_IRQ_DBELL;
 		if (IS_ENABLED(CONFIG_PPC_BOOK3E))
-- 
cgit v1.2.3


From 903dd1ff453e458fc7608ee4df42a6df16d3d1a0 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 15 Sep 2020 21:46:48 +1000
Subject: powerpc/64e: remove 64s specific interrupt soft-mask code

Since the assembly soft-masking code was moved to 64e specific, there
are some 64s specific interrupt types still there. Remove them.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200915114650.3980244-4-npiggin@gmail.com
---
 arch/powerpc/kernel/exceptions-64e.S | 10 ----------
 arch/powerpc/kernel/irq.c            |  2 +-
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index ca444ca82b8d..f579ce46eef2 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1302,16 +1302,6 @@ fast_exception_return:
 	addi	r3,r1,STACK_FRAME_OVERHEAD;
 	bl	do_IRQ
 	b	ret_from_except
-1:	cmpwi	cr0,r3,0xf00
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	performance_monitor_exception
-	b	ret_from_except
-1:	cmpwi	cr0,r3,0xe60
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	handle_hmi_exception
-	b	ret_from_except
 1:	cmpwi	cr0,r3,0x900
 	bne	1f
 	addi	r3,r1,STACK_FRAME_OVERHEAD;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 736a6b56e7d6..b725509f9073 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -113,7 +113,7 @@ static inline notrace int decrementer_check_overflow(void)
 #ifdef CONFIG_PPC_BOOK3E
 
 /* This is called whenever we are re-enabling interrupts
- * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if
+ * and returns either 0 (nothing to do) or 500/900/280 if
  * there's an EE, DEC or DBELL to generate.
  *
  * This is called in two contexts: From arch_local_irq_restore()
-- 
cgit v1.2.3


From 455575533c7aa294d3c0284d59a77ae9a60c0537 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 15 Sep 2020 21:46:49 +1000
Subject: powerpc/64: make restore_interrupts 64e only

This is not used by 64s.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200915114650.3980244-5-npiggin@gmail.com
---
 arch/powerpc/kernel/irq.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index b725509f9073..631e6d236c97 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -191,6 +191,25 @@ notrace unsigned int __check_irq_replay(void)
 
 	return 0;
 }
+
+/*
+ * This is specifically called by assembly code to re-enable interrupts
+ * if they are currently disabled. This is typically called before
+ * schedule() or do_signal() when returning to userspace. We do it
+ * in C to avoid the burden of dealing with lockdep etc...
+ *
+ * NOTE: This is called with interrupts hard disabled but not marked
+ * as such in paca->irq_happened, so we need to resync this.
+ */
+void notrace restore_interrupts(void)
+{
+	if (irqs_disabled()) {
+		local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+		local_irq_enable();
+	} else
+		__hard_irq_enable();
+}
+
 #endif /* CONFIG_PPC_BOOK3E */
 
 void replay_soft_interrupts(void)
@@ -364,24 +383,6 @@ notrace void arch_local_irq_restore(unsigned long mask)
 }
 EXPORT_SYMBOL(arch_local_irq_restore);
 
-/*
- * This is specifically called by assembly code to re-enable interrupts
- * if they are currently disabled. This is typically called before
- * schedule() or do_signal() when returning to userspace. We do it
- * in C to avoid the burden of dealing with lockdep etc...
- *
- * NOTE: This is called with interrupts hard disabled but not marked
- * as such in paca->irq_happened, so we need to resync this.
- */
-void notrace restore_interrupts(void)
-{
-	if (irqs_disabled()) {
-		local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-		local_irq_enable();
-	} else
-		__hard_irq_enable();
-}
-
 /*
  * This is a helper to use when about to go into idle low-power
  * when the latter has the side effect of re-enabling interrupts
-- 
cgit v1.2.3


From 4366337490cbe5a35b50e83755be629a502ec7e2 Mon Sep 17 00:00:00 2001
From: Wang Wensheng <wangwensheng4@huawei.com>
Date: Fri, 18 Sep 2020 08:59:51 +0000
Subject: powerpc/papr_scm: Fix warnings about undeclared variable

Build the kernel with 'make C=2':
arch/powerpc/platforms/pseries/papr_scm.c:825:1: warning: symbol
'dev_attr_perf_stats' was not declared. Should it be static?

Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com>
Reviewed-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200918085951.44983-1-wangwensheng4@huawei.com
---
 arch/powerpc/platforms/pseries/papr_scm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 5493bc847bd0..a95aa425e7d4 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -823,7 +823,7 @@ free_stats:
 	kfree(stats);
 	return rc ? rc : (ssize_t)seq_buf_used(&s);
 }
-DEVICE_ATTR_ADMIN_RO(perf_stats);
+static DEVICE_ATTR_ADMIN_RO(perf_stats);
 
 static ssize_t flags_show(struct device *dev,
 			  struct device_attribute *attr, char *buf)
-- 
cgit v1.2.3


From 5c5e46dad939b2bf4df04293ab9ac68abd7c1f55 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Thu, 24 Sep 2020 11:49:22 +1000
Subject: powerpc: PPC_SECURE_BOOT should not require PowerNV

In commit 61f879d97ce4 ("powerpc/pseries: Detect secure and trusted
boot state of the system.") we taught the kernel how to understand the
secure-boot parameters used by a pseries guest.

However, CONFIG_PPC_SECURE_BOOT still requires PowerNV. I didn't
catch this because pseries_le_defconfig includes support for
PowerNV and so everything still worked. Indeed, most configs will.
Nonetheless, technically PPC_SECURE_BOOT doesn't require PowerNV
any more.

The secure variables support (PPC_SECVAR_SYSFS) doesn't do anything
on pSeries yet, but I don't think it's worth adding a new condition -
at some stage we'll want to add a backend for pSeries anyway.

Fixes: 61f879d97ce4 ("powerpc/pseries: Detect secure and trusted boot state of the system.")
Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200924014922.172914-1-dja@axtens.net
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d02b51174fe4..fe0e6b317cc2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -984,7 +984,7 @@ config PPC_MEM_KEYS
 config PPC_SECURE_BOOT
 	prompt "Enable secure boot support"
 	bool
-	depends on PPC_POWERNV
+	depends on PPC_POWERNV || PPC_PSERIES
 	depends on IMA_ARCH_POLICY
 	imply IMA_SECURE_AND_OR_TRUSTED_BOOT
 	help
-- 
cgit v1.2.3


From 874dc62f548f28649ac3d7e31025b1e8cec3868a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Sep 2020 06:13:10 +0200
Subject: powerpc: switch 85xx defconfigs from legacy ide to libata

Switch the 85xx defconfigs from the soon to be removed legacy ide
driver to libata.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200924041310.520970-1-hch@lst.de
---
 arch/powerpc/configs/85xx/mpc85xx_cds_defconfig | 6 +++---
 arch/powerpc/configs/85xx/tqm8540_defconfig     | 6 +++---
 arch/powerpc/configs/85xx/tqm8541_defconfig     | 6 +++---
 arch/powerpc/configs/85xx/tqm8555_defconfig     | 6 +++---
 arch/powerpc/configs/85xx/tqm8560_defconfig     | 6 +++---
 5 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
index 0683d8c292a8..cea72e85ed26 100644
--- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
@@ -29,9 +29,9 @@ CONFIG_SYN_COOKIES=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E1000=y
diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig
index 98982a0e82d8..bbf040aa1f9a 100644
--- a/arch/powerpc/configs/85xx/tqm8540_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8540_defconfig
@@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig
index a6e21db1dafe..523ad8dcfd9d 100644
--- a/arch/powerpc/configs/85xx/tqm8541_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8541_defconfig
@@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig
index ca1de3979474..0032ce1e8c9c 100644
--- a/arch/powerpc/configs/85xx/tqm8555_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8555_defconfig
@@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig
index ca3b8c8ef30f..a80b971f7d6e 100644
--- a/arch/powerpc/configs/85xx/tqm8560_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8560_defconfig
@@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
-- 
cgit v1.2.3


From d125aedb404204de0579b16028096b2cc09e4deb Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 18 Sep 2020 19:30:42 +1000
Subject: powerpc/eeh: Rework EEH initialisation

Drop the EEH register / unregister ops thing and have the platform pass the
ops structure into eeh_init() directly. This takes one initcall out of the
EEH setup path and it means we're only doing EEH setup on the platforms
which actually support it. It's also less code and generally easier to
follow.

No functional changes.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200918093050.37344-1-oohall@gmail.com
---
 arch/powerpc/include/asm/eeh.h               |  3 +-
 arch/powerpc/kernel/eeh.c                    | 87 +++++-----------------------
 arch/powerpc/platforms/powernv/eeh-powernv.c |  4 +-
 arch/powerpc/platforms/pseries/eeh_pseries.c |  5 +-
 4 files changed, 21 insertions(+), 78 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d5f369bcd130..765bcf63edea 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -295,8 +295,7 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe);
 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
 void eeh_show_enabled(void);
-int __init eeh_ops_register(struct eeh_ops *ops);
-int __exit eeh_ops_unregister(const char *name);
+int __init eeh_init(struct eeh_ops *ops);
 int eeh_check_failure(const volatile void __iomem *token);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_init(void);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 94682382fc8c..28a0ea5d9faa 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -929,56 +929,6 @@ void eeh_save_bars(struct eeh_dev *edev)
 		edev->config_space[1] |= PCI_COMMAND_MASTER;
 }
 
-/**
- * eeh_ops_register - Register platform dependent EEH operations
- * @ops: platform dependent EEH operations
- *
- * Register the platform dependent EEH operation callback
- * functions. The platform should call this function before
- * any other EEH operations.
- */
-int __init eeh_ops_register(struct eeh_ops *ops)
-{
-	if (!ops->name) {
-		pr_warn("%s: Invalid EEH ops name for %p\n",
-			__func__, ops);
-		return -EINVAL;
-	}
-
-	if (eeh_ops && eeh_ops != ops) {
-		pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
-			__func__, eeh_ops->name, ops->name);
-		return -EEXIST;
-	}
-
-	eeh_ops = ops;
-
-	return 0;
-}
-
-/**
- * eeh_ops_unregister - Unreigster platform dependent EEH operations
- * @name: name of EEH platform operations
- *
- * Unregister the platform dependent EEH operation callback
- * functions.
- */
-int __exit eeh_ops_unregister(const char *name)
-{
-	if (!name || !strlen(name)) {
-		pr_warn("%s: Invalid EEH ops name\n",
-			__func__);
-		return -EINVAL;
-	}
-
-	if (eeh_ops && !strcmp(eeh_ops->name, name)) {
-		eeh_ops = NULL;
-		return 0;
-	}
-
-	return -EEXIST;
-}
-
 static int eeh_reboot_notifier(struct notifier_block *nb,
 			       unsigned long action, void *unused)
 {
@@ -991,25 +941,22 @@ static struct notifier_block eeh_reboot_nb = {
 };
 
 /**
- * eeh_init - EEH initialization
- *
- * Initialize EEH by trying to enable it for all of the adapters in the system.
- * As a side effect we can determine here if eeh is supported at all.
- * Note that we leave EEH on so failed config cycles won't cause a machine
- * check.  If a user turns off EEH for a particular adapter they are really
- * telling Linux to ignore errors.  Some hardware (e.g. POWER5) won't
- * grant access to a slot if EEH isn't enabled, and so we always enable
- * EEH for all slots/all devices.
+ * eeh_init - System wide EEH initialization
  *
- * The eeh-force-off option disables EEH checking globally, for all slots.
- * Even if force-off is set, the EEH hardware is still enabled, so that
- * newer systems can boot.
+ * It's the platform's job to call this from an arch_initcall().
  */
-static int eeh_init(void)
+int eeh_init(struct eeh_ops *ops)
 {
 	struct pci_controller *hose, *tmp;
 	int ret = 0;
 
+	/* the platform should only initialise EEH once */
+	if (WARN_ON(eeh_ops))
+		return -EEXIST;
+	if (WARN_ON(!ops))
+		return -ENOENT;
+	eeh_ops = ops;
+
 	/* Register reboot notifier */
 	ret = register_reboot_notifier(&eeh_reboot_nb);
 	if (ret) {
@@ -1018,13 +965,13 @@ static int eeh_init(void)
 		return ret;
 	}
 
-	/* call platform initialization function */
-	if (!eeh_ops) {
-		pr_warn("%s: Platform EEH operation not found\n",
-			__func__);
-		return -EEXIST;
-	} else if ((ret = eeh_ops->init()))
+	if (eeh_ops->init)
+		ret = eeh_ops->init();
+	if (ret) {
+		pr_warn("%s: platform EEH init failed (%d)\n",
+			__func__, ret);
 		return ret;
+	}
 
 	/* Initialize PHB PEs */
 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
@@ -1036,8 +983,6 @@ static int eeh_init(void)
 	return eeh_event_init();
 }
 
-core_initcall_sync(eeh_init);
-
 static int eeh_device_notifier(struct notifier_block *nb,
 			       unsigned long action, void *data)
 {
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 663bd69ac51b..33938f8a3fba 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1717,7 +1717,7 @@ static int __init eeh_powernv_init(void)
 {
 	int ret = -EINVAL;
 
-	ret = eeh_ops_register(&pnv_eeh_ops);
+	ret = eeh_init(&pnv_eeh_ops);
 	if (!ret)
 		pr_info("EEH: PowerNV platform initialized\n");
 	else
@@ -1725,4 +1725,4 @@ static int __init eeh_powernv_init(void)
 
 	return ret;
 }
-machine_early_initcall(powernv, eeh_powernv_init);
+machine_core_initcall_sync(powernv, eeh_powernv_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 1db74cec72bc..df32b8ccbd59 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -990,13 +990,12 @@ static int __init eeh_pseries_init(void)
 {
 	int ret;
 
-	ret = eeh_ops_register(&pseries_eeh_ops);
+	ret = eeh_init(&pseries_eeh_ops);
 	if (!ret)
 		pr_info("EEH: pSeries platform initialized\n");
 	else
 		pr_info("EEH: pSeries platform initialization failure (%d)\n",
 			ret);
-
 	return ret;
 }
-machine_early_initcall(pseries, eeh_pseries_init);
+machine_core_initcall_sync(pseries, eeh_pseries_init);
-- 
cgit v1.2.3


From 82a1ea21f1bac42eb8e3f77d5d249201855f2c85 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 18 Sep 2020 19:30:43 +1000
Subject: powerpc/powernv: Stop using eeh_ops->init()

Fold pnv_eeh_init() into eeh_powernv_init() rather than having eeh_init()
call it via eeh_ops->init(). It's simpler and it'll let us delete
eeh_ops.init.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200918093050.37344-2-oohall@gmail.com
---
 arch/powerpc/platforms/powernv/eeh-powernv.c | 94 +++++++++++++---------------
 1 file changed, 45 insertions(+), 49 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 33938f8a3fba..5db92f39887a 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -44,54 +44,6 @@ static void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
 	eeh_probe_device(pdev);
 }
 
-static int pnv_eeh_init(void)
-{
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
-	int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
-
-	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
-		pr_warn("%s: OPAL is required !\n",
-			__func__);
-		return -EINVAL;
-	}
-
-	/* Set probe mode */
-	eeh_add_flag(EEH_PROBE_MODE_DEV);
-
-	/*
-	 * P7IOC blocks PCI config access to frozen PE, but PHB3
-	 * doesn't do that. So we have to selectively enable I/O
-	 * prior to collecting error log.
-	 */
-	list_for_each_entry(hose, &hose_list, list_node) {
-		phb = hose->private_data;
-
-		if (phb->model == PNV_PHB_MODEL_P7IOC)
-			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
-
-		if (phb->diag_data_size > max_diag_size)
-			max_diag_size = phb->diag_data_size;
-
-		/*
-		 * PE#0 should be regarded as valid by EEH core
-		 * if it's not the reserved one. Currently, we
-		 * have the reserved PE#255 and PE#127 for PHB3
-		 * and P7IOC separately. So we should regard
-		 * PE#0 as valid for PHB3 and P7IOC.
-		 */
-		if (phb->ioda.reserved_pe_idx != 0)
-			eeh_add_flag(EEH_VALID_PE_ZERO);
-
-		break;
-	}
-
-	eeh_set_pe_aux_size(max_diag_size);
-	ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
-
-	return 0;
-}
-
 static irqreturn_t pnv_eeh_event(int irq, void *data)
 {
 	/*
@@ -1674,7 +1626,6 @@ static int pnv_eeh_restore_config(struct eeh_dev *edev)
 
 static struct eeh_ops pnv_eeh_ops = {
 	.name                   = "powernv",
-	.init                   = pnv_eeh_init,
 	.probe			= pnv_eeh_probe,
 	.set_option             = pnv_eeh_set_option,
 	.get_state              = pnv_eeh_get_state,
@@ -1715,8 +1666,53 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps);
  */
 static int __init eeh_powernv_init(void)
 {
+	int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
 	int ret = -EINVAL;
 
+	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+		pr_warn("%s: OPAL is required !\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Set probe mode */
+	eeh_add_flag(EEH_PROBE_MODE_DEV);
+
+	/*
+	 * P7IOC blocks PCI config access to frozen PE, but PHB3
+	 * doesn't do that. So we have to selectively enable I/O
+	 * prior to collecting error log.
+	 */
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		if (phb->model == PNV_PHB_MODEL_P7IOC)
+			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+
+		if (phb->diag_data_size > max_diag_size)
+			max_diag_size = phb->diag_data_size;
+
+		/*
+		 * PE#0 should be regarded as valid by EEH core
+		 * if it's not the reserved one. Currently, we
+		 * have the reserved PE#255 and PE#127 for PHB3
+		 * and P7IOC separately. So we should regard
+		 * PE#0 as valid for PHB3 and P7IOC.
+		 */
+		if (phb->ioda.reserved_pe_idx != 0)
+			eeh_add_flag(EEH_VALID_PE_ZERO);
+
+		break;
+	}
+
+	/*
+	 * eeh_init() allocates the eeh_pe and its aux data buf so the
+	 * size needs to be set before calling eeh_init().
+	 */
+	eeh_set_pe_aux_size(max_diag_size);
+	ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
+
 	ret = eeh_init(&pnv_eeh_ops);
 	if (!ret)
 		pr_info("EEH: PowerNV platform initialized\n");
-- 
cgit v1.2.3


From 1f8fa0cd6a848ff072bffe0ee776554387128f60 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 18 Sep 2020 19:30:44 +1000
Subject: powerpc/pseries: Stop using eeh_ops->init()

Fold pseries_eeh_init() into eeh_pseries_init() rather than having
eeh_init() call it via eeh_ops->init(). It's simpler and it'll let us
delete eeh_ops.init.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200918093050.37344-3-oohall@gmail.com
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 155 ++++++++++++---------------
 1 file changed, 71 insertions(+), 84 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index df32b8ccbd59..a3ed6a0507da 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -237,88 +237,6 @@ static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
 static DEFINE_SPINLOCK(slot_errbuf_lock);
 static int eeh_error_buf_size;
 
-/**
- * pseries_eeh_init - EEH platform dependent initialization
- *
- * EEH platform dependent initialization on pseries.
- */
-static int pseries_eeh_init(void)
-{
-	struct pci_controller *phb;
-	struct pci_dn *pdn;
-	int addr, config_addr;
-
-	/* figure out EEH RTAS function call tokens */
-	ibm_set_eeh_option		= rtas_token("ibm,set-eeh-option");
-	ibm_set_slot_reset		= rtas_token("ibm,set-slot-reset");
-	ibm_read_slot_reset_state2	= rtas_token("ibm,read-slot-reset-state2");
-	ibm_read_slot_reset_state	= rtas_token("ibm,read-slot-reset-state");
-	ibm_slot_error_detail		= rtas_token("ibm,slot-error-detail");
-	ibm_get_config_addr_info2	= rtas_token("ibm,get-config-addr-info2");
-	ibm_get_config_addr_info	= rtas_token("ibm,get-config-addr-info");
-	ibm_configure_pe		= rtas_token("ibm,configure-pe");
-
-	/*
-	 * ibm,configure-pe and ibm,configure-bridge have the same semantics,
-	 * however ibm,configure-pe can be faster.  If we can't find
-	 * ibm,configure-pe then fall back to using ibm,configure-bridge.
-	 */
-	if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
-		ibm_configure_pe 	= rtas_token("ibm,configure-bridge");
-
-	/*
-	 * Necessary sanity check. We needn't check "get-config-addr-info"
-	 * and its variant since the old firmware probably support address
-	 * of domain/bus/slot/function for EEH RTAS operations.
-	 */
-	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE		||
-	    ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE		||
-	    (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
-	     ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE)	||
-	    ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE	||
-	    ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
-		pr_info("EEH functionality not supported\n");
-		return -EINVAL;
-	}
-
-	/* Initialize error log lock and size */
-	spin_lock_init(&slot_errbuf_lock);
-	eeh_error_buf_size = rtas_token("rtas-error-log-max");
-	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
-		pr_info("%s: unknown EEH error log size\n",
-			__func__);
-		eeh_error_buf_size = 1024;
-	} else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
-		pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
-			__func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
-		eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
-	}
-
-	/* Set EEH probe mode */
-	eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
-
-	/* Set EEH machine dependent code */
-	ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
-
-	if (is_kdump_kernel() || reset_devices) {
-		pr_info("Issue PHB reset ...\n");
-		list_for_each_entry(phb, &hose_list, list_node) {
-			pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
-			addr = (pdn->busno << 16) | (pdn->devfn << 8);
-			config_addr = pseries_eeh_get_config_addr(phb, addr);
-			/* invalid PE config addr */
-			if (config_addr == 0)
-				continue;
-
-			pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
-			pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
-			pseries_eeh_phb_configure_bridge(phb, config_addr);
-		}
-	}
-
-	return 0;
-}
-
 static int pseries_eeh_cap_start(struct pci_dn *pdn)
 {
 	u32 status;
@@ -963,7 +881,6 @@ static int pseries_notify_resume(struct eeh_dev *edev)
 
 static struct eeh_ops pseries_eeh_ops = {
 	.name			= "pseries",
-	.init			= pseries_eeh_init,
 	.probe			= pseries_eeh_probe,
 	.set_option		= pseries_eeh_set_option,
 	.get_state		= pseries_eeh_get_state,
@@ -988,7 +905,77 @@ static struct eeh_ops pseries_eeh_ops = {
  */
 static int __init eeh_pseries_init(void)
 {
-	int ret;
+	struct pci_controller *phb;
+	struct pci_dn *pdn;
+	int ret, addr, config_addr;
+
+	/* figure out EEH RTAS function call tokens */
+	ibm_set_eeh_option		= rtas_token("ibm,set-eeh-option");
+	ibm_set_slot_reset		= rtas_token("ibm,set-slot-reset");
+	ibm_read_slot_reset_state2	= rtas_token("ibm,read-slot-reset-state2");
+	ibm_read_slot_reset_state	= rtas_token("ibm,read-slot-reset-state");
+	ibm_slot_error_detail		= rtas_token("ibm,slot-error-detail");
+	ibm_get_config_addr_info2	= rtas_token("ibm,get-config-addr-info2");
+	ibm_get_config_addr_info	= rtas_token("ibm,get-config-addr-info");
+	ibm_configure_pe		= rtas_token("ibm,configure-pe");
+
+	/*
+	 * ibm,configure-pe and ibm,configure-bridge have the same semantics,
+	 * however ibm,configure-pe can be faster.  If we can't find
+	 * ibm,configure-pe then fall back to using ibm,configure-bridge.
+	 */
+	if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
+		ibm_configure_pe	= rtas_token("ibm,configure-bridge");
+
+	/*
+	 * Necessary sanity check. We needn't check "get-config-addr-info"
+	 * and its variant since the old firmware probably support address
+	 * of domain/bus/slot/function for EEH RTAS operations.
+	 */
+	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE		||
+	    ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE		||
+	    (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
+	     ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE)	||
+	    ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE	||
+	    ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
+		pr_info("EEH functionality not supported\n");
+		return -EINVAL;
+	}
+
+	/* Initialize error log lock and size */
+	spin_lock_init(&slot_errbuf_lock);
+	eeh_error_buf_size = rtas_token("rtas-error-log-max");
+	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
+		pr_info("%s: unknown EEH error log size\n",
+			__func__);
+		eeh_error_buf_size = 1024;
+	} else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
+		pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
+			__func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
+		eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
+	}
+
+	/* Set EEH probe mode */
+	eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
+
+	/* Set EEH machine dependent code */
+	ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
+
+	if (is_kdump_kernel() || reset_devices) {
+		pr_info("Issue PHB reset ...\n");
+		list_for_each_entry(phb, &hose_list, list_node) {
+			pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
+			addr = (pdn->busno << 16) | (pdn->devfn << 8);
+			config_addr = pseries_eeh_get_config_addr(phb, addr);
+			/* invalid PE config addr */
+			if (config_addr == 0)
+				continue;
+
+			pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
+			pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
+			pseries_eeh_phb_configure_bridge(phb, config_addr);
+		}
+	}
 
 	ret = eeh_init(&pseries_eeh_ops);
 	if (!ret)
-- 
cgit v1.2.3


From 5d69e46a2104050c0a458c6bf6abba5f58f56e4c Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 18 Sep 2020 19:30:45 +1000
Subject: powerpc/eeh: Delete eeh_ops->init

No longer used since the platforms perform their EEH initialisation before
calling eeh_init().

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200918093050.37344-4-oohall@gmail.com
---
 arch/powerpc/include/asm/eeh.h | 1 -
 arch/powerpc/kernel/eeh.c      | 8 --------
 2 files changed, 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 765bcf63edea..85030c05e67e 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -216,7 +216,6 @@ enum {
 
 struct eeh_ops {
 	char *name;
-	int (*init)(void);
 	struct eeh_dev *(*probe)(struct pci_dev *pdev);
 	int (*set_option)(struct eeh_pe *pe, int option);
 	int (*get_state)(struct eeh_pe *pe, int *delay);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 28a0ea5d9faa..98faf139e676 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -965,14 +965,6 @@ int eeh_init(struct eeh_ops *ops)
 		return ret;
 	}
 
-	if (eeh_ops->init)
-		ret = eeh_ops->init();
-	if (ret) {
-		pr_warn("%s: platform EEH init failed (%d)\n",
-			__func__, ret);
-		return ret;
-	}
-
 	/* Initialize PHB PEs */
 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
 		eeh_phb_pe_create(hose);
-- 
cgit v1.2.3


From 395ee2a2a15ba1c4c7c414db24dc3082ba8feab8 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 18 Sep 2020 19:30:46 +1000
Subject: powerpc/eeh: Move EEH initialisation to an arch initcall

The initialisation of EEH mostly happens in a core_initcall_sync initcall,
followed by registering a bus notifier later on in an arch_initcall.
Anything involving initcall dependecies is mostly incomprehensible unless
you've spent a while staring at code so here's the full sequence:

ppc_md.setup_arch       <-- pci_controllers are created here

...time passes...

core_initcall           <-- pci_dns are created from DT nodes
core_initcall_sync      <-- platforms call eeh_init()
postcore_initcall       <-- PCI bus type is registered
postcore_initcall_sync
arch_initcall           <-- EEH pci_bus notifier registered
subsys_initcall         <-- PHBs are scanned here

There's no real requirement to do the EEH setup at the core_initcall_sync
level. It just needs to be done after pci_dn's are created and before we
start scanning PHBs. Simplify the flow a bit by moving the platform EEH
inititalisation to an arch_initcall so we can fold the bus notifier
registration into eeh_init().

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200918093050.37344-5-oohall@gmail.com
---
 arch/powerpc/kernel/eeh.c                    | 64 ++++++++++++++--------------
 arch/powerpc/platforms/powernv/eeh-powernv.c |  2 +-
 arch/powerpc/platforms/pseries/eeh_pseries.c |  2 +-
 3 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 98faf139e676..c9e25cfce8f0 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -940,6 +940,30 @@ static struct notifier_block eeh_reboot_nb = {
 	.notifier_call = eeh_reboot_notifier,
 };
 
+static int eeh_device_notifier(struct notifier_block *nb,
+			       unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	switch (action) {
+	/*
+	 * Note: It's not possible to perform EEH device addition (i.e.
+	 * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
+	 * the device's resources, which have not yet been set up.
+	 */
+	case BUS_NOTIFY_DEL_DEVICE:
+		eeh_remove_device(to_pci_dev(dev));
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+	.notifier_call = eeh_device_notifier,
+};
+
 /**
  * eeh_init - System wide EEH initialization
  *
@@ -960,7 +984,14 @@ int eeh_init(struct eeh_ops *ops)
 	/* Register reboot notifier */
 	ret = register_reboot_notifier(&eeh_reboot_nb);
 	if (ret) {
-		pr_warn("%s: Failed to register notifier (%d)\n",
+		pr_warn("%s: Failed to register reboot notifier (%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+	if (ret) {
+		pr_warn("%s: Failed to register bus notifier (%d)\n",
 			__func__, ret);
 		return ret;
 	}
@@ -975,37 +1006,6 @@ int eeh_init(struct eeh_ops *ops)
 	return eeh_event_init();
 }
 
-static int eeh_device_notifier(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-
-	switch (action) {
-	/*
-	 * Note: It's not possible to perform EEH device addition (i.e.
-	 * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
-	 * the device's resources, which have not yet been set up.
-	 */
-	case BUS_NOTIFY_DEL_DEVICE:
-		eeh_remove_device(to_pci_dev(dev));
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block eeh_device_nb = {
-	.notifier_call = eeh_device_notifier,
-};
-
-static __init int eeh_set_bus_notifier(void)
-{
-	bus_register_notifier(&pci_bus_type, &eeh_device_nb);
-	return 0;
-}
-arch_initcall(eeh_set_bus_notifier);
-
 /**
  * eeh_probe_device() - Perform EEH initialization for the indicated pci device
  * @dev: pci device for which to set up EEH
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 5db92f39887a..b97ec796dd41 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1721,4 +1721,4 @@ static int __init eeh_powernv_init(void)
 
 	return ret;
 }
-machine_core_initcall_sync(powernv, eeh_powernv_init);
+machine_arch_initcall(powernv, eeh_powernv_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index a3ed6a0507da..691b9a38b683 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -985,4 +985,4 @@ static int __init eeh_pseries_init(void)
 			ret);
 	return ret;
 }
-machine_core_initcall_sync(pseries, eeh_pseries_init);
+machine_arch_initcall(pseries, eeh_pseries_init);
-- 
cgit v1.2.3


From f61c859feb5d19787c93d6b2b3d4beeca7260034 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 18 Sep 2020 19:30:47 +1000
Subject: powerpc/pseries/eeh: Clean up pe_config_addr lookups

De-duplicate, and fix up the comments, and make the prototype just take a
pci_dn since the job of the function is to return the pe_config_addr of the
PE which contains a given device.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200918093050.37344-6-oohall@gmail.com
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 80 ++++------------------------
 1 file changed, 11 insertions(+), 69 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 691b9a38b683..c96ee45a4689 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -33,8 +33,6 @@
 #include <asm/ppc-pci.h>
 #include <asm/rtas.h>
 
-static int pseries_eeh_get_pe_addr(struct pci_dn *pdn);
-
 /* RTAS tokens */
 static int ibm_set_eeh_option;
 static int ibm_set_slot_reset;
@@ -86,7 +84,8 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
 
 
 /**
- * pseries_eeh_get_config_addr - Retrieve config address
+ * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device
+ * @pdn: pci_dn of the input device
  *
  * Retrieve the assocated config address. Actually, there're 2 RTAS
  * function calls dedicated for the purpose. We need implement
@@ -97,16 +96,17 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
  * It's notable that zero'ed return value means invalid PE config
  * address.
  */
-static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr)
+static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
 {
+	int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+	struct pci_controller *phb = pdn->phb;
 	int ret = 0;
 	int rets[3];
 
 	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
 		/*
-		 * First of all, we need to make sure there has one PE
-		 * associated with the device. Otherwise, PE address is
-		 * meaningless.
+		 * First of all, use function 1 to determine if this device is
+		 * part of a PE or not. ret[0] being zero indicates it's not.
 		 */
 		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
 				config_addr, BUID_HI(phb->buid),
@@ -429,7 +429,7 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
 		struct eeh_pe *parent;
 
 		/* Retrieve PE address */
-		edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn);
+		edev->pe_config_addr = pseries_eeh_get_pe_config_addr(pdn);
 		pe.addr = edev->pe_config_addr;
 
 		/* Some older systems (Power4) allow the ibm,set-eeh-option
@@ -548,64 +548,6 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
 	return ret;
 }
 
-/**
- * pseries_eeh_get_pe_addr - Retrieve PE address
- * @pe: EEH PE
- *
- * Retrieve the assocated PE address. Actually, there're 2 RTAS
- * function calls dedicated for the purpose. We need implement
- * it through the new function and then the old one. Besides,
- * you should make sure the config address is figured out from
- * FDT node before calling the function.
- *
- * It's notable that zero'ed return value means invalid PE config
- * address.
- */
-static int pseries_eeh_get_pe_addr(struct pci_dn *pdn)
-{
-	int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
-	unsigned long buid = pdn->phb->buid;
-	int ret = 0;
-	int rets[3];
-
-	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
-		/*
-		 * First of all, we need to make sure there has one PE
-		 * associated with the device. Otherwise, PE address is
-		 * meaningless.
-		 */
-		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
-				config_addr, BUID_HI(buid), BUID_LO(buid), 1);
-		if (ret || (rets[0] == 0))
-			return 0;
-
-		/* Retrieve the associated PE config address */
-		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
-				config_addr, BUID_HI(buid), BUID_LO(buid), 0);
-		if (ret) {
-			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
-				__func__, pdn->phb->global_number, config_addr);
-			return 0;
-		}
-
-		return rets[0];
-	}
-
-	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
-		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
-				config_addr, BUID_HI(buid), BUID_LO(buid), 0);
-		if (ret) {
-			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
-				__func__, pdn->phb->global_number, config_addr);
-			return 0;
-		}
-
-		return rets[0];
-	}
-
-	return ret;
-}
-
 /**
  * pseries_eeh_get_state - Retrieve PE state
  * @pe: EEH PE
@@ -907,7 +849,7 @@ static int __init eeh_pseries_init(void)
 {
 	struct pci_controller *phb;
 	struct pci_dn *pdn;
-	int ret, addr, config_addr;
+	int ret, config_addr;
 
 	/* figure out EEH RTAS function call tokens */
 	ibm_set_eeh_option		= rtas_token("ibm,set-eeh-option");
@@ -965,8 +907,8 @@ static int __init eeh_pseries_init(void)
 		pr_info("Issue PHB reset ...\n");
 		list_for_each_entry(phb, &hose_list, list_node) {
 			pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
-			addr = (pdn->busno << 16) | (pdn->devfn << 8);
-			config_addr = pseries_eeh_get_config_addr(phb, addr);
+			config_addr = pseries_eeh_get_pe_config_addr(pdn);
+
 			/* invalid PE config addr */
 			if (config_addr == 0)
 				continue;
-- 
cgit v1.2.3


From 98ba956f6a3891b233466b8da064f17d16dc2090 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 18 Sep 2020 19:30:48 +1000
Subject: powerpc/pseries/eeh: Rework device EEH PE determination

The process Linux uses for determining if a device supports EEH or not
appears to be at odds with what PAPR says the OS should be doing. The
current flow is something like:

1. Assume pe_config_addr is equal the the device's config_addr.
2. Attempt to enable EEH on that PE
3. Verify EEH was enabled (POWER4 bug workaround)
4. Try find the pe_config_addr using the ibm,get-config-addr-info2 RTAS
   call.
5. If that fails walk the pci_dn tree upwards trying to find a parent
   device with EEH support. If we find one then add the device to that PE.

The first major problem with this process is that we need the PE config
address in step 2) since its needs to be passed to the ibm,set-eeh-option
RTAS call when enabling EEH for th PE. We hack around this requirement in
by making the assumption in 1) and delay finding the actual PE address
until 4). This is fine if:

a) The PCI device is the 0th function, and
b) The device is on the PE's root bus.

Granted, the current sequence does appear to work on most systems even when
these conditions are false. At a guess PowerVM's RTAS has workarounds to
accommodate Linux's quirks or the RTAS call to enable EEH is treated as
no-op on most platforms since EEH is usually enabled by default. However,
what is currently implemented is a bit sketch and is downright confusing
since it doesn't match up with what what PAPR suggests we should be doing.

This patch re-works how we handle EEH init so that we find the PE config
address using the ibm,get-config-addr-info2 RTAS call first, then use the
found address to finish the EEH init process. It also drops the Power4
workaround since as of commit 471d7ff8b51b ("powerpc/64s: Remove POWER4
support") the kernel does not support running on a Power4 CPU so there's
no need to support the Power4 platform's quirks either. With the patch
applied the sequence is now:

1. Find the pe_config_addr from the device using the RTAS call.
2. Enable the PE.
3. Insert the edev into the tree and create an eeh_pe if needed.

The other change made here is ignoring unsupported devices entirely.
Currently the device's BARs are saved to the eeh_dev even if the device is
not part of an EEH PE. Not being part of a PE means that an EEH recovery
pass will never see that device so the saving the BARs is pointless.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200918093050.37344-7-oohall@gmail.com
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 57 +++++++++++-----------------
 1 file changed, 22 insertions(+), 35 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index c96ee45a4689..4b88b482ef16 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -355,10 +355,10 @@ static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
  */
 void pseries_eeh_init_edev(struct pci_dn *pdn)
 {
+	struct eeh_pe pe, *parent;
 	struct eeh_dev *edev;
-	struct eeh_pe pe;
+	int addr;
 	u32 pcie_flags;
-	int enable = 0;
 	int ret;
 
 	if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)))
@@ -415,51 +415,38 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
 		}
 	}
 
-	/* Initialize the fake PE */
+	/* first up, find the pe_config_addr for the PE containing the device */
+	addr = pseries_eeh_get_pe_config_addr(pdn);
+	if (addr == 0) {
+		eeh_edev_dbg(edev, "Unable to find pe_config_addr\n");
+		goto err;
+	}
+
+	/* Try enable EEH on the fake PE */
 	memset(&pe, 0, sizeof(struct eeh_pe));
 	pe.phb = pdn->phb;
-	pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+	pe.addr = addr;
 
-	/* Enable EEH on the device */
 	eeh_edev_dbg(edev, "Enabling EEH on device\n");
 	ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
 	if (ret) {
 		eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
-	} else {
-		struct eeh_pe *parent;
+		goto err;
+	}
 
-		/* Retrieve PE address */
-		edev->pe_config_addr = pseries_eeh_get_pe_config_addr(pdn);
-		pe.addr = edev->pe_config_addr;
+	edev->pe_config_addr = addr;
 
-		/* Some older systems (Power4) allow the ibm,set-eeh-option
-		 * call to succeed even on nodes where EEH is not supported.
-		 * Verify support explicitly.
-		 */
-		ret = eeh_ops->get_state(&pe, NULL);
-		if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
-			enable = 1;
+	eeh_add_flag(EEH_ENABLED);
 
-		/*
-		 * This device doesn't support EEH, but it may have an
-		 * EEH parent. In this case any error on the device will
-		 * freeze the PE of it's upstream bridge, so added it to
-		 * the upstream PE.
-		 */
-		parent = pseries_eeh_pe_get_parent(edev);
-		if (parent && !enable)
-			edev->pe_config_addr = parent->addr;
+	parent = pseries_eeh_pe_get_parent(edev);
+	eeh_pe_tree_insert(edev, parent);
+	eeh_save_bars(edev);
+	eeh_edev_dbg(edev, "EEH enabled for device");
 
-		if (enable || parent) {
-			eeh_add_flag(EEH_ENABLED);
-			eeh_pe_tree_insert(edev, parent);
-		}
-		eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
-			     (enable ? "enabled" : "unsupported"), ret);
-	}
+	return;
 
-	/* Save memory bars */
-	eeh_save_bars(edev);
+err:
+	eeh_edev_dbg(edev, "EEH is unsupported on device (code = %d)\n", ret);
 }
 
 static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev)
-- 
cgit v1.2.3


From 42de19d5ef71b91765266557705394e52954adb3 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 18 Sep 2020 19:30:49 +1000
Subject: powerpc/pseries/eeh: Allow zero to be a valid PE configuration
 address

There's no real reason why zero can't be a valid PE configuration address.
Under qemu each sPAPR PHB (i.e. EEH supporting) has the passed-though
devices on bus zero, so the PE address of bus <dddd>:00 should be zero.

However, all previous versions of Linux will reject that, so Qemu at least
goes out of it's way to avoid it. The Qemu implementation of
ibm,get-config-addr-info2 RTAS has the following comment:

> /*
>  * We always have PE address of form "00BB0001". "BB"
>  * represents the bus number of PE's primary bus.
>  */

So qemu puts a one into the register portion of the PE's config_addr to
avoid it being zero. The whole is pretty silly considering that RTAS will
return a negative error code if it can't map the device's config_addr to a
PE.

This patch fixes Linux to treat zero as a valid PE address. This shouldn't
have any real effects due to the Qemu hack mentioned above. And the fact
that Linux EEH has worked historically on PowerVM means they never pass
through devices on bus zero so we would never see the problem there either.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200918093050.37344-8-oohall@gmail.com
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 38 ++++++++++++++++------------
 1 file changed, 22 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 4b88b482ef16..f8b546135bba 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -87,21 +87,20 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
  * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device
  * @pdn: pci_dn of the input device
  *
- * Retrieve the assocated config address. Actually, there're 2 RTAS
- * function calls dedicated for the purpose. We need implement
- * it through the new function and then the old one. Besides,
- * you should make sure the config address is figured out from
- * FDT node before calling the function.
+ * The EEH RTAS calls use a tuple consisting of: (buid_hi, buid_lo,
+ * pe_config_addr) as a handle to a given PE. This function finds the
+ * pe_config_addr based on the device's config addr.
  *
- * It's notable that zero'ed return value means invalid PE config
- * address.
+ * Keep in mind that the pe_config_addr *might* be numerically identical to the
+ * device's config addr, but the two are conceptually distinct.
+ *
+ * Returns the pe_config_addr, or a negative error code.
  */
 static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
 {
 	int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
 	struct pci_controller *phb = pdn->phb;
-	int ret = 0;
-	int rets[3];
+	int ret, rets[3];
 
 	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
 		/*
@@ -112,16 +111,16 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
 				config_addr, BUID_HI(phb->buid),
 				BUID_LO(phb->buid), 1);
 		if (ret || (rets[0] == 0))
-			return 0;
+			return -ENOENT;
 
-		/* Retrieve the associated PE config address */
+		/* Retrieve the associated PE config address with function 0 */
 		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
 				config_addr, BUID_HI(phb->buid),
 				BUID_LO(phb->buid), 0);
 		if (ret) {
 			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
 				__func__, phb->global_number, config_addr);
-			return 0;
+			return -ENXIO;
 		}
 
 		return rets[0];
@@ -134,13 +133,20 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
 		if (ret) {
 			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
 				__func__, phb->global_number, config_addr);
-			return 0;
+			return -ENXIO;
 		}
 
 		return rets[0];
 	}
 
-	return ret;
+	/*
+	 * PAPR does describe a process for finding the pe_config_addr that was
+	 * used before the ibm,get-config-addr-info calls were added. However,
+	 * I haven't found *any* systems that don't have that RTAS call
+	 * implemented. If you happen to find one that needs the old DT based
+	 * process, patches are welcome!
+	 */
+	return -ENOENT;
 }
 
 /**
@@ -417,7 +423,7 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
 
 	/* first up, find the pe_config_addr for the PE containing the device */
 	addr = pseries_eeh_get_pe_config_addr(pdn);
-	if (addr == 0) {
+	if (addr < 0) {
 		eeh_edev_dbg(edev, "Unable to find pe_config_addr\n");
 		goto err;
 	}
@@ -897,7 +903,7 @@ static int __init eeh_pseries_init(void)
 			config_addr = pseries_eeh_get_pe_config_addr(pdn);
 
 			/* invalid PE config addr */
-			if (config_addr == 0)
+			if (config_addr < 0)
 				continue;
 
 			pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
-- 
cgit v1.2.3


From 35d64734b64315f2c5716c5a0a380ed1ba8fbe4a Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 18 Sep 2020 19:30:50 +1000
Subject: powerpc/eeh: Clean up PE addressing

When support for EEH on PowerNV was added a lot of pseries specific code
was made "generic" and some of the quirks of pseries EEH came along for the
ride. One of the stranger quirks is eeh_pe containing two types of PE
address: pe->addr and pe->config_addr. There reason for this appears to be
historical baggage rather than any real requirements.

On pseries EEH PEs are manipulated using RTAS calls. Each EEH RTAS call
takes a "PE configuration address" as an input which is used to identify
which EEH PE is being manipulated by the call. When initialising the EEH
state for a device the first thing we need to do is determine the
configuration address for the PE which contains the device so we can enable
EEH on that PE. This process is outlined in PAPR which is the modern
(i.e post-2003) FW specification for pseries. However, EEH support was
first described in the pSeries RISC Platform Architecture (RPA) and
although they are mostly compatible EEH is one of the areas where they are
not.

The major difference is that RPA doesn't actually have the concept of a PE.
On RPA systems the EEH RTAS calls are done on a per-device basis using the
same config_addr that would be passed to the RTAS functions to access PCI
config space (e.g. ibm,read-pci-config). The config_addr is not identical
since the function and config register offsets of the config_addr must be
set to zero. EEH operations being done on a per-device basis doesn't make a
whole lot of sense when you consider how EEH was implemented on legacy PCI
systems.

For legacy PCI(-X) systems EEH was implemented using special PCI-PCI
bridges which contained logic to detect errors and freeze the secondary
bus when one occurred. This means that the EEH enabled state is shared
among all devices behind that EEH bridge. As a result there's no way to
implement the per-device control required for the semantics specified by
RPA. It can be made to work if we assume that a separate EEH bridge exists
for each EEH capable PCI slot and there are no bridges behind those slots.
However, RPA also specifies the ibm,configure-bridge RTAS call for
re-initalising bridges behind EEH capable slots after they are reset due
to an EEH event so that is probably not a valid assumption. This
incoherence was fixed in later PAPR, which succeeded RPA. Unfortunately,
since Linux EEH support seems to have been implemented based on the RPA
spec some of the legacy assumptions were carried over (probably for POWER4
compatibility).

The fix made in PAPR was the introduction of the "PE" concept and
redefining the EEH RTAS calls (set-eeh-option, reset-slot, etc) to operate
on a per-PE basis so all devices behind an EEH bride would share the same
EEH state. The "config_addr" argument to the EEH RTAS calls became the
"PE_config_addr" and the OS was required to use the
ibm,get-config-addr-info RTAS call to find the correct PE address for the
device. When support for the new interfaces was added to Linux it was
implemented using something like:

At probe time:

	pdn->eeh_config_addr = rtas_config_addr(pdn);
	pdn->eeh_pe_config_addr = rtas_get_config_addr_info(pdn);

When performing an RTAS call:

	config_addr = pdn->eeh_config_addr;
	if (pdn->eeh_pe_config_addr)
		config_addr = pdn->eeh_pe_config_addr;

	rtas_call(..., config_addr, ...);

In other words, if the ibm,get-config-addr-info RTAS call is implemented
and returned a valid result we'd use that as the argument to the EEH
RTAS calls. If not, Linux would fall back to using the device's
config_addr. Over time these addresses have moved around going from pci_dn
to eeh_dev and finally into eeh_pe. Today the users look like this:

	config_addr = pe->config_addr;
	if (pe->addr)
		config_addr = pe->addr;

	rtas_call(..., config_addr, ...);

However, considering the EEH core always operates on a per-PE basis and
even on pseries the only per-device operation is the initial call to
ibm,set-eeh-option I'm not sure if any of this actually works on an RPA
system today. It doesn't make much sense to have the fallback address in
a generic structure either since the bulk of the code which reference it
is in pseries anyway.

The EEH core makes a token effort to support looking up a PE using the
config_addr by having two arguments to eeh_pe_get(). However, a survey of
all the callers to eeh_pe_get() shows that all bar one have the config_addr
argument hard-coded to zero.The only caller that doesn't is in
eeh_pe_tree_insert() which has:

	if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr)
		return -EINVAL;

	pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn);

The third argument (config_addr) is only used if the second (pe->addr)
argument is invalid. The preceding check ensures that the call to
eeh_pe_get() will never happen if edev->pe_config_addr is invalid so there
is no situation where eeh_pe_get() will search for a PE based on the 3rd
argument. The check also means that we'll never insert a PE into the tree
where pe_config_addr is zero since EEH_VALID_PE_ZERO is never set on
pseries. All the users of the fallback address on pseries never actually
use the fallback and all the only caller that supplies something for the
config_addr argument to eeh_pe_get() never use it either. It's all dead
code.

This patch removes the fallback address from eeh_pe since nothing uses it.
Specificly, we do this by:

1) Removing pe->config_addr
2) Removing the EEH_VALID_PE_ZERO flag
3) Removing the fallback address argument to eeh_pe_get().
4) Removing all the checks for pe->addr being zero in the pseries EEH code.

This leaves us with PE's only being identified by what's in their pe->addr
field and the EEH core relying on the platform to ensure that eeh_dev's are
only inserted into the EEH tree if they're actually inside a PE.

No functional changes, I hope.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200918093050.37344-9-oohall@gmail.com
---
 arch/powerpc/include/asm/eeh.h               |  4 +--
 arch/powerpc/kernel/eeh.c                    |  2 +-
 arch/powerpc/kernel/eeh_pe.c                 | 46 ++++------------------------
 arch/powerpc/platforms/powernv/eeh-powernv.c | 16 ++--------
 arch/powerpc/platforms/pseries/eeh_pseries.c | 42 ++++---------------------
 5 files changed, 17 insertions(+), 93 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 85030c05e67e..dd6a4ac6c713 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -27,7 +27,6 @@ struct pci_dn;
 #define EEH_FORCE_DISABLED	0x02	/* EEH disabled			     */
 #define EEH_PROBE_MODE_DEV	0x04	/* From PCI device		     */
 #define EEH_PROBE_MODE_DEVTREE	0x08	/* From device tree		     */
-#define EEH_VALID_PE_ZERO	0x10	/* PE#0 is valid		     */
 #define EEH_ENABLE_IO_FOR_LOG	0x20	/* Enable IO for log		     */
 #define EEH_EARLY_DUMP_LOG	0x40	/* Dump log immediately		     */
 
@@ -280,8 +279,7 @@ int eeh_phb_pe_create(struct pci_controller *phb);
 int eeh_wait_state(struct eeh_pe *pe, int max_wait);
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
 struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
-struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
-			  int pe_no, int config_addr);
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no);
 int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent);
 int eeh_pe_tree_remove(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c9e25cfce8f0..87de8b798b2d 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1657,7 +1657,7 @@ static ssize_t eeh_force_recover_write(struct file *filp,
 		return -ENODEV;
 
 	/* Retrieve PE */
-	pe = eeh_pe_get(hose, pe_no, 0);
+	pe = eeh_pe_get(hose, pe_no);
 	if (!pe)
 		return -ENODEV;
 
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index d2aaaa73fdd5..61b7d4019051 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -251,43 +251,21 @@ void eeh_pe_dev_traverse(struct eeh_pe *root,
 
 /**
  * __eeh_pe_get - Check the PE address
- * @data: EEH PE
- * @flag: EEH device
  *
  * For one particular PE, it can be identified by PE address
  * or tranditional BDF address. BDF address is composed of
  * Bus/Device/Function number. The extra data referred by flag
  * indicates which type of address should be used.
  */
-struct eeh_pe_get_flag {
-	int pe_no;
-	int config_addr;
-};
-
 static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
 {
-	struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
+	int *target_pe = flag;
 
-	/* Unexpected PHB PE */
+	/* PHB PEs are special and should be ignored */
 	if (pe->type & EEH_PE_PHB)
 		return NULL;
 
-	/*
-	 * We prefer PE address. For most cases, we should
-	 * have non-zero PE address
-	 */
-	if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
-		if (tmp->pe_no == pe->addr)
-			return pe;
-	} else {
-		if (tmp->pe_no &&
-		    (tmp->pe_no == pe->addr))
-			return pe;
-	}
-
-	/* Try BDF address */
-	if (tmp->config_addr &&
-	   (tmp->config_addr == pe->config_addr))
+	if (*target_pe == pe->addr)
 		return pe;
 
 	return NULL;
@@ -297,7 +275,6 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
  * eeh_pe_get - Search PE based on the given address
  * @phb: PCI controller
  * @pe_no: PE number
- * @config_addr: Config address
  *
  * Search the corresponding PE based on the specified address which
  * is included in the eeh device. The function is used to check if
@@ -306,16 +283,11 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
  * which is composed of PCI bus/device/function number, or unified
  * PE address.
  */
-struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
-		int pe_no, int config_addr)
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no)
 {
 	struct eeh_pe *root = eeh_phb_pe_get(phb);
-	struct eeh_pe_get_flag tmp = { pe_no, config_addr };
-	struct eeh_pe *pe;
-
-	pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp);
 
-	return pe;
+	return eeh_pe_traverse(root, __eeh_pe_get, &pe_no);
 }
 
 /**
@@ -336,19 +308,13 @@ int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
 	struct pci_controller *hose = edev->controller;
 	struct eeh_pe *pe, *parent;
 
-	/* Check if the PE number is valid */
-	if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
-		eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
-		return -EINVAL;
-	}
-
 	/*
 	 * Search the PE has been existing or not according
 	 * to the PE address. If that has been existing, the
 	 * PE should be composed of PCI bus and its subordinate
 	 * components.
 	 */
-	pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn);
+	pe = eeh_pe_get(hose, edev->pe_config_addr);
 	if (pe) {
 		if (pe->type & EEH_PE_INVALID) {
 			list_add_tail(&edev->entry, &pe->edevs);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index b97ec796dd41..89e22c460ebf 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -87,7 +87,7 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
 		return -EINVAL;
 
 	/* Retrieve PE */
-	pe = eeh_pe_get(hose, pe_no, 0);
+	pe = eeh_pe_get(hose, pe_no);
 	if (!pe)
 		return -ENODEV;
 
@@ -306,7 +306,7 @@ static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
 	if (parent) {
 		struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
 
-		return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0);
+		return eeh_pe_get(phb->hose, ioda_pe->pe_number);
 	}
 
 	return NULL;
@@ -1358,7 +1358,7 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
 	}
 
 	/* Find the PE according to PE# */
-	dev_pe = eeh_pe_get(hose, pe_no, 0);
+	dev_pe = eeh_pe_get(hose, pe_no);
 	if (!dev_pe)
 		return -EEXIST;
 
@@ -1693,16 +1693,6 @@ static int __init eeh_powernv_init(void)
 		if (phb->diag_data_size > max_diag_size)
 			max_diag_size = phb->diag_data_size;
 
-		/*
-		 * PE#0 should be regarded as valid by EEH core
-		 * if it's not the reserved one. Currently, we
-		 * have the reserved PE#255 and PE#127 for PHB3
-		 * and P7IOC separately. So we should regard
-		 * PE#0 as valid for PHB3 and P7IOC.
-		 */
-		if (phb->ioda.reserved_pe_idx != 0)
-			eeh_add_flag(EEH_VALID_PE_ZERO);
-
 		break;
 	}
 
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index f8b546135bba..d8fd5f7b2143 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -509,7 +509,6 @@ EXPORT_SYMBOL_GPL(pseries_eeh_init_edev_recursive);
 static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
 {
 	int ret = 0;
-	int config_addr;
 
 	/*
 	 * When we're enabling or disabling EEH functioality on
@@ -522,9 +521,6 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
 	case EEH_OPT_ENABLE:
 	case EEH_OPT_THAW_MMIO:
 	case EEH_OPT_THAW_DMA:
-		config_addr = pe->config_addr;
-		if (pe->addr)
-			config_addr = pe->addr;
 		break;
 	case EEH_OPT_FREEZE_PE:
 		/* Not support */
@@ -535,7 +531,7 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
 	}
 
 	ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
-			config_addr, BUID_HI(pe->phb->buid),
+			pe->addr, BUID_HI(pe->phb->buid),
 			BUID_LO(pe->phb->buid), option);
 
 	return ret;
@@ -556,25 +552,19 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
  */
 static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
 {
-	int config_addr;
 	int ret;
 	int rets[4];
 	int result;
 
-	/* Figure out PE config address if possible */
-	config_addr = pe->config_addr;
-	if (pe->addr)
-		config_addr = pe->addr;
-
 	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
 		ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
-				config_addr, BUID_HI(pe->phb->buid),
+				pe->addr, BUID_HI(pe->phb->buid),
 				BUID_LO(pe->phb->buid));
 	} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
 		/* Fake PE unavailable info */
 		rets[2] = 0;
 		ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
-				config_addr, BUID_HI(pe->phb->buid),
+				pe->addr, BUID_HI(pe->phb->buid),
 				BUID_LO(pe->phb->buid));
 	} else {
 		return EEH_STATE_NOT_SUPPORT;
@@ -628,14 +618,7 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
  */
 static int pseries_eeh_reset(struct eeh_pe *pe, int option)
 {
-	int config_addr;
-
-	/* Figure out PE address */
-	config_addr = pe->config_addr;
-	if (pe->addr)
-		config_addr = pe->addr;
-
-	return pseries_eeh_phb_reset(pe->phb, config_addr, option);
+	return pseries_eeh_phb_reset(pe->phb, pe->addr, option);
 }
 
 /**
@@ -651,19 +634,13 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option)
  */
 static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
 {
-	int config_addr;
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&slot_errbuf_lock, flags);
 	memset(slot_errbuf, 0, eeh_error_buf_size);
 
-	/* Figure out the PE address */
-	config_addr = pe->config_addr;
-	if (pe->addr)
-		config_addr = pe->addr;
-
-	ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
+	ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, pe->addr,
 			BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid),
 			virt_to_phys(drv_log), len,
 			virt_to_phys(slot_errbuf), eeh_error_buf_size,
@@ -682,14 +659,7 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u
  */
 static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 {
-	int config_addr;
-
-	/* Figure out the PE address */
-	config_addr = pe->config_addr;
-	if (pe->addr)
-		config_addr = pe->addr;
-
-	return pseries_eeh_phb_configure_bridge(pe->phb, config_addr);
+	return pseries_eeh_phb_configure_bridge(pe->phb, pe->addr);
 }
 
 /**
-- 
cgit v1.2.3


From d0ffdee8ff01fb21085d835ee54dc8c1c4d19226 Mon Sep 17 00:00:00 2001
From: Gustavo Romero <gromero@linux.ibm.com>
Date: Sat, 19 Sep 2020 12:00:25 -0300
Subject: powerpc/tm: Save and restore AMR on treclaim and trechkpt

Althought AMR is stashed in the checkpoint area, currently we don't save
it to the per thread checkpoint struct after a treclaim and so we don't
restore it either from that struct when we trechkpt. As a consequence when
the transaction is later rolled back the kernel space AMR value when the
trechkpt was done appears in userspace.

That commit saves and restores AMR accordingly on treclaim and trechkpt.
Since AMR value is also used in kernel space in other functions, it also
takes care of stashing kernel live AMR into the stack before treclaim and
before trechkpt, restoring it later, just before returning from tm_reclaim
and __tm_recheckpoint.

Is also fixes two nonrelated comments about CR and MSR.

Signed-off-by: Gustavo Romero <gromero@linux.ibm.com>
Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200919150025.9609-1-gromero@linux.ibm.com
---
 arch/powerpc/include/asm/processor.h |  1 +
 arch/powerpc/kernel/asm-offsets.c    |  1 +
 arch/powerpc/kernel/tm.S             | 35 +++++++++++++++++++++++++++++++----
 3 files changed, 33 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 22ffe85a91b8..365290b9a24b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -220,6 +220,7 @@ struct thread_struct {
 	unsigned long	tm_tar;
 	unsigned long	tm_ppr;
 	unsigned long	tm_dscr;
+	unsigned long   tm_amr;
 
 	/*
 	 * Checkpointed FP and VSX 0-31 register set.
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8711c2164b45..c2722ff36e98 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -176,6 +176,7 @@ int main(void)
 	OFFSET(THREAD_TM_TAR, thread_struct, tm_tar);
 	OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr);
 	OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr);
+	OFFSET(THREAD_TM_AMR, thread_struct, tm_amr);
 	OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs);
 	OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr);
 	OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 6ba0fdd1e7f8..2b91f233b05d 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -122,6 +122,13 @@ _GLOBAL(tm_reclaim)
 	std	r3, STK_PARAM(R3)(r1)
 	SAVE_NVGPRS(r1)
 
+	/*
+	 * Save kernel live AMR since it will be clobbered by treclaim
+	 * but can be used elsewhere later in kernel space.
+	 */
+	mfspr	r3, SPRN_AMR
+	std	r3, TM_FRAME_L1(r1)
+
 	/* We need to setup MSR for VSX register save instructions. */
 	mfmsr	r14
 	mr	r15, r14
@@ -245,7 +252,7 @@ _GLOBAL(tm_reclaim)
 	 * but is used in signal return to 'wind back' to the abort handler.
 	 */
 
-	/* ******************** CR,LR,CCR,MSR ********** */
+	/* ***************** CTR, LR, CR, XER ********** */
 	mfctr	r3
 	mflr	r4
 	mfcr	r5
@@ -256,7 +263,6 @@ _GLOBAL(tm_reclaim)
 	std	r5, _CCR(r7)
 	std	r6, _XER(r7)
 
-
 	/* ******************** TAR, DSCR ********** */
 	mfspr	r3, SPRN_TAR
 	mfspr	r4, SPRN_DSCR
@@ -264,6 +270,10 @@ _GLOBAL(tm_reclaim)
 	std	r3, THREAD_TM_TAR(r12)
 	std	r4, THREAD_TM_DSCR(r12)
 
+        /* ******************** AMR **************** */
+        mfspr	r3, SPRN_AMR
+        std	r3, THREAD_TM_AMR(r12)
+
 	/*
 	 * MSR and flags: We don't change CRs, and we don't need to alter MSR.
 	 */
@@ -308,7 +318,9 @@ _GLOBAL(tm_reclaim)
 	std	r3, THREAD_TM_TFHAR(r12)
 	std	r4, THREAD_TM_TFIAR(r12)
 
-	/* AMR is checkpointed too, but is unsupported by Linux. */
+	/* Restore kernel live AMR */
+	ld	r8, TM_FRAME_L1(r1)
+	mtspr	SPRN_AMR, r8
 
 	/* Restore original MSR/IRQ state & clear TM mode */
 	ld	r14, TM_FRAME_L0(r1)		/* Orig MSR */
@@ -355,6 +367,13 @@ _GLOBAL(__tm_recheckpoint)
 	 */
 	SAVE_NVGPRS(r1)
 
+	/*
+	 * Save kernel live AMR since it will be clobbered for trechkpt
+	 * but can be used elsewhere later in kernel space.
+	 */
+	mfspr	r8, SPRN_AMR
+	std	r8, TM_FRAME_L0(r1)
+
 	/* Load complete register state from ts_ckpt* registers */
 
 	addi	r7, r3, PT_CKPT_REGS		/* Thread's ckpt_regs */
@@ -404,7 +423,7 @@ _GLOBAL(__tm_recheckpoint)
 
 restore_gprs:
 
-	/* ******************** CR,LR,CCR,MSR ********** */
+	/* ****************** CTR, LR, XER ************* */
 	ld	r4, _CTR(r7)
 	ld	r5, _LINK(r7)
 	ld	r8, _XER(r7)
@@ -417,6 +436,10 @@ restore_gprs:
 	ld	r4, THREAD_TM_TAR(r3)
 	mtspr	SPRN_TAR,	r4
 
+	/* ******************** AMR ******************** */
+	ld	r4, THREAD_TM_AMR(r3)
+	mtspr	SPRN_AMR, r4
+
 	/* Load up the PPR and DSCR in GPRs only at this stage */
 	ld	r5, THREAD_TM_DSCR(r3)
 	ld	r6, THREAD_TM_PPR(r3)
@@ -509,6 +532,10 @@ restore_gprs:
 	li	r4, MSR_RI
 	mtmsrd	r4, 1
 
+	/* Restore kernel live AMR */
+	ld	r8, TM_FRAME_L0(r1)
+	mtspr	SPRN_AMR, r8
+
 	REST_NVGPRS(r1)
 
 	addi    r1, r1, TM_FRAME_SIZE
-- 
cgit v1.2.3


From 4bce545903fa0290e011cf118997717f0c4f4d20 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:43 +0530
Subject: powerpc/topology: Update topology_core_cpumask

On Power, cpu_core_mask and cpu_cpu_mask refer to the same set of CPUs.
cpu_cpu_mask is needed by scheduler, hence look at deprecating
cpu_core_mask. Before deleting the cpu_core_mask, ensure its only user
is moved to cpu_cpu_mask.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-2-srikar@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/topology.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 6609174918ab..e0f232533c9d 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -122,7 +122,7 @@ int get_physical_package_id(int cpu);
 #endif
 
 #define topology_sibling_cpumask(cpu)	(per_cpu(cpu_sibling_map, cpu))
-#define topology_core_cpumask(cpu)	(per_cpu(cpu_core_map, cpu))
+#define topology_core_cpumask(cpu)	(cpu_cpu_mask(cpu))
 #define topology_core_id(cpu)		(cpu_to_core_id(cpu))
 
 #endif
-- 
cgit v1.2.3


From 4ca234a9cbd7c3a656b34dd98c8b156f70ed7849 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:44 +0530
Subject: powerpc/smp: Stop updating cpu_core_mask

Anton Blanchard reported that his 4096 vcpu KVM guest took around 30
minutes to boot. He also analyzed it to the time taken to iterate while
setting the cpu_core_mask.

Further analysis shows that cpu_core_mask and cpu_cpu_mask for any CPU
would be equal on Power. However updating cpu_core_mask took forever to
update as its a per cpu cpumask variable. Instead cpu_cpu_mask was a per
NODE /per DIE cpumask that was shared by all the respective CPUs.

Also cpu_cpu_mask is needed from a scheduler perspective. However
cpu_core_map is an exported symbol. Hence stop updating cpu_core_map
and make it point to cpu_cpu_mask.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-3-srikar@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/smp.h |  5 -----
 arch/powerpc/kernel/smp.c      | 33 +++++++--------------------------
 2 files changed, 7 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 635bdf947105..b2035b2f57ce 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -121,11 +121,6 @@ static inline struct cpumask *cpu_sibling_mask(int cpu)
 	return per_cpu(cpu_sibling_map, cpu);
 }
 
-static inline struct cpumask *cpu_core_mask(int cpu)
-{
-	return per_cpu(cpu_core_map, cpu);
-}
-
 static inline struct cpumask *cpu_l2_cache_mask(int cpu)
 {
 	return per_cpu(cpu_l2_cache_map, cpu);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 58990baa5182..bf6d4192adda 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -953,12 +953,17 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 				local_memory_node(numa_cpu_lookup_table[cpu]));
 		}
 #endif
+		/*
+		 * cpu_core_map is now more updated and exists only since
+		 * its been exported for long. It only will have a snapshot
+		 * of cpu_cpu_mask.
+		 */
+		cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
 	}
 
 	/* Init the cpumasks so the boot CPU is related to itself */
 	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
 	cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
-	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
 
 	if (has_coregroup_support())
 		cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
@@ -1260,9 +1265,7 @@ static void remove_cpu_from_masks(int cpu)
 {
 	int i;
 
-	/* NB: cpu_core_mask is a superset of the others */
-	for_each_cpu(i, cpu_core_mask(cpu)) {
-		set_cpus_unrelated(cpu, i, cpu_core_mask);
+	for_each_cpu(i, cpu_cpu_mask(cpu)) {
 		set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
 		set_cpus_unrelated(cpu, i, cpu_sibling_mask);
 		if (has_big_cores)
@@ -1312,7 +1315,6 @@ EXPORT_SYMBOL_GPL(get_physical_package_id);
 static void add_cpu_to_masks(int cpu)
 {
 	int first_thread = cpu_first_thread_sibling(cpu);
-	int pkg_id = get_physical_package_id(cpu);
 	int i;
 
 	/*
@@ -1320,7 +1322,6 @@ static void add_cpu_to_masks(int cpu)
 	 * add it to it's own thread sibling mask.
 	 */
 	cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
-	cpumask_set_cpu(cpu, cpu_core_mask(cpu));
 
 	for (i = first_thread; i < first_thread + threads_per_core; i++)
 		if (cpu_online(i))
@@ -1342,26 +1343,6 @@ static void add_cpu_to_masks(int cpu)
 				set_cpus_related(cpu, i, cpu_coregroup_mask);
 		}
 	}
-
-	if (pkg_id == -1) {
-		struct cpumask *(*mask)(int) = cpu_sibling_mask;
-
-		/*
-		 * Copy the sibling mask into core sibling mask and
-		 * mark any CPUs on the same chip as this CPU.
-		 */
-		if (shared_caches)
-			mask = cpu_l2_cache_mask;
-
-		for_each_cpu(i, mask(cpu))
-			set_cpus_related(cpu, i, cpu_core_mask);
-
-		return;
-	}
-
-	for_each_cpu(i, cpu_online_mask)
-		if (get_physical_package_id(i) == pkg_id)
-			set_cpus_related(cpu, i, cpu_core_mask);
 }
 
 /* Activate a secondary processor. */
-- 
cgit v1.2.3


From e29e9ed665eeb6f98cd88672994ecf4aaefdb943 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:45 +0530
Subject: powerpc/smp: Remove get_physical_package_id

Now that cpu_core_mask has been removed and topology_core_cpumask has
been updated to use cpu_cpu_mask, we no more need
get_physical_package_id.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-4-srikar@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/topology.h |  5 -----
 arch/powerpc/kernel/smp.c           | 20 --------------------
 2 files changed, 25 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index e0f232533c9d..e45219f74be0 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -114,12 +114,7 @@ static inline int cpu_to_coregroup_id(int cpu)
 #ifdef CONFIG_PPC64
 #include <asm/smp.h>
 
-#ifdef CONFIG_PPC_SPLPAR
-int get_physical_package_id(int cpu);
-#define topology_physical_package_id(cpu)	(get_physical_package_id(cpu))
-#else
 #define topology_physical_package_id(cpu)	(cpu_to_chip_id(cpu))
-#endif
 
 #define topology_sibling_cpumask(cpu)	(per_cpu(cpu_sibling_map, cpu))
 #define topology_core_cpumask(cpu)	(cpu_cpu_mask(cpu))
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index bf6d4192adda..aecc01f0e95e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1292,26 +1292,6 @@ static inline void add_cpu_to_smallcore_masks(int cpu)
 	}
 }
 
-int get_physical_package_id(int cpu)
-{
-	int pkg_id = cpu_to_chip_id(cpu);
-
-	/*
-	 * If the platform is PowerNV or Guest on KVM, ibm,chip-id is
-	 * defined. Hence we would return the chip-id as the result of
-	 * get_physical_package_id.
-	 */
-	if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR) &&
-	    IS_ENABLED(CONFIG_PPC_SPLPAR)) {
-		struct device_node *np = of_get_cpu_node(cpu, NULL);
-		pkg_id = of_node_to_nid(np);
-		of_node_put(np);
-	}
-
-	return pkg_id;
-}
-EXPORT_SYMBOL_GPL(get_physical_package_id);
-
 static void add_cpu_to_masks(int cpu)
 {
 	int first_thread = cpu_first_thread_sibling(cpu);
-- 
cgit v1.2.3


From 70edd4a7c753ba18e3e4bb9e97b6d85156cea738 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:46 +0530
Subject: powerpc/smp: Optimize remove_cpu_from_masks

While offlining a CPU, system currently iterate through all the CPUs in
the DIE to clear sibling, l2_cache and smallcore maps. However if there
are more cores in a DIE, system can end up spending more time iterating
through CPUs which are completely unrelated.

Optimize this by only iterating through smaller but relevant cpumap.
If shared_cache is set, cpu_l2_cache_map should be relevant else
cpu_sibling_map would be relevant.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-5-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index aecc01f0e95e..9cdb966f00b3 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1263,14 +1263,21 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
 #ifdef CONFIG_HOTPLUG_CPU
 static void remove_cpu_from_masks(int cpu)
 {
+	struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
 	int i;
 
-	for_each_cpu(i, cpu_cpu_mask(cpu)) {
+	if (shared_caches)
+		mask_fn = cpu_l2_cache_mask;
+
+	for_each_cpu(i, mask_fn(cpu)) {
 		set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
 		set_cpus_unrelated(cpu, i, cpu_sibling_mask);
 		if (has_big_cores)
 			set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
-		if (has_coregroup_support())
+	}
+
+	if (has_coregroup_support()) {
+		for_each_cpu(i, cpu_coregroup_mask(cpu))
 			set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
 	}
 }
-- 
cgit v1.2.3


From 53516d4abacfab1faaa075c1f79957abc3da358c Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:47 +0530
Subject: powerpc/smp: Limit CPUs traversed to within a node.

All the arch specific topology cpumasks are within a node/DIE.
However when setting these per CPU cpumasks, system traverses through
all the online CPUs. This is redundant.

Reduce the traversal to only CPUs that are online in the node to which
the CPU belongs to.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-6-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9cdb966f00b3..9455af47123c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1241,7 +1241,7 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
 	}
 
 	cpumask_set_cpu(cpu, mask_fn(cpu));
-	for_each_cpu(i, cpu_online_mask) {
+	for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) {
 		/*
 		 * when updating the marks the current CPU has not been marked
 		 * online, but we need to update the cache masks
-- 
cgit v1.2.3


From 1f3a4181042107e32e44047e9dde990aced845b5 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:48 +0530
Subject: powerpc/smp: Stop passing mask to update_mask_by_l2

update_mask_by_l2 is called only once. But it passes cpu_l2_cache_mask
as parameter. Instead of passing cpu_l2_cache_mask, use it directly in
update_mask_by_l2.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-7-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9455af47123c..0e96f6687363 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1218,7 +1218,7 @@ static struct device_node *cpu_to_l2cache(int cpu)
 	return cache;
 }
 
-static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
+static bool update_mask_by_l2(int cpu)
 {
 	struct device_node *l2_cache, *np;
 	int i;
@@ -1240,7 +1240,7 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
 		return false;
 	}
 
-	cpumask_set_cpu(cpu, mask_fn(cpu));
+	cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));
 	for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) {
 		/*
 		 * when updating the marks the current CPU has not been marked
@@ -1251,7 +1251,7 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
 			continue;
 
 		if (np == l2_cache)
-			set_cpus_related(cpu, i, mask_fn);
+			set_cpus_related(cpu, i, cpu_l2_cache_mask);
 
 		of_node_put(np);
 	}
@@ -1315,7 +1315,7 @@ static void add_cpu_to_masks(int cpu)
 			set_cpus_related(i, cpu, cpu_sibling_mask);
 
 	add_cpu_to_smallcore_masks(cpu);
-	update_mask_by_l2(cpu, cpu_l2_cache_mask);
+	update_mask_by_l2(cpu);
 
 	if (has_coregroup_support()) {
 		int coregroup_id = cpu_to_coregroup_id(cpu);
-- 
cgit v1.2.3


From 661e3d42f99193b7fdd71467a87e48f6e597c285 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:49 +0530
Subject: powerpc/smp: Depend on cpu_l1_cache_map when adding CPUs

Currently on hotplug/hotunplug, CPU iterates through all the CPUs in
its core to find threads in its thread group. However this info is
already captured in cpu_l1_cache_map. Hence reduce iterations and
cleanup add_cpu_to_smallcore_masks function.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-8-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 0e96f6687363..3a3e6bbab29c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1285,16 +1285,15 @@ static void remove_cpu_from_masks(int cpu)
 
 static inline void add_cpu_to_smallcore_masks(int cpu)
 {
-	struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
-	int i, first_thread = cpu_first_thread_sibling(cpu);
+	int i;
 
 	if (!has_big_cores)
 		return;
 
 	cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
 
-	for (i = first_thread; i < first_thread + threads_per_core; i++) {
-		if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
+	for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) {
+		if (cpu_online(i))
 			set_cpus_related(i, cpu, cpu_smallcore_mask);
 	}
 }
-- 
cgit v1.2.3


From 375370a10d061d5c75c6bc5b09c5db4cc0b0fcfe Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:50 +0530
Subject: powerpc/smp: Check for duplicate topologies and consolidate

CACHE and COREGROUP domains are now part of default topology. However on
systems that don't support CACHE or COREGROUP, these domains will
eventually be degenerated. The degeneration happens per CPU. Do note the
current fixup_topology() logic ensures that mask of a domain that is not
supported on the current platform is set to the previous domain.

Instead of waiting for the scheduler to degenerated try to consolidate
based on their masks and sd_flags. This is done just before setting
the scheduler topology.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-9-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 3a3e6bbab29c..917c8598cf61 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1401,6 +1401,8 @@ int setup_profiling_timer(unsigned int multiplier)
 
 static void fixup_topology(void)
 {
+	int i;
+
 #ifdef CONFIG_SCHED_SMT
 	if (has_big_cores) {
 		pr_info("Big cores detected but using small core scheduling\n");
@@ -1410,6 +1412,30 @@ static void fixup_topology(void)
 
 	if (!has_coregroup_support())
 		powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
+
+	/*
+	 * Try to consolidate topology levels here instead of
+	 * allowing scheduler to degenerate.
+	 * - Dont consolidate if masks are different.
+	 * - Dont consolidate if sd_flags exists and are different.
+	 */
+	for (i = 1; i <= die_idx; i++) {
+		if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
+			continue;
+
+		if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
+				powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
+			continue;
+
+		if (!powerpc_topology[i - 1].sd_flags)
+			powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
+
+		powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
+		powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
+#ifdef CONFIG_SCHED_DEBUG
+		powerpc_topology[i].name = powerpc_topology[i + 1].name;
+#endif
+	}
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
-- 
cgit v1.2.3


From 3ab33d6dc3e98e83b55732049e1d1d488207bb6d Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:51 +0530
Subject: powerpc/smp: Optimize update_mask_by_l2

All threads of a SMT4 core can either be part of this CPU's l2-cache
mask or not related to this CPU l2-cache mask. Use this relation to
reduce the number of iterations needed to find all the CPUs that share
the same l2-cache.

Use a temporary mask to iterate through the CPUs that may share l2_cache
mask. Also instead of setting one CPU at a time into cpu_l2_cache_mask,
copy the SMT4/sub mask at one shot.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-10-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 51 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 917c8598cf61..925251b0bb0f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -670,6 +670,28 @@ static void set_cpus_unrelated(int i, int j,
 }
 #endif
 
+/*
+ * Extends set_cpus_related. Instead of setting one CPU at a time in
+ * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
+ */
+static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
+				struct cpumask *(*dstmask)(int))
+{
+	struct cpumask *mask;
+	int k;
+
+	mask = srcmask(j);
+	for_each_cpu(k, srcmask(i))
+		cpumask_or(dstmask(k), dstmask(k), mask);
+
+	if (i == j)
+		return;
+
+	mask = srcmask(i);
+	for_each_cpu(k, srcmask(j))
+		cpumask_or(dstmask(k), dstmask(k), mask);
+}
+
 /*
  * parse_thread_groups: Parses the "ibm,thread-groups" device tree
  *                      property for the CPU device node @dn and stores
@@ -1220,7 +1242,9 @@ static struct device_node *cpu_to_l2cache(int cpu)
 
 static bool update_mask_by_l2(int cpu)
 {
+	struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
 	struct device_node *l2_cache, *np;
+	cpumask_var_t mask;
 	int i;
 
 	l2_cache = cpu_to_l2cache(cpu);
@@ -1240,22 +1264,37 @@ static bool update_mask_by_l2(int cpu)
 		return false;
 	}
 
-	cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));
-	for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) {
+	alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu));
+	cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu));
+
+	if (has_big_cores)
+		submask_fn = cpu_smallcore_mask;
+
+	/* Update l2-cache mask with all the CPUs that are part of submask */
+	or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
+
+	/* Skip all CPUs already part of current CPU l2-cache mask */
+	cpumask_andnot(mask, mask, cpu_l2_cache_mask(cpu));
+
+	for_each_cpu(i, mask) {
 		/*
 		 * when updating the marks the current CPU has not been marked
 		 * online, but we need to update the cache masks
 		 */
 		np = cpu_to_l2cache(i);
-		if (!np)
-			continue;
 
-		if (np == l2_cache)
-			set_cpus_related(cpu, i, cpu_l2_cache_mask);
+		/* Skip all CPUs already part of current CPU l2-cache */
+		if (np == l2_cache) {
+			or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
+			cpumask_andnot(mask, mask, submask_fn(i));
+		} else {
+			cpumask_andnot(mask, mask, cpu_l2_cache_mask(i));
+		}
 
 		of_node_put(np);
 	}
 	of_node_put(l2_cache);
+	free_cpumask_var(mask);
 
 	return true;
 }
-- 
cgit v1.2.3


From b8a97cb4599cda28bd3b3bc13042f5803b42ad65 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:52 +0530
Subject: powerpc/smp: Move coregroup mask updation to a new function

Move the logic for updating the coregroup mask of a CPU to its own
function. This will help in reworking the updation of coregroup mask in
subsequent patch.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-11-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 925251b0bb0f..45619433c43a 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1337,6 +1337,23 @@ static inline void add_cpu_to_smallcore_masks(int cpu)
 	}
 }
 
+static void update_coregroup_mask(int cpu)
+{
+	int first_thread = cpu_first_thread_sibling(cpu);
+	int coregroup_id = cpu_to_coregroup_id(cpu);
+	int i;
+
+	cpumask_set_cpu(cpu, cpu_coregroup_mask(cpu));
+	for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) {
+		int fcpu = cpu_first_thread_sibling(i);
+
+		if (fcpu == first_thread)
+			set_cpus_related(cpu, i, cpu_coregroup_mask);
+		else if (coregroup_id == cpu_to_coregroup_id(i))
+			set_cpus_related(cpu, i, cpu_coregroup_mask);
+	}
+}
+
 static void add_cpu_to_masks(int cpu)
 {
 	int first_thread = cpu_first_thread_sibling(cpu);
@@ -1355,19 +1372,8 @@ static void add_cpu_to_masks(int cpu)
 	add_cpu_to_smallcore_masks(cpu);
 	update_mask_by_l2(cpu);
 
-	if (has_coregroup_support()) {
-		int coregroup_id = cpu_to_coregroup_id(cpu);
-
-		cpumask_set_cpu(cpu, cpu_coregroup_mask(cpu));
-		for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) {
-			int fcpu = cpu_first_thread_sibling(i);
-
-			if (fcpu == first_thread)
-				set_cpus_related(cpu, i, cpu_coregroup_mask);
-			else if (coregroup_id == cpu_to_coregroup_id(i))
-				set_cpus_related(cpu, i, cpu_coregroup_mask);
-		}
-	}
+	if (has_coregroup_support())
+		update_coregroup_mask(cpu);
 }
 
 /* Activate a secondary processor. */
-- 
cgit v1.2.3


From 70a94089d7f7fa91bc1795622426b3ed017ec71a Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 15:26:53 +0530
Subject: powerpc/smp: Optimize update_coregroup_mask

All threads of a SMT4/SMT8 core can either be part of CPU's coregroup
mask or outside the coregroup. Use this relation to reduce the
number of iterations needed to find all the CPUs that share the same
coregroup

Use a temporary mask to iterate through the CPUs that may share
coregroup mask. Also instead of setting one CPU at a time into
cpu_coregroup_mask, copy the SMT4/SMT8/submask at one shot.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200921095653.9701-12-srikar@linux.vnet.ibm.com
---
 arch/powerpc/kernel/smp.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 45619433c43a..0dc1b8591cc8 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1339,19 +1339,33 @@ static inline void add_cpu_to_smallcore_masks(int cpu)
 
 static void update_coregroup_mask(int cpu)
 {
-	int first_thread = cpu_first_thread_sibling(cpu);
+	struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
+	cpumask_var_t mask;
 	int coregroup_id = cpu_to_coregroup_id(cpu);
 	int i;
 
-	cpumask_set_cpu(cpu, cpu_coregroup_mask(cpu));
-	for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) {
-		int fcpu = cpu_first_thread_sibling(i);
+	alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu));
+	cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu));
+
+	if (shared_caches)
+		submask_fn = cpu_l2_cache_mask;
+
+	/* Update coregroup mask with all the CPUs that are part of submask */
+	or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
+
+	/* Skip all CPUs already part of coregroup mask */
+	cpumask_andnot(mask, mask, cpu_coregroup_mask(cpu));
 
-		if (fcpu == first_thread)
-			set_cpus_related(cpu, i, cpu_coregroup_mask);
-		else if (coregroup_id == cpu_to_coregroup_id(i))
-			set_cpus_related(cpu, i, cpu_coregroup_mask);
+	for_each_cpu(i, mask) {
+		/* Skip all CPUs not part of this coregroup */
+		if (coregroup_id == cpu_to_coregroup_id(i)) {
+			or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
+			cpumask_andnot(mask, mask, submask_fn(i));
+		} else {
+			cpumask_andnot(mask, mask, cpu_coregroup_mask(i));
+		}
 	}
+	free_cpumask_var(mask);
 }
 
 static void add_cpu_to_masks(int cpu)
-- 
cgit v1.2.3


From 3b6c3adbb2fa42749c3d38cfc4d4d0b7e096bb7b Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Mon, 21 Sep 2020 03:10:04 -0400
Subject: powerpc/perf: Exclude pmc5/6 from the irrelevant PMU group
 constraints

PMU counter support functions enforces event constraints for group of
events to check if all events in a group can be monitored. Incase of
event codes using PMC5 and PMC6 ( 500fa and 600f4 respectively ), not
all constraints are applicable, say the threshold or sample bits. But
current code includes pmc5 and pmc6 in some group constraints (like
IC_DC Qualifier bits) which is actually not applicable and hence
results in those events not getting counted when scheduled along with
group of other events. Patch fixes this by excluding PMC5/6 from
constraints which are not relevant for it.

Fixes: 7ffd948 ("powerpc/perf: factor out power8 pmu functions")
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1600672204-1610-1-git-send-email-atrajeev@linux.vnet.ibm.com
---
 arch/powerpc/perf/isa207-common.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 964437adec18..2848904df638 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -288,6 +288,15 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 
 		mask  |= CNST_PMC_MASK(pmc);
 		value |= CNST_PMC_VAL(pmc);
+
+		/*
+		 * PMC5 and PMC6 are used to count cycles and instructions and
+		 * they do not support most of the constraint bits. Add a check
+		 * to exclude PMC5/6 from most of the constraints except for
+		 * EBB/BHRB.
+		 */
+		if (pmc >= 5)
+			goto ebb_bhrb;
 	}
 
 	if (pmc <= 4) {
@@ -357,6 +366,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 		}
 	}
 
+ebb_bhrb:
 	if (!pmc && ebb)
 		/* EBB events must specify the PMC */
 		return -1;
-- 
cgit v1.2.3


From bd59380c5ba4147dcbaad3e582b55ccfd120b764 Mon Sep 17 00:00:00 2001
From: Andrew Donnellan <ajd@linux.ibm.com>
Date: Thu, 20 Aug 2020 14:45:12 +1000
Subject: powerpc/rtas: Restrict RTAS requests from userspace

A number of userspace utilities depend on making calls to RTAS to retrieve
information and update various things.

The existing API through which we expose RTAS to userspace exposes more
RTAS functionality than we actually need, through the sys_rtas syscall,
which allows root (or anyone with CAP_SYS_ADMIN) to make any RTAS call they
want with arbitrary arguments.

Many RTAS calls take the address of a buffer as an argument, and it's up to
the caller to specify the physical address of the buffer as an argument. We
allocate a buffer (the "RMO buffer") in the Real Memory Area that RTAS can
access, and then expose the physical address and size of this buffer in
/proc/powerpc/rtas/rmo_buffer. Userspace is expected to read this address,
poke at the buffer using /dev/mem, and pass an address in the RMO buffer to
the RTAS call.

However, there's nothing stopping the caller from specifying whatever
address they want in the RTAS call, and it's easy to construct a series of
RTAS calls that can overwrite arbitrary bytes (even without /dev/mem
access).

Additionally, there are some RTAS calls that do potentially dangerous
things and for which there are no legitimate userspace use cases.

In the past, this would not have been a particularly big deal as it was
assumed that root could modify all system state freely, but with Secure
Boot and lockdown we need to care about this.

We can't fundamentally change the ABI at this point, however we can address
this by implementing a filter that checks RTAS calls against a list
of permitted calls and forces the caller to use addresses within the RMO
buffer.

The list is based off the list of calls that are used by the librtas
userspace library, and has been tested with a number of existing userspace
RTAS utilities. For compatibility with any applications we are not aware of
that require other calls, the filter can be turned off at build time.

Cc: stable@vger.kernel.org
Reported-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200820044512.7543-1-ajd@linux.ibm.com
---
 arch/powerpc/Kconfig       |  13 ++++
 arch/powerpc/kernel/rtas.c | 153 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index fe0e6b317cc2..6c76caa950e1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1004,6 +1004,19 @@ config PPC_SECVAR_SYSFS
 	  read/write operations on these variables. Say Y if you have
 	  secure boot enabled and want to expose variables to userspace.
 
+config PPC_RTAS_FILTER
+	bool "Enable filtering of RTAS syscalls"
+	default y
+	depends on PPC_RTAS
+	help
+	  The RTAS syscall API has security issues that could be used to
+	  compromise system integrity. This option enforces restrictions on the
+	  RTAS calls and arguments passed by userspace programs to mitigate
+	  these issues.
+
+	  Say Y unless you know what you are doing and the filter is causing
+	  problems for you.
+
 endmenu
 
 config ISA_DMA_API
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 806d554ce357..954f41676f69 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -992,6 +992,147 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
 	return NULL;
 }
 
+#ifdef CONFIG_PPC_RTAS_FILTER
+
+/*
+ * The sys_rtas syscall, as originally designed, allows root to pass
+ * arbitrary physical addresses to RTAS calls. A number of RTAS calls
+ * can be abused to write to arbitrary memory and do other things that
+ * are potentially harmful to system integrity, and thus should only
+ * be used inside the kernel and not exposed to userspace.
+ *
+ * All known legitimate users of the sys_rtas syscall will only ever
+ * pass addresses that fall within the RMO buffer, and use a known
+ * subset of RTAS calls.
+ *
+ * Accordingly, we filter RTAS requests to check that the call is
+ * permitted, and that provided pointers fall within the RMO buffer.
+ * The rtas_filters list contains an entry for each permitted call,
+ * with the indexes of the parameters which are expected to contain
+ * addresses and sizes of buffers allocated inside the RMO buffer.
+ */
+struct rtas_filter {
+	const char *name;
+	int token;
+	/* Indexes into the args buffer, -1 if not used */
+	int buf_idx1;
+	int size_idx1;
+	int buf_idx2;
+	int size_idx2;
+
+	int fixed_size;
+};
+
+static struct rtas_filter rtas_filters[] __ro_after_init = {
+	{ "ibm,activate-firmware", -1, -1, -1, -1, -1 },
+	{ "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 },	/* Special cased */
+	{ "display-character", -1, -1, -1, -1, -1 },
+	{ "ibm,display-message", -1, 0, -1, -1, -1 },
+	{ "ibm,errinjct", -1, 2, -1, -1, -1, 1024 },
+	{ "ibm,close-errinjct", -1, -1, -1, -1, -1 },
+	{ "ibm,open-errinct", -1, -1, -1, -1, -1 },
+	{ "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 },
+	{ "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 },
+	{ "ibm,get-indices", -1, 2, 3, -1, -1 },
+	{ "get-power-level", -1, -1, -1, -1, -1 },
+	{ "get-sensor-state", -1, -1, -1, -1, -1 },
+	{ "ibm,get-system-parameter", -1, 1, 2, -1, -1 },
+	{ "get-time-of-day", -1, -1, -1, -1, -1 },
+	{ "ibm,get-vpd", -1, 0, -1, 1, 2 },
+	{ "ibm,lpar-perftools", -1, 2, 3, -1, -1 },
+	{ "ibm,platform-dump", -1, 4, 5, -1, -1 },
+	{ "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 },
+	{ "ibm,scan-log-dump", -1, 0, 1, -1, -1 },
+	{ "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 },
+	{ "ibm,set-eeh-option", -1, -1, -1, -1, -1 },
+	{ "set-indicator", -1, -1, -1, -1, -1 },
+	{ "set-power-level", -1, -1, -1, -1, -1 },
+	{ "set-time-for-power-on", -1, -1, -1, -1, -1 },
+	{ "ibm,set-system-parameter", -1, 1, -1, -1, -1 },
+	{ "set-time-of-day", -1, -1, -1, -1, -1 },
+	{ "ibm,suspend-me", -1, -1, -1, -1, -1 },
+	{ "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 },
+	{ "ibm,update-properties", -1, 0, -1, -1, -1, 4096 },
+	{ "ibm,physical-attestation", -1, 0, 1, -1, -1 },
+};
+
+static bool in_rmo_buf(u32 base, u32 end)
+{
+	return base >= rtas_rmo_buf &&
+		base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) &&
+		base <= end &&
+		end >= rtas_rmo_buf &&
+		end < (rtas_rmo_buf + RTAS_RMOBUF_MAX);
+}
+
+static bool block_rtas_call(int token, int nargs,
+			    struct rtas_args *args)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) {
+		struct rtas_filter *f = &rtas_filters[i];
+		u32 base, size, end;
+
+		if (token != f->token)
+			continue;
+
+		if (f->buf_idx1 != -1) {
+			base = be32_to_cpu(args->args[f->buf_idx1]);
+			if (f->size_idx1 != -1)
+				size = be32_to_cpu(args->args[f->size_idx1]);
+			else if (f->fixed_size)
+				size = f->fixed_size;
+			else
+				size = 1;
+
+			end = base + size - 1;
+			if (!in_rmo_buf(base, end))
+				goto err;
+		}
+
+		if (f->buf_idx2 != -1) {
+			base = be32_to_cpu(args->args[f->buf_idx2]);
+			if (f->size_idx2 != -1)
+				size = be32_to_cpu(args->args[f->size_idx2]);
+			else if (f->fixed_size)
+				size = f->fixed_size;
+			else
+				size = 1;
+			end = base + size - 1;
+
+			/*
+			 * Special case for ibm,configure-connector where the
+			 * address can be 0
+			 */
+			if (!strcmp(f->name, "ibm,configure-connector") &&
+			    base == 0)
+				return false;
+
+			if (!in_rmo_buf(base, end))
+				goto err;
+		}
+
+		return false;
+	}
+
+err:
+	pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
+	pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n",
+			   token, nargs, current->comm);
+	return true;
+}
+
+#else
+
+static bool block_rtas_call(int token, int nargs,
+			    struct rtas_args *args)
+{
+	return false;
+}
+
+#endif /* CONFIG_PPC_RTAS_FILTER */
+
 /* We assume to be passed big endian arguments */
 SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 {
@@ -1029,6 +1170,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 	args.rets = &args.args[nargs];
 	memset(args.rets, 0, nret * sizeof(rtas_arg_t));
 
+	if (block_rtas_call(token, nargs, &args))
+		return -EINVAL;
+
 	/* Need to handle ibm,suspend_me call specially */
 	if (token == ibm_suspend_me_token) {
 
@@ -1090,6 +1234,9 @@ void __init rtas_initialize(void)
 	unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
 	u32 base, size, entry;
 	int no_base, no_size, no_entry;
+#ifdef CONFIG_PPC_RTAS_FILTER
+	int i;
+#endif
 
 	/* Get RTAS dev node and fill up our "rtas" structure with infos
 	 * about it.
@@ -1129,6 +1276,12 @@ void __init rtas_initialize(void)
 #ifdef CONFIG_RTAS_ERROR_LOGGING
 	rtas_last_error_token = rtas_token("rtas-last-error");
 #endif
+
+#ifdef CONFIG_PPC_RTAS_FILTER
+	for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) {
+		rtas_filters[i].token = rtas_token(rtas_filters[i].name);
+	}
+#endif
 }
 
 int __init early_init_dt_scan_rtas(unsigned long node,
-- 
cgit v1.2.3


From 72cdd117c449896c707fc6cfe5b90978160697d0 Mon Sep 17 00:00:00 2001
From: Scott Cheloha <cheloha@linux.ibm.com>
Date: Wed, 16 Sep 2020 09:51:22 -0500
Subject: pseries/hotplug-memory: hot-add: skip redundant LMB lookup

During memory hot-add, dlpar_add_lmb() calls memory_add_physaddr_to_nid()
to determine which node id (nid) to use when later calling __add_memory().

This is wasteful.  On pseries, memory_add_physaddr_to_nid() finds an
appropriate nid for a given address by looking up the LMB containing the
address and then passing that LMB to of_drconf_to_nid_single() to get the
nid.  In dlpar_add_lmb() we get this address from the LMB itself.

In short, we have a pointer to an LMB and then we are searching for
that LMB *again* in order to find its nid.

If we call of_drconf_to_nid_single() directly from dlpar_add_lmb() we
can skip the redundant lookup.  The only error handling we need to
duplicate from memory_add_physaddr_to_nid() is the fallback to the
default nid when drconf_to_nid_single() returns -1 (NUMA_NO_NODE) or
an invalid nid.

Skipping the extra lookup makes hot-add operations faster, especially
on machines with many LMBs.

Consider an LPAR with 126976 LMBs.  In one test, hot-adding 126000
LMBs on an upatched kernel took ~3.5 hours while a patched kernel
completed the same operation in ~2 hours:

Unpatched (12450 seconds):
Sep  9 04:06:31 ltc-brazos1 drmgr[810169]: drmgr: -c mem -a -q 126000
Sep  9 04:06:31 ltc-brazos1 kernel: pseries-hotplug-mem: Attempting to hot-add 126000 LMB(s)
[...]
Sep  9 07:34:01 ltc-brazos1 kernel: pseries-hotplug-mem: Memory at 20000000 (drc index 80000002) was hot-added

Patched (7065 seconds):
Sep  8 21:49:57 ltc-brazos1 drmgr[877703]: drmgr: -c mem -a -q 126000
Sep  8 21:49:57 ltc-brazos1 kernel: pseries-hotplug-mem: Attempting to hot-add 126000 LMB(s)
[...]
Sep  8 23:27:42 ltc-brazos1 kernel: pseries-hotplug-mem: Memory at 20000000 (drc index 80000002) was hot-added

It should be noted that the speedup grows more substantial when
hot-adding LMBs at the end of the drconf range.  This is because we
are skipping a linear LMB search.

To see the distinction, consider smaller hot-add test on the same
LPAR.  A perf-stat run with 10 iterations showed that hot-adding 4096
LMBs completed less than 1 second faster on a patched kernel:

Unpatched:
 Performance counter stats for 'drmgr -c mem -a -q 4096' (10 runs):

        104,753.42 msec task-clock                #    0.992 CPUs utilized            ( +-  0.55% )
             4,708      context-switches          #    0.045 K/sec                    ( +-  0.69% )
             2,444      cpu-migrations            #    0.023 K/sec                    ( +-  1.25% )
               394      page-faults               #    0.004 K/sec                    ( +-  0.22% )
   445,902,503,057      cycles                    #    4.257 GHz                      ( +-  0.55% )  (66.67%)
     8,558,376,740      stalled-cycles-frontend   #    1.92% frontend cycles idle     ( +-  0.88% )  (49.99%)
   300,346,181,651      stalled-cycles-backend    #   67.36% backend cycles idle      ( +-  0.76% )  (50.01%)
   258,091,488,691      instructions              #    0.58  insn per cycle
                                                  #    1.16  stalled cycles per insn  ( +-  0.22% )  (66.67%)
    70,568,169,256      branches                  #  673.660 M/sec                    ( +-  0.17% )  (50.01%)
     3,100,725,426      branch-misses             #    4.39% of all branches          ( +-  0.20% )  (49.99%)

           105.583 +- 0.589 seconds time elapsed  ( +-  0.56% )

Patched:
 Performance counter stats for 'drmgr -c mem -a -q 4096' (10 runs):

        104,055.69 msec task-clock                #    0.993 CPUs utilized            ( +-  0.32% )
             4,606      context-switches          #    0.044 K/sec                    ( +-  0.20% )
             2,463      cpu-migrations            #    0.024 K/sec                    ( +-  0.93% )
               394      page-faults               #    0.004 K/sec                    ( +-  0.25% )
   442,951,129,921      cycles                    #    4.257 GHz                      ( +-  0.32% )  (66.66%)
     8,710,413,329      stalled-cycles-frontend   #    1.97% frontend cycles idle     ( +-  0.47% )  (50.06%)
   299,656,905,836      stalled-cycles-backend    #   67.65% backend cycles idle      ( +-  0.39% )  (50.02%)
   252,731,168,193      instructions              #    0.57  insn per cycle
                                                  #    1.19  stalled cycles per insn  ( +-  0.20% )  (66.66%)
    68,902,851,121      branches                  #  662.173 M/sec                    ( +-  0.13% )  (49.94%)
     3,100,242,882      branch-misses             #    4.50% of all branches          ( +-  0.15% )  (49.98%)

           104.829 +- 0.325 seconds time elapsed  ( +-  0.31% )

This is consistent.  An add-by-count hot-add operation adds LMBs
greedily, so LMBs near the start of the drconf range are considered
first.  On an otherwise idle LPAR with so many LMBs we would expect to
find the LMBs we need near the start of the drconf range, hence the
smaller speedup.

Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200916145122.3408129-1-cheloha@linux.ibm.com
---
 arch/powerpc/include/asm/topology.h             | 3 +++
 arch/powerpc/mm/numa.c                          | 2 +-
 arch/powerpc/platforms/pseries/hotplug-memory.c | 6 ++++--
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index e45219f74be0..8728590f514a 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -86,6 +86,9 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
 
 #endif /* CONFIG_NUMA */
 
+struct drmem_lmb;
+int of_drconf_to_nid_single(struct drmem_lmb *lmb);
+
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
 extern int find_and_online_cpu_nid(int cpu);
 extern int cpu_to_coregroup_id(int cpu);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b725fb66e913..8335399b7509 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -430,7 +430,7 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
  * This is like of_node_to_nid_single() for memory represented in the
  * ibm,dynamic-reconfiguration-memory node.
  */
-static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
+int of_drconf_to_nid_single(struct drmem_lmb *lmb)
 {
 	struct assoc_arrays aa = { .arrays = NULL };
 	int default_nid = NUMA_NO_NODE;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 0ea976d1cac4..9a533acf8ad0 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -611,8 +611,10 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 
 	block_sz = memory_block_size_bytes();
 
-	/* Find the node id for this address. */
-	nid = memory_add_physaddr_to_nid(lmb->base_addr);
+	/* Find the node id for this LMB.  Fake one if necessary. */
+	nid = of_drconf_to_nid_single(lmb);
+	if (nid < 0 || !node_possible(nid))
+		nid = first_online_node;
 
 	/* Add the memory */
 	rc = __add_memory(nid, lmb->base_addr, block_sz);
-- 
cgit v1.2.3


From 269e583357df32d77368903214f10f43fa5d7a5f Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Wed, 7 Oct 2020 15:09:02 +1100
Subject: powerpc/eeh: Delete eeh_pe->config_addr

The eeh_pe->config_addr field was supposed to be removed in
commit 35d64734b643 ("powerpc/eeh: Clean up PE addressing") which made it
largely unused. Finish the job.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201007040903.819081-1-oohall@gmail.com
---
 arch/powerpc/include/asm/eeh.h | 1 -
 arch/powerpc/kernel/eeh.c      | 2 +-
 arch/powerpc/kernel/eeh_pe.c   | 4 ++--
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index dd6a4ac6c713..b1a5bba2e0b9 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -73,7 +73,6 @@ struct pci_dn;
 struct eeh_pe {
 	int type;			/* PE type: PHB/Bus/Device	*/
 	int state;			/* PE EEH dependent mode	*/
-	int config_addr;		/* Traditional PCI address	*/
 	int addr;			/* PE configuration address	*/
 	struct pci_controller *phb;	/* Associated PHB		*/
 	struct pci_bus *bus;		/* Top PCI bus for bus PE	*/
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 87de8b798b2d..0e160dffcb86 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -466,7 +466,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 		return 0;
 	}
 
-	if (!pe->addr && !pe->config_addr) {
+	if (!pe->addr) {
 		eeh_stats.no_cfg_addr++;
 		return 0;
 	}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 61b7d4019051..845e024321d4 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -354,8 +354,8 @@ int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
 		pr_err("%s: out of memory!\n", __func__);
 		return -ENOMEM;
 	}
-	pe->addr	= edev->pe_config_addr;
-	pe->config_addr	= edev->bdfn;
+
+	pe->addr = edev->pe_config_addr;
 
 	/*
 	 * Put the new EEH PE into hierarchy tree. If the parent
-- 
cgit v1.2.3


From 8175bd580e629dcf9cc507794da774a6b8d3a9bd Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Wed, 7 Oct 2020 15:09:03 +1100
Subject: powerpc/pseries/eeh: Fix use of uninitialised variable

If the RTAS call to query the PE address for a device fails we jump the
err: label where an error message is printed along with the return code.
However, the printed return code is from the "ret" variable which isn't set
at that point since we assigned the result to "addr" instead. Fix this by
consistently using the "ret" variable for the result of the RTAS call
helpers an dropping the "addr" local variable"

Fixes: 98ba956f6a38 ("powerpc/pseries/eeh: Rework device EEH PE determination")
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201007040903.819081-2-oohall@gmail.com
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index d8fd5f7b2143..cf024fa37bda 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -363,7 +363,6 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
 {
 	struct eeh_pe pe, *parent;
 	struct eeh_dev *edev;
-	int addr;
 	u32 pcie_flags;
 	int ret;
 
@@ -422,8 +421,8 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
 	}
 
 	/* first up, find the pe_config_addr for the PE containing the device */
-	addr = pseries_eeh_get_pe_config_addr(pdn);
-	if (addr < 0) {
+	ret = pseries_eeh_get_pe_config_addr(pdn);
+	if (ret < 0) {
 		eeh_edev_dbg(edev, "Unable to find pe_config_addr\n");
 		goto err;
 	}
@@ -431,7 +430,7 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
 	/* Try enable EEH on the fake PE */
 	memset(&pe, 0, sizeof(struct eeh_pe));
 	pe.phb = pdn->phb;
-	pe.addr = addr;
+	pe.addr = ret;
 
 	eeh_edev_dbg(edev, "Enabling EEH on device\n");
 	ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
@@ -440,7 +439,7 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
 		goto err;
 	}
 
-	edev->pe_config_addr = addr;
+	edev->pe_config_addr = pe.addr;
 
 	eeh_add_flag(EEH_ENABLED);
 
-- 
cgit v1.2.3


From 0f9866f7e85765bbda86666df56c92f377c3bc10 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Sat, 3 Oct 2020 13:19:39 +0530
Subject: powerpc/perf/hv-gpci: Fix starting index value

Commit 9e9f60108423f ("powerpc/perf/{hv-gpci, hv-common}: generate
requests with counters annotated") adds a framework for defining
gpci counters.
In this patch, they adds starting_index value as '0xffffffffffffffff'.
which is wrong as starting_index is of size 32 bits.

Because of this, incase we try to run hv-gpci event we get error.

In power9 machine:

command#: perf stat -e hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
          -C 0 -I 1000
event syntax error: '..bie_count_and_time_tlbie_instructions_issued/'
                                  \___ value too big for format, maximum is 4294967295

This patch fix this issue and changes starting_index value to '0xffffffff'

After this patch:

command#: perf stat -e hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ -C 0 -I 1000
     1.000085786              1,024      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
     2.000287818              1,024      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/
     2.439113909             17,408      hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/

Fixes: 9e9f60108423 ("powerpc/perf/{hv-gpci, hv-common}: generate requests with counters annotated")
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201003074943.338618-1-kjain@linux.ibm.com
---
 arch/powerpc/perf/hv-gpci-requests.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h
index e608f9db12dd..8965b4463d43 100644
--- a/arch/powerpc/perf/hv-gpci-requests.h
+++ b/arch/powerpc/perf/hv-gpci-requests.h
@@ -95,7 +95,7 @@ REQUEST(__field(0,	8,	partition_id)
 
 #define REQUEST_NAME system_performance_capabilities
 #define REQUEST_NUM 0x40
-#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
 #include I(REQUEST_BEGIN)
 REQUEST(__field(0,	1,	perf_collect_privileged)
 	__field(0x1,	1,	capability_mask)
@@ -223,7 +223,7 @@ REQUEST(__field(0,	2, partition_id)
 
 #define REQUEST_NAME system_hypervisor_times
 #define REQUEST_NUM 0xF0
-#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
 #include I(REQUEST_BEGIN)
 REQUEST(__count(0,	8,	time_spent_to_dispatch_virtual_processors)
 	__count(0x8,	8,	time_spent_processing_virtual_processor_timers)
@@ -234,7 +234,7 @@ REQUEST(__count(0,	8,	time_spent_to_dispatch_virtual_processors)
 
 #define REQUEST_NAME system_tlbie_count_and_time
 #define REQUEST_NUM 0xF4
-#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
 #include I(REQUEST_BEGIN)
 REQUEST(__count(0,	8,	tlbie_instructions_issued)
 	/*
-- 
cgit v1.2.3


From dcb5cdf60a1fbbdb3b4dd2abc562206481f09ef1 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Sat, 3 Oct 2020 13:19:42 +0530
Subject: powerpc/perf/hv-gpci: Add cpu hotplug support

Patch here adds cpu hotplug functions to hv_gpci pmu.
A new cpuhp_state "CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE" enum
is added.

The online callback function updates the cpumask only if its
empty. As the primary intention of adding hotplug support
is to designate a CPU to make HCALL to collect the
counter data.

The offline function test and clear corresponding cpu in a cpumask
and update cpumask to any other active cpu.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201003074943.338618-4-kjain@linux.ibm.com
---
 arch/powerpc/perf/hv-gpci.c | 46 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/cpuhotplug.h  |  1 +
 2 files changed, 47 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 1667315b82e9..1bca0c1a6cfe 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -48,6 +48,8 @@ EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31);
 /* u32, byte offset */
 EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
 
+static cpumask_t hv_gpci_cpumask;
+
 static struct attribute *format_attrs[] = {
 	&format_attr_request.attr,
 	&format_attr_starting_index.attr,
@@ -266,6 +268,45 @@ static struct pmu h_gpci_pmu = {
 	.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
 };
 
+static int ppc_hv_gpci_cpu_online(unsigned int cpu)
+{
+	if (cpumask_empty(&hv_gpci_cpumask))
+		cpumask_set_cpu(cpu, &hv_gpci_cpumask);
+
+	return 0;
+}
+
+static int ppc_hv_gpci_cpu_offline(unsigned int cpu)
+{
+	int target;
+
+	/* Check if exiting cpu is used for collecting gpci events */
+	if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask))
+		return 0;
+
+	/* Find a new cpu to collect gpci events */
+	target = cpumask_last(cpu_active_mask);
+
+	if (target < 0 || target >= nr_cpu_ids) {
+		pr_err("hv_gpci: CPU hotplug init failed\n");
+		return -1;
+	}
+
+	/* Migrate gpci events to the new target */
+	cpumask_set_cpu(target, &hv_gpci_cpumask);
+	perf_pmu_migrate_context(&h_gpci_pmu, cpu, target);
+
+	return 0;
+}
+
+static int hv_gpci_cpu_hotplug_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
+			  "perf/powerpc/hv_gcpi:online",
+			  ppc_hv_gpci_cpu_online,
+			  ppc_hv_gpci_cpu_offline);
+}
+
 static int hv_gpci_init(void)
 {
 	int r;
@@ -286,6 +327,11 @@ static int hv_gpci_init(void)
 		return -ENODEV;
 	}
 
+	/* init cpuhotplug */
+	r = hv_gpci_cpu_hotplug_init();
+	if (r)
+		return r;
+
 	/* sampling not supported */
 	h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 3215023d4852..5d08ed922510 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -183,6 +183,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
+	CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
 	CPUHP_AP_WATCHDOG_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
-- 
cgit v1.2.3


From 09b791d95559ef82542063333ecaa2ac9d57118e Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Sat, 3 Oct 2020 13:19:43 +0530
Subject: powerpc/hv-gpci: Add sysfs files inside hv-gpci device to show
 cpumask

Patch here adds a cpumask attr to hv_gpci pmu along with ABI documentation.

Primary use to expose the cpumask is for the perf tool which has the
capability to parse the driver sysfs folder and understand the
cpumask file. Having cpumask file will reduce the number of perf command
line parameters (will avoid "-C" option in the perf tool
command line). It can also notify the user which is
the current cpu used to retrieve the counter data.

command:# cat /sys/devices/hv_gpci/cpumask
0

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201003074943.338618-5-kjain@linux.ibm.com
---
 .../ABI/testing/sysfs-bus-event_source-devices-hv_gpci |  7 +++++++
 arch/powerpc/perf/hv-gpci.c                            | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'arch')

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
index ed989869d116..6a023b42486c 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
@@ -72,3 +72,10 @@ Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
 Description:
 		A number indicating the latest version of the gpci interface
 		that the kernel is aware of.
+
+What:		/sys/devices/hv_gpci/cpumask
+Date:		October 2020
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	read only
+		This sysfs file exposes the cpumask which is designated to make
+		HCALLs to retrieve hv-gpci pmu event counter data.
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 1bca0c1a6cfe..d48413e28c39 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -96,7 +96,15 @@ static ssize_t kernel_version_show(struct device *dev,
 	return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
 }
 
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
+}
+
 static DEVICE_ATTR_RO(kernel_version);
+static DEVICE_ATTR_RO(cpumask);
+
 HV_CAPS_ATTR(version, "0x%x\n");
 HV_CAPS_ATTR(ga, "%d\n");
 HV_CAPS_ATTR(expanded, "%d\n");
@@ -113,6 +121,15 @@ static struct attribute *interface_attrs[] = {
 	NULL,
 };
 
+static struct attribute *cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+	.attrs = cpumask_attrs,
+};
+
 static struct attribute_group interface_group = {
 	.name = "interface",
 	.attrs = interface_attrs,
@@ -122,6 +139,7 @@ static const struct attribute_group *attr_groups[] = {
 	&format_group,
 	&event_group,
 	&interface_group,
+	&cpumask_attr_group,
 	NULL,
 };
 
-- 
cgit v1.2.3


From 792254a77201453d9a77479e63dc216ad90462d2 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 7 Oct 2020 18:06:05 +1000
Subject: powerpc/security: Fix link stack flush instruction

The inline execution path for the hardware assisted branch flush
instruction failed to set CTR to the correct value before bcctr,
causing a crash when the feature is enabled.

Fixes: 4d24e21cc694 ("powerpc/security: Allow for processors that flush the link stack using the special bcctr")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201007080605.64423-1-npiggin@gmail.com
---
 arch/powerpc/include/asm/asm-prototypes.h |  4 +++-
 arch/powerpc/kernel/entry_64.S            |  8 ++++++--
 arch/powerpc/kernel/security.c            | 34 ++++++++++++++++++++++---------
 3 files changed, 33 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 4957119604c7..d0b832cbbec8 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -145,7 +145,9 @@ void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 
 /* Patch sites */
-extern s32 patch__call_flush_branch_caches;
+extern s32 patch__call_flush_branch_caches1;
+extern s32 patch__call_flush_branch_caches2;
+extern s32 patch__call_flush_branch_caches3;
 extern s32 patch__flush_count_cache_return;
 extern s32 patch__flush_link_stack_return;
 extern s32 patch__call_kvm_flush_link_stack;
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 733e40eba4eb..2f3846192ec7 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -430,7 +430,11 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs);
 
 #define FLUSH_COUNT_CACHE	\
 1:	nop;			\
-	patch_site 1b, patch__call_flush_branch_caches
+	patch_site 1b, patch__call_flush_branch_caches1; \
+1:	nop;			\
+	patch_site 1b, patch__call_flush_branch_caches2; \
+1:	nop;			\
+	patch_site 1b, patch__call_flush_branch_caches3
 
 .macro nops number
 	.rept \number
@@ -512,7 +516,7 @@ _GLOBAL(_switch)
 
 	kuap_check_amr r9, r10
 
-	FLUSH_COUNT_CACHE
+	FLUSH_COUNT_CACHE	/* Clobbers r9, ctr */
 
 	/*
 	 * On SMP kernels, care must be taken because a task may be
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index c9876aab3142..e4e1a94ccf6a 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -430,30 +430,44 @@ device_initcall(stf_barrier_debugfs_init);
 
 static void update_branch_cache_flush(void)
 {
+	u32 *site;
+
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	site = &patch__call_kvm_flush_link_stack;
 	// This controls the branch from guest_exit_cont to kvm_flush_link_stack
 	if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
-		patch_instruction_site(&patch__call_kvm_flush_link_stack,
-				       ppc_inst(PPC_INST_NOP));
+		patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
 	} else {
 		// Could use HW flush, but that could also flush count cache
-		patch_branch_site(&patch__call_kvm_flush_link_stack,
-				  (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+		patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
 	}
 #endif
 
+	// Patch out the bcctr first, then nop the rest
+	site = &patch__call_flush_branch_caches3;
+	patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+	site = &patch__call_flush_branch_caches2;
+	patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+	site = &patch__call_flush_branch_caches1;
+	patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+
 	// This controls the branch from _switch to flush_branch_caches
 	if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE &&
 	    link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
-		patch_instruction_site(&patch__call_flush_branch_caches,
-				       ppc_inst(PPC_INST_NOP));
+		// Nothing to be done
+
 	} else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW &&
 		   link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) {
-		patch_instruction_site(&patch__call_flush_branch_caches,
-				       ppc_inst(PPC_INST_BCCTR_FLUSH));
+		// Patch in the bcctr last
+		site = &patch__call_flush_branch_caches1;
+		patch_instruction_site(site, ppc_inst(0x39207fff)); // li r9,0x7fff
+		site = &patch__call_flush_branch_caches2;
+		patch_instruction_site(site, ppc_inst(0x7d2903a6)); // mtctr r9
+		site = &patch__call_flush_branch_caches3;
+		patch_instruction_site(site, ppc_inst(PPC_INST_BCCTR_FLUSH));
+
 	} else {
-		patch_branch_site(&patch__call_flush_branch_caches,
-				  (u64)&flush_branch_caches, BRANCH_SET_LINK);
+		patch_branch_site(site, (u64)&flush_branch_caches, BRANCH_SET_LINK);
 
 		// If we just need to flush the link stack, early return
 		if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) {
-- 
cgit v1.2.3


From ec72024e35dddb88a81e40071c87ceb18b5ee835 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Wed, 7 Oct 2020 17:18:33 +0530
Subject: powerpc/drmem: Make lmb_size 64 bit

Similar to commit 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic")
make sure different variables tracking lmb_size are updated to be 64 bit.

This was found by code audit.

Cc: stable@vger.kernel.org
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Acked-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201007114836.282468-2-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/include/asm/drmem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 030a19d92213..bf2402fed3e0 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -20,7 +20,7 @@ struct drmem_lmb {
 struct drmem_lmb_info {
 	struct drmem_lmb        *lmbs;
 	int                     n_lmbs;
-	u32                     lmb_size;
+	u64                     lmb_size;
 };
 
 extern struct drmem_lmb_info *drmem_info;
@@ -80,7 +80,7 @@ struct of_drconf_cell_v2 {
 #define DRCONF_MEM_RESERVED	0x00000080
 #define DRCONF_MEM_HOTREMOVABLE	0x00000100
 
-static inline u32 drmem_lmb_size(void)
+static inline u64 drmem_lmb_size(void)
 {
 	return drmem_info->lmb_size;
 }
-- 
cgit v1.2.3


From 301d2ea6572386245c5d2d2dc85c3b5a737b85ac Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Wed, 7 Oct 2020 17:18:34 +0530
Subject: powerpc/memhotplug: Make lmb size 64bit

Similar to commit 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic")
make sure different variables tracking lmb_size are updated to be 64 bit.

This was found by code audit.

Cc: stable@vger.kernel.org
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201007114836.282468-3-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 43 +++++++++++++++++--------
 1 file changed, 29 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 9a533acf8ad0..ca5589a1ec7f 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -277,7 +277,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb)
 	return dlpar_change_lmb_state(lmb, false);
 }
 
-static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
+static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size)
 {
 	unsigned long block_sz, start_pfn;
 	int sections_per_block;
@@ -308,10 +308,11 @@ out:
 
 static int pseries_remove_mem_node(struct device_node *np)
 {
-	const __be32 *regs;
+	const __be32 *prop;
 	unsigned long base;
-	unsigned int lmb_size;
+	unsigned long lmb_size;
 	int ret = -EINVAL;
+	int addr_cells, size_cells;
 
 	/*
 	 * Check to see if we are actually removing memory
@@ -322,12 +323,19 @@ static int pseries_remove_mem_node(struct device_node *np)
 	/*
 	 * Find the base address and size of the memblock
 	 */
-	regs = of_get_property(np, "reg", NULL);
-	if (!regs)
+	prop = of_get_property(np, "reg", NULL);
+	if (!prop)
 		return ret;
 
-	base = be64_to_cpu(*(unsigned long *)regs);
-	lmb_size = be32_to_cpu(regs[3]);
+	addr_cells = of_n_addr_cells(np);
+	size_cells = of_n_size_cells(np);
+
+	/*
+	 * "reg" property represents (addr,size) tuple.
+	 */
+	base = of_read_number(prop, addr_cells);
+	prop += addr_cells;
+	lmb_size = of_read_number(prop, size_cells);
 
 	pseries_remove_memblock(base, lmb_size);
 	return 0;
@@ -564,7 +572,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
 
 #else
 static inline int pseries_remove_memblock(unsigned long base,
-					  unsigned int memblock_size)
+					  unsigned long memblock_size)
 {
 	return -EOPNOTSUPP;
 }
@@ -888,10 +896,11 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
 
 static int pseries_add_mem_node(struct device_node *np)
 {
-	const __be32 *regs;
+	const __be32 *prop;
 	unsigned long base;
-	unsigned int lmb_size;
+	unsigned long lmb_size;
 	int ret = -EINVAL;
+	int addr_cells, size_cells;
 
 	/*
 	 * Check to see if we are actually adding memory
@@ -902,12 +911,18 @@ static int pseries_add_mem_node(struct device_node *np)
 	/*
 	 * Find the base and size of the memblock
 	 */
-	regs = of_get_property(np, "reg", NULL);
-	if (!regs)
+	prop = of_get_property(np, "reg", NULL);
+	if (!prop)
 		return ret;
 
-	base = be64_to_cpu(*(unsigned long *)regs);
-	lmb_size = be32_to_cpu(regs[3]);
+	addr_cells = of_n_addr_cells(np);
+	size_cells = of_n_size_cells(np);
+	/*
+	 * "reg" property represents (addr,size) tuple.
+	 */
+	base = of_read_number(prop, addr_cells);
+	prop += addr_cells;
+	lmb_size = of_read_number(prop, size_cells);
 
 	/*
 	 * Update memory region to represent the memory add
-- 
cgit v1.2.3


From 950805f4d90eda14325ceab56b0f00d034baa8bc Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Wed, 7 Oct 2020 17:18:35 +0530
Subject: powerpc/book3s64/radix: Make radix_mem_block_size 64bit

Similar to commit 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic")
make sure different variables tracking lmb_size are updated to be 64 bit.

Fixes: af9d00e93a4f ("powerpc/mm/radix: Create separate mappings for hot-plugged memory")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201007114836.282468-4-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 2 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index ddc414ab3c4d..e0b52940e43c 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -70,7 +70,7 @@ extern unsigned int mmu_base_pid;
 /*
  * memory block size used with radix translation.
  */
-extern unsigned int __ro_after_init radix_mem_block_size;
+extern unsigned long __ro_after_init radix_mem_block_size;
 
 #define PRTB_SIZE_SHIFT	(mmu_pid_bits + 4)
 #define PRTB_ENTRIES	(1ul << mmu_pid_bits)
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 5c8adeb8c955..78c5afe98359 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -34,7 +34,7 @@
 
 unsigned int mmu_pid_bits;
 unsigned int mmu_base_pid;
-unsigned int radix_mem_block_size __ro_after_init;
+unsigned long radix_mem_block_size __ro_after_init;
 
 static __ref void *early_alloc_pgtable(unsigned long size, int nid,
 			unsigned long region_start, unsigned long region_end)
-- 
cgit v1.2.3


From fbf2f134c8c312d3166534a8bd6a1aaee6e9c7ef Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Wed, 7 Oct 2020 17:18:36 +0530
Subject: powerpc/lmb-size: Use addr #size-cells value when fetching lmb-size

Make it consistent with other usages.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201007114836.282468-5-aneesh.kumar@linux.ibm.com
---
 arch/powerpc/mm/book3s64/radix_pgtable.c        |  7 ++++---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 13 +++++++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 78c5afe98359..f8e9eb49d46b 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -498,7 +498,7 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
 					  depth, void *data)
 {
 	unsigned long *mem_block_size = (unsigned long *)data;
-	const __be64 *prop;
+	const __be32 *prop;
 	int len;
 
 	if (depth != 1)
@@ -508,13 +508,14 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
 		return 0;
 
 	prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
-	if (!prop || len < sizeof(__be64))
+
+	if (!prop || len < dt_root_size_cells * sizeof(__be32))
 		/*
 		 * Nothing in the device tree
 		 */
 		*mem_block_size = MIN_MEMORY_BLOCK_SIZE;
 	else
-		*mem_block_size = be64_to_cpup(prop);
+		*mem_block_size = of_read_number(prop, dt_root_size_cells);
 	return 1;
 }
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ca5589a1ec7f..4e18653a3804 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -30,12 +30,17 @@ unsigned long pseries_memory_block_size(void)
 
 	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (np) {
-		const __be64 *size;
+		int len;
+		int size_cells;
+		const __be32 *prop;
 
-		size = of_get_property(np, "ibm,lmb-size", NULL);
-		if (size)
-			memblock_size = be64_to_cpup(size);
+		size_cells = of_n_size_cells(np);
+
+		prop = of_get_property(np, "ibm,lmb-size", &len);
+		if (prop && len >= size_cells * sizeof(__be32))
+			memblock_size = of_read_number(prop, size_cells);
 		of_node_put(np);
+
 	} else  if (machine_is(pseries)) {
 		/* This fallback really only applies to pseries */
 		unsigned int memzero_size = 0;
-- 
cgit v1.2.3


From 13135b461cf205941308984bd3271ec7d403dc40 Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.ibm.com>
Date: Mon, 14 Sep 2020 02:49:04 +0530
Subject: powerpc/papr_scm: Add PAPR command family to pass-through command-set

Add NVDIMM_FAMILY_PAPR to the list of valid 'dimm_family_mask'
acceptable by papr_scm. This is needed as since commit
92fe2aa859f5 ("libnvdimm: Validate command family indices") libnvdimm
performs a validation of 'nd_cmd_pkg.nd_family' received as part of
ND_CMD_CALL processing to ensure only known command families can use
the general ND_CMD_CALL pass-through functionality.

Without this change the ND_CMD_CALL pass-through targeting
NVDIMM_FAMILY_PAPR error out with -EINVAL.

Fixes: 92fe2aa859f5 ("libnvdimm: Validate command family indices")
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200913211904.24472-1-vaibhav@linux.ibm.com
---
 arch/powerpc/platforms/pseries/papr_scm.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a95aa425e7d4..835163f54244 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -898,6 +898,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	p->bus_desc.of_node = p->pdev->dev.of_node;
 	p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
 
+	/* Set the dimm command family mask to accept PDSMs */
+	set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
+
 	if (!p->bus_desc.provider_name)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From ca1d3443b4dd1e8f152bd6c881ddb3eb2996179a Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 29 Sep 2020 06:48:31 +0000
Subject: powerpc: Remove SYNC on non 6xx

SYNC is usefull for Powerpc 601 only. On everything else,
SYNC is empty.

Remove it from code that is not made to run on 6xx.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/27951fa6c9a8f80724d1bc81a6117ac32343a55d.1601362098.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/head_40x.S   | 1 -
 arch/powerpc/kernel/head_booke.h | 1 -
 arch/powerpc/kernel/misc_64.S    | 1 -
 3 files changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 5b282d9965a5..44c9018aed1b 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -72,7 +72,6 @@ turn_on_mmu:
 	lis	r0,start_here@h
 	ori	r0,r0,start_here@l
 	mtspr	SPRN_SRR0,r0
-	SYNC
 	rfi				/* enables MMU */
 	b	.			/* prevent prefetch past rfi */
 
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 18f87bf9e32b..71c359d438b5 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -176,7 +176,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 #endif
 	mtspr	SPRN_SRR1,r10
 	mtspr	SPRN_SRR0,r11
-	SYNC
 	RFI				/* jump to handler, enable MMU */
 99:	b	ret_from_kernel_syscall
 .endm
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 7bb46ad98207..070465825c21 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -365,7 +365,6 @@ _GLOBAL(kexec_smp_wait)
 
 	li	r4,KEXEC_STATE_REAL_MODE
 	stb	r4,PACAKEXECSTATE(r13)
-	SYNC
 
 	b	kexec_wait
 
-- 
cgit v1.2.3


From e42a64002a507bf61e57106ed5323b1854371563 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 29 Sep 2020 06:48:33 +0000
Subject: powerpc: Remove CONFIG_PPC601_SYNC_FIX

This config option isn't in any defconfig.

The very first versions of Powerpc 601 have a bug which
requires additional sync before and/or after some instructions.

This was more than 25 years ago and time has come to retire
those buggy versions of the 601 from the kernel.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/55b46bff16705b1ae7bf0a60ccd522b1010ebf75.1601362098.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/ppc_asm.h |  6 ------
 arch/powerpc/platforms/Kconfig     | 15 ---------------
 2 files changed, 21 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index b4cc6608131c..0b9dc814b81c 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -382,15 +382,9 @@ n:
 #endif
 
 /* various errata or part fixups */
-#ifdef CONFIG_PPC601_SYNC_FIX
-#define SYNC		sync; isync
-#define SYNC_601	sync
-#define ISYNC_601	isync
-#else
 #define	SYNC
 #define SYNC_601
 #define ISYNC_601
-#endif
 
 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
 #define MFTB(dest)			\
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index b439b027a42f..7a5e8f4541e3 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -199,21 +199,6 @@ source "drivers/cpuidle/Kconfig"
 
 endmenu
 
-config PPC601_SYNC_FIX
-	bool "Workarounds for PPC601 bugs"
-	depends on PPC_BOOK3S_601 && PPC_PMAC
-	default y
-	help
-	  Some versions of the PPC601 (the first PowerPC chip) have bugs which
-	  mean that extra synchronization instructions are required near
-	  certain instructions, typically those that make major changes to the
-	  CPU state.  These extra instructions reduce performance slightly.
-	  If you say N here, these extra instructions will not be included,
-	  resulting in a kernel which will run faster but may not run at all
-	  on some systems with the PPC601 chip.
-
-	  If in doubt, say Y here.
-
 config TAU
 	bool "On-chip CPU temperature sensor support"
 	depends on PPC_BOOK3S_32
-- 
cgit v1.2.3


From d2a5cd83ee984c0e9fc172d2df9591c264261a52 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 29 Sep 2020 06:48:34 +0000
Subject: powerpc: Drop SYNC_601() ISYNC_601() and SYNC()

Those macros are now empty at all time. Drop them.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/7990bb63fc53e460bfa94f8040184881d9e6fbc3.1601362098.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/ppc_asm.h  |  4 ----
 arch/powerpc/kernel/entry_32.S      | 17 +----------------
 arch/powerpc/kernel/fpu.S           |  1 -
 arch/powerpc/kernel/head_32.S       |  9 ---------
 arch/powerpc/kernel/head_32.h       |  1 -
 arch/powerpc/kernel/l2cr_6xx.S      |  3 +--
 arch/powerpc/mm/book3s32/hash_low.S | 12 ------------
 7 files changed, 2 insertions(+), 45 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 0b9dc814b81c..67a421b81a50 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -382,10 +382,6 @@ n:
 #endif
 
 /* various errata or part fixups */
-#define	SYNC
-#define SYNC_601
-#define ISYNC_601
-
 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
 #define MFTB(dest)			\
 90:	mfspr dest, SPRN_TBRL;		\
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index f4d0af8e1136..f25ea188ecd3 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -234,7 +234,6 @@ transfer_to_handler_cont:
 	mtspr	SPRN_SRR0,r11
 	mtspr	SPRN_SRR1,r10
 	mtlr	r9
-	SYNC
 	RFI				/* jump to handler, enable MMU */
 
 #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
@@ -264,7 +263,6 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
 	LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
 	mtspr	SPRN_SRR0,r12
 	mtspr	SPRN_SRR1,r0
-	SYNC
 	RFI
 
 reenable_mmu:
@@ -323,7 +321,6 @@ stack_ovf:
 #endif
 	mtspr	SPRN_SRR0,r9
 	mtspr	SPRN_SRR1,r10
-	SYNC
 	RFI
 _ASM_NOKPROBE_SYMBOL(stack_ovf)
 #endif
@@ -411,7 +408,6 @@ ret_from_syscall:
 	/* disable interrupts so current_thread_info()->flags can't change */
 	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)	/* doesn't include MSR_EE */
 	/* Note: We don't bother telling lockdep about it */
-	SYNC
 	mtmsr	r10
 	lwz	r9,TI_FLAGS(r2)
 	li	r8,-MAX_ERRNO
@@ -474,7 +470,6 @@ syscall_exit_finish:
 #endif
 	mtspr	SPRN_SRR0,r7
 	mtspr	SPRN_SRR1,r8
-	SYNC
 	RFI
 _ASM_NOKPROBE_SYMBOL(syscall_exit_finish)
 #ifdef CONFIG_44x
@@ -567,7 +562,6 @@ syscall_exit_work:
 	 * lockdep as we are supposed to have IRQs on at this point
 	 */
 	ori	r10,r10,MSR_EE
-	SYNC
 	mtmsr	r10
 
 	/* Save NVGPRS if they're not saved already */
@@ -606,7 +600,6 @@ ret_from_kernel_syscall:
 #endif
 	mtspr	SPRN_SRR0, r9
 	mtspr	SPRN_SRR1, r10
-	SYNC
 	RFI
 _ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall)
 
@@ -810,7 +803,6 @@ fast_exception_return:
 	REST_GPR(9, r11)
 	REST_GPR(12, r11)
 	lwz	r11,GPR11(r11)
-	SYNC
 	RFI
 _ASM_NOKPROBE_SYMBOL(fast_exception_return)
 
@@ -872,7 +864,6 @@ ret_from_except:
 	 * from the interrupt. */
 	/* Note: We don't bother telling lockdep about it */
 	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-	SYNC			/* Some chip revs have problems here... */
 	mtmsr	r10		/* disable interrupts */
 
 	lwz	r3,_MSR(r1)	/* Returning to user mode? */
@@ -1035,7 +1026,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
 	 * exc_exit_restart below.  -- paulus
 	 */
 	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
-	SYNC
 	mtmsr	r10		/* clear the RI bit */
 	.globl exc_exit_restart
 exc_exit_restart:
@@ -1046,7 +1036,6 @@ exc_exit_restart:
 	lwz	r1,GPR1(r1)
 	.globl exc_exit_restart_end
 exc_exit_restart_end:
-	SYNC
 	RFI
 _ASM_NOKPROBE_SYMBOL(exc_exit_restart)
 _ASM_NOKPROBE_SYMBOL(exc_exit_restart_end)
@@ -1274,7 +1263,6 @@ do_resched:			/* r10 contains MSR_KERNEL here */
 	mfmsr	r10
 #endif
 	ori	r10,r10,MSR_EE
-	SYNC
 	mtmsr	r10		/* hard-enable interrupts */
 	bl	schedule
 recheck:
@@ -1283,7 +1271,6 @@ recheck:
 	 * TI_FLAGS aren't advertised.
 	 */
 	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-	SYNC
 	mtmsr	r10		/* disable interrupts */
 	lwz	r9,TI_FLAGS(r2)
 	andi.	r0,r9,_TIF_NEED_RESCHED
@@ -1292,7 +1279,6 @@ recheck:
 	beq	restore_user
 do_user_signal:			/* r10 contains MSR_KERNEL here */
 	ori	r10,r10,MSR_EE
-	SYNC
 	mtmsr	r10		/* hard-enable interrupts */
 	/* save r13-r31 in the exception frame, if not already done */
 	lwz	r3,_TRAP(r1)
@@ -1382,8 +1368,7 @@ _GLOBAL(enter_rtas)
 	mfmsr	r9
 	stw	r9,8(r1)
 	LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
-	SYNC			/* disable interrupts so SRR0/1 */
-	mtmsr	r0		/* don't get trashed */
+	mtmsr	r0	/* disable interrupts so SRR0/1 don't get trashed */
 	li	r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
 	mtlr	r6
 	stw	r7, THREAD + RTAS_SP(r2)
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 825893d4cb59..3ff9a8fafa46 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -87,7 +87,6 @@ BEGIN_FTR_SECTION
 	oris	r5,r5,MSR_VSX@h
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
-	SYNC
 	MTMSRD(r5)			/* enable use of fpu now */
 	isync
 	/* enable use of FP after return */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 2bd0aa3a4cc7..48cde60334a2 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -219,7 +219,6 @@ turn_on_mmu:
 	lis	r0,start_here@h
 	ori	r0,r0,start_here@l
 	mtspr	SPRN_SRR0,r0
-	SYNC
 	RFI				/* enables MMU */
 
 /*
@@ -784,14 +783,12 @@ fast_hash_page_return:
 	mtcr	r11
 	lwz	r11, THR11(r10)
 	mfspr	r10, SPRN_SPRG_SCRATCH0
-	SYNC
 	RFI
 
 1:	/* ISI */
 	mtcr	r11
 	mfspr	r11, SPRN_SPRG_SCRATCH1
 	mfspr	r10, SPRN_SPRG_SCRATCH0
-	SYNC
 	RFI
 
 stack_overflow:
@@ -882,7 +879,6 @@ __secondary_start_pmac_0:
 	   set to map the 0xf0000000 - 0xffffffff region */
 	mfmsr	r0
 	rlwinm	r0,r0,0,28,26		/* clear DR (0x10) */
-	SYNC
 	mtmsr	r0
 	isync
 
@@ -930,7 +926,6 @@ __secondary_start:
 	ori	r3,r3,start_secondary@l
 	mtspr	SPRN_SRR0,r3
 	mtspr	SPRN_SRR1,r4
-	SYNC
 	RFI
 #endif /* CONFIG_SMP */
 
@@ -1074,7 +1069,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 	.align	4
 	mtspr	SPRN_SRR0,r4
 	mtspr	SPRN_SRR1,r3
-	SYNC
 	RFI
 /* Load up the kernel context */
 2:	bl	load_up_mmu
@@ -1099,7 +1093,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 	ori	r3,r3,start_kernel@l
 	mtspr	SPRN_SRR0,r3
 	mtspr	SPRN_SRR1,r4
-	SYNC
 	RFI
 
 /*
@@ -1217,7 +1210,6 @@ _ENTRY(update_bats)
 	.align	4
 	mtspr	SPRN_SRR0, r4
 	mtspr	SPRN_SRR1, r3
-	SYNC
 	RFI
 1:	bl	clear_bats
 	lis	r3, BATS@ha
@@ -1237,7 +1229,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	mtmsr	r3
 	mtspr	SPRN_SRR0, r7
 	mtspr	SPRN_SRR1, r6
-	SYNC
 	RFI
 
 flush_tlbs:
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index cc36998c5541..7c767765071d 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -222,7 +222,6 @@
 #endif
 	mtspr	SPRN_SRR1,r10
 	mtspr	SPRN_SRR0,r11
-	SYNC
 	RFI				/* jump to handler, enable MMU */
 99:	b	ret_from_kernel_syscall
 .endm
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 5f07aa5e9851..225511d73bef 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -256,7 +256,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
 	sync
 
 	/* Restore MSR (restores EE and DR bits to original state) */
-	SYNC
 	mtmsr	r7
 	isync
 
@@ -377,7 +376,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
 1:	bdnz	1b
 
 	/* Restore MSR (restores EE and DR bits to original state) */
-4:	SYNC
+4:
 	mtmsr	r7
 	isync
 	blr
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index 1690d369688b..3143de6ae769 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -199,11 +199,9 @@ _GLOBAL(add_hash_page)
 	 * covered by a BAT).  -- paulus
 	 */
 	mfmsr	r9
-	SYNC
 	rlwinm	r0,r9,0,17,15		/* clear bit 16 (MSR_EE) */
 	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
 	mtmsr	r0
-	SYNC_601
 	isync
 
 #ifdef CONFIG_SMP
@@ -262,7 +260,6 @@ _GLOBAL(add_hash_page)
 
 	/* reenable interrupts and DR */
 	mtmsr	r9
-	SYNC_601
 	isync
 
 	lwz	r0,4(r1)
@@ -506,11 +503,9 @@ _GLOBAL(flush_hash_pages)
 	 * covered by a BAT).  -- paulus
 	 */
 	mfmsr	r10
-	SYNC
 	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
 	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
 	mtmsr	r0
-	SYNC_601
 	isync
 
 	/* First find a PTE in the range that has _PAGE_HASHPTE set */
@@ -629,7 +624,6 @@ _GLOBAL(flush_hash_pages)
 #endif
 
 19:	mtmsr	r10
-	SYNC_601
 	isync
 	blr
 EXPORT_SYMBOL(flush_hash_pages)
@@ -643,11 +637,9 @@ _GLOBAL(_tlbie)
 	lwz	r8,TASK_CPU(r2)
 	oris	r8,r8,11
 	mfmsr	r10
-	SYNC
 	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
 	rlwinm	r0,r0,0,28,26		/* clear DR */
 	mtmsr	r0
-	SYNC_601
 	isync
 	lis	r9,mmu_hash_lock@h
 	ori	r9,r9,mmu_hash_lock@l
@@ -664,7 +656,6 @@ _GLOBAL(_tlbie)
 	li	r0,0
 	stw	r0,0(r9)		/* clear mmu_hash_lock */
 	mtmsr	r10
-	SYNC_601
 	isync
 #else /* CONFIG_SMP */
 	tlbie	r3
@@ -681,11 +672,9 @@ _GLOBAL(_tlbia)
 	lwz	r8,TASK_CPU(r2)
 	oris	r8,r8,10
 	mfmsr	r10
-	SYNC
 	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
 	rlwinm	r0,r0,0,28,26		/* clear DR */
 	mtmsr	r0
-	SYNC_601
 	isync
 	lis	r9,mmu_hash_lock@h
 	ori	r9,r9,mmu_hash_lock@l
@@ -709,7 +698,6 @@ _GLOBAL(_tlbia)
 	li	r0,0
 	stw	r0,0(r9)		/* clear mmu_hash_lock */
 	mtmsr	r10
-	SYNC_601
 	isync
 #endif /* CONFIG_SMP */
 	blr
-- 
cgit v1.2.3


From f0ed73f3fa2cdca65973659689ec9e46d99a5f60 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 29 Sep 2020 06:48:35 +0000
Subject: powerpc: Remove PowerPC 601

Powerpc 601 is 25 years old.

It is not selected by any defconfig.

It requires a lot of special handling as it deviates from the
standard 6xx.

Retire it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/00a6948d659e017f8ca63437d1384222c3aede57.1601362098.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/cputable.c         | 15 ---------------
 arch/powerpc/platforms/Kconfig.cputype | 11 ++---------
 2 files changed, 2 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b5bc2edef440..492c0b36aff6 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -609,21 +609,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
 #endif	/* CONFIG_PPC_BOOK3S_64 */
 
 #ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_601
-	{	/* 601 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00010000,
-		.cpu_name		= "601",
-		.cpu_features		= CPU_FTRS_PPC601,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_601_INSTR |
-			PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc601",
-	},
-#endif /* CONFIG_PPC_BOOK3S_601 */
 #ifdef CONFIG_PPC_BOOK3S_6xx
 	{	/* 603 */
 		.pvr_mask		= 0xffff0000,
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e74ec220b5d6..c194c4ae8bc7 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -20,7 +20,7 @@ choice
 	depends on PPC32
 	help
 	  There are five families of 32 bit PowerPC chips supported.
-	  The most common ones are the desktop and server CPUs (601, 603,
+	  The most common ones are the desktop and server CPUs (603,
 	  604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
 	  embedded 512x/52xx/82xx/83xx/86xx counterparts.
 	  The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500
@@ -30,7 +30,7 @@ choice
 	  If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
 
 config PPC_BOOK3S_6xx
-	bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx except 601"
+	bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
 	select PPC_BOOK3S_32
 	select PPC_FPU
 	select PPC_HAVE_PMU_SUPPORT
@@ -38,13 +38,6 @@ config PPC_BOOK3S_6xx
 	select PPC_HAVE_KUAP
 	select HAVE_ARCH_VMAP_STACK if !ADB_PMU
 
-config PPC_BOOK3S_601
-	bool "PowerPC 601"
-	select PPC_BOOK3S_32
-	select PPC_FPU
-	select PPC_HAVE_KUAP
-	select HAVE_ARCH_VMAP_STACK
-
 config PPC_85xx
 	bool "Freescale 85xx"
 	select E500
-- 
cgit v1.2.3


From 8b14e1dff067195dca7a42321771437cb33a99e9 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 29 Sep 2020 06:48:36 +0000
Subject: powerpc: Remove support for PowerPC 601

PowerPC 601 has been retired.

Remove all associated specific code.

CPU_FTRS_PPC601 has CPU_FTR_COHERENT_ICACHE and CPU_FTR_COMMON.

CPU_FTR_COMMON is already present via other CPU_FTRS.
None of the remaining CPU selects CPU_FTR_COHERENT_ICACHE.

So CPU_FTRS_PPC601 can be removed from the possible features,
hence can be removed completely.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/60b725d55e21beec3335175c20b77903ff98284f.1601362098.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/boot/util.S                | 15 +--------
 arch/powerpc/include/asm/cputable.h     | 12 ++-----
 arch/powerpc/include/asm/ppc_asm.h      |  3 +-
 arch/powerpc/include/asm/ptrace.h       |  4 ---
 arch/powerpc/include/asm/time.h         |  2 +-
 arch/powerpc/include/asm/timex.h        |  3 --
 arch/powerpc/kernel/btext.c             |  8 +----
 arch/powerpc/kernel/entry_32.S          | 18 ----------
 arch/powerpc/kernel/head_32.S           | 44 ++----------------------
 arch/powerpc/kernel/setup_32.c          |  2 +-
 arch/powerpc/kernel/traps.c             |  4 ---
 arch/powerpc/kernel/vdso32/datapage.S   |  2 --
 arch/powerpc/kernel/vdso32/vdso32.lds.S |  2 --
 arch/powerpc/mm/book3s32/mmu.c          | 39 +++-------------------
 arch/powerpc/mm/ptdump/bats.c           | 59 ---------------------------------
 arch/powerpc/platforms/powermac/setup.c |  2 +-
 arch/powerpc/platforms/powermac/smp.c   |  4 ---
 17 files changed, 17 insertions(+), 206 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S
index f11f0589a669..d03cdb7606dc 100644
--- a/arch/powerpc/boot/util.S
+++ b/arch/powerpc/boot/util.S
@@ -18,7 +18,7 @@
 
 	.text
 
-/* udelay (on non-601 processors) needs to know the period of the
+/* udelay needs to know the period of the
  * timebase in nanoseconds.  This used to be hardcoded to be 60ns
  * (period of 66MHz/4).  Now a variable is used that is initialized to
  * 60 for backward compatibility, but it can be overridden as necessary
@@ -37,19 +37,6 @@ timebase_period_ns:
  */
 	.globl	udelay
 udelay:
-	mfspr	r4,SPRN_PVR
-	srwi	r4,r4,16
-	cmpwi	0,r4,1		/* 601 ? */
-	bne	.Ludelay_not_601
-00:	li	r0,86	/* Instructions / microsecond? */
-	mtctr	r0
-10:	addi	r0,r0,0 /* NOP */
-	bdnz	10b
-	subic.	r3,r3,1
-	bne	00b
-	blr
-
-.Ludelay_not_601:
 	mulli	r4,r3,1000	/* nanoseconds */
 	/*  Change r4 to be the number of ticks using:
 	 *	(nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index a1a300c1a20e..93bc70d4c9a1 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -295,8 +295,6 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTR_MAYBE_CAN_NAP	0
 #endif
 
-#define CPU_FTRS_PPC601	(CPU_FTR_COMMON | \
-	CPU_FTR_COHERENT_ICACHE)
 #define CPU_FTRS_603	(CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_604	(CPU_FTR_COMMON | CPU_FTR_PPC_LE)
@@ -512,10 +510,8 @@ static inline void cpu_feature_keys_init(void) { }
 #else
 enum {
 	CPU_FTRS_POSSIBLE =
-#ifdef CONFIG_PPC_BOOK3S_601
-	    CPU_FTRS_PPC601 |
-#elif defined(CONFIG_PPC_BOOK3S_32)
-	    CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
+#ifdef CONFIG_PPC_BOOK3S_32
+	    CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
 	    CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
 	    CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
 	    CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 |
@@ -590,9 +586,7 @@ enum {
 #else
 enum {
 	CPU_FTRS_ALWAYS =
-#ifdef CONFIG_PPC_BOOK3S_601
-	    CPU_FTRS_PPC601 &
-#elif defined(CONFIG_PPC_BOOK3S_32)
+#ifdef CONFIG_PPC_BOOK3S_32
 	    CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
 	    CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
 	    CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 67a421b81a50..511786f0e40d 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -401,8 +401,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
 #define MFTBU(dest)			mfspr dest, SPRN_TBRU
 #endif
 
-/* tlbsync is not implemented on 601 */
-#if !defined(CONFIG_SMP) || defined(CONFIG_PPC_BOOK3S_601)
+#ifndef CONFIG_SMP
 #define TLBSYNC
 #else
 #define TLBSYNC		tlbsync; sync
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 155a197c0aa1..e2c778c176a3 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -243,11 +243,7 @@ static inline void set_trap_norestart(struct pt_regs *regs)
 }
 
 #define arch_has_single_step()	(1)
-#ifndef CONFIG_PPC_BOOK3S_601
 #define arch_has_block_step()	(true)
-#else
-#define arch_has_block_step()	(false)
-#endif
 #define ARCH_HAS_USER_SINGLE_STEP_REPORT
 
 /*
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index cb326720a8a1..ce065589192a 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -39,7 +39,7 @@ struct div_result {
 };
 
 /* Accessor functions for the timebase (RTC on 601) registers. */
-#define __USE_RTC()	(IS_ENABLED(CONFIG_PPC_BOOK3S_601))
+#define __USE_RTC()	(0)
 
 #ifdef CONFIG_PPC64
 
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index 6047402b0a4d..95988870a57b 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -17,9 +17,6 @@ typedef unsigned long cycles_t;
 
 static inline cycles_t get_cycles(void)
 {
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
-		return 0;
-
 	return mftb();
 }
 
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 02300edc6989..b609fb39dba8 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -95,18 +95,12 @@ void __init btext_prepare_BAT(void)
 		boot_text_mapped = 0;
 		return;
 	}
-	if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+	{
 		/* 603, 604, G3, G4, ... */
 		lowbits = addr & ~0xFF000000UL;
 		addr &= 0xFF000000UL;
 		disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
 		disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);	
-	} else {
-		/* 601 */
-		lowbits = addr & ~0xFF800000UL;
-		addr &= 0xFF800000UL;
-		disp_BAT[0] = vaddr | (_PAGE_NO_CACHE | PP_RWXX) | 4;
-		disp_BAT[1] = addr | BL_8M | 0x40;
 	}
 	logicalDisplayBase = (void *) (vaddr + lowbits);
 }
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index f25ea188ecd3..8cdc8bcde703 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -811,19 +811,11 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return)
 1:	lis	r3,exc_exit_restart_end@ha
 	addi	r3,r3,exc_exit_restart_end@l
 	cmplw	r12,r3
-#ifdef CONFIG_PPC_BOOK3S_601
-	bge	2b
-#else
 	bge	3f
-#endif
 	lis	r4,exc_exit_restart@ha
 	addi	r4,r4,exc_exit_restart@l
 	cmplw	r12,r4
-#ifdef CONFIG_PPC_BOOK3S_601
-	blt	2b
-#else
 	blt	3f
-#endif
 	lis	r3,fee_restarts@ha
 	tophys(r3,r3)
 	lwz	r5,fee_restarts@l(r3)
@@ -840,7 +832,6 @@ fee_restarts:
 
 /* aargh, a nonrecoverable interrupt, panic */
 /* aargh, we don't know which trap this is */
-/* but the 601 doesn't implement the RI bit, so assume it's OK */
 3:
 	li	r10,-1
 	stw	r10,_TRAP(r11)
@@ -1302,19 +1293,11 @@ nonrecoverable:
 	lis	r10,exc_exit_restart_end@ha
 	addi	r10,r10,exc_exit_restart_end@l
 	cmplw	r12,r10
-#ifdef CONFIG_PPC_BOOK3S_601
-	bgelr
-#else
 	bge	3f
-#endif
 	lis	r11,exc_exit_restart@ha
 	addi	r11,r11,exc_exit_restart@l
 	cmplw	r12,r11
-#ifdef CONFIG_PPC_BOOK3S_601
-	bltlr
-#else
 	blt	3f
-#endif
 	lis	r10,ee_restarts@ha
 	lwz	r12,ee_restarts@l(r10)
 	addi	r12,r12,1
@@ -1322,7 +1305,6 @@ nonrecoverable:
 	mr	r12,r11		/* restart at exc_exit_restart */
 	blr
 3:	/* OK, we can't recover, kill this process */
-	/* but the 601 doesn't implement the RI bit, so assume it's OK */
 	lwz	r3,_TRAP(r1)
 	andi.	r0,r3,1
 	beq	5f
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 48cde60334a2..b14524d4534c 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -34,16 +34,6 @@
 
 #include "head_32.h"
 
-/* 601 only have IBAT */
-#ifdef CONFIG_PPC_BOOK3S_601
-#define LOAD_BAT(n, reg, RA, RB)	\
-	li	RA,0;			\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	lwz	RA,(n*16)+0(reg);	\
-	lwz	RB,(n*16)+4(reg);	\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	mtspr	SPRN_IBAT##n##L,RB
-#else
 #define LOAD_BAT(n, reg, RA, RB)	\
 	/* see the comment for clear_bats() -- Cort */ \
 	li	RA,0;			\
@@ -57,7 +47,6 @@
 	lwz	RB,(n*16)+12(reg);	\
 	mtspr	SPRN_DBAT##n##U,RA;	\
 	mtspr	SPRN_DBAT##n##L,RB
-#endif
 
 	__HEAD
 	.stabs	"arch/powerpc/kernel/",N_SO,0,0,0f
@@ -432,7 +421,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
 SystemCall:
 	SYSCALL_ENTRY	0xc00
 
-/* Single step - not used on 601 */
 	EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
 	EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
 
@@ -974,8 +962,7 @@ load_up_mmu:
 	lwz	r6,_SDR1@l(r6)
 	mtspr	SPRN_SDR1,r6
 
-/* Load the BAT registers with the values set up by MMU_init.
-   MMU_init takes care of whether we're on a 601 or not. */
+/* Load the BAT registers with the values set up by MMU_init. */
 	lis	r3,BATS@ha
 	addi	r3,r3,BATS@l
 	tophys(r3,r3)
@@ -1152,7 +1139,6 @@ EXPORT_SYMBOL(switch_mmu_context)
 clear_bats:
 	li	r10,0
 
-#ifndef CONFIG_PPC_BOOK3S_601
 	mtspr	SPRN_DBAT0U,r10
 	mtspr	SPRN_DBAT0L,r10
 	mtspr	SPRN_DBAT1U,r10
@@ -1161,7 +1147,6 @@ clear_bats:
 	mtspr	SPRN_DBAT2L,r10
 	mtspr	SPRN_DBAT3U,r10
 	mtspr	SPRN_DBAT3L,r10
-#endif
 	mtspr	SPRN_IBAT0U,r10
 	mtspr	SPRN_IBAT0L,r10
 	mtspr	SPRN_IBAT1U,r10
@@ -1252,26 +1237,9 @@ mmu_off:
 	sync
 	RFI
 
-/*
- * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET
- * (we keep one for debugging) and on others, we use one 256M BAT.
- */
+/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */
 initial_bats:
 	lis	r11,PAGE_OFFSET@h
-#ifdef CONFIG_PPC_BOOK3S_601
-	ori	r11,r11,4		/* set up BAT registers for 601 */
-	li	r8,0x7f			/* valid, block length = 8MB */
-	mtspr	SPRN_IBAT0U,r11		/* N.B. 601 has valid bit in */
-	mtspr	SPRN_IBAT0L,r8		/* lower BAT register */
-	addis	r11,r11,0x800000@h
-	addis	r8,r8,0x800000@h
-	mtspr	SPRN_IBAT1U,r11
-	mtspr	SPRN_IBAT1L,r8
-	addis	r11,r11,0x800000@h
-	addis	r8,r8,0x800000@h
-	mtspr	SPRN_IBAT2U,r11
-	mtspr	SPRN_IBAT2L,r8
-#else
 	tophys(r8,r11)
 #ifdef CONFIG_SMP
 	ori	r8,r8,0x12		/* R/W access, M=1 */
@@ -1280,11 +1248,10 @@ initial_bats:
 #endif /* CONFIG_SMP */
 	ori	r11,r11,BL_256M<<2|0x2	/* set up BAT registers for 604 */
 
-	mtspr	SPRN_DBAT0L,r8		/* N.B. 6xx (not 601) have valid */
+	mtspr	SPRN_DBAT0L,r8		/* N.B. 6xx have valid */
 	mtspr	SPRN_DBAT0U,r11		/* bit in upper BAT register */
 	mtspr	SPRN_IBAT0L,r8
 	mtspr	SPRN_IBAT0U,r11
-#endif
 	isync
 	blr
 
@@ -1302,13 +1269,8 @@ setup_disp_bat:
 	beqlr
 	lwz	r11,0(r8)
 	lwz	r8,4(r8)
-#ifndef CONFIG_PPC_BOOK3S_601
 	mtspr	SPRN_DBAT3L,r8
 	mtspr	SPRN_DBAT3U,r11
-#else
-	mtspr	SPRN_IBAT3L,r8
-	mtspr	SPRN_IBAT3U,r11
-#endif
 	blr
 #endif /* CONFIG_BOOTX_TEXT */
 
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 1823706ae076..057d6b8e9bb0 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -223,6 +223,6 @@ __init void initialize_cache_info(void)
 	dcache_bsize = cur_cpu_spec->dcache_bsize;
 	icache_bsize = cur_cpu_spec->icache_bsize;
 	ucache_bsize = 0;
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200))
+	if (IS_ENABLED(CONFIG_E200))
 		ucache_bsize = icache_bsize = dcache_bsize;
 }
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index d1ebe152f210..c5f39f13e96e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -529,9 +529,6 @@ out:
  * Check if the NIP corresponds to the address of a sync
  * instruction for which there is an entry in the exception
  * table.
- * Note that the 601 only takes a machine check on TEA
- * (transfer error ack) signal assertion, and does not
- * set any of the top 16 bits of SRR1.
  *  -- paulus.
  */
 static inline int check_io_access(struct pt_regs *regs)
@@ -796,7 +793,6 @@ int machine_check_generic(struct pt_regs *regs)
 	case 0x80000:
 		pr_cont("Machine check signal\n");
 		break;
-	case 0:		/* for 601 */
 	case 0x40000:
 	case 0x140000:	/* 7450 MSS error and TEA */
 		pr_cont("Transfer error ack signal\n");
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 217bb630f8f9..1d23e2771dba 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -47,7 +47,6 @@ V_FUNCTION_END(__kernel_get_syscall_map)
  *
  * returns the timebase frequency in HZ
  */
-#ifndef CONFIG_PPC_BOOK3S_601
 V_FUNCTION_BEGIN(__kernel_get_tbfreq)
   .cfi_startproc
 	mflr	r12
@@ -60,4 +59,3 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
 	blr
   .cfi_endproc
 V_FUNCTION_END(__kernel_get_tbfreq)
-#endif
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 5206c2eb2a1d..7eadac74c7f9 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -144,13 +144,11 @@ VERSION
 		__kernel_datapage_offset;
 
 		__kernel_get_syscall_map;
-#ifndef CONFIG_PPC_BOOK3S_601
 		__kernel_gettimeofday;
 		__kernel_clock_gettime;
 		__kernel_clock_getres;
 		__kernel_time;
 		__kernel_get_tbfreq;
-#endif
 		__kernel_sync_dicache;
 		__kernel_sync_dicache_p5;
 		__kernel_sigtramp32;
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index d426eaf76bb0..771d607f1a3d 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -74,14 +74,7 @@ static int find_free_bat(void)
 {
 	int b;
 
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) {
-		for (b = 0; b < 4; b++) {
-			struct ppc_bat *bat = BATS[b];
-
-			if (!(bat[0].batl & 0x40))
-				return b;
-		}
-	} else {
+	{
 		int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
 
 		for (b = 0; b < n; b++) {
@@ -97,7 +90,7 @@ static int find_free_bat(void)
 /*
  * This function calculates the size of the larger block usable to map the
  * beginning of an area based on the start address and size of that area:
- * - max block size is 8M on 601 and 256 on other 6xx.
+ * - max block size is 256 on 6xx.
  * - base address must be aligned to the block size. So the maximum block size
  *   is identified by the lowest bit set to 1 in the base address (for instance
  *   if base is 0x16000000, max size is 0x02000000).
@@ -106,7 +99,7 @@ static int find_free_bat(void)
  */
 static unsigned int block_size(unsigned long base, unsigned long top)
 {
-	unsigned int max_size = IS_ENABLED(CONFIG_PPC_BOOK3S_601) ? SZ_8M : SZ_256M;
+	unsigned int max_size = SZ_256M;
 	unsigned int base_shift = (ffs(base) - 1) & 31;
 	unsigned int block_shift = (fls(top - base) - 1) & 31;
 
@@ -117,7 +110,6 @@ static unsigned int block_size(unsigned long base, unsigned long top)
  * Set up one of the IBAT (block address translation) register pairs.
  * The parameters are not checked; in particular size must be a power
  * of 2 between 128k and 256M.
- * Only for 603+ ...
  */
 static void setibat(int index, unsigned long virt, phys_addr_t phys,
 		    unsigned int size, pgprot_t prot)
@@ -214,9 +206,6 @@ void mmu_mark_initmem_nx(void)
 	unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
 	unsigned long size;
 
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
-		return;
-
 	for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
 		size = block_size(base, top);
 		setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
@@ -253,9 +242,6 @@ void mmu_mark_rodata_ro(void)
 	int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
 	int i;
 
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
-		return;
-
 	for (i = 0; i < nb; i++) {
 		struct ppc_bat *bat = BATS[i];
 
@@ -294,8 +280,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
 		flags &= ~_PAGE_COHERENT;
 
 	bl = (size >> 17) - 1;
-	if (!IS_ENABLED(CONFIG_PPC_BOOK3S_601)) {
-		/* 603, 604, etc. */
+	{
 		/* Do DBAT first */
 		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
 				   | _PAGE_COHERENT | _PAGE_GUARDED);
@@ -312,16 +297,6 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
 			bat[0] = bat[1];
 		else
 			bat[0].batu = bat[0].batl = 0;
-	} else {
-		/* 601 cpu */
-		if (bl > BL_8M)
-			bl = BL_8M;
-		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
-				   | _PAGE_COHERENT);
-		wimgxpp |= (flags & _PAGE_RW)?
-			((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
-		bat->batu = virt | wimgxpp | 4;	/* Ks=0, Ku=1 */
-		bat->batl = phys | bl | 0x40;	/* V=1 */
 	}
 
 	bat_addrs[index].start = virt;
@@ -474,11 +449,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 	 */
 	BUG_ON(first_memblock_base != 0);
 
-	/* 601 can only access 16MB at the moment */
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
-		memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
-	else /* Anything else has 256M mapped */
-		memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
+	memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_256M));
 }
 
 void __init print_system_hash_info(void)
diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index e29b338d499f..c4c628b03cf8 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -12,62 +12,6 @@
 
 #include "ptdump.h"
 
-static char *pp_601(int k, int pp)
-{
-	if (pp == 0)
-		return k ? "   " : "rwx";
-	if (pp == 1)
-		return k ? "r x" : "rwx";
-	if (pp == 2)
-		return "rwx";
-	return "r x";
-}
-
-static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper)
-{
-	u32 blpi = upper & 0xfffe0000;
-	u32 k = (upper >> 2) & 3;
-	u32 pp = upper & 3;
-	phys_addr_t pbn = PHYS_BAT_ADDR(lower);
-	u32 bsm = lower & 0x3ff;
-	u32 size = (bsm + 1) << 17;
-
-	seq_printf(m, "%d: ", idx);
-	if (!(lower & 0x40)) {
-		seq_puts(m, "        -\n");
-		return;
-	}
-
-	seq_printf(m, "0x%08x-0x%08x ", blpi, blpi + size - 1);
-#ifdef CONFIG_PHYS_64BIT
-	seq_printf(m, "0x%016llx ", pbn);
-#else
-	seq_printf(m, "0x%08x ", pbn);
-#endif
-	pt_dump_size(m, size);
-
-	seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp));
-
-	seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : "  ");
-	seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : "  ");
-	seq_puts(m, lower & _PAGE_COHERENT ? "m " : "  ");
-	seq_puts(m, "\n");
-}
-
-#define BAT_SHOW_601(_m, _n, _l, _u) bat_show_601(_m, _n, mfspr(_l), mfspr(_u))
-
-static int bats_show_601(struct seq_file *m, void *v)
-{
-	seq_puts(m, "---[ Block Address Translation ]---\n");
-
-	BAT_SHOW_601(m, 0, SPRN_IBAT0L, SPRN_IBAT0U);
-	BAT_SHOW_601(m, 1, SPRN_IBAT1L, SPRN_IBAT1U);
-	BAT_SHOW_601(m, 2, SPRN_IBAT2L, SPRN_IBAT2U);
-	BAT_SHOW_601(m, 3, SPRN_IBAT3L, SPRN_IBAT3U);
-
-	return 0;
-}
-
 static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool is_d)
 {
 	u32 bepi = upper & 0xfffe0000;
@@ -146,9 +90,6 @@ static int bats_show_603(struct seq_file *m, void *v)
 
 static int bats_open(struct inode *inode, struct file *file)
 {
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
-		return single_open(file, bats_show_601, NULL);
-
 	return single_open(file, bats_show_603, NULL);
 }
 
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index f002b0fa69b8..2e2cc0c75d87 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -284,7 +284,7 @@ static void __init pmac_setup_arch(void)
 				/* 604, G3, G4 etc. */
 				loops_per_jiffy = *fp / HZ;
 			else
-				/* 601, 603, etc. */
+				/* 603, etc. */
 				loops_per_jiffy = *fp / (2 * HZ);
 			of_node_put(cpu);
 			break;
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index a6fedcfb714f..74ebe664b016 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -270,10 +270,6 @@ static void __init smp_psurge_probe(void)
 	int i, ncpus;
 	struct device_node *dn;
 
-	/* We don't do SMP on the PPC601 -- paulus */
-	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
-		return;
-
 	/*
 	 * The powersurge cpu board can be used in the generation
 	 * of powermacs that have a socket for an upgradeable cpu card,
-- 
cgit v1.2.3


From 2e38ea486615bddbc7a42d002aee93a3a9e7a36f Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 29 Sep 2020 06:48:37 +0000
Subject: powerpc: Tidy up a bit after removal of PowerPC 601.

The removal of the 601 left some standalone blocks from
former if/else. Drop the { } and re-indent.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/31c4cd093963f22831bf388449056ee045533d3b.1601362098.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/btext.c    | 11 ++++-------
 arch/powerpc/mm/book3s32/mmu.c | 45 +++++++++++++++++++-----------------------
 2 files changed, 24 insertions(+), 32 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index b609fb39dba8..c22a8e0dbc93 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -95,13 +95,10 @@ void __init btext_prepare_BAT(void)
 		boot_text_mapped = 0;
 		return;
 	}
-	{
-		/* 603, 604, G3, G4, ... */
-		lowbits = addr & ~0xFF000000UL;
-		addr &= 0xFF000000UL;
-		disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
-		disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);	
-	}
+	lowbits = addr & ~0xFF000000UL;
+	addr &= 0xFF000000UL;
+	disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
+	disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
 	logicalDisplayBase = (void *) (vaddr + lowbits);
 }
 #endif
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 771d607f1a3d..741e4fc990c7 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -73,16 +73,13 @@ unsigned long p_block_mapped(phys_addr_t pa)
 static int find_free_bat(void)
 {
 	int b;
+	int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
 
-	{
-		int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+	for (b = 0; b < n; b++) {
+		struct ppc_bat *bat = BATS[b];
 
-		for (b = 0; b < n; b++) {
-			struct ppc_bat *bat = BATS[b];
-
-			if (!(bat[1].batu & 3))
-				return b;
-		}
+		if (!(bat[1].batu & 3))
+			return b;
 	}
 	return -1;
 }
@@ -280,24 +277,22 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
 		flags &= ~_PAGE_COHERENT;
 
 	bl = (size >> 17) - 1;
-	{
-		/* Do DBAT first */
-		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
-				   | _PAGE_COHERENT | _PAGE_GUARDED);
-		wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
-		bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
-		bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
-		if (flags & _PAGE_USER)
-			bat[1].batu |= 1; 	/* Vp = 1 */
-		if (flags & _PAGE_GUARDED) {
-			/* G bit must be zero in IBATs */
-			flags &= ~_PAGE_EXEC;
-		}
-		if (flags & _PAGE_EXEC)
-			bat[0] = bat[1];
-		else
-			bat[0].batu = bat[0].batl = 0;
+	/* Do DBAT first */
+	wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+			   | _PAGE_COHERENT | _PAGE_GUARDED);
+	wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
+	bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+	bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+	if (flags & _PAGE_USER)
+		bat[1].batu |= 1; 	/* Vp = 1 */
+	if (flags & _PAGE_GUARDED) {
+		/* G bit must be zero in IBATs */
+		flags &= ~_PAGE_EXEC;
 	}
+	if (flags & _PAGE_EXEC)
+		bat[0] = bat[1];
+	else
+		bat[0].batu = bat[0].batl = 0;
 
 	bat_addrs[index].start = virt;
 	bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
-- 
cgit v1.2.3


From a4c5a355422920bcbfe3fd1f01aead2d3a2a820c Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 29 Sep 2020 06:48:38 +0000
Subject: powerpc: Remove __USE_RTC()

Now that PowerPC 601 is gone, __USE_RTC() is never true.

Remove it.

That also leads to removing get_rtc() and get_rtcl()

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/4757e1ed21fe1968c761ae081d1f3d790a9673f8.1601362098.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/time.h | 28 +---------------------
 arch/powerpc/kernel/time.c      | 52 +++++++----------------------------------
 2 files changed, 9 insertions(+), 71 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index ce065589192a..caf68a4bc19e 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -38,9 +38,6 @@ struct div_result {
 	u64 result_low;
 };
 
-/* Accessor functions for the timebase (RTC on 601) registers. */
-#define __USE_RTC()	(0)
-
 #ifdef CONFIG_PPC64
 
 /* For compatibility, get_tbl() is defined as get_tb() on ppc64 */
@@ -59,25 +56,6 @@ static inline unsigned int get_tbu(void)
 }
 #endif /* !CONFIG_PPC64 */
 
-static inline unsigned int get_rtcl(void)
-{
-	unsigned int rtcl;
-
-	asm volatile("mfrtcl %0" : "=r" (rtcl));
-	return rtcl;
-}
-
-static inline u64 get_rtc(void)
-{
-	unsigned int hi, lo, hi2;
-
-	do {
-		asm volatile("mfrtcu %0; mfrtcl %1; mfrtcu %2"
-			     : "=r" (hi), "=r" (lo), "=r" (hi2));
-	} while (hi2 != hi);
-	return (u64)hi * 1000000000 + lo;
-}
-
 static inline u64 get_vtb(void)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -109,7 +87,7 @@ static inline u64 get_tb(void)
 
 static inline u64 get_tb_or_rtc(void)
 {
-	return __USE_RTC() ? get_rtc() : get_tb();
+	return get_tb();
 }
 
 static inline void set_tb(unsigned int upper, unsigned int lower)
@@ -153,10 +131,6 @@ static inline void set_dec(u64 val)
 
 static inline unsigned long tb_ticks_since(unsigned long tstamp)
 {
-	if (__USE_RTC()) {
-		int delta = get_rtcl() - (unsigned int) tstamp;
-		return delta < 0 ? delta + 1000000000 : delta;
-	}
 	return get_tbl() - tstamp;
 }
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index f85539ebb513..13c820c15d37 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -75,15 +75,6 @@
 #include <linux/clockchips.h>
 #include <linux/timekeeper_internal.h>
 
-static u64 rtc_read(struct clocksource *);
-static struct clocksource clocksource_rtc = {
-	.name         = "rtc",
-	.rating       = 400,
-	.flags        = CLOCK_SOURCE_IS_CONTINUOUS,
-	.mask         = CLOCKSOURCE_MASK(64),
-	.read         = rtc_read,
-};
-
 static u64 timebase_read(struct clocksource *);
 static struct clocksource clocksource_timebase = {
 	.name         = "timebase",
@@ -447,19 +438,9 @@ void vtime_flush(struct task_struct *tsk)
 void __delay(unsigned long loops)
 {
 	unsigned long start;
-	int diff;
 
 	spin_begin();
-	if (__USE_RTC()) {
-		start = get_rtcl();
-		do {
-			/* the RTCL register wraps at 1000000000 */
-			diff = get_rtcl() - start;
-			if (diff < 0)
-				diff += 1000000000;
-			spin_cpu_relax();
-		} while (diff < loops);
-	} else if (tb_invalid) {
+	if (tb_invalid) {
 		/*
 		 * TB is in error state and isn't ticking anymore.
 		 * HMI handler was unable to recover from TB error.
@@ -696,8 +677,6 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
  */
 notrace unsigned long long sched_clock(void)
 {
-	if (__USE_RTC())
-		return get_rtc();
 	return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
 }
 
@@ -847,11 +826,6 @@ void read_persistent_clock64(struct timespec64 *ts)
 }
 
 /* clocksource code */
-static notrace u64 rtc_read(struct clocksource *cs)
-{
-	return (u64)get_rtc();
-}
-
 static notrace u64 timebase_read(struct clocksource *cs)
 {
 	return (u64)get_tb();
@@ -948,12 +922,7 @@ void update_vsyscall_tz(void)
 
 static void __init clocksource_init(void)
 {
-	struct clocksource *clock;
-
-	if (__USE_RTC())
-		clock = &clocksource_rtc;
-	else
-		clock = &clocksource_timebase;
+	struct clocksource *clock = &clocksource_timebase;
 
 	if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
 		printk(KERN_ERR "clocksource: %s is already registered\n",
@@ -1071,17 +1040,12 @@ void __init time_init(void)
 	u64 scale;
 	unsigned shift;
 
-	if (__USE_RTC()) {
-		/* 601 processor: dec counts down by 128 every 128ns */
-		ppc_tb_freq = 1000000000;
-	} else {
-		/* Normal PowerPC with timebase register */
-		ppc_md.calibrate_decr();
-		printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
-		       ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
-		printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu MHz\n",
-		       ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
-	}
+	/* Normal PowerPC with timebase register */
+	ppc_md.calibrate_decr();
+	printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
+	       ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
+	printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu MHz\n",
+	       ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
 
 	tb_ticks_per_jiffy = ppc_tb_freq / HZ;
 	tb_ticks_per_sec = ppc_tb_freq;
-- 
cgit v1.2.3


From 6601ec1c2ba929430f5585ce7f9d9960b0e0a01d Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 29 Sep 2020 06:48:39 +0000
Subject: powerpc: Remove get_tb_or_rtc()

601 is gone, get_tb_or_rtc() is equivalent to get_tb().

Replace the former by the later.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/3e8a13ee83418630c753c30cb722ae682d5b2d39.1601362098.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/time.h | 5 -----
 arch/powerpc/kernel/irq.c       | 2 +-
 arch/powerpc/kernel/time.c      | 6 +++---
 3 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index caf68a4bc19e..410ed72eef1c 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -85,11 +85,6 @@ static inline u64 get_tb(void)
 }
 #endif /* !CONFIG_PPC64 */
 
-static inline u64 get_tb_or_rtc(void)
-{
-	return get_tb();
-}
-
 static inline void set_tb(unsigned int upper, unsigned int lower)
 {
 	mtspr(SPRN_TBWL, 0);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 631e6d236c97..7d0f7682d01d 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -104,7 +104,7 @@ static inline notrace unsigned long get_irq_happened(void)
 
 static inline notrace int decrementer_check_overflow(void)
 {
- 	u64 now = get_tb_or_rtc();
+	u64 now = get_tb();
 	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
  
 	return now >= *next_tb;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 13c820c15d37..760ea359a7f7 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -595,7 +595,7 @@ void timer_interrupt(struct pt_regs *regs)
 		irq_work_run();
 	}
 
-	now = get_tb_or_rtc();
+	now = get_tb();
 	if (now >= *next_tb) {
 		*next_tb = ~(u64)0;
 		if (evt->event_handler)
@@ -937,7 +937,7 @@ static void __init clocksource_init(void)
 static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev)
 {
-	__this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt);
+	__this_cpu_write(decrementers_next_tb, get_tb() + evt);
 	set_dec(evt);
 
 	/* We may have raced with new irq work */
@@ -1071,7 +1071,7 @@ void __init time_init(void)
 	tb_to_ns_scale = scale;
 	tb_to_ns_shift = shift;
 	/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
-	boot_tb = get_tb_or_rtc();
+	boot_tb = get_tb();
 
 	/* If platform provided a timezone (pmac), we correct the time */
 	if (timezone_offset) {
-- 
cgit v1.2.3


From 63f9d9df5ed0d4f3a2c0cd08730e1cae1edd11bf Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 1 Oct 2020 10:59:19 +0000
Subject: powerpc/time: Remove ifdef in get_dec() and set_dec()

Move SPRN_PIT definition in reg.h.

This allows to remove ifdef in get_dec() and set_dec() and
makes them more readable.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/3c9a6eb0fc040868ac59be66f338d08fd017668d.1601549945.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/reg.h       |  2 ++
 arch/powerpc/include/asm/reg_booke.h |  1 -
 arch/powerpc/include/asm/time.h      | 23 ++++++++++-------------
 3 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d25c357a873c..788058af1d44 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -521,6 +521,8 @@
 #define SPRN_TSCR	0x399	/* Thread Switch Control Register */
 
 #define SPRN_DEC	0x016		/* Decrement Register */
+#define SPRN_PIT	0x3DB		/* Programmable Interval Timer (40x/BOOKE) */
+
 #define SPRN_DER	0x095		/* Debug Enable Register */
 #define DER_RSTE	0x40000000	/* Reset Interrupt */
 #define DER_CHSTPE	0x20000000	/* Check Stop */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index ff30f1076162..29a948e0c0f2 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -174,7 +174,6 @@
 #define SPRN_L1CSR1	0x3F3	/* L1 Cache Control and Status Register 1 */
 #define SPRN_MMUCSR0	0x3F4	/* MMU Control and Status Register 0 */
 #define SPRN_MMUCFG	0x3F7	/* MMU Configuration Register */
-#define SPRN_PIT	0x3DB	/* Programmable Interval Timer */
 #define SPRN_BUCSR	0x3F5	/* Branch Unit Control and Status */
 #define SPRN_L2CSR0	0x3F9	/* L2 Data Cache Control and Status Register 0 */
 #define SPRN_L2CSR1	0x3FA	/* L2 Data Cache Control and Status Register 1 */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 410ed72eef1c..a0c8ae4cb27c 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -100,11 +100,10 @@ static inline void set_tb(unsigned int upper, unsigned int lower)
  */
 static inline u64 get_dec(void)
 {
-#if defined(CONFIG_40x)
-	return (mfspr(SPRN_PIT));
-#else
-	return (mfspr(SPRN_DEC));
-#endif
+	if (IS_ENABLED(CONFIG_40x))
+		return mfspr(SPRN_PIT);
+
+	return mfspr(SPRN_DEC);
 }
 
 /*
@@ -114,14 +113,12 @@ static inline u64 get_dec(void)
  */
 static inline void set_dec(u64 val)
 {
-#if defined(CONFIG_40x)
-	mtspr(SPRN_PIT, (u32) val);
-#else
-#ifndef CONFIG_BOOKE
-	--val;
-#endif
-	mtspr(SPRN_DEC, val);
-#endif /* not 40x */
+	if (IS_ENABLED(CONFIG_40x))
+		mtspr(SPRN_PIT, (u32)val);
+	else if (IS_ENABLED(CONFIG_BOOKE))
+		mtspr(SPRN_DEC, val);
+	else
+		mtspr(SPRN_DEC, val - 1);
 }
 
 static inline unsigned long tb_ticks_since(unsigned long tstamp)
-- 
cgit v1.2.3


From 69a1593abdbcf03a76367320d929a8ae7a5e3d71 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 1 Oct 2020 15:35:38 +0000
Subject: powerpc/32s: Setup the early hash table at all time.

At the time being, an early hash table is set up when
CONFIG_KASAN is selected.

There is nothing wrong with setting such an early hash table
all the time, even if it is not used. This is a statically
allocated 256 kB table which lies in the init data section.

This makes the code simpler and may in the future allow to
setup early IO mappings with fixmap instead of hard coding BATs.

Put create_hpte() and flush_hash_pages() in the .ref.text section
in order to avoid warning for the reference to early_hash[]. This
reference is removed by MMU_init_hw_patch() before init memory is
freed.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b8f8101c368b8a6451844a58d7bd7d83c14cf2aa.1601566529.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/head_32.S         | 13 +++++--------
 arch/powerpc/mm/book3s32/hash_low.S   |  9 +++++++--
 arch/powerpc/mm/book3s32/mmu.c        | 14 +++++---------
 arch/powerpc/mm/kasan/kasan_init_32.c | 19 -------------------
 4 files changed, 17 insertions(+), 38 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index b14524d4534c..6dc77419147e 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -155,9 +155,9 @@ __after_mmu_off:
 
 	bl	initial_bats
 	bl	load_segment_registers
-#ifdef CONFIG_KASAN
+BEGIN_MMU_FTR_SECTION
 	bl	early_hash_table
-#endif
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 #if defined(CONFIG_BOOTX_TEXT)
 	bl	setup_disp_bat
 #endif
@@ -936,7 +936,6 @@ _ENTRY(__restore_cpu_setup)
  * Load stuff into the MMU.  Intended to be called with
  * IR=0 and DR=0.
  */
-#ifdef CONFIG_KASAN
 early_hash_table:
 	sync			/* Force all PTE updates to finish */
 	isync
@@ -947,8 +946,10 @@ early_hash_table:
 	lis	r6, early_hash - PAGE_OFFSET@h
 	ori	r6, r6, 3	/* 256kB table */
 	mtspr	SPRN_SDR1, r6
+	lis	r6, early_hash@h
+	lis	r3, Hash@ha
+	stw	r6, Hash@l(r3)
 	blr
-#endif
 
 load_up_mmu:
 	sync			/* Force all PTE updates to finish */
@@ -1037,11 +1038,7 @@ start_here:
 	bl	machine_init
 	bl	__save_cpu_setup
 	bl	MMU_init
-#ifdef CONFIG_KASAN
-BEGIN_MMU_FTR_SECTION
 	bl	MMU_init_hw_patch
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
 
 /*
  * Go back to running unmapped so we can load up new values
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index 3143de6ae769..b2c912e517b9 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -15,6 +15,7 @@
  */
 
 #include <linux/pgtable.h>
+#include <linux/init.h>
 #include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/cputable.h>
@@ -284,9 +285,9 @@ _ASM_NOKPROBE_SYMBOL(add_hash_page)
  *
  * For speed, 4 of the instructions get patched once the size and
  * physical address of the hash table are known.  These definitions
- * of Hash_base and Hash_bits below are just an example.
+ * of Hash_base and Hash_bits below are for the early hash table.
  */
-Hash_base = 0xc0180000
+Hash_base = early_hash
 Hash_bits = 12				/* e.g. 256kB hash table */
 Hash_msk = (((1 << Hash_bits) - 1) * 64)
 
@@ -307,6 +308,7 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64)
 #define HASH_LEFT	31-(LG_PTEG_SIZE+Hash_bits-1)
 #define HASH_RIGHT	31-LG_PTEG_SIZE
 
+__REF
 _GLOBAL(create_hpte)
 	/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
 	rlwinm	r8,r5,32-9,30,30	/* _PAGE_RW -> PP msb */
@@ -473,6 +475,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
 
 	sync		/* make sure pte updates get to memory */
 	blr
+	.previous
 _ASM_NOKPROBE_SYMBOL(create_hpte)
 
 	.section .bss
@@ -493,6 +496,7 @@ htab_hash_searches:
  *
  * We assume that there is a hash table in use (Hash != 0).
  */
+__REF
 _GLOBAL(flush_hash_pages)
 	/*
 	 * We disable interrupts here, even on UP, because we want
@@ -626,6 +630,7 @@ _GLOBAL(flush_hash_pages)
 19:	mtmsr	r10
 	isync
 	blr
+	.previous
 EXPORT_SYMBOL(flush_hash_pages)
 _ASM_NOKPROBE_SYMBOL(flush_hash_pages)
 
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 741e4fc990c7..a59e7ec98180 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -31,6 +31,8 @@
 
 #include <mm/mmu_decl.h>
 
+u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0};
+
 struct hash_pte *Hash;
 static unsigned long Hash_size, Hash_mask;
 unsigned long _SDR1;
@@ -395,15 +397,6 @@ void __init MMU_init_hw(void)
 	hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
 	if (lg_n_hpteg > 16)
 		hash_mb2 = 16 - LG_HPTEG_SIZE;
-
-	/*
-	 * When KASAN is selected, there is already an early temporary hash
-	 * table and the switch to the final hash table is done later.
-	 */
-	if (IS_ENABLED(CONFIG_KASAN))
-		return;
-
-	MMU_init_hw_patch();
 }
 
 void __init MMU_init_hw_patch(void)
@@ -411,6 +404,9 @@ void __init MMU_init_hw_patch(void)
 	unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
 	unsigned int hash = (unsigned int)Hash - PAGE_OFFSET;
 
+	if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return;
+
 	if (ppc_md.progress)
 		ppc_md.progress("hash:patch", 0x345);
 	if (ppc_md.progress)
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 929716ea21e9..59f61efc43af 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -174,22 +174,6 @@ void __init kasan_late_init(void)
 		kasan_unmap_early_shadow_vmalloc();
 }
 
-#ifdef CONFIG_PPC_BOOK3S_32
-u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0};
-
-static void __init kasan_early_hash_table(void)
-{
-	unsigned int hash = __pa(early_hash);
-
-	modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16);
-	modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16);
-
-	Hash = (struct hash_pte *)early_hash;
-}
-#else
-static void __init kasan_early_hash_table(void) {}
-#endif
-
 void __init kasan_early_init(void)
 {
 	unsigned long addr = KASAN_SHADOW_START;
@@ -205,7 +189,4 @@ void __init kasan_early_init(void)
 		next = pgd_addr_end(addr, end);
 		pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
 	} while (pmd++, addr = next, addr != end);
-
-	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
-		kasan_early_hash_table();
 }
-- 
cgit v1.2.3


From 533090e5a980ad80bbe0961b4ed45a9bcf55cc0c Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 6 Oct 2020 09:05:26 +0000
Subject: powerpc/32s: Rename head_32.S to head_book3s_32.S

Unlike PPC64 which had a single head_64.S, PPC32 are multiple ones.
There is the head_32.S, selected by default based on the value of BITS
and overridden based on some CONFIG_ values. This leads to thinking
that it may be selected by different types of PPC32 platform but
indeed it ends up being selected by book3s/32 only.

Make that explicit by:
- Not doing any default selection based on BITS.
- Renaming head_32.S to head_book3s_32.S.
- Get head_book3s_32.S selected only by CONFIG_PPC_BOOK3S_32.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[mpe: Fix head_$(BITS).o reference in arch/powerpc/Makefile]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/319d379f696412681c66a987cc75e6abf8f958d2.1601975100.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/Makefile                |    3 +-
 arch/powerpc/kernel/Makefile         |    3 +-
 arch/powerpc/kernel/head_32.S        | 1366 ----------------------------------
 arch/powerpc/kernel/head_book3s_32.S | 1366 ++++++++++++++++++++++++++++++++++
 4 files changed, 1370 insertions(+), 1368 deletions(-)
 delete mode 100644 arch/powerpc/kernel/head_32.S
 create mode 100644 arch/powerpc/kernel/head_book3s_32.S

(limited to 'arch')

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 3e8da9cf2eb9..c4f9dbd12577 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -264,7 +264,8 @@ KBUILD_CFLAGS += $(cpu-as-y)
 KBUILD_AFLAGS += $(aflags-y)
 KBUILD_CFLAGS += $(cflags-y)
 
-head-y				:= arch/powerpc/kernel/head_$(BITS).o
+head-$(CONFIG_PPC64)		:= arch/powerpc/kernel/head_64.o
+head-$(CONFIG_PPC_BOOK3S_32)	:= arch/powerpc/kernel/head_book3s_32.o
 head-$(CONFIG_PPC_8xx)		:= arch/powerpc/kernel/head_8xx.o
 head-$(CONFIG_40x)		:= arch/powerpc/kernel/head_40x.o
 head-$(CONFIG_44x)		:= arch/powerpc/kernel/head_44x.o
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index a5550c2b24c4..bf0bf1b900d2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -95,7 +95,8 @@ obj-$(CONFIG_PPC_FSL_BOOK3E)	+= cpu_setup_fsl_booke.o
 obj-$(CONFIG_PPC_DOORBELL)	+= dbell.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
 
-extra-y				:= head_$(BITS).o
+extra-$(CONFIG_PPC64)		:= head_64.o
+extra-$(CONFIG_PPC_BOOK3S_32)	:= head_book3s_32.o
 extra-$(CONFIG_40x)		:= head_40x.o
 extra-$(CONFIG_44x)		:= head_44x.o
 extra-$(CONFIG_FSL_BOOKE)	:= head_fsl_booke.o
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
deleted file mode 100644
index 6dc77419147e..000000000000
--- a/arch/powerpc/kernel/head_32.S
+++ /dev/null
@@ -1,1366 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *  Adapted for Power Macintosh by Paul Mackerras.
- *  Low-level exception handlers and MMU support
- *  rewritten by Paul Mackerras.
- *    Copyright (C) 1996 Paul Mackerras.
- *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- *
- *  This file contains the low-level support and setup for the
- *  PowerPC platform, including trap and interrupt dispatch.
- *  (The PPC 8xx embedded CPUs use head_8xx.S instead.)
- */
-
-#include <linux/init.h>
-#include <linux/pgtable.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/cputable.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-#include <asm/bug.h>
-#include <asm/kvm_book3s_asm.h>
-#include <asm/export.h>
-#include <asm/feature-fixups.h>
-
-#include "head_32.h"
-
-#define LOAD_BAT(n, reg, RA, RB)	\
-	/* see the comment for clear_bats() -- Cort */ \
-	li	RA,0;			\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	mtspr	SPRN_DBAT##n##U,RA;	\
-	lwz	RA,(n*16)+0(reg);	\
-	lwz	RB,(n*16)+4(reg);	\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	mtspr	SPRN_IBAT##n##L,RB;	\
-	lwz	RA,(n*16)+8(reg);	\
-	lwz	RB,(n*16)+12(reg);	\
-	mtspr	SPRN_DBAT##n##U,RA;	\
-	mtspr	SPRN_DBAT##n##L,RB
-
-	__HEAD
-	.stabs	"arch/powerpc/kernel/",N_SO,0,0,0f
-	.stabs	"head_32.S",N_SO,0,0,0f
-0:
-_ENTRY(_stext);
-
-/*
- * _start is defined this way because the XCOFF loader in the OpenFirmware
- * on the powermac expects the entry point to be a procedure descriptor.
- */
-_ENTRY(_start);
-	/*
-	 * These are here for legacy reasons, the kernel used to
-	 * need to look like a coff function entry for the pmac
-	 * but we're always started by some kind of bootloader now.
-	 *  -- Cort
-	 */
-	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
-	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
-	nop
-
-/* PMAC
- * Enter here with the kernel text, data and bss loaded starting at
- * 0, running with virtual == physical mapping.
- * r5 points to the prom entry point (the client interface handler
- * address).  Address translation is turned on, with the prom
- * managing the hash table.  Interrupts are disabled.  The stack
- * pointer (r1) points to just below the end of the half-meg region
- * from 0x380000 - 0x400000, which is mapped in already.
- *
- * If we are booted from MacOS via BootX, we enter with the kernel
- * image loaded somewhere, and the following values in registers:
- *  r3: 'BooX' (0x426f6f58)
- *  r4: virtual address of boot_infos_t
- *  r5: 0
- *
- * PREP
- * This is jumped to on prep systems right after the kernel is relocated
- * to its proper place in memory by the boot loader.  The expected layout
- * of the regs is:
- *   r3: ptr to residual data
- *   r4: initrd_start or if no initrd then 0
- *   r5: initrd_end - unused if r4 is 0
- *   r6: Start of command line string
- *   r7: End of command line string
- *
- * This just gets a minimal mmu environment setup so we can call
- * start_here() to do the real work.
- * -- Cort
- */
-
-	.globl	__start
-__start:
-/*
- * We have to do any OF calls before we map ourselves to KERNELBASE,
- * because OF may have I/O devices mapped into that area
- * (particularly on CHRP).
- */
-	cmpwi	0,r5,0
-	beq	1f
-
-#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
-	/* find out where we are now */
-	bcl	20,31,$+4
-0:	mflr	r8			/* r8 = runtime addr here */
-	addis	r8,r8,(_stext - 0b)@ha
-	addi	r8,r8,(_stext - 0b)@l	/* current runtime base addr */
-	bl	prom_init
-#endif /* CONFIG_PPC_OF_BOOT_TRAMPOLINE */
-
-	/* We never return. We also hit that trap if trying to boot
-	 * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
-	trap
-
-/*
- * Check for BootX signature when supporting PowerMac and branch to
- * appropriate trampoline if it's present
- */
-#ifdef CONFIG_PPC_PMAC
-1:	lis	r31,0x426f
-	ori	r31,r31,0x6f58
-	cmpw	0,r3,r31
-	bne	1f
-	bl	bootx_init
-	trap
-#endif /* CONFIG_PPC_PMAC */
-
-1:	mr	r31,r3			/* save device tree ptr */
-	li	r24,0			/* cpu # */
-
-/*
- * early_init() does the early machine identification and does
- * the necessary low-level setup and clears the BSS
- *  -- Cort <cort@fsmlabs.com>
- */
-	bl	early_init
-
-/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
- * the physical address we are running at, returned by early_init()
- */
- 	bl	mmu_off
-__after_mmu_off:
-	bl	clear_bats
-	bl	flush_tlbs
-
-	bl	initial_bats
-	bl	load_segment_registers
-BEGIN_MMU_FTR_SECTION
-	bl	early_hash_table
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#if defined(CONFIG_BOOTX_TEXT)
-	bl	setup_disp_bat
-#endif
-#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
-	bl	setup_cpm_bat
-#endif
-#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
-	bl	setup_usbgecko_bat
-#endif
-
-/*
- * Call setup_cpu for CPU 0 and initialize 6xx Idle
- */
-	bl	reloc_offset
-	li	r24,0			/* cpu# */
-	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
-	bl	reloc_offset
-	bl	init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
-
-
-/*
- * We need to run with _start at physical address 0.
- * On CHRP, we are loaded at 0x10000 since OF on CHRP uses
- * the exception vectors at 0 (and therefore this copy
- * overwrites OF's exception vectors with our own).
- * The MMU is off at this point.
- */
-	bl	reloc_offset
-	mr	r26,r3
-	addis	r4,r3,KERNELBASE@h	/* current address of _start */
-	lis	r5,PHYSICAL_START@h
-	cmplw	0,r4,r5			/* already running at PHYSICAL_START? */
-	bne	relocate_kernel
-/*
- * we now have the 1st 16M of ram mapped with the bats.
- * prep needs the mmu to be turned on here, but pmac already has it on.
- * this shouldn't bother the pmac since it just gets turned on again
- * as we jump to our code at KERNELBASE. -- Cort
- * Actually no, pmac doesn't have it on any more. BootX enters with MMU
- * off, and in other cases, we now turn it off before changing BATs above.
- */
-turn_on_mmu:
-	mfmsr	r0
-	ori	r0,r0,MSR_DR|MSR_IR|MSR_RI
-	mtspr	SPRN_SRR1,r0
-	lis	r0,start_here@h
-	ori	r0,r0,start_here@l
-	mtspr	SPRN_SRR0,r0
-	RFI				/* enables MMU */
-
-/*
- * We need __secondary_hold as a place to hold the other cpus on
- * an SMP machine, even when we are running a UP kernel.
- */
-	. = 0xc0			/* for prep bootloader */
-	li	r3,1			/* MTX only has 1 cpu */
-	.globl	__secondary_hold
-__secondary_hold:
-	/* tell the master we're here */
-	stw	r3,__secondary_hold_acknowledge@l(0)
-#ifdef CONFIG_SMP
-100:	lwz	r4,0(0)
-	/* wait until we're told to start */
-	cmpw	0,r4,r3
-	bne	100b
-	/* our cpu # was at addr 0 - go */
-	mr	r24,r3			/* cpu # */
-	b	__secondary_start
-#else
-	b	.
-#endif /* CONFIG_SMP */
-
-	.globl	__secondary_hold_spinloop
-__secondary_hold_spinloop:
-	.long	0
-	.globl	__secondary_hold_acknowledge
-__secondary_hold_acknowledge:
-	.long	-1
-
-/* System reset */
-/* core99 pmac starts the seconary here by changing the vector, and
-   putting it back to what it was (unknown_exception) when done.  */
-	EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
-
-/* Machine check */
-/*
- * On CHRP, this is complicated by the fact that we could get a
- * machine check inside RTAS, and we have no guarantee that certain
- * critical registers will have the values we expect.  The set of
- * registers that might have bad values includes all the GPRs
- * and all the BATs.  We indicate that we are in RTAS by putting
- * a non-zero value, the address of the exception frame to use,
- * in thread.rtas_sp.  The machine check handler checks thread.rtas_sp
- * and uses its value if it is non-zero.
- * (Other exception handlers assume that r1 is a valid kernel stack
- * pointer when we take an exception from supervisor mode.)
- *	-- paulus.
- */
-	. = 0x200
-	DO_KVM  0x200
-MachineCheck:
-	EXCEPTION_PROLOG_0
-#ifdef CONFIG_PPC_CHRP
-	mfspr	r11, SPRN_SPRG_THREAD
-	lwz	r11, RTAS_SP(r11)
-	cmpwi	cr1, r11, 0
-	bne	cr1, 7f
-#endif /* CONFIG_PPC_CHRP */
-	EXCEPTION_PROLOG_1 for_rtas=1
-7:	EXCEPTION_PROLOG_2
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_CHRP
-#ifdef CONFIG_VMAP_STACK
-	mfspr	r4, SPRN_SPRG_THREAD
-	tovirt(r4, r4)
-	lwz	r4, RTAS_SP(r4)
-	cmpwi	cr1, r4, 0
-#endif
-	beq	cr1, machine_check_tramp
-	twi	31, 0, 0
-#else
-	b	machine_check_tramp
-#endif
-
-/* Data access exception. */
-	. = 0x300
-	DO_KVM  0x300
-DataAccess:
-#ifdef CONFIG_VMAP_STACK
-	mtspr	SPRN_SPRG_SCRATCH0,r10
-	mfspr	r10, SPRN_SPRG_THREAD
-BEGIN_MMU_FTR_SECTION
-	stw	r11, THR11(r10)
-	mfspr	r10, SPRN_DSISR
-	mfcr	r11
-#ifdef CONFIG_PPC_KUAP
-	andis.	r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
-#else
-	andis.	r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
-#endif
-	mfspr	r10, SPRN_SPRG_THREAD
-	beq	hash_page_dsi
-.Lhash_page_dsi_cont:
-	mtcr	r11
-	lwz	r11, THR11(r10)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-	mtspr	SPRN_SPRG_SCRATCH1,r11
-	mfspr	r11, SPRN_DAR
-	stw	r11, DAR(r10)
-	mfspr	r11, SPRN_DSISR
-	stw	r11, DSISR(r10)
-	mfspr	r11, SPRN_SRR0
-	stw	r11, SRR0(r10)
-	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
-	stw	r11, SRR1(r10)
-	mfcr	r10
-	andi.	r11, r11, MSR_PR
-
-	EXCEPTION_PROLOG_1
-	b	handle_page_fault_tramp_1
-#else	/* CONFIG_VMAP_STACK */
-	EXCEPTION_PROLOG handle_dar_dsisr=1
-	get_and_save_dar_dsisr_on_stack	r4, r5, r11
-BEGIN_MMU_FTR_SECTION
-#ifdef CONFIG_PPC_KUAP
-	andis.	r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
-#else
-	andis.	r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
-#endif
-	bne	handle_page_fault_tramp_2	/* if not, try to put a PTE */
-	rlwinm	r3, r5, 32 - 15, 21, 21		/* DSISR_STORE -> _PAGE_RW */
-	bl	hash_page
-	b	handle_page_fault_tramp_1
-FTR_SECTION_ELSE
-	b	handle_page_fault_tramp_2
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
-#endif	/* CONFIG_VMAP_STACK */
-
-/* Instruction access exception. */
-	. = 0x400
-	DO_KVM  0x400
-InstructionAccess:
-#ifdef CONFIG_VMAP_STACK
-	mtspr	SPRN_SPRG_SCRATCH0,r10
-	mtspr	SPRN_SPRG_SCRATCH1,r11
-	mfspr	r10, SPRN_SPRG_THREAD
-	mfspr	r11, SPRN_SRR0
-	stw	r11, SRR0(r10)
-	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
-	stw	r11, SRR1(r10)
-	mfcr	r10
-BEGIN_MMU_FTR_SECTION
-	andis.	r11, r11, SRR1_ISI_NOPT@h	/* no pte found? */
-	bne	hash_page_isi
-.Lhash_page_isi_cont:
-	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-	andi.	r11, r11, MSR_PR
-
-	EXCEPTION_PROLOG_1
-	EXCEPTION_PROLOG_2
-#else	/* CONFIG_VMAP_STACK */
-	EXCEPTION_PROLOG
-	andis.	r0,r9,SRR1_ISI_NOPT@h	/* no pte found? */
-	beq	1f			/* if so, try to put a PTE */
-	li	r3,0			/* into the hash table */
-	mr	r4,r12			/* SRR0 is fault address */
-BEGIN_MMU_FTR_SECTION
-	bl	hash_page
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif	/* CONFIG_VMAP_STACK */
-1:	mr	r4,r12
-	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
-	stw	r4, _DAR(r11)
-	EXC_XFER_LITE(0x400, handle_page_fault)
-
-/* External interrupt */
-	EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
-
-/* Alignment exception */
-	. = 0x600
-	DO_KVM  0x600
-Alignment:
-	EXCEPTION_PROLOG handle_dar_dsisr=1
-	save_dar_dsisr_on_stack r4, r5, r11
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	b	alignment_exception_tramp
-
-/* Program check exception */
-	EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
-
-/* Floating-point unavailable */
-	. = 0x800
-	DO_KVM  0x800
-FPUnavailable:
-BEGIN_FTR_SECTION
-/*
- * Certain Freescale cores don't have a FPU and treat fp instructions
- * as a FP Unavailable exception.  Redirect to illegal/emulation handling.
- */
-	b 	ProgramCheck
-END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
-	EXCEPTION_PROLOG
-	beq	1f
-	bl	load_up_fpu		/* if from user, just load it up */
-	b	fast_exception_return
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
-
-/* Decrementer */
-	EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
-
-	EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
-
-/* System call */
-	. = 0xc00
-	DO_KVM  0xc00
-SystemCall:
-	SYSCALL_ENTRY	0xc00
-
-	EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
-	EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
-
-/*
- * The Altivec unavailable trap is at 0x0f20.  Foo.
- * We effectively remap it to 0x3000.
- * We include an altivec unavailable exception vector even if
- * not configured for Altivec, so that you can't panic a
- * non-altivec kernel running on a machine with altivec just
- * by executing an altivec instruction.
- */
-	. = 0xf00
-	DO_KVM  0xf00
-	b	PerformanceMonitor
-
-	. = 0xf20
-	DO_KVM  0xf20
-	b	AltiVecUnavailable
-
-/*
- * Handle TLB miss for instruction on 603/603e.
- * Note: we get an alternate set of r0 - r3 to use automatically.
- */
-	. = 0x1000
-InstructionTLBMiss:
-/*
- * r0:	scratch
- * r1:	linux style pte ( later becomes ppc hardware pte )
- * r2:	ptr to linux-style pte
- * r3:	scratch
- */
-	/* Get PTE (linux-style) and check access */
-	mfspr	r3,SPRN_IMISS
-#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
-	lis	r1, TASK_SIZE@h		/* check if kernel address */
-	cmplw	0,r1,r3
-#endif
-	mfspr	r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
-	li	r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-#else
-	li	r1,_PAGE_PRESENT | _PAGE_EXEC
-#endif
-#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
-	bgt-	112f
-	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
-	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
-#endif
-112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
-	lwz	r2,0(r2)		/* get pmd entry */
-	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
-	beq-	InstructionAddressInvalid	/* return if no mapping */
-	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
-	lwz	r0,0(r2)		/* get linux-style pte */
-	andc.	r1,r1,r0		/* check access & ~permission */
-	bne-	InstructionAddressInvalid /* return if access not permitted */
-	/* Convert linux-style PTE to low word of PPC-style PTE */
-	rlwimi	r0,r0,32-2,31,31	/* _PAGE_USER -> PP lsb */
-	ori	r1, r1, 0xe06		/* clear out reserved bits */
-	andc	r1, r0, r1		/* PP = user? 1 : 0 */
-BEGIN_FTR_SECTION
-	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
-END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
-	mtspr	SPRN_RPA,r1
-	tlbli	r3
-	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
-	mtcrf	0x80,r3
-	rfi
-InstructionAddressInvalid:
-	mfspr	r3,SPRN_SRR1
-	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
-
-	addis	r1,r1,0x2000
-	mtspr	SPRN_DSISR,r1	/* (shouldn't be needed) */
-	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
-	or	r2,r2,r1
-	mtspr	SPRN_SRR1,r2
-	mfspr	r1,SPRN_IMISS	/* Get failing address */
-	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
-	rlwimi	r2,r2,1,30,30	/* change 1 -> 3 */
-	xor	r1,r1,r2
-	mtspr	SPRN_DAR,r1	/* Set fault address */
-	mfmsr	r0		/* Restore "normal" registers */
-	xoris	r0,r0,MSR_TGPR>>16
-	mtcrf	0x80,r3		/* Restore CR0 */
-	mtmsr	r0
-	b	InstructionAccess
-
-/*
- * Handle TLB miss for DATA Load operation on 603/603e
- */
-	. = 0x1100
-DataLoadTLBMiss:
-/*
- * r0:	scratch
- * r1:	linux style pte ( later becomes ppc hardware pte )
- * r2:	ptr to linux-style pte
- * r3:	scratch
- */
-	/* Get PTE (linux-style) and check access */
-	mfspr	r3,SPRN_DMISS
-	lis	r1, TASK_SIZE@h		/* check if kernel address */
-	cmplw	0,r1,r3
-	mfspr	r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
-	li	r1, _PAGE_PRESENT | _PAGE_ACCESSED
-#else
-	li	r1, _PAGE_PRESENT
-#endif
-	bgt-	112f
-	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
-	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
-112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
-	lwz	r2,0(r2)		/* get pmd entry */
-	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
-	beq-	DataAddressInvalid	/* return if no mapping */
-	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
-	lwz	r0,0(r2)		/* get linux-style pte */
-	andc.	r1,r1,r0		/* check access & ~permission */
-	bne-	DataAddressInvalid	/* return if access not permitted */
-	/*
-	 * NOTE! We are assuming this is not an SMP system, otherwise
-	 * we would need to update the pte atomically with lwarx/stwcx.
-	 */
-	/* Convert linux-style PTE to low word of PPC-style PTE */
-	rlwinm	r1,r0,32-9,30,30	/* _PAGE_RW -> PP msb */
-	rlwimi	r0,r0,32-1,30,30	/* _PAGE_USER -> PP msb */
-	rlwimi	r0,r0,32-1,31,31	/* _PAGE_USER -> PP lsb */
-	ori	r1,r1,0xe04		/* clear out reserved bits */
-	andc	r1,r0,r1		/* PP = user? rw? 1: 3: 0 */
-BEGIN_FTR_SECTION
-	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
-END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
-	mtspr	SPRN_RPA,r1
-	mfspr	r2,SPRN_SRR1		/* Need to restore CR0 */
-	mtcrf	0x80,r2
-BEGIN_MMU_FTR_SECTION
-	li	r0,1
-	mfspr	r1,SPRN_SPRG_603_LRU
-	rlwinm	r2,r3,20,27,31		/* Get Address bits 15:19 */
-	slw	r0,r0,r2
-	xor	r1,r0,r1
-	srw	r0,r1,r2
-	mtspr   SPRN_SPRG_603_LRU,r1
-	mfspr	r2,SPRN_SRR1
-	rlwimi	r2,r0,31-14,14,14
-	mtspr   SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
-	tlbld	r3
-	rfi
-DataAddressInvalid:
-	mfspr	r3,SPRN_SRR1
-	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
-	addis	r1,r1,0x2000
-	mtspr	SPRN_DSISR,r1
-	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
-	mtspr	SPRN_SRR1,r2
-	mfspr	r1,SPRN_DMISS	/* Get failing address */
-	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
-	beq	20f		/* Jump if big endian */
-	xori	r1,r1,3
-20:	mtspr	SPRN_DAR,r1	/* Set fault address */
-	mfmsr	r0		/* Restore "normal" registers */
-	xoris	r0,r0,MSR_TGPR>>16
-	mtcrf	0x80,r3		/* Restore CR0 */
-	mtmsr	r0
-	b	DataAccess
-
-/*
- * Handle TLB miss for DATA Store on 603/603e
- */
-	. = 0x1200
-DataStoreTLBMiss:
-/*
- * r0:	scratch
- * r1:	linux style pte ( later becomes ppc hardware pte )
- * r2:	ptr to linux-style pte
- * r3:	scratch
- */
-	/* Get PTE (linux-style) and check access */
-	mfspr	r3,SPRN_DMISS
-	lis	r1, TASK_SIZE@h		/* check if kernel address */
-	cmplw	0,r1,r3
-	mfspr	r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
-	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
-#else
-	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
-#endif
-	bgt-	112f
-	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
-	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
-112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
-	lwz	r2,0(r2)		/* get pmd entry */
-	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
-	beq-	DataAddressInvalid	/* return if no mapping */
-	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
-	lwz	r0,0(r2)		/* get linux-style pte */
-	andc.	r1,r1,r0		/* check access & ~permission */
-	bne-	DataAddressInvalid	/* return if access not permitted */
-	/*
-	 * NOTE! We are assuming this is not an SMP system, otherwise
-	 * we would need to update the pte atomically with lwarx/stwcx.
-	 */
-	/* Convert linux-style PTE to low word of PPC-style PTE */
-	rlwimi	r0,r0,32-2,31,31	/* _PAGE_USER -> PP lsb */
-	li	r1,0xe06		/* clear out reserved bits & PP msb */
-	andc	r1,r0,r1		/* PP = user? 1: 0 */
-BEGIN_FTR_SECTION
-	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
-END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
-	mtspr	SPRN_RPA,r1
-	mfspr	r2,SPRN_SRR1		/* Need to restore CR0 */
-	mtcrf	0x80,r2
-BEGIN_MMU_FTR_SECTION
-	li	r0,1
-	mfspr	r1,SPRN_SPRG_603_LRU
-	rlwinm	r2,r3,20,27,31		/* Get Address bits 15:19 */
-	slw	r0,r0,r2
-	xor	r1,r0,r1
-	srw	r0,r1,r2
-	mtspr   SPRN_SPRG_603_LRU,r1
-	mfspr	r2,SPRN_SRR1
-	rlwimi	r2,r0,31-14,14,14
-	mtspr   SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
-	tlbld	r3
-	rfi
-
-#ifndef CONFIG_ALTIVEC
-#define altivec_assist_exception	unknown_exception
-#endif
-
-#ifndef CONFIG_TAU_INT
-#define TAUException	unknown_exception
-#endif
-
-	EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
-	EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
-	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
-	EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
-	EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
-	EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
-
-	. = 0x3000
-
-machine_check_tramp:
-	EXC_XFER_STD(0x200, machine_check_exception)
-
-alignment_exception_tramp:
-	EXC_XFER_STD(0x600, alignment_exception)
-
-handle_page_fault_tramp_1:
-#ifdef CONFIG_VMAP_STACK
-	EXCEPTION_PROLOG_2 handle_dar_dsisr=1
-#endif
-	lwz	r4, _DAR(r11)
-	lwz	r5, _DSISR(r11)
-	/* fall through */
-handle_page_fault_tramp_2:
-	EXC_XFER_LITE(0x300, handle_page_fault)
-
-#ifdef CONFIG_VMAP_STACK
-.macro save_regs_thread		thread
-	stw	r0, THR0(\thread)
-	stw	r3, THR3(\thread)
-	stw	r4, THR4(\thread)
-	stw	r5, THR5(\thread)
-	stw	r6, THR6(\thread)
-	stw	r8, THR8(\thread)
-	stw	r9, THR9(\thread)
-	mflr	r0
-	stw	r0, THLR(\thread)
-	mfctr	r0
-	stw	r0, THCTR(\thread)
-.endm
-
-.macro restore_regs_thread	thread
-	lwz	r0, THLR(\thread)
-	mtlr	r0
-	lwz	r0, THCTR(\thread)
-	mtctr	r0
-	lwz	r0, THR0(\thread)
-	lwz	r3, THR3(\thread)
-	lwz	r4, THR4(\thread)
-	lwz	r5, THR5(\thread)
-	lwz	r6, THR6(\thread)
-	lwz	r8, THR8(\thread)
-	lwz	r9, THR9(\thread)
-.endm
-
-hash_page_dsi:
-	save_regs_thread	r10
-	mfdsisr	r3
-	mfdar	r4
-	mfsrr0	r5
-	mfsrr1	r9
-	rlwinm	r3, r3, 32 - 15, _PAGE_RW	/* DSISR_STORE -> _PAGE_RW */
-	bl	hash_page
-	mfspr	r10, SPRN_SPRG_THREAD
-	restore_regs_thread r10
-	b	.Lhash_page_dsi_cont
-
-hash_page_isi:
-	mr	r11, r10
-	mfspr	r10, SPRN_SPRG_THREAD
-	save_regs_thread	r10
-	li	r3, 0
-	lwz	r4, SRR0(r10)
-	lwz	r9, SRR1(r10)
-	bl	hash_page
-	mfspr	r10, SPRN_SPRG_THREAD
-	restore_regs_thread r10
-	mr	r10, r11
-	b	.Lhash_page_isi_cont
-
-	.globl fast_hash_page_return
-fast_hash_page_return:
-	andis.	r10, r9, SRR1_ISI_NOPT@h	/* Set on ISI, cleared on DSI */
-	mfspr	r10, SPRN_SPRG_THREAD
-	restore_regs_thread r10
-	bne	1f
-
-	/* DSI */
-	mtcr	r11
-	lwz	r11, THR11(r10)
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	RFI
-
-1:	/* ISI */
-	mtcr	r11
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	RFI
-
-stack_overflow:
-	vmap_stack_overflow_exception
-#endif
-
-AltiVecUnavailable:
-	EXCEPTION_PROLOG
-#ifdef CONFIG_ALTIVEC
-	beq	1f
-	bl	load_up_altivec		/* if from user, just load it up */
-	b	fast_exception_return
-#endif /* CONFIG_ALTIVEC */
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
-
-PerformanceMonitor:
-	EXCEPTION_PROLOG
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0xf00, performance_monitor_exception)
-
-
-/*
- * This code is jumped to from the startup code to copy
- * the kernel image to physical address PHYSICAL_START.
- */
-relocate_kernel:
-	addis	r9,r26,klimit@ha	/* fetch klimit */
-	lwz	r25,klimit@l(r9)
-	addis	r25,r25,-KERNELBASE@h
-	lis	r3,PHYSICAL_START@h	/* Destination base address */
-	li	r6,0			/* Destination offset */
-	li	r5,0x4000		/* # bytes of memory to copy */
-	bl	copy_and_flush		/* copy the first 0x4000 bytes */
-	addi	r0,r3,4f@l		/* jump to the address of 4f */
-	mtctr	r0			/* in copy and do the rest. */
-	bctr				/* jump to the copy */
-4:	mr	r5,r25
-	bl	copy_and_flush		/* copy the rest */
-	b	turn_on_mmu
-
-/*
- * Copy routine used to copy the kernel to start at physical address 0
- * and flush and invalidate the caches as needed.
- * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
- * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
- */
-_ENTRY(copy_and_flush)
-	addi	r5,r5,-4
-	addi	r6,r6,-4
-4:	li	r0,L1_CACHE_BYTES/4
-	mtctr	r0
-3:	addi	r6,r6,4			/* copy a cache line */
-	lwzx	r0,r6,r4
-	stwx	r0,r6,r3
-	bdnz	3b
-	dcbst	r6,r3			/* write it to memory */
-	sync
-	icbi	r6,r3			/* flush the icache line */
-	cmplw	0,r6,r5
-	blt	4b
-	sync				/* additional sync needed on g4 */
-	isync
-	addi	r5,r5,4
-	addi	r6,r6,4
-	blr
-
-#ifdef CONFIG_SMP
-	.globl __secondary_start_mpc86xx
-__secondary_start_mpc86xx:
-	mfspr	r3, SPRN_PIR
-	stw	r3, __secondary_hold_acknowledge@l(0)
-	mr	r24, r3			/* cpu # */
-	b	__secondary_start
-
-	.globl	__secondary_start_pmac_0
-__secondary_start_pmac_0:
-	/* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
-	li	r24,0
-	b	1f
-	li	r24,1
-	b	1f
-	li	r24,2
-	b	1f
-	li	r24,3
-1:
-	/* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0
-	   set to map the 0xf0000000 - 0xffffffff region */
-	mfmsr	r0
-	rlwinm	r0,r0,0,28,26		/* clear DR (0x10) */
-	mtmsr	r0
-	isync
-
-	.globl	__secondary_start
-__secondary_start:
-	/* Copy some CPU settings from CPU 0 */
-	bl	__restore_cpu_setup
-
-	lis	r3,-KERNELBASE@h
-	mr	r4,r24
-	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
-	lis	r3,-KERNELBASE@h
-	bl	init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
-
-	/* get current's stack and current */
-	lis	r2,secondary_current@ha
-	tophys(r2,r2)
-	lwz	r2,secondary_current@l(r2)
-	tophys(r1,r2)
-	lwz	r1,TASK_STACK(r1)
-
-	/* stack */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	li	r0,0
-	tophys(r3,r1)
-	stw	r0,0(r3)
-
-	/* load up the MMU */
-	bl	load_segment_registers
-	bl	load_up_mmu
-
-	/* ptr to phys current thread */
-	tophys(r4,r2)
-	addi	r4,r4,THREAD	/* phys address of our thread_struct */
-	mtspr	SPRN_SPRG_THREAD,r4
-	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
-	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
-	mtspr	SPRN_SPRG_PGDIR, r4
-
-	/* enable MMU and jump to start_secondary */
-	li	r4,MSR_KERNEL
-	lis	r3,start_secondary@h
-	ori	r3,r3,start_secondary@l
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	RFI
-#endif /* CONFIG_SMP */
-
-#ifdef CONFIG_KVM_BOOK3S_HANDLER
-#include "../kvm/book3s_rmhandlers.S"
-#endif
-
-/*
- * Those generic dummy functions are kept for CPUs not
- * included in CONFIG_PPC_BOOK3S_32
- */
-#if !defined(CONFIG_PPC_BOOK3S_32)
-_ENTRY(__save_cpu_setup)
-	blr
-_ENTRY(__restore_cpu_setup)
-	blr
-#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
-
-/*
- * Load stuff into the MMU.  Intended to be called with
- * IR=0 and DR=0.
- */
-early_hash_table:
-	sync			/* Force all PTE updates to finish */
-	isync
-	tlbia			/* Clear all TLB entries */
-	sync			/* wait for tlbia/tlbie to finish */
-	TLBSYNC			/* ... on all CPUs */
-	/* Load the SDR1 register (hash table base & size) */
-	lis	r6, early_hash - PAGE_OFFSET@h
-	ori	r6, r6, 3	/* 256kB table */
-	mtspr	SPRN_SDR1, r6
-	lis	r6, early_hash@h
-	lis	r3, Hash@ha
-	stw	r6, Hash@l(r3)
-	blr
-
-load_up_mmu:
-	sync			/* Force all PTE updates to finish */
-	isync
-	tlbia			/* Clear all TLB entries */
-	sync			/* wait for tlbia/tlbie to finish */
-	TLBSYNC			/* ... on all CPUs */
-	/* Load the SDR1 register (hash table base & size) */
-	lis	r6,_SDR1@ha
-	tophys(r6,r6)
-	lwz	r6,_SDR1@l(r6)
-	mtspr	SPRN_SDR1,r6
-
-/* Load the BAT registers with the values set up by MMU_init. */
-	lis	r3,BATS@ha
-	addi	r3,r3,BATS@l
-	tophys(r3,r3)
-	LOAD_BAT(0,r3,r4,r5)
-	LOAD_BAT(1,r3,r4,r5)
-	LOAD_BAT(2,r3,r4,r5)
-	LOAD_BAT(3,r3,r4,r5)
-BEGIN_MMU_FTR_SECTION
-	LOAD_BAT(4,r3,r4,r5)
-	LOAD_BAT(5,r3,r4,r5)
-	LOAD_BAT(6,r3,r4,r5)
-	LOAD_BAT(7,r3,r4,r5)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-	blr
-
-_GLOBAL(load_segment_registers)
-	li	r0, NUM_USER_SEGMENTS /* load up user segment register values */
-	mtctr	r0		/* for context 0 */
-	li	r3, 0		/* Kp = 0, Ks = 0, VSID = 0 */
-#ifdef CONFIG_PPC_KUEP
-	oris	r3, r3, SR_NX@h	/* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
-	oris	r3, r3, SR_KS@h	/* Set Ks */
-#endif
-	li	r4, 0
-3:	mtsrin	r3, r4
-	addi	r3, r3, 0x111	/* increment VSID */
-	addis	r4, r4, 0x1000	/* address of next segment */
-	bdnz	3b
-	li	r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */
-	mtctr	r0			/* for context 0 */
-	rlwinm	r3, r3, 0, ~SR_NX	/* Nx = 0 */
-	rlwinm	r3, r3, 0, ~SR_KS	/* Ks = 0 */
-	oris	r3, r3, SR_KP@h		/* Kp = 1 */
-3:	mtsrin	r3, r4
-	addi	r3, r3, 0x111	/* increment VSID */
-	addis	r4, r4, 0x1000	/* address of next segment */
-	bdnz	3b
-	blr
-
-/*
- * This is where the main kernel code starts.
- */
-start_here:
-	/* ptr to current */
-	lis	r2,init_task@h
-	ori	r2,r2,init_task@l
-	/* Set up for using our exception vectors */
-	/* ptr to phys current thread */
-	tophys(r4,r2)
-	addi	r4,r4,THREAD	/* init task's THREAD */
-	mtspr	SPRN_SPRG_THREAD,r4
-	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
-	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
-	mtspr	SPRN_SPRG_PGDIR, r4
-
-	/* stack */
-	lis	r1,init_thread_union@ha
-	addi	r1,r1,init_thread_union@l
-	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
-/*
- * Do early platform-specific initialization,
- * and set up the MMU.
- */
-#ifdef CONFIG_KASAN
-	bl	kasan_early_init
-#endif
-	li	r3,0
-	mr	r4,r31
-	bl	machine_init
-	bl	__save_cpu_setup
-	bl	MMU_init
-	bl	MMU_init_hw_patch
-
-/*
- * Go back to running unmapped so we can load up new values
- * for SDR1 (hash table pointer) and the segment registers
- * and change to using our exception vectors.
- */
-	lis	r4,2f@h
-	ori	r4,r4,2f@l
-	tophys(r4,r4)
-	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
-
-	.align	4
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	RFI
-/* Load up the kernel context */
-2:	bl	load_up_mmu
-
-#ifdef CONFIG_BDI_SWITCH
-	/* Add helper information for the Abatron bdiGDB debugger.
-	 * We do this here because we know the mmu is disabled, and
-	 * will be enabled for real in just a few instructions.
-	 */
-	lis	r5, abatron_pteptrs@h
-	ori	r5, r5, abatron_pteptrs@l
-	stw	r5, 0xf0(0)	/* This much match your Abatron config */
-	lis	r6, swapper_pg_dir@h
-	ori	r6, r6, swapper_pg_dir@l
-	tophys(r5, r5)
-	stw	r6, 0(r5)
-#endif /* CONFIG_BDI_SWITCH */
-
-/* Now turn on the MMU for real! */
-	li	r4,MSR_KERNEL
-	lis	r3,start_kernel@h
-	ori	r3,r3,start_kernel@l
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	RFI
-
-/*
- * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
- *
- * Set up the segment registers for a new context.
- */
-_ENTRY(switch_mmu_context)
-	lwz	r3,MMCONTEXTID(r4)
-	cmpwi	cr0,r3,0
-	blt-	4f
-	mulli	r3,r3,897	/* multiply context by skew factor */
-	rlwinm	r3,r3,4,8,27	/* VSID = (context & 0xfffff) << 4 */
-#ifdef CONFIG_PPC_KUEP
-	oris	r3, r3, SR_NX@h	/* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
-	oris	r3, r3, SR_KS@h	/* Set Ks */
-#endif
-	li	r0,NUM_USER_SEGMENTS
-	mtctr	r0
-
-	lwz	r4, MM_PGD(r4)
-#ifdef CONFIG_BDI_SWITCH
-	/* Context switch the PTE pointer for the Abatron BDI2000.
-	 * The PGDIR is passed as second argument.
-	 */
-	lis	r5, abatron_pteptrs@ha
-	stw	r4, abatron_pteptrs@l + 0x4(r5)
-#endif
-	tophys(r4, r4)
-	mtspr	SPRN_SPRG_PGDIR, r4
-	li	r4,0
-	isync
-3:
-	mtsrin	r3,r4
-	addi	r3,r3,0x111	/* next VSID */
-	rlwinm	r3,r3,0,8,3	/* clear out any overflow from VSID field */
-	addis	r4,r4,0x1000	/* address of next segment */
-	bdnz	3b
-	sync
-	isync
-	blr
-4:	trap
-	EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
-	blr
-EXPORT_SYMBOL(switch_mmu_context)
-
-/*
- * An undocumented "feature" of 604e requires that the v bit
- * be cleared before changing BAT values.
- *
- * Also, newer IBM firmware does not clear bat3 and 4 so
- * this makes sure it's done.
- *  -- Cort
- */
-clear_bats:
-	li	r10,0
-
-	mtspr	SPRN_DBAT0U,r10
-	mtspr	SPRN_DBAT0L,r10
-	mtspr	SPRN_DBAT1U,r10
-	mtspr	SPRN_DBAT1L,r10
-	mtspr	SPRN_DBAT2U,r10
-	mtspr	SPRN_DBAT2L,r10
-	mtspr	SPRN_DBAT3U,r10
-	mtspr	SPRN_DBAT3L,r10
-	mtspr	SPRN_IBAT0U,r10
-	mtspr	SPRN_IBAT0L,r10
-	mtspr	SPRN_IBAT1U,r10
-	mtspr	SPRN_IBAT1L,r10
-	mtspr	SPRN_IBAT2U,r10
-	mtspr	SPRN_IBAT2L,r10
-	mtspr	SPRN_IBAT3U,r10
-	mtspr	SPRN_IBAT3L,r10
-BEGIN_MMU_FTR_SECTION
-	/* Here's a tweak: at this point, CPU setup have
-	 * not been called yet, so HIGH_BAT_EN may not be
-	 * set in HID0 for the 745x processors. However, it
-	 * seems that doesn't affect our ability to actually
-	 * write to these SPRs.
-	 */
-	mtspr	SPRN_DBAT4U,r10
-	mtspr	SPRN_DBAT4L,r10
-	mtspr	SPRN_DBAT5U,r10
-	mtspr	SPRN_DBAT5L,r10
-	mtspr	SPRN_DBAT6U,r10
-	mtspr	SPRN_DBAT6L,r10
-	mtspr	SPRN_DBAT7U,r10
-	mtspr	SPRN_DBAT7L,r10
-	mtspr	SPRN_IBAT4U,r10
-	mtspr	SPRN_IBAT4L,r10
-	mtspr	SPRN_IBAT5U,r10
-	mtspr	SPRN_IBAT5L,r10
-	mtspr	SPRN_IBAT6U,r10
-	mtspr	SPRN_IBAT6L,r10
-	mtspr	SPRN_IBAT7U,r10
-	mtspr	SPRN_IBAT7L,r10
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-	blr
-
-_ENTRY(update_bats)
-	lis	r4, 1f@h
-	ori	r4, r4, 1f@l
-	tophys(r4, r4)
-	mfmsr	r6
-	mflr	r7
-	li	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
-	rlwinm	r0, r6, 0, ~MSR_RI
-	rlwinm	r0, r0, 0, ~MSR_EE
-	mtmsr	r0
-
-	.align	4
-	mtspr	SPRN_SRR0, r4
-	mtspr	SPRN_SRR1, r3
-	RFI
-1:	bl	clear_bats
-	lis	r3, BATS@ha
-	addi	r3, r3, BATS@l
-	tophys(r3, r3)
-	LOAD_BAT(0, r3, r4, r5)
-	LOAD_BAT(1, r3, r4, r5)
-	LOAD_BAT(2, r3, r4, r5)
-	LOAD_BAT(3, r3, r4, r5)
-BEGIN_MMU_FTR_SECTION
-	LOAD_BAT(4, r3, r4, r5)
-	LOAD_BAT(5, r3, r4, r5)
-	LOAD_BAT(6, r3, r4, r5)
-	LOAD_BAT(7, r3, r4, r5)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-	li	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
-	mtmsr	r3
-	mtspr	SPRN_SRR0, r7
-	mtspr	SPRN_SRR1, r6
-	RFI
-
-flush_tlbs:
-	lis	r10, 0x40
-1:	addic.	r10, r10, -0x1000
-	tlbie	r10
-	bgt	1b
-	sync
-	blr
-
-mmu_off:
- 	addi	r4, r3, __after_mmu_off - _start
-	mfmsr	r3
-	andi.	r0,r3,MSR_DR|MSR_IR		/* MMU enabled? */
-	beqlr
-	andc	r3,r3,r0
-
-	.align	4
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	sync
-	RFI
-
-/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */
-initial_bats:
-	lis	r11,PAGE_OFFSET@h
-	tophys(r8,r11)
-#ifdef CONFIG_SMP
-	ori	r8,r8,0x12		/* R/W access, M=1 */
-#else
-	ori	r8,r8,2			/* R/W access */
-#endif /* CONFIG_SMP */
-	ori	r11,r11,BL_256M<<2|0x2	/* set up BAT registers for 604 */
-
-	mtspr	SPRN_DBAT0L,r8		/* N.B. 6xx have valid */
-	mtspr	SPRN_DBAT0U,r11		/* bit in upper BAT register */
-	mtspr	SPRN_IBAT0L,r8
-	mtspr	SPRN_IBAT0U,r11
-	isync
-	blr
-
-#ifdef CONFIG_BOOTX_TEXT
-setup_disp_bat:
-	/*
-	 * setup the display bat prepared for us in prom.c
-	 */
-	mflr	r8
-	bl	reloc_offset
-	mtlr	r8
-	addis	r8,r3,disp_BAT@ha
-	addi	r8,r8,disp_BAT@l
-	cmpwi	cr0,r8,0
-	beqlr
-	lwz	r11,0(r8)
-	lwz	r8,4(r8)
-	mtspr	SPRN_DBAT3L,r8
-	mtspr	SPRN_DBAT3U,r11
-	blr
-#endif /* CONFIG_BOOTX_TEXT */
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
-setup_cpm_bat:
-	lis	r8, 0xf000
-	ori	r8, r8,	0x002a
-	mtspr	SPRN_DBAT1L, r8
-
-	lis	r11, 0xf000
-	ori	r11, r11, (BL_1M << 2) | 2
-	mtspr	SPRN_DBAT1U, r11
-
-	blr
-#endif
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
-setup_usbgecko_bat:
-	/* prepare a BAT for early io */
-#if defined(CONFIG_GAMECUBE)
-	lis	r8, 0x0c00
-#elif defined(CONFIG_WII)
-	lis	r8, 0x0d00
-#else
-#error Invalid platform for USB Gecko based early debugging.
-#endif
-	/*
-	 * The virtual address used must match the virtual address
-	 * associated to the fixmap entry FIX_EARLY_DEBUG_BASE.
-	 */
-	lis	r11, 0xfffe	/* top 128K */
-	ori	r8, r8, 0x002a	/* uncached, guarded ,rw */
-	ori	r11, r11, 0x2	/* 128K, Vs=1, Vp=0 */
-	mtspr	SPRN_DBAT1L, r8
-	mtspr	SPRN_DBAT1U, r11
-	blr
-#endif
-
-#ifdef CONFIG_8260
-/* Jump into the system reset for the rom.
- * We first disable the MMU, and then jump to the ROM reset address.
- *
- * r3 is the board info structure, r4 is the location for starting.
- * I use this for building a small kernel that can load other kernels,
- * rather than trying to write or rely on a rom monitor that can tftp load.
- */
-       .globl  m8260_gorom
-m8260_gorom:
-	mfmsr	r0
-	rlwinm	r0,r0,0,17,15	/* clear MSR_EE in r0 */
-	sync
-	mtmsr	r0
-	sync
-	mfspr	r11, SPRN_HID0
-	lis	r10, 0
-	ori	r10,r10,HID0_ICE|HID0_DCE
-	andc	r11, r11, r10
-	mtspr	SPRN_HID0, r11
-	isync
-	li	r5, MSR_ME|MSR_RI
-	lis	r6,2f@h
-	addis	r6,r6,-KERNELBASE@h
-	ori	r6,r6,2f@l
-	mtspr	SPRN_SRR0,r6
-	mtspr	SPRN_SRR1,r5
-	isync
-	sync
-	rfi
-2:
-	mtlr	r4
-	blr
-#endif
-
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
-	.data
-	.globl	sdata
-sdata:
-	.globl	empty_zero_page
-empty_zero_page:
-	.space	4096
-EXPORT_SYMBOL(empty_zero_page)
-
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.space	PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
-	.space	8
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
new file mode 100644
index 000000000000..b7b554533e0d
--- /dev/null
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -0,0 +1,1366 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ *  This file contains the low-level support and setup for the
+ *  PowerPC platform, including trap and interrupt dispatch.
+ *  (The PPC 8xx embedded CPUs use head_8xx.S instead.)
+ */
+
+#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/bug.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/export.h>
+#include <asm/feature-fixups.h>
+
+#include "head_32.h"
+
+#define LOAD_BAT(n, reg, RA, RB)	\
+	/* see the comment for clear_bats() -- Cort */ \
+	li	RA,0;			\
+	mtspr	SPRN_IBAT##n##U,RA;	\
+	mtspr	SPRN_DBAT##n##U,RA;	\
+	lwz	RA,(n*16)+0(reg);	\
+	lwz	RB,(n*16)+4(reg);	\
+	mtspr	SPRN_IBAT##n##U,RA;	\
+	mtspr	SPRN_IBAT##n##L,RB;	\
+	lwz	RA,(n*16)+8(reg);	\
+	lwz	RB,(n*16)+12(reg);	\
+	mtspr	SPRN_DBAT##n##U,RA;	\
+	mtspr	SPRN_DBAT##n##L,RB
+
+	__HEAD
+	.stabs	"arch/powerpc/kernel/",N_SO,0,0,0f
+	.stabs	"head_book3s_32.S",N_SO,0,0,0f
+0:
+_ENTRY(_stext);
+
+/*
+ * _start is defined this way because the XCOFF loader in the OpenFirmware
+ * on the powermac expects the entry point to be a procedure descriptor.
+ */
+_ENTRY(_start);
+	/*
+	 * These are here for legacy reasons, the kernel used to
+	 * need to look like a coff function entry for the pmac
+	 * but we're always started by some kind of bootloader now.
+	 *  -- Cort
+	 */
+	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
+	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
+	nop
+
+/* PMAC
+ * Enter here with the kernel text, data and bss loaded starting at
+ * 0, running with virtual == physical mapping.
+ * r5 points to the prom entry point (the client interface handler
+ * address).  Address translation is turned on, with the prom
+ * managing the hash table.  Interrupts are disabled.  The stack
+ * pointer (r1) points to just below the end of the half-meg region
+ * from 0x380000 - 0x400000, which is mapped in already.
+ *
+ * If we are booted from MacOS via BootX, we enter with the kernel
+ * image loaded somewhere, and the following values in registers:
+ *  r3: 'BooX' (0x426f6f58)
+ *  r4: virtual address of boot_infos_t
+ *  r5: 0
+ *
+ * PREP
+ * This is jumped to on prep systems right after the kernel is relocated
+ * to its proper place in memory by the boot loader.  The expected layout
+ * of the regs is:
+ *   r3: ptr to residual data
+ *   r4: initrd_start or if no initrd then 0
+ *   r5: initrd_end - unused if r4 is 0
+ *   r6: Start of command line string
+ *   r7: End of command line string
+ *
+ * This just gets a minimal mmu environment setup so we can call
+ * start_here() to do the real work.
+ * -- Cort
+ */
+
+	.globl	__start
+__start:
+/*
+ * We have to do any OF calls before we map ourselves to KERNELBASE,
+ * because OF may have I/O devices mapped into that area
+ * (particularly on CHRP).
+ */
+	cmpwi	0,r5,0
+	beq	1f
+
+#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+	/* find out where we are now */
+	bcl	20,31,$+4
+0:	mflr	r8			/* r8 = runtime addr here */
+	addis	r8,r8,(_stext - 0b)@ha
+	addi	r8,r8,(_stext - 0b)@l	/* current runtime base addr */
+	bl	prom_init
+#endif /* CONFIG_PPC_OF_BOOT_TRAMPOLINE */
+
+	/* We never return. We also hit that trap if trying to boot
+	 * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
+	trap
+
+/*
+ * Check for BootX signature when supporting PowerMac and branch to
+ * appropriate trampoline if it's present
+ */
+#ifdef CONFIG_PPC_PMAC
+1:	lis	r31,0x426f
+	ori	r31,r31,0x6f58
+	cmpw	0,r3,r31
+	bne	1f
+	bl	bootx_init
+	trap
+#endif /* CONFIG_PPC_PMAC */
+
+1:	mr	r31,r3			/* save device tree ptr */
+	li	r24,0			/* cpu # */
+
+/*
+ * early_init() does the early machine identification and does
+ * the necessary low-level setup and clears the BSS
+ *  -- Cort <cort@fsmlabs.com>
+ */
+	bl	early_init
+
+/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
+ * the physical address we are running at, returned by early_init()
+ */
+ 	bl	mmu_off
+__after_mmu_off:
+	bl	clear_bats
+	bl	flush_tlbs
+
+	bl	initial_bats
+	bl	load_segment_registers
+BEGIN_MMU_FTR_SECTION
+	bl	early_hash_table
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#if defined(CONFIG_BOOTX_TEXT)
+	bl	setup_disp_bat
+#endif
+#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+	bl	setup_cpm_bat
+#endif
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+	bl	setup_usbgecko_bat
+#endif
+
+/*
+ * Call setup_cpu for CPU 0 and initialize 6xx Idle
+ */
+	bl	reloc_offset
+	li	r24,0			/* cpu# */
+	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
+#ifdef CONFIG_PPC_BOOK3S_32
+	bl	reloc_offset
+	bl	init_idle_6xx
+#endif /* CONFIG_PPC_BOOK3S_32 */
+
+
+/*
+ * We need to run with _start at physical address 0.
+ * On CHRP, we are loaded at 0x10000 since OF on CHRP uses
+ * the exception vectors at 0 (and therefore this copy
+ * overwrites OF's exception vectors with our own).
+ * The MMU is off at this point.
+ */
+	bl	reloc_offset
+	mr	r26,r3
+	addis	r4,r3,KERNELBASE@h	/* current address of _start */
+	lis	r5,PHYSICAL_START@h
+	cmplw	0,r4,r5			/* already running at PHYSICAL_START? */
+	bne	relocate_kernel
+/*
+ * we now have the 1st 16M of ram mapped with the bats.
+ * prep needs the mmu to be turned on here, but pmac already has it on.
+ * this shouldn't bother the pmac since it just gets turned on again
+ * as we jump to our code at KERNELBASE. -- Cort
+ * Actually no, pmac doesn't have it on any more. BootX enters with MMU
+ * off, and in other cases, we now turn it off before changing BATs above.
+ */
+turn_on_mmu:
+	mfmsr	r0
+	ori	r0,r0,MSR_DR|MSR_IR|MSR_RI
+	mtspr	SPRN_SRR1,r0
+	lis	r0,start_here@h
+	ori	r0,r0,start_here@l
+	mtspr	SPRN_SRR0,r0
+	RFI				/* enables MMU */
+
+/*
+ * We need __secondary_hold as a place to hold the other cpus on
+ * an SMP machine, even when we are running a UP kernel.
+ */
+	. = 0xc0			/* for prep bootloader */
+	li	r3,1			/* MTX only has 1 cpu */
+	.globl	__secondary_hold
+__secondary_hold:
+	/* tell the master we're here */
+	stw	r3,__secondary_hold_acknowledge@l(0)
+#ifdef CONFIG_SMP
+100:	lwz	r4,0(0)
+	/* wait until we're told to start */
+	cmpw	0,r4,r3
+	bne	100b
+	/* our cpu # was at addr 0 - go */
+	mr	r24,r3			/* cpu # */
+	b	__secondary_start
+#else
+	b	.
+#endif /* CONFIG_SMP */
+
+	.globl	__secondary_hold_spinloop
+__secondary_hold_spinloop:
+	.long	0
+	.globl	__secondary_hold_acknowledge
+__secondary_hold_acknowledge:
+	.long	-1
+
+/* System reset */
+/* core99 pmac starts the seconary here by changing the vector, and
+   putting it back to what it was (unknown_exception) when done.  */
+	EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+
+/* Machine check */
+/*
+ * On CHRP, this is complicated by the fact that we could get a
+ * machine check inside RTAS, and we have no guarantee that certain
+ * critical registers will have the values we expect.  The set of
+ * registers that might have bad values includes all the GPRs
+ * and all the BATs.  We indicate that we are in RTAS by putting
+ * a non-zero value, the address of the exception frame to use,
+ * in thread.rtas_sp.  The machine check handler checks thread.rtas_sp
+ * and uses its value if it is non-zero.
+ * (Other exception handlers assume that r1 is a valid kernel stack
+ * pointer when we take an exception from supervisor mode.)
+ *	-- paulus.
+ */
+	. = 0x200
+	DO_KVM  0x200
+MachineCheck:
+	EXCEPTION_PROLOG_0
+#ifdef CONFIG_PPC_CHRP
+	mfspr	r11, SPRN_SPRG_THREAD
+	lwz	r11, RTAS_SP(r11)
+	cmpwi	cr1, r11, 0
+	bne	cr1, 7f
+#endif /* CONFIG_PPC_CHRP */
+	EXCEPTION_PROLOG_1 for_rtas=1
+7:	EXCEPTION_PROLOG_2
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_CHRP
+#ifdef CONFIG_VMAP_STACK
+	mfspr	r4, SPRN_SPRG_THREAD
+	tovirt(r4, r4)
+	lwz	r4, RTAS_SP(r4)
+	cmpwi	cr1, r4, 0
+#endif
+	beq	cr1, machine_check_tramp
+	twi	31, 0, 0
+#else
+	b	machine_check_tramp
+#endif
+
+/* Data access exception. */
+	. = 0x300
+	DO_KVM  0x300
+DataAccess:
+#ifdef CONFIG_VMAP_STACK
+	mtspr	SPRN_SPRG_SCRATCH0,r10
+	mfspr	r10, SPRN_SPRG_THREAD
+BEGIN_MMU_FTR_SECTION
+	stw	r11, THR11(r10)
+	mfspr	r10, SPRN_DSISR
+	mfcr	r11
+#ifdef CONFIG_PPC_KUAP
+	andis.	r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
+#else
+	andis.	r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
+#endif
+	mfspr	r10, SPRN_SPRG_THREAD
+	beq	hash_page_dsi
+.Lhash_page_dsi_cont:
+	mtcr	r11
+	lwz	r11, THR11(r10)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+	mtspr	SPRN_SPRG_SCRATCH1,r11
+	mfspr	r11, SPRN_DAR
+	stw	r11, DAR(r10)
+	mfspr	r11, SPRN_DSISR
+	stw	r11, DSISR(r10)
+	mfspr	r11, SPRN_SRR0
+	stw	r11, SRR0(r10)
+	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
+	stw	r11, SRR1(r10)
+	mfcr	r10
+	andi.	r11, r11, MSR_PR
+
+	EXCEPTION_PROLOG_1
+	b	handle_page_fault_tramp_1
+#else	/* CONFIG_VMAP_STACK */
+	EXCEPTION_PROLOG handle_dar_dsisr=1
+	get_and_save_dar_dsisr_on_stack	r4, r5, r11
+BEGIN_MMU_FTR_SECTION
+#ifdef CONFIG_PPC_KUAP
+	andis.	r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
+#else
+	andis.	r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
+#endif
+	bne	handle_page_fault_tramp_2	/* if not, try to put a PTE */
+	rlwinm	r3, r5, 32 - 15, 21, 21		/* DSISR_STORE -> _PAGE_RW */
+	bl	hash_page
+	b	handle_page_fault_tramp_1
+FTR_SECTION_ELSE
+	b	handle_page_fault_tramp_2
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+#endif	/* CONFIG_VMAP_STACK */
+
+/* Instruction access exception. */
+	. = 0x400
+	DO_KVM  0x400
+InstructionAccess:
+#ifdef CONFIG_VMAP_STACK
+	mtspr	SPRN_SPRG_SCRATCH0,r10
+	mtspr	SPRN_SPRG_SCRATCH1,r11
+	mfspr	r10, SPRN_SPRG_THREAD
+	mfspr	r11, SPRN_SRR0
+	stw	r11, SRR0(r10)
+	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
+	stw	r11, SRR1(r10)
+	mfcr	r10
+BEGIN_MMU_FTR_SECTION
+	andis.	r11, r11, SRR1_ISI_NOPT@h	/* no pte found? */
+	bne	hash_page_isi
+.Lhash_page_isi_cont:
+	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+	andi.	r11, r11, MSR_PR
+
+	EXCEPTION_PROLOG_1
+	EXCEPTION_PROLOG_2
+#else	/* CONFIG_VMAP_STACK */
+	EXCEPTION_PROLOG
+	andis.	r0,r9,SRR1_ISI_NOPT@h	/* no pte found? */
+	beq	1f			/* if so, try to put a PTE */
+	li	r3,0			/* into the hash table */
+	mr	r4,r12			/* SRR0 is fault address */
+BEGIN_MMU_FTR_SECTION
+	bl	hash_page
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif	/* CONFIG_VMAP_STACK */
+1:	mr	r4,r12
+	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+	stw	r4, _DAR(r11)
+	EXC_XFER_LITE(0x400, handle_page_fault)
+
+/* External interrupt */
+	EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+
+/* Alignment exception */
+	. = 0x600
+	DO_KVM  0x600
+Alignment:
+	EXCEPTION_PROLOG handle_dar_dsisr=1
+	save_dar_dsisr_on_stack r4, r5, r11
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	b	alignment_exception_tramp
+
+/* Program check exception */
+	EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+
+/* Floating-point unavailable */
+	. = 0x800
+	DO_KVM  0x800
+FPUnavailable:
+BEGIN_FTR_SECTION
+/*
+ * Certain Freescale cores don't have a FPU and treat fp instructions
+ * as a FP Unavailable exception.  Redirect to illegal/emulation handling.
+ */
+	b 	ProgramCheck
+END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
+	EXCEPTION_PROLOG
+	beq	1f
+	bl	load_up_fpu		/* if from user, just load it up */
+	b	fast_exception_return
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
+
+/* Decrementer */
+	EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+
+	EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
+
+/* System call */
+	. = 0xc00
+	DO_KVM  0xc00
+SystemCall:
+	SYSCALL_ENTRY	0xc00
+
+	EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
+	EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
+
+/*
+ * The Altivec unavailable trap is at 0x0f20.  Foo.
+ * We effectively remap it to 0x3000.
+ * We include an altivec unavailable exception vector even if
+ * not configured for Altivec, so that you can't panic a
+ * non-altivec kernel running on a machine with altivec just
+ * by executing an altivec instruction.
+ */
+	. = 0xf00
+	DO_KVM  0xf00
+	b	PerformanceMonitor
+
+	. = 0xf20
+	DO_KVM  0xf20
+	b	AltiVecUnavailable
+
+/*
+ * Handle TLB miss for instruction on 603/603e.
+ * Note: we get an alternate set of r0 - r3 to use automatically.
+ */
+	. = 0x1000
+InstructionTLBMiss:
+/*
+ * r0:	scratch
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_IMISS
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
+	lis	r1, TASK_SIZE@h		/* check if kernel address */
+	cmplw	0,r1,r3
+#endif
+	mfspr	r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+	li	r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#else
+	li	r1,_PAGE_PRESENT | _PAGE_EXEC
+#endif
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
+	bgt-	112f
+	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
+	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
+#endif
+112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	InstructionAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r0,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r0		/* check access & ~permission */
+	bne-	InstructionAddressInvalid /* return if access not permitted */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwimi	r0,r0,32-2,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r1, r1, 0xe06		/* clear out reserved bits */
+	andc	r1, r0, r1		/* PP = user? 1 : 0 */
+BEGIN_FTR_SECTION
+	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
+	mtspr	SPRN_RPA,r1
+	tlbli	r3
+	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi
+InstructionAddressInvalid:
+	mfspr	r3,SPRN_SRR1
+	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
+
+	addis	r1,r1,0x2000
+	mtspr	SPRN_DSISR,r1	/* (shouldn't be needed) */
+	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
+	or	r2,r2,r1
+	mtspr	SPRN_SRR1,r2
+	mfspr	r1,SPRN_IMISS	/* Get failing address */
+	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
+	rlwimi	r2,r2,1,30,30	/* change 1 -> 3 */
+	xor	r1,r1,r2
+	mtspr	SPRN_DAR,r1	/* Set fault address */
+	mfmsr	r0		/* Restore "normal" registers */
+	xoris	r0,r0,MSR_TGPR>>16
+	mtcrf	0x80,r3		/* Restore CR0 */
+	mtmsr	r0
+	b	InstructionAccess
+
+/*
+ * Handle TLB miss for DATA Load operation on 603/603e
+ */
+	. = 0x1100
+DataLoadTLBMiss:
+/*
+ * r0:	scratch
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_DMISS
+	lis	r1, TASK_SIZE@h		/* check if kernel address */
+	cmplw	0,r1,r3
+	mfspr	r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+	li	r1, _PAGE_PRESENT | _PAGE_ACCESSED
+#else
+	li	r1, _PAGE_PRESENT
+#endif
+	bgt-	112f
+	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
+	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
+112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	DataAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r0,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r0		/* check access & ~permission */
+	bne-	DataAddressInvalid	/* return if access not permitted */
+	/*
+	 * NOTE! We are assuming this is not an SMP system, otherwise
+	 * we would need to update the pte atomically with lwarx/stwcx.
+	 */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwinm	r1,r0,32-9,30,30	/* _PAGE_RW -> PP msb */
+	rlwimi	r0,r0,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r0,r0,32-1,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r1,r1,0xe04		/* clear out reserved bits */
+	andc	r1,r0,r1		/* PP = user? rw? 1: 3: 0 */
+BEGIN_FTR_SECTION
+	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
+	mtspr	SPRN_RPA,r1
+	mfspr	r2,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r2
+BEGIN_MMU_FTR_SECTION
+	li	r0,1
+	mfspr	r1,SPRN_SPRG_603_LRU
+	rlwinm	r2,r3,20,27,31		/* Get Address bits 15:19 */
+	slw	r0,r0,r2
+	xor	r1,r0,r1
+	srw	r0,r1,r2
+	mtspr   SPRN_SPRG_603_LRU,r1
+	mfspr	r2,SPRN_SRR1
+	rlwimi	r2,r0,31-14,14,14
+	mtspr   SPRN_SRR1,r2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+	tlbld	r3
+	rfi
+DataAddressInvalid:
+	mfspr	r3,SPRN_SRR1
+	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
+	addis	r1,r1,0x2000
+	mtspr	SPRN_DSISR,r1
+	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
+	mtspr	SPRN_SRR1,r2
+	mfspr	r1,SPRN_DMISS	/* Get failing address */
+	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
+	beq	20f		/* Jump if big endian */
+	xori	r1,r1,3
+20:	mtspr	SPRN_DAR,r1	/* Set fault address */
+	mfmsr	r0		/* Restore "normal" registers */
+	xoris	r0,r0,MSR_TGPR>>16
+	mtcrf	0x80,r3		/* Restore CR0 */
+	mtmsr	r0
+	b	DataAccess
+
+/*
+ * Handle TLB miss for DATA Store on 603/603e
+ */
+	. = 0x1200
+DataStoreTLBMiss:
+/*
+ * r0:	scratch
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_DMISS
+	lis	r1, TASK_SIZE@h		/* check if kernel address */
+	cmplw	0,r1,r3
+	mfspr	r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
+#else
+	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
+#endif
+	bgt-	112f
+	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
+	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
+112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	DataAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r0,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r0		/* check access & ~permission */
+	bne-	DataAddressInvalid	/* return if access not permitted */
+	/*
+	 * NOTE! We are assuming this is not an SMP system, otherwise
+	 * we would need to update the pte atomically with lwarx/stwcx.
+	 */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwimi	r0,r0,32-2,31,31	/* _PAGE_USER -> PP lsb */
+	li	r1,0xe06		/* clear out reserved bits & PP msb */
+	andc	r1,r0,r1		/* PP = user? 1: 0 */
+BEGIN_FTR_SECTION
+	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
+	mtspr	SPRN_RPA,r1
+	mfspr	r2,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r2
+BEGIN_MMU_FTR_SECTION
+	li	r0,1
+	mfspr	r1,SPRN_SPRG_603_LRU
+	rlwinm	r2,r3,20,27,31		/* Get Address bits 15:19 */
+	slw	r0,r0,r2
+	xor	r1,r0,r1
+	srw	r0,r1,r2
+	mtspr   SPRN_SPRG_603_LRU,r1
+	mfspr	r2,SPRN_SRR1
+	rlwimi	r2,r0,31-14,14,14
+	mtspr   SPRN_SRR1,r2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+	tlbld	r3
+	rfi
+
+#ifndef CONFIG_ALTIVEC
+#define altivec_assist_exception	unknown_exception
+#endif
+
+#ifndef CONFIG_TAU_INT
+#define TAUException	unknown_exception
+#endif
+
+	EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
+	EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
+	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
+	EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
+	EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
+	EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
+
+	. = 0x3000
+
+machine_check_tramp:
+	EXC_XFER_STD(0x200, machine_check_exception)
+
+alignment_exception_tramp:
+	EXC_XFER_STD(0x600, alignment_exception)
+
+handle_page_fault_tramp_1:
+#ifdef CONFIG_VMAP_STACK
+	EXCEPTION_PROLOG_2 handle_dar_dsisr=1
+#endif
+	lwz	r4, _DAR(r11)
+	lwz	r5, _DSISR(r11)
+	/* fall through */
+handle_page_fault_tramp_2:
+	EXC_XFER_LITE(0x300, handle_page_fault)
+
+#ifdef CONFIG_VMAP_STACK
+.macro save_regs_thread		thread
+	stw	r0, THR0(\thread)
+	stw	r3, THR3(\thread)
+	stw	r4, THR4(\thread)
+	stw	r5, THR5(\thread)
+	stw	r6, THR6(\thread)
+	stw	r8, THR8(\thread)
+	stw	r9, THR9(\thread)
+	mflr	r0
+	stw	r0, THLR(\thread)
+	mfctr	r0
+	stw	r0, THCTR(\thread)
+.endm
+
+.macro restore_regs_thread	thread
+	lwz	r0, THLR(\thread)
+	mtlr	r0
+	lwz	r0, THCTR(\thread)
+	mtctr	r0
+	lwz	r0, THR0(\thread)
+	lwz	r3, THR3(\thread)
+	lwz	r4, THR4(\thread)
+	lwz	r5, THR5(\thread)
+	lwz	r6, THR6(\thread)
+	lwz	r8, THR8(\thread)
+	lwz	r9, THR9(\thread)
+.endm
+
+hash_page_dsi:
+	save_regs_thread	r10
+	mfdsisr	r3
+	mfdar	r4
+	mfsrr0	r5
+	mfsrr1	r9
+	rlwinm	r3, r3, 32 - 15, _PAGE_RW	/* DSISR_STORE -> _PAGE_RW */
+	bl	hash_page
+	mfspr	r10, SPRN_SPRG_THREAD
+	restore_regs_thread r10
+	b	.Lhash_page_dsi_cont
+
+hash_page_isi:
+	mr	r11, r10
+	mfspr	r10, SPRN_SPRG_THREAD
+	save_regs_thread	r10
+	li	r3, 0
+	lwz	r4, SRR0(r10)
+	lwz	r9, SRR1(r10)
+	bl	hash_page
+	mfspr	r10, SPRN_SPRG_THREAD
+	restore_regs_thread r10
+	mr	r10, r11
+	b	.Lhash_page_isi_cont
+
+	.globl fast_hash_page_return
+fast_hash_page_return:
+	andis.	r10, r9, SRR1_ISI_NOPT@h	/* Set on ISI, cleared on DSI */
+	mfspr	r10, SPRN_SPRG_THREAD
+	restore_regs_thread r10
+	bne	1f
+
+	/* DSI */
+	mtcr	r11
+	lwz	r11, THR11(r10)
+	mfspr	r10, SPRN_SPRG_SCRATCH0
+	RFI
+
+1:	/* ISI */
+	mtcr	r11
+	mfspr	r11, SPRN_SPRG_SCRATCH1
+	mfspr	r10, SPRN_SPRG_SCRATCH0
+	RFI
+
+stack_overflow:
+	vmap_stack_overflow_exception
+#endif
+
+AltiVecUnavailable:
+	EXCEPTION_PROLOG
+#ifdef CONFIG_ALTIVEC
+	beq	1f
+	bl	load_up_altivec		/* if from user, just load it up */
+	b	fast_exception_return
+#endif /* CONFIG_ALTIVEC */
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
+
+PerformanceMonitor:
+	EXCEPTION_PROLOG
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_STD(0xf00, performance_monitor_exception)
+
+
+/*
+ * This code is jumped to from the startup code to copy
+ * the kernel image to physical address PHYSICAL_START.
+ */
+relocate_kernel:
+	addis	r9,r26,klimit@ha	/* fetch klimit */
+	lwz	r25,klimit@l(r9)
+	addis	r25,r25,-KERNELBASE@h
+	lis	r3,PHYSICAL_START@h	/* Destination base address */
+	li	r6,0			/* Destination offset */
+	li	r5,0x4000		/* # bytes of memory to copy */
+	bl	copy_and_flush		/* copy the first 0x4000 bytes */
+	addi	r0,r3,4f@l		/* jump to the address of 4f */
+	mtctr	r0			/* in copy and do the rest. */
+	bctr				/* jump to the copy */
+4:	mr	r5,r25
+	bl	copy_and_flush		/* copy the rest */
+	b	turn_on_mmu
+
+/*
+ * Copy routine used to copy the kernel to start at physical address 0
+ * and flush and invalidate the caches as needed.
+ * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
+ * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
+ */
+_ENTRY(copy_and_flush)
+	addi	r5,r5,-4
+	addi	r6,r6,-4
+4:	li	r0,L1_CACHE_BYTES/4
+	mtctr	r0
+3:	addi	r6,r6,4			/* copy a cache line */
+	lwzx	r0,r6,r4
+	stwx	r0,r6,r3
+	bdnz	3b
+	dcbst	r6,r3			/* write it to memory */
+	sync
+	icbi	r6,r3			/* flush the icache line */
+	cmplw	0,r6,r5
+	blt	4b
+	sync				/* additional sync needed on g4 */
+	isync
+	addi	r5,r5,4
+	addi	r6,r6,4
+	blr
+
+#ifdef CONFIG_SMP
+	.globl __secondary_start_mpc86xx
+__secondary_start_mpc86xx:
+	mfspr	r3, SPRN_PIR
+	stw	r3, __secondary_hold_acknowledge@l(0)
+	mr	r24, r3			/* cpu # */
+	b	__secondary_start
+
+	.globl	__secondary_start_pmac_0
+__secondary_start_pmac_0:
+	/* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
+	li	r24,0
+	b	1f
+	li	r24,1
+	b	1f
+	li	r24,2
+	b	1f
+	li	r24,3
+1:
+	/* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0
+	   set to map the 0xf0000000 - 0xffffffff region */
+	mfmsr	r0
+	rlwinm	r0,r0,0,28,26		/* clear DR (0x10) */
+	mtmsr	r0
+	isync
+
+	.globl	__secondary_start
+__secondary_start:
+	/* Copy some CPU settings from CPU 0 */
+	bl	__restore_cpu_setup
+
+	lis	r3,-KERNELBASE@h
+	mr	r4,r24
+	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
+#ifdef CONFIG_PPC_BOOK3S_32
+	lis	r3,-KERNELBASE@h
+	bl	init_idle_6xx
+#endif /* CONFIG_PPC_BOOK3S_32 */
+
+	/* get current's stack and current */
+	lis	r2,secondary_current@ha
+	tophys(r2,r2)
+	lwz	r2,secondary_current@l(r2)
+	tophys(r1,r2)
+	lwz	r1,TASK_STACK(r1)
+
+	/* stack */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	li	r0,0
+	tophys(r3,r1)
+	stw	r0,0(r3)
+
+	/* load up the MMU */
+	bl	load_segment_registers
+	bl	load_up_mmu
+
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* phys address of our thread_struct */
+	mtspr	SPRN_SPRG_THREAD,r4
+	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
+	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
+	mtspr	SPRN_SPRG_PGDIR, r4
+
+	/* enable MMU and jump to start_secondary */
+	li	r4,MSR_KERNEL
+	lis	r3,start_secondary@h
+	ori	r3,r3,start_secondary@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	RFI
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+#include "../kvm/book3s_rmhandlers.S"
+#endif
+
+/*
+ * Those generic dummy functions are kept for CPUs not
+ * included in CONFIG_PPC_BOOK3S_32
+ */
+#if !defined(CONFIG_PPC_BOOK3S_32)
+_ENTRY(__save_cpu_setup)
+	blr
+_ENTRY(__restore_cpu_setup)
+	blr
+#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
+
+/*
+ * Load stuff into the MMU.  Intended to be called with
+ * IR=0 and DR=0.
+ */
+early_hash_table:
+	sync			/* Force all PTE updates to finish */
+	isync
+	tlbia			/* Clear all TLB entries */
+	sync			/* wait for tlbia/tlbie to finish */
+	TLBSYNC			/* ... on all CPUs */
+	/* Load the SDR1 register (hash table base & size) */
+	lis	r6, early_hash - PAGE_OFFSET@h
+	ori	r6, r6, 3	/* 256kB table */
+	mtspr	SPRN_SDR1, r6
+	lis	r6, early_hash@h
+	lis	r3, Hash@ha
+	stw	r6, Hash@l(r3)
+	blr
+
+load_up_mmu:
+	sync			/* Force all PTE updates to finish */
+	isync
+	tlbia			/* Clear all TLB entries */
+	sync			/* wait for tlbia/tlbie to finish */
+	TLBSYNC			/* ... on all CPUs */
+	/* Load the SDR1 register (hash table base & size) */
+	lis	r6,_SDR1@ha
+	tophys(r6,r6)
+	lwz	r6,_SDR1@l(r6)
+	mtspr	SPRN_SDR1,r6
+
+/* Load the BAT registers with the values set up by MMU_init. */
+	lis	r3,BATS@ha
+	addi	r3,r3,BATS@l
+	tophys(r3,r3)
+	LOAD_BAT(0,r3,r4,r5)
+	LOAD_BAT(1,r3,r4,r5)
+	LOAD_BAT(2,r3,r4,r5)
+	LOAD_BAT(3,r3,r4,r5)
+BEGIN_MMU_FTR_SECTION
+	LOAD_BAT(4,r3,r4,r5)
+	LOAD_BAT(5,r3,r4,r5)
+	LOAD_BAT(6,r3,r4,r5)
+	LOAD_BAT(7,r3,r4,r5)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+	blr
+
+_GLOBAL(load_segment_registers)
+	li	r0, NUM_USER_SEGMENTS /* load up user segment register values */
+	mtctr	r0		/* for context 0 */
+	li	r3, 0		/* Kp = 0, Ks = 0, VSID = 0 */
+#ifdef CONFIG_PPC_KUEP
+	oris	r3, r3, SR_NX@h	/* Set Nx */
+#endif
+#ifdef CONFIG_PPC_KUAP
+	oris	r3, r3, SR_KS@h	/* Set Ks */
+#endif
+	li	r4, 0
+3:	mtsrin	r3, r4
+	addi	r3, r3, 0x111	/* increment VSID */
+	addis	r4, r4, 0x1000	/* address of next segment */
+	bdnz	3b
+	li	r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */
+	mtctr	r0			/* for context 0 */
+	rlwinm	r3, r3, 0, ~SR_NX	/* Nx = 0 */
+	rlwinm	r3, r3, 0, ~SR_KS	/* Ks = 0 */
+	oris	r3, r3, SR_KP@h		/* Kp = 1 */
+3:	mtsrin	r3, r4
+	addi	r3, r3, 0x111	/* increment VSID */
+	addis	r4, r4, 0x1000	/* address of next segment */
+	bdnz	3b
+	blr
+
+/*
+ * This is where the main kernel code starts.
+ */
+start_here:
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+	/* Set up for using our exception vectors */
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG_THREAD,r4
+	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
+	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
+	mtspr	SPRN_SPRG_PGDIR, r4
+
+	/* stack */
+	lis	r1,init_thread_union@ha
+	addi	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+/*
+ * Do early platform-specific initialization,
+ * and set up the MMU.
+ */
+#ifdef CONFIG_KASAN
+	bl	kasan_early_init
+#endif
+	li	r3,0
+	mr	r4,r31
+	bl	machine_init
+	bl	__save_cpu_setup
+	bl	MMU_init
+	bl	MMU_init_hw_patch
+
+/*
+ * Go back to running unmapped so we can load up new values
+ * for SDR1 (hash table pointer) and the segment registers
+ * and change to using our exception vectors.
+ */
+	lis	r4,2f@h
+	ori	r4,r4,2f@l
+	tophys(r4,r4)
+	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+
+	.align	4
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	RFI
+/* Load up the kernel context */
+2:	bl	load_up_mmu
+
+#ifdef CONFIG_BDI_SWITCH
+	/* Add helper information for the Abatron bdiGDB debugger.
+	 * We do this here because we know the mmu is disabled, and
+	 * will be enabled for real in just a few instructions.
+	 */
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r5, 0xf0(0)	/* This much match your Abatron config */
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	tophys(r5, r5)
+	stw	r6, 0(r5)
+#endif /* CONFIG_BDI_SWITCH */
+
+/* Now turn on the MMU for real! */
+	li	r4,MSR_KERNEL
+	lis	r3,start_kernel@h
+	ori	r3,r3,start_kernel@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	RFI
+
+/*
+ * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
+ *
+ * Set up the segment registers for a new context.
+ */
+_ENTRY(switch_mmu_context)
+	lwz	r3,MMCONTEXTID(r4)
+	cmpwi	cr0,r3,0
+	blt-	4f
+	mulli	r3,r3,897	/* multiply context by skew factor */
+	rlwinm	r3,r3,4,8,27	/* VSID = (context & 0xfffff) << 4 */
+#ifdef CONFIG_PPC_KUEP
+	oris	r3, r3, SR_NX@h	/* Set Nx */
+#endif
+#ifdef CONFIG_PPC_KUAP
+	oris	r3, r3, SR_KS@h	/* Set Ks */
+#endif
+	li	r0,NUM_USER_SEGMENTS
+	mtctr	r0
+
+	lwz	r4, MM_PGD(r4)
+#ifdef CONFIG_BDI_SWITCH
+	/* Context switch the PTE pointer for the Abatron BDI2000.
+	 * The PGDIR is passed as second argument.
+	 */
+	lis	r5, abatron_pteptrs@ha
+	stw	r4, abatron_pteptrs@l + 0x4(r5)
+#endif
+	tophys(r4, r4)
+	mtspr	SPRN_SPRG_PGDIR, r4
+	li	r4,0
+	isync
+3:
+	mtsrin	r3,r4
+	addi	r3,r3,0x111	/* next VSID */
+	rlwinm	r3,r3,0,8,3	/* clear out any overflow from VSID field */
+	addis	r4,r4,0x1000	/* address of next segment */
+	bdnz	3b
+	sync
+	isync
+	blr
+4:	trap
+	EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
+	blr
+EXPORT_SYMBOL(switch_mmu_context)
+
+/*
+ * An undocumented "feature" of 604e requires that the v bit
+ * be cleared before changing BAT values.
+ *
+ * Also, newer IBM firmware does not clear bat3 and 4 so
+ * this makes sure it's done.
+ *  -- Cort
+ */
+clear_bats:
+	li	r10,0
+
+	mtspr	SPRN_DBAT0U,r10
+	mtspr	SPRN_DBAT0L,r10
+	mtspr	SPRN_DBAT1U,r10
+	mtspr	SPRN_DBAT1L,r10
+	mtspr	SPRN_DBAT2U,r10
+	mtspr	SPRN_DBAT2L,r10
+	mtspr	SPRN_DBAT3U,r10
+	mtspr	SPRN_DBAT3L,r10
+	mtspr	SPRN_IBAT0U,r10
+	mtspr	SPRN_IBAT0L,r10
+	mtspr	SPRN_IBAT1U,r10
+	mtspr	SPRN_IBAT1L,r10
+	mtspr	SPRN_IBAT2U,r10
+	mtspr	SPRN_IBAT2L,r10
+	mtspr	SPRN_IBAT3U,r10
+	mtspr	SPRN_IBAT3L,r10
+BEGIN_MMU_FTR_SECTION
+	/* Here's a tweak: at this point, CPU setup have
+	 * not been called yet, so HIGH_BAT_EN may not be
+	 * set in HID0 for the 745x processors. However, it
+	 * seems that doesn't affect our ability to actually
+	 * write to these SPRs.
+	 */
+	mtspr	SPRN_DBAT4U,r10
+	mtspr	SPRN_DBAT4L,r10
+	mtspr	SPRN_DBAT5U,r10
+	mtspr	SPRN_DBAT5L,r10
+	mtspr	SPRN_DBAT6U,r10
+	mtspr	SPRN_DBAT6L,r10
+	mtspr	SPRN_DBAT7U,r10
+	mtspr	SPRN_DBAT7L,r10
+	mtspr	SPRN_IBAT4U,r10
+	mtspr	SPRN_IBAT4L,r10
+	mtspr	SPRN_IBAT5U,r10
+	mtspr	SPRN_IBAT5L,r10
+	mtspr	SPRN_IBAT6U,r10
+	mtspr	SPRN_IBAT6L,r10
+	mtspr	SPRN_IBAT7U,r10
+	mtspr	SPRN_IBAT7L,r10
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+	blr
+
+_ENTRY(update_bats)
+	lis	r4, 1f@h
+	ori	r4, r4, 1f@l
+	tophys(r4, r4)
+	mfmsr	r6
+	mflr	r7
+	li	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
+	rlwinm	r0, r6, 0, ~MSR_RI
+	rlwinm	r0, r0, 0, ~MSR_EE
+	mtmsr	r0
+
+	.align	4
+	mtspr	SPRN_SRR0, r4
+	mtspr	SPRN_SRR1, r3
+	RFI
+1:	bl	clear_bats
+	lis	r3, BATS@ha
+	addi	r3, r3, BATS@l
+	tophys(r3, r3)
+	LOAD_BAT(0, r3, r4, r5)
+	LOAD_BAT(1, r3, r4, r5)
+	LOAD_BAT(2, r3, r4, r5)
+	LOAD_BAT(3, r3, r4, r5)
+BEGIN_MMU_FTR_SECTION
+	LOAD_BAT(4, r3, r4, r5)
+	LOAD_BAT(5, r3, r4, r5)
+	LOAD_BAT(6, r3, r4, r5)
+	LOAD_BAT(7, r3, r4, r5)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+	li	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+	mtmsr	r3
+	mtspr	SPRN_SRR0, r7
+	mtspr	SPRN_SRR1, r6
+	RFI
+
+flush_tlbs:
+	lis	r10, 0x40
+1:	addic.	r10, r10, -0x1000
+	tlbie	r10
+	bgt	1b
+	sync
+	blr
+
+mmu_off:
+ 	addi	r4, r3, __after_mmu_off - _start
+	mfmsr	r3
+	andi.	r0,r3,MSR_DR|MSR_IR		/* MMU enabled? */
+	beqlr
+	andc	r3,r3,r0
+
+	.align	4
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	sync
+	RFI
+
+/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */
+initial_bats:
+	lis	r11,PAGE_OFFSET@h
+	tophys(r8,r11)
+#ifdef CONFIG_SMP
+	ori	r8,r8,0x12		/* R/W access, M=1 */
+#else
+	ori	r8,r8,2			/* R/W access */
+#endif /* CONFIG_SMP */
+	ori	r11,r11,BL_256M<<2|0x2	/* set up BAT registers for 604 */
+
+	mtspr	SPRN_DBAT0L,r8		/* N.B. 6xx have valid */
+	mtspr	SPRN_DBAT0U,r11		/* bit in upper BAT register */
+	mtspr	SPRN_IBAT0L,r8
+	mtspr	SPRN_IBAT0U,r11
+	isync
+	blr
+
+#ifdef CONFIG_BOOTX_TEXT
+setup_disp_bat:
+	/*
+	 * setup the display bat prepared for us in prom.c
+	 */
+	mflr	r8
+	bl	reloc_offset
+	mtlr	r8
+	addis	r8,r3,disp_BAT@ha
+	addi	r8,r8,disp_BAT@l
+	cmpwi	cr0,r8,0
+	beqlr
+	lwz	r11,0(r8)
+	lwz	r8,4(r8)
+	mtspr	SPRN_DBAT3L,r8
+	mtspr	SPRN_DBAT3U,r11
+	blr
+#endif /* CONFIG_BOOTX_TEXT */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+setup_cpm_bat:
+	lis	r8, 0xf000
+	ori	r8, r8,	0x002a
+	mtspr	SPRN_DBAT1L, r8
+
+	lis	r11, 0xf000
+	ori	r11, r11, (BL_1M << 2) | 2
+	mtspr	SPRN_DBAT1U, r11
+
+	blr
+#endif
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+setup_usbgecko_bat:
+	/* prepare a BAT for early io */
+#if defined(CONFIG_GAMECUBE)
+	lis	r8, 0x0c00
+#elif defined(CONFIG_WII)
+	lis	r8, 0x0d00
+#else
+#error Invalid platform for USB Gecko based early debugging.
+#endif
+	/*
+	 * The virtual address used must match the virtual address
+	 * associated to the fixmap entry FIX_EARLY_DEBUG_BASE.
+	 */
+	lis	r11, 0xfffe	/* top 128K */
+	ori	r8, r8, 0x002a	/* uncached, guarded ,rw */
+	ori	r11, r11, 0x2	/* 128K, Vs=1, Vp=0 */
+	mtspr	SPRN_DBAT1L, r8
+	mtspr	SPRN_DBAT1U, r11
+	blr
+#endif
+
+#ifdef CONFIG_8260
+/* Jump into the system reset for the rom.
+ * We first disable the MMU, and then jump to the ROM reset address.
+ *
+ * r3 is the board info structure, r4 is the location for starting.
+ * I use this for building a small kernel that can load other kernels,
+ * rather than trying to write or rely on a rom monitor that can tftp load.
+ */
+       .globl  m8260_gorom
+m8260_gorom:
+	mfmsr	r0
+	rlwinm	r0,r0,0,17,15	/* clear MSR_EE in r0 */
+	sync
+	mtmsr	r0
+	sync
+	mfspr	r11, SPRN_HID0
+	lis	r10, 0
+	ori	r10,r10,HID0_ICE|HID0_DCE
+	andc	r11, r11, r10
+	mtspr	SPRN_HID0, r11
+	isync
+	li	r5, MSR_ME|MSR_RI
+	lis	r6,2f@h
+	addis	r6,r6,-KERNELBASE@h
+	ori	r6,r6,2f@l
+	mtspr	SPRN_SRR0,r6
+	mtspr	SPRN_SRR1,r5
+	isync
+	sync
+	rfi
+2:
+	mtlr	r4
+	blr
+#endif
+
+
+/*
+ * We put a few things here that have to be page-aligned.
+ * This stuff goes at the beginning of the data segment,
+ * which is page-aligned.
+ */
+	.data
+	.globl	sdata
+sdata:
+	.globl	empty_zero_page
+empty_zero_page:
+	.space	4096
+EXPORT_SYMBOL(empty_zero_page)
+
+	.globl	swapper_pg_dir
+swapper_pg_dir:
+	.space	PGD_TABLE_SIZE
+
+/* Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+	.space	8
-- 
cgit v1.2.3


From 865418795a1dea1c2b58a5fd7b6bdcb93e0c36b8 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 6 Oct 2020 09:05:37 +0000
Subject: powerpc/32s: Remove #ifdef CONFIG_PPC_BOOK3S_32 in head_book3s_32.S

head_book3s_32.S is only built when CONFIG_PPC_BOOK3S_32 is selected.

Remove all conditions based on CONFIG_PPC_BOOK3S_32 in the file.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1b68632425d8866d147aea9005004e4594672211.1601975100.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/head_book3s_32.S | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index b7b554533e0d..5eb9eedac920 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -174,10 +174,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 	bl	reloc_offset
 	li	r24,0			/* cpu# */
 	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
 	bl	reloc_offset
 	bl	init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
 
 
 /*
@@ -878,10 +876,8 @@ __secondary_start:
 	lis	r3,-KERNELBASE@h
 	mr	r4,r24
 	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
 	lis	r3,-KERNELBASE@h
 	bl	init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
 
 	/* get current's stack and current */
 	lis	r2,secondary_current@ha
@@ -921,17 +917,6 @@ __secondary_start:
 #include "../kvm/book3s_rmhandlers.S"
 #endif
 
-/*
- * Those generic dummy functions are kept for CPUs not
- * included in CONFIG_PPC_BOOK3S_32
- */
-#if !defined(CONFIG_PPC_BOOK3S_32)
-_ENTRY(__save_cpu_setup)
-	blr
-_ENTRY(__restore_cpu_setup)
-	blr
-#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
-
 /*
  * Load stuff into the MMU.  Intended to be called with
  * IR=0 and DR=0.
-- 
cgit v1.2.3


From 15c102153e722cc6e0729764a7068c209a7469cd Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 1 Oct 2020 12:42:39 +0000
Subject: powerpc/time: Rename mftbl() to mftb()

On PPC64, we have mftb().
On PPC32, we have mftbl() and an #define mftb() mftbl().

mftb() and mftbl() are equivalent, their purpose is to read the
content of SPRN_TRBL, as returned by 'mftb' simplified instruction.

binutils seems to define 'mftbl' instruction as an equivalent
of 'mftb'.

However in both 32 bits and 64 bits documentation, only 'mftb' is
defined, and when performing a disassembly with objdump, the displayed
instruction is 'mftb'

No need to have two ways to do the same thing with different
names, rename mftbl() to have only mftb().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/94dc68d3d9ef9eb549796d4b938b6ba0305a049b.1601556145.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/reg.h  | 5 ++---
 arch/powerpc/include/asm/time.h | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 788058af1d44..c66dcdb47c44 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1439,19 +1439,18 @@ static inline void msr_check_and_clear(unsigned long bits)
 #else /* __powerpc64__ */
 
 #if defined(CONFIG_PPC_8xx)
-#define mftbl()		({unsigned long rval;	\
+#define mftb()		({unsigned long rval;	\
 			asm volatile("mftbl %0" : "=r" (rval)); rval;})
 #define mftbu()		({unsigned long rval;	\
 			asm volatile("mftbu %0" : "=r" (rval)); rval;})
 #else
-#define mftbl()		({unsigned long rval;	\
+#define mftb()		({unsigned long rval;	\
 			asm volatile("mfspr %0, %1" : "=r" (rval) : \
 				"i" (SPRN_TBRL)); rval;})
 #define mftbu()		({unsigned long rval;	\
 			asm volatile("mfspr %0, %1" : "=r" (rval) : \
 				"i" (SPRN_TBRU)); rval;})
 #endif
-#define mftb()		mftbl()
 #endif /* !__powerpc64__ */
 
 #define mttbl(v)	asm volatile("mttbl %0":: "r"(v))
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index a0c8ae4cb27c..6e681160981b 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -47,7 +47,7 @@ struct div_result {
 
 static inline unsigned long get_tbl(void)
 {
-	return mftbl();
+	return mftb();
 }
 
 static inline unsigned int get_tbu(void)
-- 
cgit v1.2.3


From ff125fbcd45d1706861579dbe66e31f5b3f1e779 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 1 Oct 2020 12:42:40 +0000
Subject: powerpc/time: Make mftb() common to PPC32 and PPC64

No need to have two versions that are identical.

CONFIG_PPC_CELL is only selected by PPC64 targets.
CONFIG_E500 is the only PPC64 target selecting CONFIG_FSL_BOOK3E.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/6bf23ec744aab4ba63506a011f6a145ea35d620d.1601556145.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/reg.h | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c66dcdb47c44..f877a576b338 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1419,8 +1419,7 @@ static inline void msr_check_and_clear(unsigned long bits)
 		__msr_check_and_clear(bits);
 }
 
-#ifdef __powerpc64__
-#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_CELL) || defined(CONFIG_E500)
 #define mftb()		({unsigned long rval;				\
 			asm volatile(					\
 				"90:	mfspr %0, %2;\n"		\
@@ -1430,28 +1429,23 @@ static inline void msr_check_and_clear(unsigned long bits)
 			: "=r" (rval) \
 			: "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
 			rval;})
+#elif defined(CONFIG_PPC_8xx)
+#define mftb()		({unsigned long rval;	\
+			asm volatile("mftbl %0" : "=r" (rval)); rval;})
 #else
 #define mftb()		({unsigned long rval;	\
 			asm volatile("mfspr %0, %1" : \
 				     "=r" (rval) : "i" (SPRN_TBRL)); rval;})
 #endif /* !CONFIG_PPC_CELL */
 
-#else /* __powerpc64__ */
-
 #if defined(CONFIG_PPC_8xx)
-#define mftb()		({unsigned long rval;	\
-			asm volatile("mftbl %0" : "=r" (rval)); rval;})
 #define mftbu()		({unsigned long rval;	\
 			asm volatile("mftbu %0" : "=r" (rval)); rval;})
 #else
-#define mftb()		({unsigned long rval;	\
-			asm volatile("mfspr %0, %1" : "=r" (rval) : \
-				"i" (SPRN_TBRL)); rval;})
 #define mftbu()		({unsigned long rval;	\
 			asm volatile("mfspr %0, %1" : "=r" (rval) : \
 				"i" (SPRN_TBRU)); rval;})
 #endif
-#endif /* !__powerpc64__ */
 
 #define mttbl(v)	asm volatile("mttbl %0":: "r"(v))
 #define mttbu(v)	asm volatile("mttbu %0":: "r"(v))
-- 
cgit v1.2.3


From 942e89115b588b4b5df86930b5302a5c07b820ba Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 1 Oct 2020 12:42:41 +0000
Subject: powerpc/time: Avoid using get_tbl() and get_tbu() internally

get_tbl() is confusing as it returns the content of TBL register
on PPC32 but the concatenation of TBL and TBU on PPC64.

Use mftb() instead.

Do the same with get_tbu() for consistency allthough it's name
is less confusing.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/41573406a4eab98838decaa91649086fef1e6119.1601556145.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/delay.h | 2 +-
 arch/powerpc/include/asm/time.h  | 8 ++++----
 arch/powerpc/kernel/time.c       | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/delay.h b/arch/powerpc/include/asm/delay.h
index 66963f7d3e64..51bb8c1476c7 100644
--- a/arch/powerpc/include/asm/delay.h
+++ b/arch/powerpc/include/asm/delay.h
@@ -54,7 +54,7 @@ extern void udelay(unsigned long usecs);
 ({                                                                             \
 	typeof(condition) __ret;                                               \
 	unsigned long __loops = tb_ticks_per_usec * timeout;                   \
-	unsigned long __start = get_tbl();                                     \
+	unsigned long __start = mftb();                                     \
                                                                                \
 	if (delay) {                                                           \
 		while (!(__ret = (condition)) &&                               \
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 6e681160981b..c83116ec79c3 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -76,9 +76,9 @@ static inline u64 get_tb(void)
 	unsigned int tbhi, tblo, tbhi2;
 
 	do {
-		tbhi = get_tbu();
-		tblo = get_tbl();
-		tbhi2 = get_tbu();
+		tbhi = mftbu();
+		tblo = mftb();
+		tbhi2 = mftbu();
 	} while (tbhi != tbhi2);
 
 	return ((u64)tbhi << 32) | tblo;
@@ -123,7 +123,7 @@ static inline void set_dec(u64 val)
 
 static inline unsigned long tb_ticks_since(unsigned long tstamp)
 {
-	return get_tbl() - tstamp;
+	return mftb() - tstamp;
 }
 
 #define mulhwu(x,y) \
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 760ea359a7f7..74efe46f5532 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -448,8 +448,8 @@ void __delay(unsigned long loops)
 		 */
 		spin_cpu_relax();
 	} else {
-		start = get_tbl();
-		while (get_tbl() - start < loops)
+		start = mftb();
+		while (mftb() - start < loops)
 			spin_cpu_relax();
 	}
 	spin_end();
-- 
cgit v1.2.3


From e8d5bf30eafc37e31ce68bc7ccf1db970fe3cd04 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 1 Oct 2020 12:42:42 +0000
Subject: powerpc/time: Remove get_tbu()

get_tbu() is redundant with mftbu() and is not used anymore.

Remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1746e2d11ea90c3f45877e1fcc6c79ce96cf6b98.1601556145.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/time.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index c83116ec79c3..3dd2deb623da 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -49,11 +49,6 @@ static inline unsigned long get_tbl(void)
 {
 	return mftb();
 }
-
-static inline unsigned int get_tbu(void)
-{
-	return mftbu();
-}
 #endif /* !CONFIG_PPC64 */
 
 static inline u64 get_vtb(void)
-- 
cgit v1.2.3


From 1156a6285cd38e5a6987ddee3758e7954c56cb3d Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 1 Oct 2020 12:42:43 +0000
Subject: powerpc/time: Make get_tbl() common to PPC32 and PPC64

On PPC64, get_tbl() is defined as an alias of get_tb() which return
the result of mftb(). That exactly the same as what the PPC32 version
does. We don't need two versions.

Remove the PPC64 definition of get_tbl() and use the PPC32 version
for both.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/a8eaabb87d69534e533ebac805163e08146e05bd.1601556145.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/time.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 3dd2deb623da..6ec0409d10f4 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -38,18 +38,11 @@ struct div_result {
 	u64 result_low;
 };
 
-#ifdef CONFIG_PPC64
-
 /* For compatibility, get_tbl() is defined as get_tb() on ppc64 */
-#define get_tbl		get_tb
-
-#else
-
 static inline unsigned long get_tbl(void)
 {
 	return mftb();
 }
-#endif /* !CONFIG_PPC64 */
 
 static inline u64 get_vtb(void)
 {
-- 
cgit v1.2.3


From 9686e431c683ee7b8aca0f3985c244aee3d9f30d Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 1 Oct 2020 12:42:44 +0000
Subject: powerpc/time: Make get_tb() common to PPC32 and PPC64

mftbu() is always defined now, so the #ifdef can be removed
and replaced by an IS_ENABLED(CONFIG_PPC64) inside the
PPC32 version of get_tb().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/47e49717d2643169ffcbe5d507f184cf49f0fe95.1601556145.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/time.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 6ec0409d10f4..2f566c1a754c 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -53,16 +53,13 @@ static inline u64 get_vtb(void)
 	return 0;
 }
 
-#ifdef CONFIG_PPC64
-static inline u64 get_tb(void)
-{
-	return mftb();
-}
-#else /* CONFIG_PPC64 */
 static inline u64 get_tb(void)
 {
 	unsigned int tbhi, tblo, tbhi2;
 
+	if (IS_ENABLED(CONFIG_PPC64))
+		return mftb();
+
 	do {
 		tbhi = mftbu();
 		tblo = mftb();
@@ -71,7 +68,6 @@ static inline u64 get_tb(void)
 
 	return ((u64)tbhi << 32) | tblo;
 }
-#endif /* !CONFIG_PPC64 */
 
 static inline void set_tb(unsigned int upper, unsigned int lower)
 {
-- 
cgit v1.2.3


From ffd0b25ca049a477cb757e5bcf2d5e1664d12e5d Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Wed, 14 Oct 2020 14:28:11 -0400
Subject: Revert "powerpc/pci: unmap legacy INTx interrupts when a PHB is
 removed"

This reverts commit 3a3181e16fbde752007759f8759d25e0ff1fc425 which
causes memory corruptions on POWER9 powernv. eg:

  pci_bus 0035:08: busn_res: [bus 08-0c] is released
  =============================================================================
  BUG kmalloc-16 (Tainted: G        W  O     ): Object already free
  -----------------------------------------------------------------------------
  Disabling lock debugging due to kernel taint
  INFO: Allocated in pcibios_scan_phb+0x104/0x3e0 age=1960714 cpu=4 pid=1
  	__slab_alloc+0xa4/0xf0
  	__kmalloc+0x294/0x330
  	pcibios_scan_phb+0x104/0x3e0
  	pcibios_init+0x84/0x124
  	do_one_initcall+0xac/0x528
  	kernel_init_freeable+0x35c/0x3fc
  	kernel_init+0x24/0x148
  	ret_from_kernel_thread+0x5c/0x80
  INFO: Freed in pcibios_remove_bus+0x70/0x90 age=0 cpu=16 pid=1717146
  	kfree+0x49c/0x510
  	pcibios_remove_bus+0x70/0x90
  	pci_remove_bus+0xe4/0x110
  	pci_remove_bus_device+0x74/0x170
  	pci_remove_bus_device+0x4c/0x170
  	pci_stop_and_remove_bus_device_locked+0x34/0x50
  	remove_store+0xc0/0xe0
  	dev_attr_store+0x30/0x50
  	sysfs_kf_write+0x68/0xb0
  	kernfs_fop_write+0x114/0x260
  	vfs_write+0xe4/0x260
  	ksys_write+0x74/0x130
  	system_call_exception+0xf8/0x1d0
  	system_call_common+0xe8/0x218
  INFO: Slab 0x0000000099caaf22 objects=178 used=174 fp=0x00000000006a64b0 flags=0x7fff8000000201
  INFO: Object 0x00000000f360132d @offset=30192 fp=0x0000000000000000

Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201014182811.12027-1-cai@lca.pw
---
 arch/powerpc/include/asm/pci-bridge.h |   6 --
 arch/powerpc/kernel/pci-common.c      | 114 ----------------------------------
 2 files changed, 120 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index d21e070352dc..d2a2a14e56f9 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -48,9 +48,6 @@ struct pci_controller_ops {
 
 /*
  * Structure of a PCI controller (host bridge)
- *
- * @irq_count: number of interrupt mappings
- * @irq_map: interrupt mappings
  */
 struct pci_controller {
 	struct pci_bus *bus;
@@ -130,9 +127,6 @@ struct pci_controller {
 
 	void *private_data;
 	struct npu *npu;
-
-	unsigned int irq_count;
-	unsigned int *irq_map;
 };
 
 /* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index deb831f0ae13..be108616a721 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -353,115 +353,6 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
 	return NULL;
 }
 
-/*
- * Assumption is made on the interrupt parent. All interrupt-map
- * entries are considered to have the same parent.
- */
-static int pcibios_irq_map_count(struct pci_controller *phb)
-{
-	const __be32 *imap;
-	int imaplen;
-	struct device_node *parent;
-	u32 intsize, addrsize, parintsize, paraddrsize;
-
-	if (of_property_read_u32(phb->dn, "#interrupt-cells", &intsize))
-		return 0;
-	if (of_property_read_u32(phb->dn, "#address-cells", &addrsize))
-		return 0;
-
-	imap = of_get_property(phb->dn, "interrupt-map", &imaplen);
-	if (!imap) {
-		pr_debug("%pOF : no interrupt-map\n", phb->dn);
-		return 0;
-	}
-	imaplen /= sizeof(u32);
-	pr_debug("%pOF : imaplen=%d\n", phb->dn, imaplen);
-
-	if (imaplen < (addrsize + intsize + 1))
-		return 0;
-
-	imap += intsize + addrsize;
-	parent = of_find_node_by_phandle(be32_to_cpup(imap));
-	if (!parent) {
-		pr_debug("%pOF : no imap parent found !\n", phb->dn);
-		return 0;
-	}
-
-	if (of_property_read_u32(parent, "#interrupt-cells", &parintsize)) {
-		pr_debug("%pOF : parent lacks #interrupt-cells!\n", phb->dn);
-		return 0;
-	}
-
-	if (of_property_read_u32(parent, "#address-cells", &paraddrsize))
-		paraddrsize = 0;
-
-	return imaplen / (addrsize + intsize + 1 + paraddrsize + parintsize);
-}
-
-static void pcibios_irq_map_init(struct pci_controller *phb)
-{
-	phb->irq_count = pcibios_irq_map_count(phb);
-	if (phb->irq_count < PCI_NUM_INTX)
-		phb->irq_count = PCI_NUM_INTX;
-
-	pr_debug("%pOF : interrupt map #%d\n", phb->dn, phb->irq_count);
-
-	phb->irq_map = kcalloc(phb->irq_count, sizeof(unsigned int),
-			       GFP_KERNEL);
-}
-
-static void pci_irq_map_register(struct pci_dev *pdev, unsigned int virq)
-{
-	struct pci_controller *phb = pci_bus_to_host(pdev->bus);
-	int i;
-
-	if (!phb->irq_map)
-		return;
-
-	for (i = 0; i < phb->irq_count; i++) {
-		/*
-		 * Look for an empty or an equivalent slot, as INTx
-		 * interrupts can be shared between adapters.
-		 */
-		if (phb->irq_map[i] == virq || !phb->irq_map[i]) {
-			phb->irq_map[i] = virq;
-			break;
-		}
-	}
-
-	if (i == phb->irq_count)
-		pr_err("PCI:%s all platform interrupts mapped\n",
-		       pci_name(pdev));
-}
-
-/*
- * Clearing the mapped interrupts will also clear the underlying
- * mappings of the ESB pages of the interrupts when under XIVE. It is
- * a requirement of PowerVM to clear all memory mappings before
- * removing a PHB.
- */
-static void pci_irq_map_dispose(struct pci_bus *bus)
-{
-	struct pci_controller *phb = pci_bus_to_host(bus);
-	int i;
-
-	if (!phb->irq_map)
-		return;
-
-	pr_debug("PCI: Clearing interrupt mappings for PHB %04x:%02x...\n",
-		 pci_domain_nr(bus), bus->number);
-	for (i = 0; i < phb->irq_count; i++)
-		irq_dispose_mapping(phb->irq_map[i]);
-
-	kfree(phb->irq_map);
-}
-
-void pcibios_remove_bus(struct pci_bus *bus)
-{
-	pci_irq_map_dispose(bus);
-}
-EXPORT_SYMBOL_GPL(pcibios_remove_bus);
-
 /*
  * Reads the interrupt pin to determine if interrupt is use by card.
  * If the interrupt is used, then gets the interrupt line from the
@@ -510,8 +401,6 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
 
 	pci_dev->irq = virq;
 
-	/* Record all interrut mappings for later removal of a PHB */
-	pci_irq_map_register(pci_dev, virq);
 	return 0;
 }
 
@@ -1665,9 +1554,6 @@ void pcibios_scan_phb(struct pci_controller *hose)
 
 	pr_debug("PCI: Scanning PHB %pOF\n", node);
 
-	/* Allocate interrupt mappings array */
-	pcibios_irq_map_init(hose);
-
 	/* Get some IO space for the new PHB */
 	pcibios_setup_phb_io_space(hose);
 
-- 
cgit v1.2.3