From ef78f2dd2398ce8ed9eeaab9c9f8af2e15f5d870 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 Oct 2020 13:08:38 +1100 Subject: powerpc/85xx: Fix declaration made after definition Currently the clang build of corenet64_smp_defconfig fails with: arch/powerpc/platforms/85xx/corenet_generic.c:210:1: error: attribute declaration must precede definition machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); Fix it by moving the initcall definition prior to the machine definition, and directly below the function it calls, which is the usual style anyway. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201023020838.3274226-1-mpe@ellerman.id.au --- arch/powerpc/platforms/85xx/corenet_generic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 6aa8defb5857..8d6029099848 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -106,6 +106,7 @@ int __init corenet_gen_publish_devices(void) { return of_platform_bus_probe(NULL, of_device_ids, NULL); } +machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); static const char * const boards[] __initconst = { "fsl,P2041RDB", @@ -206,5 +207,3 @@ define_machine(corenet_generic) { .power_save = e500_idle, #endif }; - -machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); -- cgit v1.2.3 From a40fdaf1420d6e6bda0dd2df1e6806013e58dbe1 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 10 Nov 2020 21:07:52 -0500 Subject: Revert "powerpc/pseries/hotplug-cpu: Remove double free in error path" This reverts commit a0ff72f9f5a780341e7ff5e9ba50a0dad5fa1980. Since the commit b015f6bc9547 ("powerpc/pseries: Add cpu DLPAR support for drc-info property"), the 'cpu_drcs' wouldn't be double freed when the 'cpus' node not found. So we needn't apply this patch, otherwise, the memory will be leaked. Fixes: a0ff72f9f5a7 ("powerpc/pseries/hotplug-cpu: Remove double free in error path") Reported-by: Hulk Robot Signed-off-by: Zhang Xiaoxu [mpe: Caused by me applying a patch to a function that had changed in the interim] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111020752.1686139-1-zhangxiaoxu5@huawei.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index f2837e33bf5d..4bb1c9f2bb11 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -743,6 +743,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) parent = of_find_node_by_path("/cpus"); if (!parent) { pr_warn("Could not find CPU root node in device tree\n"); + kfree(cpu_drcs); return -1; } -- cgit v1.2.3 From 027717a45ca251a7ba67a63db359994836962cd2 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Tue, 10 Nov 2020 19:19:30 +0800 Subject: powerpc/powernv/sriov: fix unsigned int win compared to less than zero Fix coccicheck warning: arch/powerpc/platforms/powernv/pci-sriov.c:443:7-10: WARNING: Unsigned expression compared with zero: win < 0 arch/powerpc/platforms/powernv/pci-sriov.c:462:7-10: WARNING: Unsigned expression compared with zero: win < 0 Fixes: 39efc03e3ee8 ("powerpc/powernv/sriov: Move M64 BAR allocation into a helper") Reported-by: Tosk Robot Signed-off-by: Kaixu Xia Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1605007170-22171-1-git-send-email-kaixuxia@tencent.com --- arch/powerpc/platforms/powernv/pci-sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c index c4434f20f42f..28aac933a439 100644 --- a/arch/powerpc/platforms/powernv/pci-sriov.c +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -422,7 +422,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) { struct pnv_iov_data *iov; struct pnv_phb *phb; - unsigned int win; + int win; struct resource *res; int i, j; int64_t rc; -- cgit v1.2.3 From c74cf7a3d59a21b290fe0468f5b470d0b8ee37df Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:15 +0100 Subject: powerpc/powernv/memtrace: Don't leak kernel memory to user space We currently leak kernel memory to user space, because memory offlining doesn't do any implicit clearing of memory and we are missing explicit clearing of memory. Let's keep it simple and clear pages before removing the linear mapping. Reproduced in QEMU/TCG with 10 GiB of main memory: [root@localhost ~]# dd obs=9G if=/dev/urandom of=/dev/null [... wait until "free -m" used counter no longer changes and cancel] 19665802+0 records in 1+0 records out 9663676416 bytes (9.7 GB, 9.0 GiB) copied, 135.548 s, 71.3 MB/s [root@localhost ~]# cat /sys/devices/system/memory/block_size_bytes 40000000 [root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 402.978663][ T1086] page:000000001bc4bc74 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x24900 [ 402.980063][ T1086] flags: 0x7ffff000001000(reserved) [ 402.980415][ T1086] raw: 007ffff000001000 c00c000000924008 c00c000000924008 0000000000000000 [ 402.980627][ T1086] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 [ 402.980845][ T1086] page dumped because: unmovable page [ 402.989608][ T1086] Offlined Pages 16384 [ 403.324155][ T1086] memtrace: Allocated trace memory on node 0 at 0x0000000200000000 Before this patch: [root@localhost ~]# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace | head 00000000 c8 25 72 51 4d 26 36 c5 5c c2 56 15 d5 1a cd 10 |.%rQM&6.\.V.....| 00000010 19 b9 50 b2 cb e3 60 b8 ec 0a f3 ec 4b 3c 39 f0 |..P...`.....K<9.|$ 00000020 4e 5a 4c cf bd 26 19 ff 37 79 13 67 24 b7 b8 57 |NZL..&..7y.g$..W|$ 00000030 98 3e f5 be 6f 14 6a bd a4 52 bc 6e e9 e0 c1 5d |.>..o.j..R.n...]|$ 00000040 76 b3 ae b5 88 d7 da e3 64 23 85 2c 10 88 07 b6 |v.......d#.,....|$ 00000050 9a d8 91 de f7 50 27 69 2e 64 9c 6f d3 19 45 79 |.....P'i.d.o..Ey|$ 00000060 6a 6f 8a 61 71 19 1f c7 f1 df 28 26 ca 0f 84 55 |jo.aq.....(&...U|$ 00000070 01 3f be e4 e2 e1 da ff 7b 8c 8e 32 37 b4 24 53 |.?......{..27.$S|$ 00000080 1b 70 30 45 56 e6 8c c4 0e b5 4c fb 9f dd 88 06 |.p0EV.....L.....|$ 00000090 ef c4 18 79 f1 60 b1 5c 79 59 4d f4 36 d7 4a 5c |...y.`.\yYM.6.J\|$ After this patch: [root@localhost ~]# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace | head 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 40000000 Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing") Cc: stable@vger.kernel.org # v4.14+ Reported-by: Michael Ellerman Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-2-david@redhat.com --- arch/powerpc/platforms/powernv/memtrace.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 6828108486f8..eea1f94482ff 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -67,6 +67,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg) return 0; } +static void memtrace_clear_range(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long pfn; + + /* + * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM + * does not apply, avoid passing around "struct page" and use + * clear_page() instead directly. + */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { + if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) + cond_resched(); + clear_page(__va(PFN_PHYS(pfn))); + } +} + /* called with device_hotplug_lock held */ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) { @@ -111,6 +128,11 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) lock_device_hotplug(); for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { + /* + * Clear the range while we still have a linear + * mapping. + */ + memtrace_clear_range(base_pfn, nr_pages); /* * Remove memory in memory block size chunks so that * iomem resources are always split to the same size and -- cgit v1.2.3 From d6718941a2767fb383e105d257d2105fe4f15f0e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:16 +0100 Subject: powerpc/powernv/memtrace: Fix crashing the kernel when enabling concurrently It's very easy to crash the kernel right now by simply trying to enable memtrace concurrently, hammering on the "enable" interface loop.sh: #!/bin/bash dmesg --console-off while true; do echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable done [root@localhost ~]# loop.sh & [root@localhost ~]# loop.sh & Resulting quickly in a kernel crash. Let's properly protect using a mutex. Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing") Cc: stable@vger.kernel.org# v4.14+ Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-3-david@redhat.com --- arch/powerpc/platforms/powernv/memtrace.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index eea1f94482ff..0e42fe2d7b6a 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -30,6 +30,7 @@ struct memtrace_entry { char name[16]; }; +static DEFINE_MUTEX(memtrace_mutex); static u64 memtrace_size; static struct memtrace_entry *memtrace_array; @@ -279,6 +280,7 @@ static int memtrace_online(void) static int memtrace_enable_set(void *data, u64 val) { + int rc = -EAGAIN; u64 bytes; /* @@ -291,25 +293,31 @@ static int memtrace_enable_set(void *data, u64 val) return -EINVAL; } + mutex_lock(&memtrace_mutex); + /* Re-add/online previously removed/offlined memory */ if (memtrace_size) { if (memtrace_online()) - return -EAGAIN; + goto out_unlock; } - if (!val) - return 0; + if (!val) { + rc = 0; + goto out_unlock; + } /* Offline and remove memory */ if (memtrace_init_regions_runtime(val)) - return -EINVAL; + goto out_unlock; if (memtrace_init_debugfs()) - return -EINVAL; + goto out_unlock; memtrace_size = val; - - return 0; + rc = 0; +out_unlock: + mutex_unlock(&memtrace_mutex); + return rc; } static int memtrace_enable_get(void *data, u64 *val) -- cgit v1.2.3 From 0bd4b96d99108b7ea9bac0573957483be7781d70 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:22 +0100 Subject: powernv/memtrace: don't abuse memory hot(un)plug infrastructure for memory allocations Let's use alloc_contig_pages() for allocating memory and remove the linear mapping manually via arch_remove_linear_mapping(). Mark all pages PG_offline, such that they will definitely not get touched - e.g., when hibernating. When freeing memory, try to revert what we did. The original idea was discussed in: https://lkml.kernel.org/r/48340e96-7e6b-736f-9e23-d3111b915b6e@redhat.com This is similar to CONFIG_DEBUG_PAGEALLOC handling on other architectures, whereby only single pages are unmapped from the linear mapping. Let's mimic what memory hot(un)plug would do with the linear mapping. We now need MEMORY_HOTPLUG and CONTIG_ALLOC as dependencies. Add a TODO that we want to use __GFP_ZERO for clearing once alloc_contig_pages() understands that. Tested with in QEMU/TCG with 10 GiB of main memory: [root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 105.903043][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000 [root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 145.042493][ T1080] radix-mmu: Mapped 0x0000000080000000-0x00000000c0000000 with 64.0 KiB pages [ 145.049019][ T1080] memtrace: Freed trace memory back on node 0 [ 145.333960][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000 [root@localhost ~]# echo 0x80000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 213.606916][ T1080] radix-mmu: Mapped 0x0000000080000000-0x00000000c0000000 with 64.0 KiB pages [ 213.613855][ T1080] memtrace: Freed trace memory back on node 0 [ 214.185094][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000 [root@localhost ~]# echo 0x100000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 234.874872][ T1080] radix-mmu: Mapped 0x0000000080000000-0x0000000100000000 with 64.0 KiB pages [ 234.886974][ T1080] memtrace: Freed trace memory back on node 0 [ 234.890153][ T1080] memtrace: Failed to allocate trace memory on node 0 [root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable [ 259.490196][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000 I also made sure allocated memory is properly zeroed. Note 1: We currently won't be allocating from ZONE_MOVABLE - because our pages are not movable. However, as we don't run with any memory hot(un)plug mechanism around, we could make an exception to increase the chance of allocations succeeding. Note 2: PG_reserved isn't sufficient. E.g., kernel_page_present() used along PG_reserved in hibernation code will always return "true" on powerpc, resulting in the pages getting touched. It's too generic - e.g., indicates boot allocations. Note 3: For now, we keep using memory_block_size_bytes() as minimum granularity. Suggested-by: Michal Hocko Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-9-david@redhat.com --- arch/powerpc/platforms/powernv/Kconfig | 8 +- arch/powerpc/platforms/powernv/memtrace.c | 163 +++++++++++------------------- 2 files changed, 62 insertions(+), 109 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 938803eab0ad..619b093a0657 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -27,11 +27,11 @@ config OPAL_PRD recovery diagnostics on OpenPower machines config PPC_MEMTRACE - bool "Enable removal of RAM from kernel mappings for tracing" - depends on PPC_POWERNV && MEMORY_HOTREMOVE + bool "Enable runtime allocation of RAM for tracing" + depends on PPC_POWERNV && MEMORY_HOTPLUG && CONTIG_ALLOC help - Enabling this option allows for the removal of memory (RAM) - from the kernel mappings to be used for hardware tracing. + Enabling this option allows for runtime allocation of memory (RAM) + for hardware tracing. config PPC_VAS bool "IBM Virtual Accelerator Switchboard (VAS)" diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 0e42fe2d7b6a..5fc9408bb0b3 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -51,33 +51,12 @@ static const struct file_operations memtrace_fops = { .open = simple_open, }; -static int check_memblock_online(struct memory_block *mem, void *arg) -{ - if (mem->state != MEM_ONLINE) - return -1; - - return 0; -} - -static int change_memblock_state(struct memory_block *mem, void *arg) -{ - unsigned long state = (unsigned long)arg; - - mem->state = state; - - return 0; -} - static void memtrace_clear_range(unsigned long start_pfn, unsigned long nr_pages) { unsigned long pfn; - /* - * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM - * does not apply, avoid passing around "struct page" and use - * clear_page() instead directly. - */ + /* As HIGHMEM does not apply, use clear_page() directly. */ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) cond_resched(); @@ -85,72 +64,39 @@ static void memtrace_clear_range(unsigned long start_pfn, } } -/* called with device_hotplug_lock held */ -static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) -{ - const unsigned long start = PFN_PHYS(start_pfn); - const unsigned long size = PFN_PHYS(nr_pages); - - if (walk_memory_blocks(start, size, NULL, check_memblock_online)) - return false; - - walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE, - change_memblock_state); - - if (offline_pages(start_pfn, nr_pages)) { - walk_memory_blocks(start, size, (void *)MEM_ONLINE, - change_memblock_state); - return false; - } - - walk_memory_blocks(start, size, (void *)MEM_OFFLINE, - change_memblock_state); - - - return true; -} - static u64 memtrace_alloc_node(u32 nid, u64 size) { - u64 start_pfn, end_pfn, nr_pages, pfn; - u64 base_pfn; - u64 bytes = memory_block_size_bytes(); + const unsigned long nr_pages = PHYS_PFN(size); + unsigned long pfn, start_pfn; + struct page *page; - if (!node_spanned_pages(nid)) + /* + * Trace memory needs to be aligned to the size, which is guaranteed + * by alloc_contig_pages(). + */ + page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE | + __GFP_NOWARN, nid, NULL); + if (!page) return 0; + start_pfn = page_to_pfn(page); - start_pfn = node_start_pfn(nid); - end_pfn = node_end_pfn(nid); - nr_pages = size >> PAGE_SHIFT; - - /* Trace memory needs to be aligned to the size */ - end_pfn = round_down(end_pfn - nr_pages, nr_pages); - - lock_device_hotplug(); - for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { - if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { - /* - * Clear the range while we still have a linear - * mapping. - */ - memtrace_clear_range(base_pfn, nr_pages); - /* - * Remove memory in memory block size chunks so that - * iomem resources are always split to the same size and - * we never try to remove memory that spans two iomem - * resources. - */ - end_pfn = base_pfn + nr_pages; - for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { - __remove_memory(nid, pfn << PAGE_SHIFT, bytes); - } - unlock_device_hotplug(); - return base_pfn << PAGE_SHIFT; - } - } - unlock_device_hotplug(); + /* + * Clear the range while we still have a linear mapping. + * + * TODO: use __GFP_ZERO with alloc_contig_pages() once supported. + */ + memtrace_clear_range(start_pfn, nr_pages); - return 0; + /* + * Set pages PageOffline(), to indicate that nobody (e.g., hibernation, + * dumping, ...) should be touching these pages. + */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) + __SetPageOffline(pfn_to_page(pfn)); + + arch_remove_linear_mapping(PFN_PHYS(start_pfn), size); + + return PFN_PHYS(start_pfn); } static int memtrace_init_regions_runtime(u64 size) @@ -220,16 +166,30 @@ static int memtrace_init_debugfs(void) return ret; } -static int online_mem_block(struct memory_block *mem, void *arg) +static int memtrace_free(int nid, u64 start, u64 size) { - return device_online(&mem->dev); + struct mhp_params params = { .pgprot = PAGE_KERNEL }; + const unsigned long nr_pages = PHYS_PFN(size); + const unsigned long start_pfn = PHYS_PFN(start); + unsigned long pfn; + int ret; + + ret = arch_create_linear_mapping(nid, start, size, ¶ms); + if (ret) + return ret; + + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) + __ClearPageOffline(pfn_to_page(pfn)); + + free_contig_range(start_pfn, nr_pages); + return 0; } /* - * Iterate through the chunks of memory we have removed from the kernel - * and attempt to add them back to the kernel. + * Iterate through the chunks of memory we allocated and attempt to expose + * them back to the kernel. */ -static int memtrace_online(void) +static int memtrace_free_regions(void) { int i, ret = 0; struct memtrace_entry *ent; @@ -237,7 +197,7 @@ static int memtrace_online(void) for (i = memtrace_array_nr - 1; i >= 0; i--) { ent = &memtrace_array[i]; - /* We have onlined this chunk previously */ + /* We have freed this chunk previously */ if (ent->nid == NUMA_NO_NODE) continue; @@ -247,30 +207,25 @@ static int memtrace_online(void) ent->mem = 0; } - if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) { - pr_err("Failed to add trace memory to node %d\n", + if (memtrace_free(ent->nid, ent->start, ent->size)) { + pr_err("Failed to free trace memory on node %d\n", ent->nid); ret += 1; continue; } - lock_device_hotplug(); - walk_memory_blocks(ent->start, ent->size, NULL, - online_mem_block); - unlock_device_hotplug(); - /* - * Memory was added successfully so clean up references to it - * so on reentry we can tell that this chunk was added. + * Memory was freed successfully so clean up references to it + * so on reentry we can tell that this chunk was freed. */ debugfs_remove_recursive(ent->dir); - pr_info("Added trace memory back to node %d\n", ent->nid); + pr_info("Freed trace memory back on node %d\n", ent->nid); ent->size = ent->start = ent->nid = NUMA_NO_NODE; } if (ret) return ret; - /* If all chunks of memory were added successfully, reset globals */ + /* If all chunks of memory were freed successfully, reset globals */ kfree(memtrace_array); memtrace_array = NULL; memtrace_size = 0; @@ -295,18 +250,16 @@ static int memtrace_enable_set(void *data, u64 val) mutex_lock(&memtrace_mutex); - /* Re-add/online previously removed/offlined memory */ - if (memtrace_size) { - if (memtrace_online()) - goto out_unlock; - } + /* Free all previously allocated memory. */ + if (memtrace_size && memtrace_free_regions()) + goto out_unlock; if (!val) { rc = 0; goto out_unlock; } - /* Offline and remove memory */ + /* Allocate memory. */ if (memtrace_init_regions_runtime(val)) goto out_unlock; -- cgit v1.2.3 From b6254ced4da6cf28d49fbffe24ee4b3286dcb3f4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:17 +0000 Subject: powerpc/signal: Don't manage floating point regs when no FPU There is no point in copying floating point regs when there is no FPU and MATH_EMULATION is not selected. Create a new CONFIG_PPC_FPU_REGS bool that is selected by CONFIG_MATH_EMULATION and CONFIG_PPC_FPU, and use it to opt out everything related to fp_state in thread_struct. The asm const used only by fpu.S are opted out with CONFIG_PPC_FPU as fpu.S build is conditionnal to CONFIG_PPC_FPU. The following app spends approx 8.1 seconds system time on an 8xx without the patch, and 7.0 seconds with the patch (13.5% reduction). On an 832x, it spends approx 2.6 seconds system time without the patch and 2.1 seconds with the patch (19% reduction). void sigusr1(int sig) { } int main(int argc, char **argv) { int i = 100000; signal(SIGUSR1, sigusr1); for (;i--;) raise(SIGUSR1); exit(0); } Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7569070083e6cd5b279bb5023da601aba3c06f3c.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/processor.h | 2 ++ arch/powerpc/kernel/asm-offsets.c | 2 ++ arch/powerpc/kernel/process.c | 4 ++++ arch/powerpc/kernel/ptrace/Makefile | 4 ++-- arch/powerpc/kernel/ptrace/ptrace-decl.h | 14 ++++++++++++++ arch/powerpc/kernel/ptrace/ptrace-view.c | 2 ++ arch/powerpc/kernel/signal.h | 14 +++++++++++++- arch/powerpc/kernel/signal_32.c | 4 ++++ arch/powerpc/kernel/traps.c | 2 ++ arch/powerpc/platforms/Kconfig.cputype | 4 ++++ 11 files changed, 50 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index aad8532a718e..8d12da224cb9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -422,6 +422,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE config MATH_EMULATION bool "Math emulation" depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE + select PPC_FPU_REGS help Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 333e3b6c76fb..0792530bedef 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -164,8 +164,10 @@ struct thread_struct { #endif /* Debug Registers */ struct debug_reg debug; +#ifdef CONFIG_PPC_FPU_REGS struct thread_fp_state fp_state; struct thread_fp_state *fp_save_area; +#endif int fpexc_mode; /* floating-point exception mode */ unsigned int align_ctl; /* alignment handling control */ #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a2dcb8ed79b9..81d68494d026 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -110,9 +110,11 @@ int main(void) #ifdef CONFIG_BOOKE OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]); #endif +#ifdef CONFIG_PPC_FPU OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode); OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr); OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area); +#endif OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr); OFFSET(THREAD_LOAD_FP, thread_struct, load_fp); #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d421a2c7f822..ba2c987b8403 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1730,7 +1730,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.ptrace_bps[i] = NULL; #endif +#ifdef CONFIG_PPC_FPU_REGS p->thread.fp_save_area = NULL; +#endif #ifdef CONFIG_ALTIVEC p->thread.vr_save_area = NULL; #endif @@ -1855,8 +1857,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #endif current->thread.load_slb = 0; current->thread.load_fp = 0; +#ifdef CONFIG_PPC_FPU_REGS memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state)); current->thread.fp_save_area = NULL; +#endif #ifdef CONFIG_ALTIVEC memset(¤t->thread.vr_state, 0, sizeof(current->thread.vr_state)); current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */ diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index 77abd1a5a508..8ebc11d1168d 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -6,11 +6,11 @@ CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y += ptrace.o ptrace-view.o -obj-y += ptrace-fpu.o +obj-$(CONFIG_PPC_FPU_REGS) += ptrace-fpu.o obj-$(CONFIG_COMPAT) += ptrace32.o obj-$(CONFIG_VSX) += ptrace-vsx.o ifneq ($(CONFIG_VSX),y) -obj-y += ptrace-novsx.o +obj-$(CONFIG_PPC_FPU_REGS) += ptrace-novsx.o endif obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o obj-$(CONFIG_SPE) += ptrace-spe.o diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index eafe5f0f6289..3487f2c9735c 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -165,8 +165,22 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data); extern const struct user_regset_view user_ppc_native_view; /* ptrace-fpu */ +#ifdef CONFIG_PPC_FPU_REGS int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data); int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data); +#else +static inline int +ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) +{ + return -EIO; +} + +static inline int +ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) +{ + return -EIO; +} +#endif /* ptrace-(no)adv */ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo); diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 142d58337f40..00a765f00d31 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -521,11 +521,13 @@ static const struct user_regset native_regsets[] = { .size = sizeof(long), .align = sizeof(long), .regset_get = gpr_get, .set = gpr_set }, +#ifdef CONFIG_PPC_FPU_REGS [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .regset_get = fpr_get, .set = fpr_set }, +#endif #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 4626d39cc0f0..6c2a33ab042c 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -34,7 +34,7 @@ unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task); unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); -#else +#elif defined(CONFIG_PPC_FPU_REGS) static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { @@ -63,6 +63,18 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from) ELF_NFPREG * sizeof(double)); } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#else +static inline unsigned long +copy_fpr_to_user(void __user *to, struct task_struct *task) +{ + return 0; +} + +static inline unsigned long +copy_fpr_from_user(struct task_struct *task, void __user *from) +{ + return 0; +} #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 96950f189b5a..7b291707eb31 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -814,7 +814,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, } regs->link = tramp; +#ifdef CONFIG_PPC_FPU_REGS tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); @@ -1271,7 +1273,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, regs->link = tramp; +#ifdef CONFIG_PPC_FPU_REGS tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5006dcbe1d9f..5b39baa61590 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1190,7 +1190,9 @@ static void parse_fpe(struct pt_regs *regs) flush_fp_to_thread(current); +#ifdef CONFIG_PPC_FPU_REGS code = __parse_fpscr(current->thread.fp_state.fpscr); +#endif _exception(SIGFPE, regs, code, regs->nip); } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index c194c4ae8bc7..3e36e3712deb 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -218,9 +218,13 @@ config PPC_E500MC such as e5500/e6500), and must be disabled for running on e500v1 or e500v2. +config PPC_FPU_REGS + bool + config PPC_FPU bool default y if PPC64 + select PPC_FPU_REGS config FSL_EMB_PERFMON bool "Freescale Embedded Perfmon" -- cgit v1.2.3 From 7d68c89169508064c460a1208f38ed0589d226fa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:18 +0000 Subject: powerpc/32s: Allow deselecting CONFIG_PPC_FPU on mpc832x The e300c2 core which is embedded in mpc832x CPU doesn't have an FPU. Make it possible to not select CONFIG_PPC_FPU when building a kernel dedicated to that target. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fcdc60d85baf80eaa0a7f3261d9d889282068216.1597770847.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 4 ++++ arch/powerpc/platforms/Kconfig.cputype | 11 +++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index ed25d9991233..bbcc84c5cf5f 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -394,6 +394,7 @@ Alignment: . = 0x800 DO_KVM 0x800 FPUnavailable: +#ifdef CONFIG_PPC_FPU BEGIN_FTR_SECTION /* * Certain Freescale cores don't have a FPU and treat fp instructions @@ -407,6 +408,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) +#else + b ProgramCheck +#endif /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 3e36e3712deb..44ab03fbcadc 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -32,7 +32,7 @@ choice config PPC_BOOK3S_6xx bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" select PPC_BOOK3S_32 - select PPC_FPU + imply PPC_FPU select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP @@ -222,9 +222,16 @@ config PPC_FPU_REGS bool config PPC_FPU - bool + bool "Support for Floating Point Unit (FPU)" if PPC_MPC832x default y if PPC64 select PPC_FPU_REGS + help + This must be enabled to support the Floating Point Unit + Most 6xx have an FPU but e300c2 core (mpc832x) don't have + an FPU, so when building an embedded kernel for that target + you can disable FPU support. + + If unsure say Y. config FSL_EMB_PERFMON bool "Freescale Embedded Perfmon" -- cgit v1.2.3 From c8754c517e37270a01b0561ad46ee647a721a09b Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Mon, 30 Nov 2020 16:29:49 +0100 Subject: powerpc/pseries: Define PCI bus speed for Gen4 and Gen5 Update bus speed definition for PCI Gen4 and 5. Signed-off-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201130152949.26467-1-fbarrat@linux.ibm.com --- arch/powerpc/platforms/pseries/pci.c | 51 +++++++++++++++--------------------- 1 file changed, 21 insertions(+), 30 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 911534b89c85..72a4d4167849 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -290,6 +290,25 @@ static void fixup_winbond_82c105(struct pci_dev* dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, fixup_winbond_82c105); +static enum pci_bus_speed prop_to_pci_speed(u32 prop) +{ + switch (prop) { + case 0x01: + return PCIE_SPEED_2_5GT; + case 0x02: + return PCIE_SPEED_5_0GT; + case 0x04: + return PCIE_SPEED_8_0GT; + case 0x08: + return PCIE_SPEED_16_0GT; + case 0x10: + return PCIE_SPEED_32_0GT; + default: + pr_debug("Unexpected PCI link speed property value\n"); + return PCI_SPEED_UNKNOWN; + } +} + int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) { struct device_node *dn, *pdn; @@ -322,35 +341,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) return 0; } - switch (pcie_link_speed_stats[0]) { - case 0x01: - bus->max_bus_speed = PCIE_SPEED_2_5GT; - break; - case 0x02: - bus->max_bus_speed = PCIE_SPEED_5_0GT; - break; - case 0x04: - bus->max_bus_speed = PCIE_SPEED_8_0GT; - break; - default: - bus->max_bus_speed = PCI_SPEED_UNKNOWN; - break; - } - - switch (pcie_link_speed_stats[1]) { - case 0x01: - bus->cur_bus_speed = PCIE_SPEED_2_5GT; - break; - case 0x02: - bus->cur_bus_speed = PCIE_SPEED_5_0GT; - break; - case 0x04: - bus->cur_bus_speed = PCIE_SPEED_8_0GT; - break; - default: - bus->cur_bus_speed = PCI_SPEED_UNKNOWN; - break; - } - + bus->max_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[0]); + bus->cur_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[1]); return 0; } -- cgit v1.2.3 From 92cc6bf01c7f4c5cfefd1963985c0064687ebeda Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 2 Dec 2020 10:34:53 +0530 Subject: powerpc: Refactor is_kvm_guest() declaration to new header Only code/declaration movement, in anticipation of doing a KVM-aware vcpu_is_preempted(). No additional changes. Signed-off-by: Srikar Dronamraju Acked-by: Waiman Long Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201202050456.164005-2-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/firmware.h | 6 ------ arch/powerpc/include/asm/kvm_guest.h | 15 +++++++++++++++ arch/powerpc/include/asm/kvm_para.h | 2 +- arch/powerpc/kernel/firmware.c | 1 + arch/powerpc/platforms/pseries/smp.c | 1 + 5 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 arch/powerpc/include/asm/kvm_guest.h (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 0b295bdb201e..aa6a5ef5d483 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -134,12 +134,6 @@ extern int ibm_nmi_interlock_token; extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void); -#else -static inline bool is_kvm_guest(void) { return false; } -#endif - #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h new file mode 100644 index 000000000000..d2c946dbbd2c --- /dev/null +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 IBM Corporation + */ + +#ifndef _ASM_POWERPC_KVM_GUEST_H_ +#define _ASM_POWERPC_KVM_GUEST_H_ + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +bool is_kvm_guest(void); +#else +static inline bool is_kvm_guest(void) { return false; } +#endif + +#endif /* _ASM_POWERPC_KVM_GUEST_H_ */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 744612054c94..abe1b5e82547 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -8,7 +8,7 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ -#include +#include #include diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index fe48d319d490..5f48e5ad24cd 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -14,6 +14,7 @@ #include #include +#include #ifdef CONFIG_PPC64 unsigned long powerpc_firmware_features __read_mostly; diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 92922491a81c..d578732c545d 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "pseries.h" -- cgit v1.2.3 From 16520a858a995742c2d2248e86a6026bd0316562 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 2 Dec 2020 10:34:54 +0530 Subject: powerpc: Rename is_kvm_guest() to check_kvm_guest() We want to reuse the is_kvm_guest() name in a subsequent patch but with a new body. Hence rename is_kvm_guest() to check_kvm_guest(). No additional changes. Signed-off-by: Srikar Dronamraju Acked-by: Waiman Long Signed-off-by: kernel test robot # int -> bool fix [mpe: Fold in fix from lkp to use true/false not 0/1] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201202050456.164005-3-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/kvm_guest.h | 4 ++-- arch/powerpc/include/asm/kvm_para.h | 2 +- arch/powerpc/kernel/firmware.c | 8 ++++---- arch/powerpc/platforms/pseries/smp.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h index d2c946dbbd2c..d7749ecb30d4 100644 --- a/arch/powerpc/include/asm/kvm_guest.h +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -7,9 +7,9 @@ #define _ASM_POWERPC_KVM_GUEST_H_ #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void); +bool check_kvm_guest(void); #else -static inline bool is_kvm_guest(void) { return false; } +static inline bool check_kvm_guest(void) { return false; } #endif #endif /* _ASM_POWERPC_KVM_GUEST_H_ */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index abe1b5e82547..6fba06b6cfdb 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -14,7 +14,7 @@ static inline int kvm_para_available(void) { - return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest(); + return IS_ENABLED(CONFIG_KVM_GUEST) && check_kvm_guest(); } static inline unsigned int kvm_arch_para_features(void) diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index 5f48e5ad24cd..c3140c6084c9 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -22,17 +22,17 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #endif #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void) +bool check_kvm_guest(void) { struct device_node *hyper_node; hyper_node = of_find_node_by_path("/hypervisor"); if (!hyper_node) - return 0; + return false; if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return 0; + return false; - return 1; + return true; } #endif diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index d578732c545d..c70b4be9f0a5 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -211,7 +211,7 @@ static __init void pSeries_smp_probe(void) if (!cpu_has_feature(CPU_FTR_SMT)) return; - if (is_kvm_guest()) { + if (check_kvm_guest()) { /* * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp * faults to the hypervisor which then reads the instruction -- cgit v1.2.3 From 6d247e4d264961aa3b871290f9b11a48d5a567f2 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 26 Nov 2020 17:59:50 +0100 Subject: powerpc/ps3: make system bus's remove and shutdown callbacks return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver core ignores the return value of struct device_driver::remove because there is only little that can be done. For the shutdown callback it's ps3_system_bus_shutdown() which ignores the return value. To simplify the quest to make struct device_driver::remove return void, let struct ps3_system_bus_driver::remove return void, too. All users already unconditionally return 0, this commit makes it obvious that returning an error code is a bad idea and ensures future users behave accordingly. Signed-off-by: Uwe Kleine-König Reviewed-by: Geert Uytterhoeven Acked-by: Takashi Iwai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201126165950.2554997-2-u.kleine-koenig@pengutronix.de --- arch/powerpc/include/asm/ps3.h | 4 ++-- arch/powerpc/platforms/ps3/system-bus.c | 5 ++--- drivers/block/ps3disk.c | 3 +-- drivers/block/ps3vram.c | 3 +-- drivers/char/ps3flash.c | 3 +-- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 3 +-- drivers/ps3/ps3-lpm.c | 3 +-- drivers/ps3/ps3-vuart.c | 10 ++++------ drivers/scsi/ps3rom.c | 3 +-- drivers/usb/host/ehci-ps3.c | 4 +--- drivers/usb/host/ohci-ps3.c | 4 +--- drivers/video/fbdev/ps3fb.c | 4 +--- sound/ppc/snd_ps3.c | 3 +-- 13 files changed, 18 insertions(+), 34 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index cb89e4bf55ce..e646c7f218bc 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -378,8 +378,8 @@ struct ps3_system_bus_driver { enum ps3_match_sub_id match_sub_id; struct device_driver core; int (*probe)(struct ps3_system_bus_device *); - int (*remove)(struct ps3_system_bus_device *); - int (*shutdown)(struct ps3_system_bus_device *); + void (*remove)(struct ps3_system_bus_device *); + void (*shutdown)(struct ps3_system_bus_device *); /* int (*suspend)(struct ps3_system_bus_device *, pm_message_t); */ /* int (*resume)(struct ps3_system_bus_device *); */ }; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index c62aaa29a9d5..b431f41c6cb5 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -382,7 +382,6 @@ static int ps3_system_bus_probe(struct device *_dev) static int ps3_system_bus_remove(struct device *_dev) { - int result = 0; struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); struct ps3_system_bus_driver *drv; @@ -393,13 +392,13 @@ static int ps3_system_bus_remove(struct device *_dev) BUG_ON(!drv); if (drv->remove) - result = drv->remove(dev); + drv->remove(dev); else dev_dbg(&dev->core, "%s:%d %s: no remove method\n", __func__, __LINE__, drv->core.name); pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core)); - return result; + return 0; } static void ps3_system_bus_shutdown(struct device *_dev) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 7b55811c2a81..ba3ece56cbb3 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -507,7 +507,7 @@ fail: return error; } -static int ps3disk_remove(struct ps3_system_bus_device *_dev) +static void ps3disk_remove(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); @@ -526,7 +526,6 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) kfree(dev->bounce_buf); kfree(priv); ps3_system_bus_set_drvdata(_dev, NULL); - return 0; } static struct ps3_system_bus_driver ps3disk = { diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 1088798c8dd0..b71d28372ef3 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -797,7 +797,7 @@ fail: return error; } -static int ps3vram_remove(struct ps3_system_bus_device *dev) +static void ps3vram_remove(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); @@ -817,7 +817,6 @@ static int ps3vram_remove(struct ps3_system_bus_device *dev) free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE)); kfree(priv); ps3_system_bus_set_drvdata(dev, NULL); - return 0; } static struct ps3_system_bus_driver ps3vram = { diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c index 1a07fee33f66..23871cde41fb 100644 --- a/drivers/char/ps3flash.c +++ b/drivers/char/ps3flash.c @@ -403,7 +403,7 @@ fail: return error; } -static int ps3flash_remove(struct ps3_system_bus_device *_dev) +static void ps3flash_remove(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); @@ -413,7 +413,6 @@ static int ps3flash_remove(struct ps3_system_bus_device *_dev) kfree(ps3_system_bus_get_drvdata(&dev->sbd)); ps3_system_bus_set_drvdata(&dev->sbd, NULL); ps3flash_dev = NULL; - return 0; } diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index d9a5722f561b..3d1fc8d2ca66 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1791,7 +1791,7 @@ fail_open: * ps3_gelic_driver_remove - remove a device from the control of this driver */ -static int ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) +static void ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) { struct gelic_card *card = ps3_system_bus_get_drvdata(dev); struct net_device *netdev0; @@ -1840,7 +1840,6 @@ static int ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) ps3_close_hv_device(dev); pr_debug("%s: done\n", __func__); - return 0; } static struct ps3_system_bus_driver ps3_gelic_driver = { diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c index e54aa2d82f50..65512b6cc6fd 100644 --- a/drivers/ps3/ps3-lpm.c +++ b/drivers/ps3/ps3-lpm.c @@ -1196,7 +1196,7 @@ static int ps3_lpm_probe(struct ps3_system_bus_device *dev) return 0; } -static int ps3_lpm_remove(struct ps3_system_bus_device *dev) +static void ps3_lpm_remove(struct ps3_system_bus_device *dev) { dev_dbg(&dev->core, " -> %s:%u:\n", __func__, __LINE__); @@ -1206,7 +1206,6 @@ static int ps3_lpm_remove(struct ps3_system_bus_device *dev) lpm_priv = NULL; dev_info(&dev->core, " <- %s:%u:\n", __func__, __LINE__); - return 0; } static struct ps3_system_bus_driver ps3_lpm_driver = { diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index 4ed131eaff51..e34ae6a442c7 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -1102,7 +1102,7 @@ static int ps3_vuart_cleanup(struct ps3_system_bus_device *dev) * device can no longer be used. */ -static int ps3_vuart_remove(struct ps3_system_bus_device *dev) +static void ps3_vuart_remove(struct ps3_system_bus_device *dev) { struct ps3_vuart_port_priv *priv = to_port_priv(dev); struct ps3_vuart_port_driver *drv; @@ -1118,7 +1118,7 @@ static int ps3_vuart_remove(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, "%s:%d: no driver bound\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; + return; } drv = ps3_system_bus_dev_to_vuart_drv(dev); @@ -1141,7 +1141,6 @@ static int ps3_vuart_remove(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; } /** @@ -1154,7 +1153,7 @@ static int ps3_vuart_remove(struct ps3_system_bus_device *dev) * sequence. */ -static int ps3_vuart_shutdown(struct ps3_system_bus_device *dev) +static void ps3_vuart_shutdown(struct ps3_system_bus_device *dev) { struct ps3_vuart_port_driver *drv; @@ -1169,7 +1168,7 @@ static int ps3_vuart_shutdown(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, "%s:%d: no driver bound\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; + return; } drv = ps3_system_bus_dev_to_vuart_drv(dev); @@ -1193,7 +1192,6 @@ static int ps3_vuart_shutdown(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; } static int __init ps3_vuart_bus_init(void) diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c index f75c0b5cd587..ccb5771f1cb7 100644 --- a/drivers/scsi/ps3rom.c +++ b/drivers/scsi/ps3rom.c @@ -402,7 +402,7 @@ fail_free_bounce: return error; } -static int ps3rom_remove(struct ps3_system_bus_device *_dev) +static void ps3rom_remove(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); struct Scsi_Host *host = ps3_system_bus_get_drvdata(&dev->sbd); @@ -412,7 +412,6 @@ static int ps3rom_remove(struct ps3_system_bus_device *_dev) scsi_host_put(host); ps3_system_bus_set_drvdata(&dev->sbd, NULL); kfree(dev->bounce_buf); - return 0; } static struct ps3_system_bus_driver ps3rom = { diff --git a/drivers/usb/host/ehci-ps3.c b/drivers/usb/host/ehci-ps3.c index fb52133c3557..98568b046a1a 100644 --- a/drivers/usb/host/ehci-ps3.c +++ b/drivers/usb/host/ehci-ps3.c @@ -200,7 +200,7 @@ fail_start: return result; } -static int ps3_ehci_remove(struct ps3_system_bus_device *dev) +static void ps3_ehci_remove(struct ps3_system_bus_device *dev) { unsigned int tmp; struct usb_hcd *hcd = ps3_system_bus_get_drvdata(dev); @@ -227,8 +227,6 @@ static int ps3_ehci_remove(struct ps3_system_bus_device *dev) ps3_dma_region_free(dev->d_region); ps3_close_hv_device(dev); - - return 0; } static int __init ps3_ehci_driver_register(struct ps3_system_bus_driver *drv) diff --git a/drivers/usb/host/ohci-ps3.c b/drivers/usb/host/ohci-ps3.c index f77cd6af0ccf..4f5af929c3e4 100644 --- a/drivers/usb/host/ohci-ps3.c +++ b/drivers/usb/host/ohci-ps3.c @@ -184,7 +184,7 @@ fail_start: return result; } -static int ps3_ohci_remove(struct ps3_system_bus_device *dev) +static void ps3_ohci_remove(struct ps3_system_bus_device *dev) { unsigned int tmp; struct usb_hcd *hcd = ps3_system_bus_get_drvdata(dev); @@ -212,8 +212,6 @@ static int ps3_ohci_remove(struct ps3_system_bus_device *dev) ps3_dma_region_free(dev->d_region); ps3_close_hv_device(dev); - - return 0; } static int __init ps3_ohci_driver_register(struct ps3_system_bus_driver *drv) diff --git a/drivers/video/fbdev/ps3fb.c b/drivers/video/fbdev/ps3fb.c index 203c254f8f6c..2fe08b67eda7 100644 --- a/drivers/video/fbdev/ps3fb.c +++ b/drivers/video/fbdev/ps3fb.c @@ -1208,7 +1208,7 @@ err: return retval; } -static int ps3fb_shutdown(struct ps3_system_bus_device *dev) +static void ps3fb_shutdown(struct ps3_system_bus_device *dev) { struct fb_info *info = ps3_system_bus_get_drvdata(dev); u64 xdr_lpar = ps3_mm_phys_to_lpar(__pa(ps3fb_videomemory.address)); @@ -1241,8 +1241,6 @@ static int ps3fb_shutdown(struct ps3_system_bus_device *dev) lv1_gpu_memory_free(ps3fb.memory_handle); ps3_close_hv_device(dev); dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__); - - return 0; } static struct ps3_system_bus_driver ps3fb_driver = { diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c index 6ab796a5d936..8e44fa5d4dc7 100644 --- a/sound/ppc/snd_ps3.c +++ b/sound/ppc/snd_ps3.c @@ -1049,7 +1049,7 @@ clean_open: }; /* snd_ps3_probe */ /* called when module removal */ -static int snd_ps3_driver_remove(struct ps3_system_bus_device *dev) +static void snd_ps3_driver_remove(struct ps3_system_bus_device *dev) { int ret; pr_info("%s:start id=%d\n", __func__, dev->match_id); @@ -1075,7 +1075,6 @@ static int snd_ps3_driver_remove(struct ps3_system_bus_device *dev) lv1_gpu_device_unmap(2); ps3_close_hv_device(dev); pr_info("%s:end id=%d\n", __func__, dev->match_id); - return 0; } /* snd_ps3_remove */ static struct ps3_system_bus_driver snd_ps3_bus_driver_info = { -- cgit v1.2.3 From f4b239e4c6bddf63d00cd460eabb933232dbc326 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 28 Nov 2020 17:07:25 +1000 Subject: powerpc/64s/powernv: Ratelimit harmless HMI error printing Harmless HMI errors can be triggered by guests in some cases, and don't contain much useful information anyway. Ratelimit these to avoid flooding the console/logs. Signed-off-by: Nicholas Piggin [mpe: Use dedicated ratelimit state, not printk_ratelimit()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201128070728.825934-6-npiggin@gmail.com --- arch/powerpc/platforms/powernv/opal-hmi.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index 3e1f064a18db..f0c1830deb51 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -213,6 +213,8 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) "A hypervisor resource error occurred", "CAPP recovery process is in progress", }; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); /* Print things out */ if (hmi_evt->version < OpalHMIEvt_V1) { @@ -240,19 +242,22 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) break; } - printk("%s%s Hypervisor Maintenance interrupt [%s]\n", - level, sevstr, - hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? - "Recovered" : "Not recovered"); - error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? - hmi_error_types[hmi_evt->type] - : "Unknown"; - printk("%s Error detail: %s\n", level, error_info); - printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer)); - if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || - (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) - printk("%s TFMR: %016llx\n", level, + if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || __ratelimit(&rs)) { + printk("%s%s Hypervisor Maintenance interrupt [%s]\n", + level, sevstr, + hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? + hmi_error_types[hmi_evt->type] + : "Unknown"; + printk("%s Error detail: %s\n", level, error_info); + printk("%s HMER: %016llx\n", level, + be64_to_cpu(hmi_evt->hmer)); + if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || + (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) + printk("%s TFMR: %016llx\n", level, be64_to_cpu(hmi_evt->tfmr)); + } if (hmi_evt->version < OpalHMIEvt_V2) return; -- cgit v1.2.3 From 82f70a05108c98aea4f140067c44a606262d2af7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 28 Nov 2020 17:07:26 +1000 Subject: powerpc/64s/pseries: Add ERAT specific machine check handler Don't treat ERAT MCEs as SLB, don't save the SLB and use a specific ERAT flush to recover it. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201128070728.825934-7-npiggin@gmail.com --- arch/powerpc/include/asm/mce.h | 1 + arch/powerpc/kernel/mce_power.c | 2 +- arch/powerpc/platforms/pseries/ras.c | 5 ++++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 89aa8248a57d..e6c27ae843dc 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -228,6 +228,7 @@ int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); +void flush_erat(void); long __machine_check_early_realmode_p7(struct pt_regs *regs); long __machine_check_early_realmode_p8(struct pt_regs *regs); long __machine_check_early_realmode_p9(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 1372ce3f7bdd..667104d4c455 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -97,7 +97,7 @@ void flush_and_reload_slb(void) } #endif -static void flush_erat(void) +void flush_erat(void) { #ifdef CONFIG_PPC_BOOK3S_64 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index b2b245b25edb..149cec2212e6 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -526,8 +526,11 @@ static int mce_handle_err_realmode(int disposition, u8 error_type) #ifdef CONFIG_PPC_BOOK3S_64 if (disposition == RTAS_DISP_NOT_RECOVERED) { switch (error_type) { - case MC_ERROR_TYPE_SLB: case MC_ERROR_TYPE_ERAT: + flush_erat(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + case MC_ERROR_TYPE_SLB: /* * Store the old slb content in paca before flushing. * Print this when we go to virtual mode. -- cgit v1.2.3 From 227ae625522c65c4535cabe407f47abc058585ed Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 27 Nov 2020 10:14:05 +0530 Subject: powerpc/book3s64/kuap/kuep: Add PPC_PKEY config on book3s64 The config CONFIG_PPC_PKEY is used to select the base support that is required for PPC_MEM_KEYS, KUAP, and KUEP. Adding this dependency reduces the code complexity(in terms of #ifdefs) and enables us to move some of the initialization code to pkeys.c Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201127044424.40686-4-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/kup-radix.h | 4 ++-- arch/powerpc/include/asm/book3s/64/mmu.h | 2 +- arch/powerpc/include/asm/ptrace.h | 7 ++++++- arch/powerpc/kernel/asm-offsets.c | 3 +++ arch/powerpc/mm/book3s64/Makefile | 2 +- arch/powerpc/mm/book3s64/pkeys.c | 24 +++++++++++++++--------- arch/powerpc/platforms/Kconfig.cputype | 5 +++++ 7 files changed, 33 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index a39e2d193fdc..2fb8ee7b1e2a 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -16,7 +16,7 @@ #ifdef CONFIG_PPC_KUAP BEGIN_MMU_FTR_SECTION_NESTED(67) mfspr \gpr1, SPRN_AMR - ld \gpr2, STACK_REGS_KUAP(r1) + ld \gpr2, STACK_REGS_AMR(r1) cmpd \gpr1, \gpr2 beq 998f isync @@ -48,7 +48,7 @@ bne \msr_pr_cr, 99f .endif mfspr \gpr1, SPRN_AMR - std \gpr1, STACK_REGS_KUAP(r1) + std \gpr1, STACK_REGS_AMR(r1) li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) sldi \gpr2, \gpr2, AMR_KUAP_SHIFT cmpd \use_cr, \gpr1, \gpr2 diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index ad0837d8076d..d0365914686e 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -199,7 +199,7 @@ extern int mmu_io_psize; void mmu_early_init_devtree(void); void hash__early_init_devtree(void); void radix__early_init_devtree(void); -#ifdef CONFIG_PPC_MEM_KEYS +#ifdef CONFIG_PPC_PKEY void pkey_early_init_devtree(void); #else static inline void pkey_early_init_devtree(void) {} diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 297d30fed945..0aeba52b5ca8 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -53,9 +53,14 @@ struct pt_regs #ifdef CONFIG_PPC64 unsigned long ppr; #endif + union { #ifdef CONFIG_PPC_KUAP - unsigned long kuap; + unsigned long kuap; #endif +#ifdef CONFIG_PPC_PKEY + unsigned long amr; +#endif + }; }; unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */ }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d4331d451c71..a2e01b7b9eeb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -356,6 +356,9 @@ int main(void) STACK_PT_REGS_OFFSET(_PPR, ppr); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_PKEY + STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); +#endif #ifdef CONFIG_PPC_KUAP STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); #endif diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index fd393b8be14f..1b56d3af47d4 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -17,7 +17,7 @@ endif obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o -obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o +obj-$(CONFIG_PPC_PKEY) += pkeys.o # Instrumenting the SLB fault path can lead to duplicate SLB entries KCOV_INSTRUMENT_slb.o := n diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index b1d091a97611..7dc71f85683d 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -89,12 +89,14 @@ static int scan_pkey_feature(void) } } +#ifdef CONFIG_PPC_MEM_KEYS /* * Adjust the upper limit, based on the number of bits supported by * arch-neutral code. */ pkeys_total = min_t(int, pkeys_total, ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1)); +#endif return pkeys_total; } @@ -102,6 +104,7 @@ void __init pkey_early_init_devtree(void) { int pkeys_total, i; +#ifdef CONFIG_PPC_MEM_KEYS /* * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. @@ -117,7 +120,7 @@ void __init pkey_early_init_devtree(void) BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) != (sizeof(u64) * BITS_PER_BYTE)); - +#endif /* * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 */ @@ -223,14 +226,6 @@ out: return; } -void pkey_mm_init(struct mm_struct *mm) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - mm_pkey_allocation_map(mm) = initial_allocation_mask; - mm->context.execute_only_pkey = execute_only_key; -} - static inline u64 read_amr(void) { return mfspr(SPRN_AMR); @@ -257,6 +252,15 @@ static inline void write_iamr(u64 value) mtspr(SPRN_IAMR, value); } +#ifdef CONFIG_PPC_MEM_KEYS +void pkey_mm_init(struct mm_struct *mm) +{ + if (!mmu_has_feature(MMU_FTR_PKEY)) + return; + mm_pkey_allocation_map(mm) = initial_allocation_mask; + mm->context.execute_only_pkey = execute_only_key; +} + static inline void init_amr(int pkey, u8 init_bits) { u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); @@ -445,3 +449,5 @@ void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm); mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; } + +#endif /* CONFIG_PPC_MEM_KEYS */ diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 44ab03fbcadc..60162b65909c 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -406,6 +406,11 @@ config PPC_KUAP_DEBUG Add extra debugging for Kernel Userspace Access Protection (KUAP) If you're unsure, say N. +config PPC_PKEY + def_bool y + depends on PPC_BOOK3S_64 + depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP + config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION -- cgit v1.2.3 From fc1347b5feb685073ce2108c68cd8147340be016 Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Wed, 25 Nov 2020 16:50:09 +0100 Subject: ocxl: Assign a register set to a Logical Partition Platform specific function to assign a register set to a Logical Partition. The "ibm,mmio-atsd" property, provided by the firmware, contains the 16 base ATSD physical addresses (ATSD0 through ATSD15) of the set of MMIO registers (XTS MMIO ATSDx LPARID/AVA/launch/status register). For the time being, the ATSD0 set of registers is used by default. Signed-off-by: Christophe Lombard Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201125155013.39955-2-clombard@linux.vnet.ibm.com --- arch/powerpc/include/asm/pnv-ocxl.h | 3 +++ arch/powerpc/platforms/powernv/ocxl.c | 45 +++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index d37ededca3ee..60c3c74427d9 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -28,4 +28,7 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **p void pnv_ocxl_spa_release(void *platform_data); int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva); +void pnv_ocxl_unmap_lpar(void __iomem *arva); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index ecdad219d704..57fc1062677b 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -483,3 +483,48 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); + +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + u64 mmio_atsd; + int rc; + + /* ATSD physical address. + * ATSD LAUNCH register: write access initiates a shoot down to + * initiate the TLB Invalidate command. + */ + rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", + 0, &mmio_atsd); + if (rc) { + dev_info(&dev->dev, "No available ATSD found\n"); + return rc; + } + + /* Assign a register set to a Logical Partition and MMIO ATSD + * LPARID register to the required value. + */ + rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev), + lparid, lpcr); + if (rc) { + dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc); + return rc; + } + + *arva = ioremap(mmio_atsd, 24); + if (!(*arva)) { + dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd); + rc = -ENOMEM; + } + + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar); + +void pnv_ocxl_unmap_lpar(void __iomem *arva) +{ + iounmap(arva); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); -- cgit v1.2.3 From 19b311ca51e108b6d8d679496af8635fdc1984a8 Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Wed, 25 Nov 2020 16:50:10 +0100 Subject: ocxl: Initiate a TLB invalidate command When a TLB Invalidate is required for the Logical Partition, the following sequence has to be performed: 1. Load MMIO ATSD AVA register with the necessary value, if required. 2. Write the MMIO ATSD launch register to initiate the TLB Invalidate command. 3. Poll the MMIO ATSD status register to determine when the TLB Invalidate has been completed. Signed-off-by: Christophe Lombard Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201125155013.39955-3-clombard@linux.vnet.ibm.com --- arch/powerpc/include/asm/pnv-ocxl.h | 51 ++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/ocxl.c | 69 +++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 60c3c74427d9..9acd1fbf1197 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -3,12 +3,59 @@ #ifndef _ASM_PNV_OCXL_H #define _ASM_PNV_OCXL_H +#include #include #define PNV_OCXL_TL_MAX_TEMPLATE 63 #define PNV_OCXL_TL_BITS_PER_RATE 4 #define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) +#define PNV_OCXL_ATSD_TIMEOUT 1 + +/* TLB Management Instructions */ +#define PNV_OCXL_ATSD_LNCH 0x00 +/* Radix Invalidate */ +#define PNV_OCXL_ATSD_LNCH_R PPC_BIT(0) +/* Radix Invalidation Control + * 0b00 Just invalidate TLB. + * 0b01 Invalidate just Page Walk Cache. + * 0b10 Invalidate TLB, Page Walk Cache, and any + * caching of Partition and Process Table Entries. + */ +#define PNV_OCXL_ATSD_LNCH_RIC PPC_BITMASK(1, 2) +/* Number and Page Size of translations to be invalidated */ +#define PNV_OCXL_ATSD_LNCH_LP PPC_BITMASK(3, 10) +/* Invalidation Criteria + * 0b00 Invalidate just the target VA. + * 0b01 Invalidate matching PID. + */ +#define PNV_OCXL_ATSD_LNCH_IS PPC_BITMASK(11, 12) +/* 0b1: Process Scope, 0b0: Partition Scope */ +#define PNV_OCXL_ATSD_LNCH_PRS PPC_BIT(13) +/* Invalidation Flag */ +#define PNV_OCXL_ATSD_LNCH_B PPC_BIT(14) +/* Actual Page Size to be invalidated + * 000 4KB + * 101 64KB + * 001 2MB + * 010 1GB + */ +#define PNV_OCXL_ATSD_LNCH_AP PPC_BITMASK(15, 17) +/* Defines the large page select + * L=0b0 for 4KB pages + * L=0b1 for large pages) + */ +#define PNV_OCXL_ATSD_LNCH_L PPC_BIT(18) +/* Process ID */ +#define PNV_OCXL_ATSD_LNCH_PID PPC_BITMASK(19, 38) +/* NoFlush – Assumed to be 0b0 */ +#define PNV_OCXL_ATSD_LNCH_F PPC_BIT(39) +#define PNV_OCXL_ATSD_LNCH_OCAPI_SLBI PPC_BIT(40) +#define PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON PPC_BIT(41) +#define PNV_OCXL_ATSD_AVA 0x08 +#define PNV_OCXL_ATSD_AVA_AVA PPC_BITMASK(0, 51) +#define PNV_OCXL_ATSD_STAT 0x10 + int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported); int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); @@ -31,4 +78,8 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, uint64_t lpcr, void __iomem **arva); void pnv_ocxl_unmap_lpar(void __iomem *arva); +void pnv_ocxl_tlb_invalidate(void __iomem *arva, + unsigned long pid, + unsigned long addr, + unsigned long page_size); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 57fc1062677b..9105efcf242a 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -528,3 +528,72 @@ void pnv_ocxl_unmap_lpar(void __iomem *arva) iounmap(arva); } EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); + +void pnv_ocxl_tlb_invalidate(void __iomem *arva, + unsigned long pid, + unsigned long addr, + unsigned long page_size) +{ + unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT); + u64 val = 0ull; + int pend; + u8 size; + + if (!(arva)) + return; + + if (addr) { + /* load Abbreviated Virtual Address register with + * the necessary value + */ + val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51)); + out_be64(arva + PNV_OCXL_ATSD_AVA, val); + } + + /* Write access initiates a shoot down to initiate the + * TLB Invalidate command + */ + val = PNV_OCXL_ATSD_LNCH_R; + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10); + if (addr) + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00); + else { + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01); + val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON; + } + val |= PNV_OCXL_ATSD_LNCH_PRS; + /* Actual Page Size to be invalidated + * 000 4KB + * 101 64KB + * 001 2MB + * 010 1GB + */ + size = 0b101; + if (page_size == 0x1000) + size = 0b000; + if (page_size == 0x200000) + size = 0b001; + if (page_size == 0x40000000) + size = 0b010; + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size); + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid); + out_be64(arva + PNV_OCXL_ATSD_LNCH, val); + + /* Poll the ATSD status register to determine when the + * TLB Invalidate has been completed. + */ + val = in_be64(arva + PNV_OCXL_ATSD_STAT); + pend = val >> 63; + + while (pend) { + if (time_after_eq(jiffies, timeout)) { + pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n", + __func__, val, pid); + return; + } + cpu_relax(); + val = in_be64(arva + PNV_OCXL_ATSD_STAT); + pend = val >> 63; + } +} +EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate); -- cgit v1.2.3 From 6c58b1b41b19c00099e4771ee55e21eb9aa245c1 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 9 Apr 2020 16:13:37 +1000 Subject: powernv/pci: Print an error when device enable is blocked If the platform decides to block enabling the device nothing is printed currently. This can lead to some confusion since the dmesg output will usually print an error with no context e.g. e1000e: probe of 0022:01:00.0 failed with error -22 This shouldn't be spammy since pci_enable_device() already prints a messages when it succeeds. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200409061337.9187-1-oohall@gmail.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2b4ceb5e6ce4..c4f72cdc9b51 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2613,8 +2613,10 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; pdn = pci_get_pdn(dev); - if (!pdn || pdn->pe_number == IODA_INVALID_PE) + if (!pdn || pdn->pe_number == IODA_INVALID_PE) { + pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n"); return false; + } return true; } -- cgit v1.2.3 From b1198a88230f2ce50c271e22b82a8b8610b2eea9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Sun, 22 Nov 2020 18:38:28 +1100 Subject: powerpc/powernv/npu: Do not attempt NPU2 setup on POWER8NVL NPU We execute certain NPU2 setup code (such as mapping an LPID to a device in NPU2) unconditionally if an Nvlink bridge is detected. However this cannot succeed on POWER8NVL machines and errors appear in dmesg. This is harmless as skiboot returns an error and the only place we check it is vfio-pci but that code does not get called on P8+ either. This adds a check if pnv_npu2_xxx helpers are called on a machine with NPU2 which initializes pnv_phb::npu in pnv_npu2_init(); pnv_phb::npu==NULL on POWER8/NVL (Naples). While at this, fix NULL derefencing in pnv_npu_peers_take_ownership/ pnv_npu_peers_release_ownership which occurs when GPUs on mentioned P8s cause EEH which happens if "vfio-pci" disables devices using the D3 power state; the vfio-pci's disable_idle_d3 module parameter controls this and must be set on Naples. The EEH handling clears the entire pnv_ioda_pe struct in pnv_ioda_free_pe() hence the NULL derefencing. We cannot recover from that but at least we stop crashing. Tested on - POWER9 pvr=004e1201, Ubuntu 19.04 host, Ubuntu 18.04 vm, NVIDIA GV100 10de:1db1 driver 418.39 - POWER8 pvr=004c0100, RHEL 7.6 host, Ubuntu 16.10 vm, NVIDIA P100 10de:15f9 driver 396.47 Fixes: 1b785611e119 ("powerpc/powernv/npu: Add release_ownership hook") Cc: stable@vger.kernel.org # 5.0 Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201122073828.15446-1-aik@ozlabs.ru --- arch/powerpc/platforms/powernv/npu-dma.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index abeaa533b976..b711dc3262a3 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -385,7 +385,8 @@ static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group) for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->take_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->take_ownership) continue; pe->table_group.ops->take_ownership(&pe->table_group); } @@ -401,7 +402,8 @@ static void pnv_npu_peers_release_ownership( for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->release_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->release_ownership) continue; pe->table_group.ops->release_ownership(&pe->table_group); } @@ -623,6 +625,11 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n", @@ -670,6 +677,11 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", -- cgit v1.2.3 From 39c8bf2b3cc166a2a75111e4941cc5f7efbddc35 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 Nov 2020 05:07:58 +0000 Subject: powerpc: Retire e200 core (mpc555x processor) There is no defconfig selecting CONFIG_E200, and no platform. e200 is an earlier version of booke, a predecessor of e500, with some particularities like an unified cache instead of both an instruction cache and a data cache. Remove it. Signed-off-by: Christophe Leroy Acked-by: Scott Wood Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/34ebc3ba2c768d97f363bd5f2deea2356e9ae127.1605589460.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 11 ------ arch/powerpc/include/asm/mmu.h | 2 +- arch/powerpc/include/asm/reg.h | 5 --- arch/powerpc/include/asm/reg_booke.h | 12 ------- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 9 ----- arch/powerpc/kernel/cputable.c | 46 ------------------------- arch/powerpc/kernel/head_booke.h | 3 +- arch/powerpc/kernel/head_fsl_booke.S | 57 +------------------------------ arch/powerpc/kernel/setup_32.c | 2 -- arch/powerpc/kernel/traps.c | 25 -------------- arch/powerpc/mm/nohash/fsl_booke.c | 12 ++----- arch/powerpc/platforms/Kconfig.cputype | 13 +++---- 12 files changed, 11 insertions(+), 186 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 845a338c8d3f..8a4e1ed8a4a2 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -41,7 +41,6 @@ extern int machine_check_4xx(struct pt_regs *regs); extern int machine_check_440A(struct pt_regs *regs); extern int machine_check_e500mc(struct pt_regs *regs); extern int machine_check_e500(struct pt_regs *regs); -extern int machine_check_e200(struct pt_regs *regs); extern int machine_check_47x(struct pt_regs *regs); int machine_check_8xx(struct pt_regs *regs); int machine_check_83xx(struct pt_regs *regs); @@ -381,10 +380,6 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTRS_440x6 (CPU_FTR_NOEXECUTE | \ CPU_FTR_INDEXED_DCR) #define CPU_FTRS_47X (CPU_FTRS_440x6) -#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \ - CPU_FTR_COHERENT_ICACHE | \ - CPU_FTR_NOEXECUTE | \ - CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_NOEXECUTE) @@ -529,9 +524,6 @@ enum { #elif defined(CONFIG_44x) CPU_FTRS_44X | CPU_FTRS_440x6 | #endif -#ifdef CONFIG_E200 - CPU_FTRS_E200 | -#endif #ifdef CONFIG_E500 CPU_FTRS_E500 | CPU_FTRS_E500_2 | #endif @@ -601,9 +593,6 @@ enum { #elif defined(CONFIG_44x) CPU_FTRS_44X & CPU_FTRS_440x6 & #endif -#ifdef CONFIG_E200 - CPU_FTRS_E200 & -#endif #ifdef CONFIG_E500 CPU_FTRS_E500 & CPU_FTRS_E500_2 & #endif diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 60aa420f414d..620e8fe6f8fd 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -171,7 +171,7 @@ enum { #elif defined(CONFIG_44x) MMU_FTR_TYPE_44x | #endif -#if defined(CONFIG_E200) || defined(CONFIG_E500) +#ifdef CONFIG_E500 MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX | #endif #ifdef CONFIG_PPC_BOOK3S_32 diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index ee645e790446..b9492f2b0608 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1233,14 +1233,9 @@ #define SPRN_SPRG_WSCRATCH_MC SPRN_SPRG1 #define SPRN_SPRG_RSCRATCH4 SPRN_SPRG7R #define SPRN_SPRG_WSCRATCH4 SPRN_SPRG7W -#ifdef CONFIG_E200 -#define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG6R -#define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG6W -#else #define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9 #define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9 #endif -#endif #ifdef CONFIG_PPC_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 29a948e0c0f2..262782f08fd4 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -281,18 +281,6 @@ #define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */ #endif -#ifdef CONFIG_E200 -#define MCSR_MCP 0x80000000UL /* Machine Check Input Pin */ -#define MCSR_CP_PERR 0x20000000UL /* Cache Push Parity Error */ -#define MCSR_CPERR 0x10000000UL /* Cache Parity Error */ -#define MCSR_EXCP_ERR 0x08000000UL /* ISI, ITLB, or Bus Error on 1st insn - fetch for an exception handler */ -#define MCSR_BUS_IRERR 0x00000010UL /* Read Bus Error on instruction fetch*/ -#define MCSR_BUS_DRERR 0x00000008UL /* Read Bus Error on data load */ -#define MCSR_BUS_WRERR 0x00000004UL /* Write Bus Error on buffered - store or cache line push */ -#endif - /* Bit definitions for the HID1 */ #ifdef CONFIG_E500 /* e500v1/v2 */ diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 1d308780e0d3..4bf33f1b4193 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -108,15 +108,6 @@ _GLOBAL(__setup_cpu_e6500) #endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_E200 -_GLOBAL(__setup_cpu_e200) - /* enable dedicated debug exception handling resources (Debug APU) */ - mfspr r3,SPRN_HID0 - ori r3,r3,HID0_DAPUEN@l - mtspr SPRN_HID0,r3 - b __setup_e200_ivors -#endif /* CONFIG_E200 */ - #ifdef CONFIG_E500 #ifndef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e500v1) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b552e22dcddd..f2fcd29aab23 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -36,7 +36,6 @@ const char *powerpc_base_platform; * and ppc64 */ #ifdef CONFIG_PPC32 -extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec); @@ -1902,51 +1901,6 @@ static struct cpu_spec __initdata cpu_specs[] = { } #endif /* CONFIG_PPC_47x */ #endif /* CONFIG_44x */ -#ifdef CONFIG_E200 - { /* e200z5 */ - .pvr_mask = 0xfff00000, - .pvr_value = 0x81000000, - .cpu_name = "e200z5", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_EFP_SINGLE | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .machine_check = machine_check_e200, - .platform = "ppc5554", - }, - { /* e200z6 */ - .pvr_mask = 0xfff00000, - .pvr_value = 0x81100000, - .cpu_name = "e200z6", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .machine_check = machine_check_e200, - .platform = "ppc5554", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic E200 PPC)", - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_EFP_SINGLE | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_e200, - .machine_check = machine_check_e200, - .platform = "ppc5554", - } -#endif /* CONFIG_E200 */ #endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index e26d35de27e5..74e230c200fb 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -185,7 +185,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) * * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check - * On e200 we have critical and debug (machine check occurs via critical) * * Additionally we reserve a SPRG for each priority level so we can free up a * GPR to use as the base for indirect access to the exception stacks. This @@ -201,7 +200,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define MC_STACK_BASE mcheckirq_ctx #define CRIT_STACK_BASE critirq_ctx -/* only on e500mc/e200 */ +/* only on e500mc */ #define DBG_STACK_BASE dbgirq_ctx #define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 586a6ac501e9..fdd4d274c245 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -187,9 +187,6 @@ set_ivor: /* Setup the defaults for TLB entries */ li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l -#ifdef CONFIG_E200 - oris r2,r2,MAS4_TLBSELD(1)@h -#endif mtspr SPRN_MAS4, r2 #if !defined(CONFIG_BDI_SWITCH) @@ -362,13 +359,7 @@ interrupt_base: CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ -#ifdef CONFIG_E200 - /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ - machine_check_exception) -#else MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) -#endif /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) @@ -399,15 +390,9 @@ interrupt_base: /* Floating Point Unavailable Interrupt */ #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION -#else -#ifdef CONFIG_E200 - /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - program_check_exception, EXC_XFER_STD) #else EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ unknown_exception, EXC_XFER_STD) -#endif #endif /* System Call Interrupt */ @@ -625,7 +610,7 @@ END_BTB_FLUSH_SECTION mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage -/* Define SPE handlers for e200 and e500v2 */ +/* Define SPE handlers for e500v2 */ #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) @@ -807,31 +792,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #endif 3: mtspr SPRN_MAS2, r12 -#ifdef CONFIG_E200 - /* Round robin TLB1 entries assignment */ - mfspr r12, SPRN_MAS0 - - /* Extract TLB1CFG(NENTRY) */ - mfspr r11, SPRN_TLB1CFG - andi. r11, r11, 0xfff - - /* Extract MAS0(NV) */ - andi. r13, r12, 0xfff - addi r13, r13, 1 - cmpw 0, r13, r11 - addi r12, r12, 1 - - /* check if we need to wrap */ - blt 7f - - /* wrap back to first free tlbcam entry */ - lis r13, tlbcam_index@ha - lwz r13, tlbcam_index@l(r13) - rlwimi r12, r13, 0, 20, 31 -7: - mtspr SPRN_MAS0,r12 -#endif /* CONFIG_E200 */ - tlb_write_entry: tlbwe @@ -933,21 +893,6 @@ get_phys_addr: * Global functions */ -#ifdef CONFIG_E200 -/* Adjust or setup IVORs for e200 */ -_GLOBAL(__setup_e200_ivors) - li r3,DebugDebug@l - mtspr SPRN_IVOR15,r3 - li r3,SPEUnavailable@l - mtspr SPRN_IVOR32,r3 - li r3,SPEFloatingPointData@l - mtspr SPRN_IVOR33,r3 - li r3,SPEFloatingPointRound@l - mtspr SPRN_IVOR34,r3 - sync - blr -#endif - #ifdef CONFIG_E500 #ifndef CONFIG_PPC_E500MC /* Adjust or setup IVORs for e500v1/v2 */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 057d6b8e9bb0..416e2c7a8b0a 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -223,6 +223,4 @@ __init void initialize_cache_info(void) dcache_bsize = cur_cpu_spec->dcache_bsize; icache_bsize = cur_cpu_spec->icache_bsize; ucache_bsize = 0; - if (IS_ENABLED(CONFIG_E200)) - ucache_bsize = icache_bsize = dcache_bsize; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 46419ae4d17e..3ec7b443fe6b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -751,31 +751,6 @@ int machine_check_generic(struct pt_regs *regs) { return 0; } -#elif defined(CONFIG_E200) -int machine_check_e200(struct pt_regs *regs) -{ - unsigned long reason = mfspr(SPRN_MCSR); - - printk("Machine check in kernel mode.\n"); - printk("Caused by (from MCSR=%lx): ", reason); - - if (reason & MCSR_MCP) - pr_cont("Machine Check Signal\n"); - if (reason & MCSR_CP_PERR) - pr_cont("Cache Push Parity Error\n"); - if (reason & MCSR_CPERR) - pr_cont("Cache Parity Error\n"); - if (reason & MCSR_EXCP_ERR) - pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); - if (reason & MCSR_BUS_IRERR) - pr_cont("Bus - Read Bus Error on instruction fetch\n"); - if (reason & MCSR_BUS_DRERR) - pr_cont("Bus - Read Bus Error on data load\n"); - if (reason & MCSR_BUS_WRERR) - pr_cont("Bus - Write Bus Error on buffered store or cache line push\n"); - - return 0; -} #elif defined(CONFIG_PPC32) int machine_check_generic(struct pt_regs *regs) { diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c index 36bda962d3b3..03dacbe940e5 100644 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -223,15 +223,9 @@ void flush_instruction_cache(void) { unsigned long tmp; - if (IS_ENABLED(CONFIG_E200)) { - tmp = mfspr(SPRN_L1CSR0); - tmp |= L1CSR0_CFI | L1CSR0_CLFC; - mtspr(SPRN_L1CSR0, tmp); - } else { - tmp = mfspr(SPRN_L1CSR1); - tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; - mtspr(SPRN_L1CSR1, tmp); - } + tmp = mfspr(SPRN_L1CSR1); + tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; + mtspr(SPRN_L1CSR1, tmp); isync(); } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 60162b65909c..45ce09db8f46 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -23,7 +23,7 @@ choice The most common ones are the desktop and server CPUs (603, 604, 740, 750, 74xx) CPUs from Freescale and IBM, with their embedded 512x/52xx/82xx/83xx/86xx counterparts. - The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500 + The other embedded parts, namely 4xx, 8xx and e500 (85xx) each form a family of their own that is not compatible with the others. @@ -66,9 +66,6 @@ config 44x select HAVE_PCI select PHYS_64BIT -config E200 - bool "Freescale e200" - endchoice choice @@ -258,12 +255,12 @@ config 4xx config BOOKE bool - depends on E200 || E500 || 44x || PPC_BOOK3E + depends on E500 || 44x || PPC_BOOK3E default y config FSL_BOOKE bool - depends on (E200 || E500) && PPC32 + depends on E500 && PPC32 default y # this is for common code between PPC32 & PPC64 FSL BOOKE @@ -328,7 +325,7 @@ config VSX config SPE_POSSIBLE def_bool y - depends on E200 || (E500 && !PPC_E500MC) + depends on E500 && !PPC_E500MC config SPE bool "SPE Support" @@ -480,7 +477,7 @@ config NR_CPUS config NOT_COHERENT_CACHE bool - depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \ + depends on 4xx || PPC_8xx || PPC_MPC512x || \ GAMECUBE_COMMON || AMIGAONE select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE -- cgit v1.2.3 From 250ad7a45b1e58d580decfb935fc063c4cf56f91 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Mon, 7 Dec 2020 12:05:19 +1100 Subject: powerpc/powernv/idle: Restore CIABR after idle for Power9 On Power9, CIABR is lost after idle. This means that instruction breakpoints set by xmon which use CIABR do not work. Fix this by restoring CIABR after idle. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207010519.15597-2-jniethe5@gmail.com --- arch/powerpc/platforms/powernv/idle.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 1ed7c5286487..e6f461812856 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -589,6 +589,7 @@ struct p9_sprs { u64 spurr; u64 dscr; u64 wort; + u64 ciabr; u64 mmcra; u32 mmcr0; @@ -668,6 +669,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) sprs.spurr = mfspr(SPRN_SPURR); sprs.dscr = mfspr(SPRN_DSCR); sprs.wort = mfspr(SPRN_WORT); + sprs.ciabr = mfspr(SPRN_CIABR); sprs.mmcra = mfspr(SPRN_MMCRA); sprs.mmcr0 = mfspr(SPRN_MMCR0); @@ -785,6 +787,7 @@ core_woken: mtspr(SPRN_SPURR, sprs.spurr); mtspr(SPRN_DSCR, sprs.dscr); mtspr(SPRN_WORT, sprs.wort); + mtspr(SPRN_CIABR, sprs.ciabr); mtspr(SPRN_MMCRA, sprs.mmcra); mtspr(SPRN_MMCR0, sprs.mmcr0); -- cgit v1.2.3 From 7049b288ea8c95f270ec8fe643e3c3187938d5af Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:35 -0600 Subject: powerpc/rtas: rtas_ibm_suspend_me -> rtas_ibm_suspend_me_unsafe The pseries partition suspend sequence requires that all active CPUs call H_JOIN, which suspends all but one of them with interrupts disabled. The "chosen" CPU is then to call ibm,suspend-me to complete the suspend. Upon returning from ibm,suspend-me, the chosen CPU is to use H_PROD to wake the joined CPUs. Using on_each_cpu() for this, as rtas_ibm_suspend_me() does to implement partition migration, is susceptible to deadlock with other users of on_each_cpu() and with users of stop_machine APIs. The callback passed to on_each_cpu() is not allowed to synchronize with other CPUs in the way it is used here. Complicating the fix is the fact that rtas_ibm_suspend_me() also occupies the function name that should be used to provide a more conventional wrapper for ibm,suspend-me. Rename rtas_ibm_suspend_me() to rtas_ibm_suspend_me_unsafe() to free up the name and indicate that it should not gain users. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-4-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 2 +- arch/powerpc/kernel/rtas.c | 6 +++--- arch/powerpc/platforms/pseries/mobility.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index f060181a0d32..8436ed01567b 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -257,7 +257,7 @@ extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); -extern int rtas_ibm_suspend_me(u64 handle); +int rtas_ibm_suspend_me_unsafe(u64 handle); struct rtc_time; extern time64_t rtas_get_boot_time(void); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 4ed64aba37d6..0a8e5dc2c108 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -843,7 +843,7 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } -int rtas_ibm_suspend_me(u64 handle) +int rtas_ibm_suspend_me_unsafe(u64 handle) { long state; long rc; @@ -949,7 +949,7 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) } #else /* CONFIG_PPC_PSERIES */ -int rtas_ibm_suspend_me(u64 handle) +int rtas_ibm_suspend_me_unsafe(u64 handle) { return -ENOSYS; } @@ -1185,7 +1185,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) int rc = 0; u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32) | be32_to_cpu(args.args[1]); - rc = rtas_ibm_suspend_me(handle); + rc = rtas_ibm_suspend_me_unsafe(handle); if (rc == -EAGAIN) args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE); else if (rc == -EIO) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 2f73cb5bf12d..6ff642e84c6a 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -370,7 +370,7 @@ static ssize_t migration_store(struct class *class, return rc; do { - rc = rtas_ibm_suspend_me(streamid); + rc = rtas_ibm_suspend_me_unsafe(streamid); if (rc == -EAGAIN) ssleep(1); } while (rc == -EAGAIN); -- cgit v1.2.3 From b06a6717873560e9dd1c07357781fc2b27545701 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:39 -0600 Subject: powerpc/pseries/mobility: don't error on absence of ibm, update-nodes Treat the absence of the ibm,update-nodes function as benign instead of reporting an error. If the platform does not provide that facility, it's not a problem for Linux. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-8-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 6ff642e84c6a..e66359b00297 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -261,7 +261,7 @@ int pseries_devicetree_update(s32 scope) update_nodes_token = rtas_token("ibm,update-nodes"); if (update_nodes_token == RTAS_UNKNOWN_SERVICE) - return -EINVAL; + return 0; rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); if (!rtas_buf) -- cgit v1.2.3 From aa5e5c9b556a2e5f68a915e4b5dfa5c6bda47c64 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:40 -0600 Subject: powerpc/pseries/mobility: add missing break to default case update_dt_node() has a switch statement where the default case lacks a break statement. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-9-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index e66359b00297..527a64e2d89f 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -213,6 +213,7 @@ static int update_dt_node(__be32 phandle, s32 scope) } prop_data += vd; + break; } cond_resched(); -- cgit v1.2.3 From 2d5be6f16c4ba5c27d06704976daf55f3236a236 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:41 -0600 Subject: powerpc/pseries/mobility: error message improvements - Convert printk(KERN_ERR) to pr_err(). - Include errno in property update failure message. - Remove reference to "Post-mobility" from device tree update message: with pr_err() it will have a "mobility:" prefix. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-10-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 527a64e2d89f..31d81b7da961 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -208,8 +208,8 @@ static int update_dt_node(__be32 phandle, s32 scope) rc = update_dt_property(dn, &prop, prop_name, vd, prop_data); if (rc) { - printk(KERN_ERR "Could not update %s" - " property\n", prop_name); + pr_err("updating %s property failed: %d\n", + prop_name, rc); } prop_data += vd; @@ -343,8 +343,7 @@ void post_mobility_fixup(void) rc = pseries_devicetree_update(MIGRATION_SCOPE); if (rc) - printk(KERN_ERR "Post-mobility device tree update " - "failed: %d\n", rc); + pr_err("device tree update failed: %d\n", rc); cacheinfo_rebuild(); -- cgit v1.2.3 From c3ae9781d5a64093f161e6cc5dfefb0773106ca9 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:42 -0600 Subject: powerpc/pseries/mobility: use rtas_activate_firmware() on resume It's incorrect to abort post-suspend processing if ibm,activate-firmware isn't available. Use rtas_activate_firmware(), which logs this condition appropriately and allows us to proceed. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-11-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 31d81b7da961..01ac7c03558e 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -312,21 +312,8 @@ int pseries_devicetree_update(s32 scope) void post_mobility_fixup(void) { int rc; - int activate_fw_token; - activate_fw_token = rtas_token("ibm,activate-firmware"); - if (activate_fw_token == RTAS_UNKNOWN_SERVICE) { - printk(KERN_ERR "Could not make post-mobility " - "activate-fw call.\n"); - return; - } - - do { - rc = rtas_call(activate_fw_token, 0, 1, NULL); - } while (rtas_busy_delay(rc)); - - if (rc) - printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc); + rtas_activate_firmware(); /* * We don't want CPUs to go online/offline while the device -- cgit v1.2.3 From d9213319b84ee8393475c38361c84151d5c33415 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:43 -0600 Subject: powerpc/pseries/mobility: extract VASI session polling logic The behavior of rtas_ibm_suspend_me_unsafe() is to return -EAGAIN to the caller until the specified VASI suspend session state makes the transition from H_VASI_ENABLED to H_VASI_SUSPENDING. In the interest of separating concerns to prepare for a new implementation of the join/suspend sequence, extract VASI session polling logic into a couple of local functions. Waiting for the session state to reach H_VASI_SUSPENDING before calling rtas_ibm_suspend_me_unsafe() ensures that we will never get an EAGAIN result necessitating a retry. No user-visible change in behavior is intended. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-12-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 69 ++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 01ac7c03558e..573ed48b43d8 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -345,6 +345,66 @@ void post_mobility_fixup(void) return; } +static int poll_vasi_state(u64 handle, unsigned long *res) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long hvrc; + int ret; + + hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle); + switch (hvrc) { + case H_SUCCESS: + ret = 0; + *res = retbuf[0]; + break; + case H_PARAMETER: + ret = -EINVAL; + break; + case H_FUNCTION: + ret = -EOPNOTSUPP; + break; + case H_HARDWARE: + default: + pr_err("unexpected H_VASI_STATE result %ld\n", hvrc); + ret = -EIO; + break; + } + return ret; +} + +static int wait_for_vasi_session_suspending(u64 handle) +{ + unsigned long state; + int ret; + + /* + * Wait for transition from H_VASI_ENABLED to + * H_VASI_SUSPENDING. Treat anything else as an error. + */ + while (true) { + ret = poll_vasi_state(handle, &state); + + if (ret != 0 || state == H_VASI_SUSPENDING) { + break; + } else if (state == H_VASI_ENABLED) { + ssleep(1); + } else { + pr_err("unexpected H_VASI_STATE result %lu\n", state); + ret = -EIO; + break; + } + } + + /* + * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or + * ibm,suspend-me are also unimplemented, we'll recover then. + */ + if (ret == -EOPNOTSUPP) + ret = 0; + + return ret; +} + static ssize_t migration_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) @@ -356,12 +416,11 @@ static ssize_t migration_store(struct class *class, if (rc) return rc; - do { - rc = rtas_ibm_suspend_me_unsafe(streamid); - if (rc == -EAGAIN) - ssleep(1); - } while (rc == -EAGAIN); + rc = wait_for_vasi_session_suspending(streamid); + if (rc) + return rc; + rc = rtas_ibm_suspend_me_unsafe(streamid); if (rc) return rc; -- cgit v1.2.3 From 9327dc0aeef36a3cbb9d94f79b79cc4f91ff8a41 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:44 -0600 Subject: powerpc/pseries/mobility: use stop_machine for join/suspend The partition suspend sequence as specified in the platform architecture requires that all active processor threads call H_JOIN, which: - suspends the calling thread until it is the target of an H_PROD; or - immediately returns H_CONTINUE, if the calling thread is the last to call H_JOIN. This thread is expected to call ibm,suspend-me to completely suspend the partition. Upon returning from ibm,suspend-me the calling thread must wake all others using H_PROD. rtas_ibm_suspend_me_unsafe() uses on_each_cpu() to implement this protocol, but because of its synchronizing nature this is susceptible to deadlock versus users of stop_machine() or other callers of on_each_cpu(). Not only is stop_machine() intended for use cases like this, it handles error propagation and allows us to keep the data shared between CPUs minimal: a single atomic counter which ensures exactly one CPU will wake the others from their joined states. Switch the migration code to use stop_machine() and a less complex local implementation of the H_JOIN/ibm,suspend-me logic, which carries additional benefits: - more informative error reporting, appropriately ratelimited - resets the lockup detector / watchdog on resume to prevent lockup warnings when the OS has been suspended for a time exceeding the threshold. Fixes: 91dc182ca6e2 ("[PATCH] powerpc: special-case ibm,suspend-me RTAS call") Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-13-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 132 ++++++++++++++++++++++++++++-- 1 file changed, 125 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 573ed48b43d8..5a3951626a96 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -12,9 +12,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -405,6 +407,128 @@ static int wait_for_vasi_session_suspending(u64 handle) return ret; } +static void prod_single(unsigned int target_cpu) +{ + long hvrc; + int hwid; + + hwid = get_hard_smp_processor_id(target_cpu); + hvrc = plpar_hcall_norets(H_PROD, hwid); + if (hvrc == H_SUCCESS) + return; + pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n", + target_cpu, hwid, hvrc); +} + +static void prod_others(void) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + prod_single(cpu); + } +} + +static u16 clamp_slb_size(void) +{ + u16 prev = mmu_slb_size; + + slb_set_size(SLB_MIN_SIZE); + + return prev; +} + +static int do_suspend(void) +{ + u16 saved_slb_size; + int status; + int ret; + + pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id()); + + /* + * The destination processor model may have fewer SLB entries + * than the source. We reduce mmu_slb_size to a safe minimum + * before suspending in order to minimize the possibility of + * programming non-existent entries on the destination. If + * suspend fails, we restore it before returning. On success + * the OF reconfig path will update it from the new device + * tree after resuming on the destination. + */ + saved_slb_size = clamp_slb_size(); + + ret = rtas_ibm_suspend_me(&status); + if (ret != 0) { + pr_err("ibm,suspend-me error: %d\n", status); + slb_set_size(saved_slb_size); + } + + return ret; +} + +static int do_join(void *arg) +{ + atomic_t *counter = arg; + long hvrc; + int ret; + + /* Must ensure MSR.EE off for H_JOIN. */ + hard_irq_disable(); + hvrc = plpar_hcall_norets(H_JOIN); + + switch (hvrc) { + case H_CONTINUE: + /* + * All other CPUs are offline or in H_JOIN. This CPU + * attempts the suspend. + */ + ret = do_suspend(); + break; + case H_SUCCESS: + /* + * The suspend is complete and this cpu has received a + * prod. + */ + ret = 0; + break; + case H_BAD_MODE: + case H_HARDWARE: + default: + ret = -EIO; + pr_err_ratelimited("H_JOIN error %ld on CPU %i\n", + hvrc, smp_processor_id()); + break; + } + + if (atomic_inc_return(counter) == 1) { + pr_info("CPU %u waking all threads\n", smp_processor_id()); + prod_others(); + } + /* + * Execution may have been suspended for several seconds, so + * reset the watchdog. + */ + touch_nmi_watchdog(); + return ret; +} + +static int pseries_migrate_partition(u64 handle) +{ + atomic_t counter = ATOMIC_INIT(0); + int ret; + + ret = wait_for_vasi_session_suspending(handle); + if (ret) + return ret; + + ret = stop_machine(do_join, &counter, cpu_online_mask); + if (ret == 0) + post_mobility_fixup(); + + return ret; +} + static ssize_t migration_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) @@ -416,16 +540,10 @@ static ssize_t migration_store(struct class *class, if (rc) return rc; - rc = wait_for_vasi_session_suspending(streamid); + rc = pseries_migrate_partition(streamid); if (rc) return rc; - rc = rtas_ibm_suspend_me_unsafe(streamid); - if (rc) - return rc; - - post_mobility_fixup(); - return count; } -- cgit v1.2.3 From 37cddc7d6cf4568a7fb69aeff6f26e4c8a3bc0f7 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:45 -0600 Subject: powerpc/pseries/mobility: signal suspend cancellation to platform If we're returning an error to user space, use H_VASI_SIGNAL to send a cancellation request to the platform. This isn't strictly required but it communicates that Linux will not attempt to complete the suspend, which allows the various entities involved to promptly end the operation in progress. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-14-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 5a3951626a96..f234a7ed87aa 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -513,6 +513,35 @@ static int do_join(void *arg) return ret; } +/* + * Abort reason code byte 0. We use only the 'Migrating partition' value. + */ +enum vasi_aborting_entity { + ORCHESTRATOR = 1, + VSP_SOURCE = 2, + PARTITION_FIRMWARE = 3, + PLATFORM_FIRMWARE = 4, + VSP_TARGET = 5, + MIGRATING_PARTITION = 6, +}; + +static void pseries_cancel_migration(u64 handle, int err) +{ + u32 reason_code; + u32 detail; + u8 entity; + long hvrc; + + entity = MIGRATING_PARTITION; + detail = abs(err) & 0xffffff; + reason_code = (entity << 24) | detail; + + hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle, + H_VASI_SIGNAL_CANCEL, reason_code); + if (hvrc) + pr_err("H_VASI_SIGNAL error: %ld\n", hvrc); +} + static int pseries_migrate_partition(u64 handle) { atomic_t counter = ATOMIC_INIT(0); @@ -525,6 +554,8 @@ static int pseries_migrate_partition(u64 handle) ret = stop_machine(do_join, &counter, cpu_online_mask); if (ret == 0) post_mobility_fixup(); + else + pseries_cancel_migration(handle, ret); return ret; } -- cgit v1.2.3 From aeca35b9a52b0e0d019a5244fbaab699f753b443 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:46 -0600 Subject: powerpc/pseries/mobility: retry partition suspend after error This is a mitigation for the relatively rare occurrence where a virtual IOA can be in a transient state that prevents the suspend/migration from succeeding, resulting in an error from ibm,suspend-me. If the join/suspend sequence returns an error, it is acceptable to retry as long as the VASI suspend session state is still "Suspending" (i.e. the platform is still waiting for the OS to suspend). Retry a few times on suspend failure while this condition holds, progressively increasing the delay between attempts. We don't want to retry indefinitey because firmware emits an error log event on each unsuccessful attempt. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-15-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 59 +++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index f234a7ed87aa..fe7e35cdc9d5 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -542,16 +542,71 @@ static void pseries_cancel_migration(u64 handle, int err) pr_err("H_VASI_SIGNAL error: %ld\n", hvrc); } +static int pseries_suspend(u64 handle) +{ + const unsigned int max_attempts = 5; + unsigned int retry_interval_ms = 1; + unsigned int attempt = 1; + int ret; + + while (true) { + atomic_t counter = ATOMIC_INIT(0); + unsigned long vasi_state; + int vasi_err; + + ret = stop_machine(do_join, &counter, cpu_online_mask); + if (ret == 0) + break; + /* + * Encountered an error. If the VASI stream is still + * in Suspending state, it's likely a transient + * condition related to some device in the partition + * and we can retry in the hope that the cause has + * cleared after some delay. + * + * A better design would allow drivers etc to prepare + * for the suspend and avoid conditions which prevent + * the suspend from succeeding. For now, we have this + * mitigation. + */ + pr_notice("Partition suspend attempt %u of %u error: %d\n", + attempt, max_attempts, ret); + + if (attempt == max_attempts) + break; + + vasi_err = poll_vasi_state(handle, &vasi_state); + if (vasi_err == 0) { + if (vasi_state != H_VASI_SUSPENDING) { + pr_notice("VASI state %lu after failed suspend\n", + vasi_state); + break; + } + } else if (vasi_err != -EOPNOTSUPP) { + pr_err("VASI state poll error: %d", vasi_err); + break; + } + + pr_notice("Will retry partition suspend after %u ms\n", + retry_interval_ms); + + msleep(retry_interval_ms); + retry_interval_ms *= 10; + attempt++; + } + + return ret; +} + static int pseries_migrate_partition(u64 handle) { - atomic_t counter = ATOMIC_INIT(0); int ret; ret = wait_for_vasi_session_suspending(handle); if (ret) return ret; - ret = stop_machine(do_join, &counter, cpu_online_mask); + ret = pseries_suspend(handle); if (ret == 0) post_mobility_fixup(); else -- cgit v1.2.3 From 4d756894ba75f1afe7945ccafe9afebff50484b6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:47 -0600 Subject: powerpc/rtas: dispatch partition migration requests to pseries sys_rtas() cannot call ibm,suspend-me directly in the same way it handles other inputs. Instead it must dispatch the request to code that can first perform the H_JOIN sequence before any call to ibm,suspend-me can succeed. Over time kernel/rtas.c has accreted a fair amount of platform-specific code to implement this. Since a different, more robust implementation of the suspend sequence is now in the pseries platform code, we want to dispatch the request there. Note that invoking ibm,suspend-me via the RTAS syscall is all but deprecated; this change preserves ABI compatibility for old programs while providing to them the benefit of the new partition suspend implementation. This is a behavior change in that the kernel performs the device tree update and firmware activation before returning, but experimentation indicates this is tolerated fine by legacy user space. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-16-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 5 +++++ arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/platforms/pseries/mobility.c | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index fdefe6a974eb..3b52d8574fcc 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -279,8 +279,13 @@ extern time64_t last_rtas_event; extern int clobbering_unread_rtas_event(void); extern int pseries_devicetree_update(s32 scope); extern void post_mobility_fixup(void); +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else static inline int clobbering_unread_rtas_event(void) { return 0; } +static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) +{ + return -EINVAL; +} #endif #ifdef CONFIG_PPC_RTAS_DAEMON diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 3a740ae933f8..d4b048571728 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1272,7 +1272,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) int rc = 0; u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32) | be32_to_cpu(args.args[1]); - rc = rtas_ibm_suspend_me_unsafe(handle); + rc = rtas_syscall_dispatch_ibm_suspend_me(handle); if (rc == -EAGAIN) args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE); else if (rc == -EIO) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index fe7e35cdc9d5..e670180f311d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -615,6 +615,11 @@ static int pseries_migrate_partition(u64 handle) return ret; } +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) +{ + return pseries_migrate_partition(handle); +} + static ssize_t migration_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) -- cgit v1.2.3 From 52719fce3f4c7a8ac9eaa191e8d75a697f9fbcbc Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:49 -0600 Subject: powerpc/pseries/hibernation: drop pseries_suspend_begin() from suspend ops There are three ways pseries_suspend_begin() can be reached: 1. When "mem" is written to /sys/power/state: kobj_attr_store() -> state_store() -> pm_suspend() -> suspend_devices_and_enter() -> pseries_suspend_begin() This never works because there is no way to supply a valid stream id using this interface, and H_VASI_STATE is called with a stream id of zero. So this call path is useless at best. 2. When a stream id is written to /sys/devices/system/power/hibernate. pseries_suspend_begin() is polled directly from store_hibernate() until the stream is in the "Suspending" state (i.e. the platform is ready for the OS to suspend execution): dev_attr_store() -> store_hibernate() -> pseries_suspend_begin() 3. When a stream id is written to /sys/devices/system/power/hibernate (continued). After #2, pseries_suspend_begin() is called once again from the pm core: dev_attr_store() -> store_hibernate() -> pm_suspend() -> suspend_devices_and_enter() -> pseries_suspend_begin() This is redundant because the VASI suspend state is already known to be Suspending. The begin() callback of platform_suspend_ops is optional, so we can simply remove that assignment with no loss of function. Fixes: 32d8ad4e621d ("powerpc/pseries: Partition hibernation support") Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-18-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 81e0ac58d620..3eaa9d59dc7a 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -187,7 +187,6 @@ static struct bus_type suspend_subsys = { static const struct platform_suspend_ops pseries_suspend_ops = { .valid = suspend_valid_only_mem, - .begin = pseries_suspend_begin, .prepare_late = pseries_prepare_late, .enter = pseries_suspend_enter, }; -- cgit v1.2.3 From a10a5a17f4ac4f84fcc26162d43b53e2a4e1009a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:50 -0600 Subject: powerpc/pseries/hibernation: pass stream id via function arguments There is no need for the stream id to be a file-global variable; pass it from hibernate_store() to pseries_suspend_begin() for the H_VASI_STATE call. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-19-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 3eaa9d59dc7a..232621f33510 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -15,7 +15,6 @@ #include #include "../../kernel/cacheinfo.h" -static u64 stream_id; static struct device suspend_dev; static DECLARE_COMPLETION(suspend_work); static struct rtas_suspend_me_data suspend_data; @@ -29,7 +28,7 @@ static atomic_t suspending; * Return value: * 0 on success / other on failure **/ -static int pseries_suspend_begin(suspend_state_t state) +static int pseries_suspend_begin(u64 stream_id) { long vasi_state, rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; @@ -132,6 +131,7 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + u64 stream_id; int rc; if (!capable(CAP_SYS_ADMIN)) @@ -140,7 +140,7 @@ static ssize_t store_hibernate(struct device *dev, stream_id = simple_strtoul(buf, NULL, 16); do { - rc = pseries_suspend_begin(PM_SUSPEND_MEM); + rc = pseries_suspend_begin(stream_id); if (rc == -EAGAIN) ssleep(1); } while (rc == -EAGAIN); @@ -148,8 +148,6 @@ static ssize_t store_hibernate(struct device *dev, if (!rc) rc = pm_suspend(PM_SUSPEND_MEM); - stream_id = 0; - if (!rc) rc = count; -- cgit v1.2.3 From ed22bb8d39fa7f3980afc6e16d2a891847367d33 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:51 -0600 Subject: powerpc/pseries/hibernation: remove pseries_suspend_cpu() Since commit 48f6e7f6d948 ("powerpc/pseries: remove cede offline state for CPUs"), ppc_md.suspend_disable_cpu() is no longer used and all CPUs (save one) are placed into true offline state as opposed to H_JOIN. So pseries_suspend_cpu() is effectively unused; remove it. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-20-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 232621f33510..3315d698d5ab 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -48,20 +48,6 @@ static int pseries_suspend_begin(u64 stream_id) vasi_state); return -EIO; } - - return 0; -} - -/** - * pseries_suspend_cpu - Suspend a single CPU - * - * Makes the H_JOIN call to suspend the CPU - * - **/ -static int pseries_suspend_cpu(void) -{ - if (atomic_read(&suspending)) - return rtas_suspend_cpu(&suspend_data); return 0; } @@ -235,7 +221,6 @@ static int __init pseries_suspend_init(void) if ((rc = pseries_suspend_sysfs_register(&suspend_dev))) return rc; - ppc_md.suspend_disable_cpu = pseries_suspend_cpu; ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs; suspend_set_ops(&pseries_suspend_ops); return 0; -- cgit v1.2.3 From 366fb13bf13b029c4d43bf19382f7aea69bfa4b7 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:54 -0600 Subject: powerpc/pseries/hibernation: switch to rtas_ibm_suspend_me() rtas_suspend_last_cpu() and related code perform a lot of work that isn't relevant to the hibernation workflow. All other CPUs are offline when called so there is no need to place them in H_JOIN or prod them on resume, nor is there need for retries or operations on shared state. Call the rtas_ibm_suspend_me() wrapper function directly from pseries_suspend_enter() instead of using rtas_suspend_last_cpu(). Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-23-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 3315d698d5ab..703728cb95ec 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -76,11 +76,7 @@ static void pseries_suspend_enable_irqs(void) **/ static int pseries_suspend_enter(suspend_state_t state) { - int rc = rtas_suspend_last_cpu(&suspend_data); - - atomic_set(&suspending, 0); - atomic_set(&suspend_data.done, 1); - return rc; + return rtas_ibm_suspend_me(NULL); } /** -- cgit v1.2.3 From b866459489fe8ef0e92cde3cbd6bbb1af6c4e99b Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:56 -0600 Subject: powerpc/pseries/hibernation: remove redundant cacheinfo update Partitions with cache nodes in the device tree can encounter the following warning on resume: CPU 0 already accounted in PowerPC,POWER9@0(Data) WARNING: CPU: 0 PID: 3177 at arch/powerpc/kernel/cacheinfo.c:197 cacheinfo_cpu_online+0x640/0x820 These calls to cacheinfo_cpu_offline/online have been redundant since commit e610a466d16a ("powerpc/pseries/mobility: rebuild cacheinfo hierarchy post-migration"). Fixes: e610a466d16a ("powerpc/pseries/mobility: rebuild cacheinfo hierarchy post-migration") Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-25-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 703728cb95ec..6a94cc0deb88 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -13,7 +13,6 @@ #include #include #include -#include "../../kernel/cacheinfo.h" static struct device suspend_dev; static DECLARE_COMPLETION(suspend_work); @@ -63,9 +62,7 @@ static void pseries_suspend_enable_irqs(void) * Update configuration which can be modified based on device tree * changes during resume. */ - cacheinfo_cpu_offline(smp_processor_id()); post_mobility_fixup(); - cacheinfo_cpu_online(smp_processor_id()); } /** -- cgit v1.2.3 From fa53bcdb7413e7c40170106781f6b5bb9d74db84 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:57 -0600 Subject: powerpc/pseries/hibernation: perform post-suspend fixups later The pseries hibernate code calls post_mobility_fixup() which is sort of a dumping ground of fixups that need to run after resuming from suspend regardless of whether suspend was a hibernation or a migration. Calling post_mobility_fixup() from pseries_suspend_enable_irqs() runs this code early in resume with devices suspended and only one CPU up, while the much more commonly used migration case runs these fixups in a more typical process context. Call post_mobility_fixup() after the suspend core returns a success status to the hibernate sysfs store method and remove pseries_suspend_enable_irqs(). Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-26-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 6a94cc0deb88..589a91730db8 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -50,21 +50,6 @@ static int pseries_suspend_begin(u64 stream_id) return 0; } -/** - * pseries_suspend_enable_irqs - * - * Post suspend configuration updates - * - **/ -static void pseries_suspend_enable_irqs(void) -{ - /* - * Update configuration which can be modified based on device tree - * changes during resume. - */ - post_mobility_fixup(); -} - /** * pseries_suspend_enter - Final phase of hibernation * @@ -127,8 +112,11 @@ static ssize_t store_hibernate(struct device *dev, if (!rc) rc = pm_suspend(PM_SUSPEND_MEM); - if (!rc) + if (!rc) { rc = count; + post_mobility_fixup(); + } + return rc; } @@ -214,7 +202,6 @@ static int __init pseries_suspend_init(void) if ((rc = pseries_suspend_sysfs_register(&suspend_dev))) return rc; - ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs; suspend_set_ops(&pseries_suspend_ops); return 0; } -- cgit v1.2.3 From d102f8312e1ea5e8bf84fceebf99186f22d16fc6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:51:58 -0600 Subject: powerpc/pseries/hibernation: remove prepare_late() callback The pseries hibernate code no longer calls into the original join/suspend code in kernel/rtas.c, so pseries_prepare_late() and related code don't accomplish anything now. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-27-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/suspend.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 589a91730db8..1b902cbf85c5 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -15,9 +15,6 @@ #include static struct device suspend_dev; -static DECLARE_COMPLETION(suspend_work); -static struct rtas_suspend_me_data suspend_data; -static atomic_t suspending; /** * pseries_suspend_begin - First phase of hibernation @@ -61,23 +58,6 @@ static int pseries_suspend_enter(suspend_state_t state) return rtas_ibm_suspend_me(NULL); } -/** - * pseries_prepare_late - Prepare to suspend all other CPUs - * - * Return value: - * 0 on success / other on failure - **/ -static int pseries_prepare_late(void) -{ - atomic_set(&suspending, 1); - atomic_set(&suspend_data.working, 0); - atomic_set(&suspend_data.done, 0); - atomic_set(&suspend_data.error, 0); - suspend_data.complete = &suspend_work; - reinit_completion(&suspend_work); - return 0; -} - /** * store_hibernate - Initiate partition hibernation * @dev: subsys root device @@ -152,7 +132,6 @@ static struct bus_type suspend_subsys = { static const struct platform_suspend_ops pseries_suspend_ops = { .valid = suspend_valid_only_mem, - .prepare_late = pseries_prepare_late, .enter = pseries_suspend_enter, }; @@ -195,10 +174,6 @@ static int __init pseries_suspend_init(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; - suspend_data.token = rtas_token("ibm,suspend-me"); - if (suspend_data.token == RTAS_UNKNOWN_SERVICE) - return 0; - if ((rc = pseries_suspend_sysfs_register(&suspend_dev))) return rc; -- cgit v1.2.3 From 2efd7f6eb9b7107e469837d8452e750d7d080a5d Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Dec 2020 15:52:00 -0600 Subject: powerpc/pseries/mobility: refactor node lookup during DT update In pseries_devicetree_update(), with each call to ibm,update-nodes the partition firmware communicates the node to be deleted or updated by placing its phandle in the work buffer. Each of delete_dt_node(), update_dt_node(), and add_dt_node() have duplicate lookups using the phandle value and corresponding refcount management. Move the lookup and of_node_put() into pseries_devicetree_update(), and emit a warning on any failed lookups. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201207215200.1785968-29-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 49 +++++++++++-------------------- 1 file changed, 17 insertions(+), 32 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index e670180f311d..ea4d6a660e0d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -61,18 +61,10 @@ static int mobility_rtas_call(int token, char *buf, s32 scope) return rc; } -static int delete_dt_node(__be32 phandle) +static int delete_dt_node(struct device_node *dn) { - struct device_node *dn; - - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (!dn) - return -ENOENT; - pr_debug("removing node %pOFfp\n", dn); - dlpar_detach_node(dn); - of_node_put(dn); return 0; } @@ -137,10 +129,9 @@ static int update_dt_property(struct device_node *dn, struct property **prop, return 0; } -static int update_dt_node(__be32 phandle, s32 scope) +static int update_dt_node(struct device_node *dn, s32 scope) { struct update_props_workarea *upwa; - struct device_node *dn; struct property *prop = NULL; int i, rc, rtas_rc; char *prop_data; @@ -157,14 +148,8 @@ static int update_dt_node(__be32 phandle, s32 scope) if (!rtas_buf) return -ENOMEM; - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (!dn) { - kfree(rtas_buf); - return -ENOENT; - } - upwa = (struct update_props_workarea *)&rtas_buf[0]; - upwa->phandle = phandle; + upwa->phandle = cpu_to_be32(dn->phandle); do { rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf, @@ -224,26 +209,18 @@ static int update_dt_node(__be32 phandle, s32 scope) cond_resched(); } while (rtas_rc == 1); - of_node_put(dn); kfree(rtas_buf); return 0; } -static int add_dt_node(__be32 parent_phandle, __be32 drc_index) +static int add_dt_node(struct device_node *parent_dn, __be32 drc_index) { struct device_node *dn; - struct device_node *parent_dn; int rc; - parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle)); - if (!parent_dn) - return -ENOENT; - dn = dlpar_configure_connector(drc_index, parent_dn); - if (!dn) { - of_node_put(parent_dn); + if (!dn) return -ENOENT; - } rc = dlpar_attach_node(dn, parent_dn); if (rc) @@ -251,7 +228,6 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) pr_debug("added node %pOFfp\n", dn); - of_node_put(parent_dn); return rc; } @@ -284,22 +260,31 @@ int pseries_devicetree_update(s32 scope) data++; for (i = 0; i < node_count; i++) { + struct device_node *np; __be32 phandle = *data++; __be32 drc_index; + np = of_find_node_by_phandle(be32_to_cpu(phandle)); + if (!np) { + pr_warn("Failed lookup: phandle 0x%x for action 0x%x\n", + be32_to_cpu(phandle), action); + continue; + } + switch (action) { case DELETE_DT_NODE: - delete_dt_node(phandle); + delete_dt_node(np); break; case UPDATE_DT_NODE: - update_dt_node(phandle, scope); + update_dt_node(np, scope); break; case ADD_DT_NODE: drc_index = *data++; - add_dt_node(phandle, drc_index); + add_dt_node(np, drc_index); break; } + of_node_put(np); cond_resched(); } } -- cgit v1.2.3 From db972a3787d12b1ce9ba7a31ec376d8a79e04c47 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Dec 2020 05:24:19 +0000 Subject: powerpc/powermac: Fix low_sleep_handler with CONFIG_VMAP_STACK low_sleep_handler() can't restore the context from standard stack because the stack can hardly be accessed with MMU OFF. Store everything in a global storage area instead of storing a pointer to the stack in that global storage area. To avoid a complete churn of the function, still use r1 as the pointer to the storage area during restore. Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK") Reported-by: Giuseppe Sacco Signed-off-by: Christophe Leroy Tested-by: Giuseppe Sacco Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e3e0d8042a3ba75cb4a9546c19c408b5b5b28994.1607404931.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 2 +- arch/powerpc/platforms/powermac/sleep.S | 132 ++++++++++++++------------------ 2 files changed, 60 insertions(+), 74 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 45ce09db8f46..aa071663b9a9 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -36,7 +36,7 @@ config PPC_BOOK3S_6xx select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select HAVE_ARCH_VMAP_STACK if !ADB_PMU + select HAVE_ARCH_VMAP_STACK config PPC_85xx bool "Freescale 85xx" diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index 7e0f8ba6e54a..d497a60003d2 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -44,7 +44,8 @@ #define SL_TB 0xa0 #define SL_R2 0xa8 #define SL_CR 0xac -#define SL_R12 0xb0 /* r12 to r31 */ +#define SL_LR 0xb0 +#define SL_R12 0xb4 /* r12 to r31 */ #define SL_SIZE (SL_R12 + 80) .section .text @@ -63,105 +64,107 @@ _GLOBAL(low_sleep_handler) blr #else mflr r0 - stw r0,4(r1) - stwu r1,-SL_SIZE(r1) + lis r11,sleep_storage@ha + addi r11,r11,sleep_storage@l + stw r0,SL_LR(r11) mfcr r0 - stw r0,SL_CR(r1) - stw r2,SL_R2(r1) - stmw r12,SL_R12(r1) + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) /* Save MSR & SDR1 */ mfmsr r4 - stw r4,SL_MSR(r1) + stw r4,SL_MSR(r11) mfsdr1 r4 - stw r4,SL_SDR1(r1) + stw r4,SL_SDR1(r11) /* Get a stable timebase and save it */ 1: mftbu r4 - stw r4,SL_TB(r1) + stw r4,SL_TB(r11) mftb r5 - stw r5,SL_TB+4(r1) + stw r5,SL_TB+4(r11) mftbu r3 cmpw r3,r4 bne 1b /* Save SPRGs */ mfsprg r4,0 - stw r4,SL_SPRG0(r1) + stw r4,SL_SPRG0(r11) mfsprg r4,1 - stw r4,SL_SPRG0+4(r1) + stw r4,SL_SPRG0+4(r11) mfsprg r4,2 - stw r4,SL_SPRG0+8(r1) + stw r4,SL_SPRG0+8(r11) mfsprg r4,3 - stw r4,SL_SPRG0+12(r1) + stw r4,SL_SPRG0+12(r11) /* Save BATs */ mfdbatu r4,0 - stw r4,SL_DBAT0(r1) + stw r4,SL_DBAT0(r11) mfdbatl r4,0 - stw r4,SL_DBAT0+4(r1) + stw r4,SL_DBAT0+4(r11) mfdbatu r4,1 - stw r4,SL_DBAT1(r1) + stw r4,SL_DBAT1(r11) mfdbatl r4,1 - stw r4,SL_DBAT1+4(r1) + stw r4,SL_DBAT1+4(r11) mfdbatu r4,2 - stw r4,SL_DBAT2(r1) + stw r4,SL_DBAT2(r11) mfdbatl r4,2 - stw r4,SL_DBAT2+4(r1) + stw r4,SL_DBAT2+4(r11) mfdbatu r4,3 - stw r4,SL_DBAT3(r1) + stw r4,SL_DBAT3(r11) mfdbatl r4,3 - stw r4,SL_DBAT3+4(r1) + stw r4,SL_DBAT3+4(r11) mfibatu r4,0 - stw r4,SL_IBAT0(r1) + stw r4,SL_IBAT0(r11) mfibatl r4,0 - stw r4,SL_IBAT0+4(r1) + stw r4,SL_IBAT0+4(r11) mfibatu r4,1 - stw r4,SL_IBAT1(r1) + stw r4,SL_IBAT1(r11) mfibatl r4,1 - stw r4,SL_IBAT1+4(r1) + stw r4,SL_IBAT1+4(r11) mfibatu r4,2 - stw r4,SL_IBAT2(r1) + stw r4,SL_IBAT2(r11) mfibatl r4,2 - stw r4,SL_IBAT2+4(r1) + stw r4,SL_IBAT2+4(r11) mfibatu r4,3 - stw r4,SL_IBAT3(r1) + stw r4,SL_IBAT3(r11) mfibatl r4,3 - stw r4,SL_IBAT3+4(r1) + stw r4,SL_IBAT3+4(r11) BEGIN_MMU_FTR_SECTION mfspr r4,SPRN_DBAT4U - stw r4,SL_DBAT4(r1) + stw r4,SL_DBAT4(r11) mfspr r4,SPRN_DBAT4L - stw r4,SL_DBAT4+4(r1) + stw r4,SL_DBAT4+4(r11) mfspr r4,SPRN_DBAT5U - stw r4,SL_DBAT5(r1) + stw r4,SL_DBAT5(r11) mfspr r4,SPRN_DBAT5L - stw r4,SL_DBAT5+4(r1) + stw r4,SL_DBAT5+4(r11) mfspr r4,SPRN_DBAT6U - stw r4,SL_DBAT6(r1) + stw r4,SL_DBAT6(r11) mfspr r4,SPRN_DBAT6L - stw r4,SL_DBAT6+4(r1) + stw r4,SL_DBAT6+4(r11) mfspr r4,SPRN_DBAT7U - stw r4,SL_DBAT7(r1) + stw r4,SL_DBAT7(r11) mfspr r4,SPRN_DBAT7L - stw r4,SL_DBAT7+4(r1) + stw r4,SL_DBAT7+4(r11) mfspr r4,SPRN_IBAT4U - stw r4,SL_IBAT4(r1) + stw r4,SL_IBAT4(r11) mfspr r4,SPRN_IBAT4L - stw r4,SL_IBAT4+4(r1) + stw r4,SL_IBAT4+4(r11) mfspr r4,SPRN_IBAT5U - stw r4,SL_IBAT5(r1) + stw r4,SL_IBAT5(r11) mfspr r4,SPRN_IBAT5L - stw r4,SL_IBAT5+4(r1) + stw r4,SL_IBAT5+4(r11) mfspr r4,SPRN_IBAT6U - stw r4,SL_IBAT6(r1) + stw r4,SL_IBAT6(r11) mfspr r4,SPRN_IBAT6L - stw r4,SL_IBAT6+4(r1) + stw r4,SL_IBAT6+4(r11) mfspr r4,SPRN_IBAT7U - stw r4,SL_IBAT7(r1) + stw r4,SL_IBAT7(r11) mfspr r4,SPRN_IBAT7L - stw r4,SL_IBAT7+4(r1) + stw r4,SL_IBAT7+4(r11) END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) /* Backup various CPU config stuffs */ @@ -180,9 +183,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) lis r5,grackle_wake_up@ha addi r5,r5,grackle_wake_up@l tophys(r5,r5) - stw r5,SL_PC(r1) + stw r5,SL_PC(r11) lis r4,KERNELBASE@h - tophys(r5,r1) + tophys(r5,r11) addi r5,r5,SL_PC lis r6,MAGIC@ha addi r6,r6,MAGIC@l @@ -194,12 +197,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) tophys(r3,r3) stw r3,0x80(r4) stw r5,0x84(r4) - /* Store a pointer to our backup storage into - * a kernel global - */ - lis r3,sleep_storage@ha - addi r3,r3,sleep_storage@l - stw r5,0(r3) .globl low_cpu_offline_self low_cpu_offline_self: @@ -279,7 +276,7 @@ _GLOBAL(core99_wake_up) lis r3,sleep_storage@ha addi r3,r3,sleep_storage@l tophys(r3,r3) - lwz r1,0(r3) + addi r1,r3,SL_PC /* Pass thru to older resume code ... */ _ASM_NOKPROBE_SYMBOL(core99_wake_up) @@ -399,13 +396,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blt 1b sync - /* restore the MSR and turn on the MMU */ - lwz r3,SL_MSR(r1) - bl turn_on_mmu - - /* get back the stack pointer */ - tovirt(r1,r1) - /* Restore TB */ li r3,0 mttbl r3 @@ -419,28 +409,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtcr r0 lwz r2,SL_R2(r1) lmw r12,SL_R12(r1) - addi r1,r1,SL_SIZE - lwz r0,4(r1) - mtlr r0 - blr -_ASM_NOKPROBE_SYMBOL(grackle_wake_up) -turn_on_mmu: - mflr r4 - tovirt(r4,r4) + /* restore the MSR and SP and turn on the MMU and return */ + lwz r3,SL_MSR(r1) + lwz r4,SL_LR(r1) + lwz r1,SL_SP(r1) mtsrr0 r4 mtsrr1 r3 sync isync rfi -_ASM_NOKPROBE_SYMBOL(turn_on_mmu) +_ASM_NOKPROBE_SYMBOL(grackle_wake_up) #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ - .section .data + .section .bss .balign L1_CACHE_BYTES sleep_storage: - .long 0 + .space SL_SIZE .balign L1_CACHE_BYTES, 0 #endif /* CONFIG_PPC_BOOK3S_32 */ -- cgit v1.2.3 From a6a50d8495d098b6459166c3707ab251d3dc9e06 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:42 +0000 Subject: powerpc/32s: Remove CONFIG_PPC_BOOK3S_6xx As 601 is gone, CONFIG_PPC_BOO3S_6xx and CONFIG_PPC_BOOK3S_32 are dedundant. Remove CONFIG_PPC_BOOK3S_6xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f18c16af37f6f77b577bed8d9e12831b695617ae.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/cputable.c | 4 ++-- arch/powerpc/platforms/Kconfig.cputype | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index f2fcd29aab23..086eadd5c3c8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -611,7 +611,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_PPC_BOOK3S_6xx +#ifdef CONFIG_PPC_BOOK3S_32 { /* 603 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00030000, @@ -1241,7 +1241,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, -#endif /* CONFIG_PPC_BOOK3S_6xx */ +#endif /* CONFIG_PPC_BOOK3S_32 */ #ifdef CONFIG_PPC_8xx { /* 8xx */ .pvr_mask = 0xffff0000, diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index aa071663b9a9..14a8481f36a8 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -11,9 +11,6 @@ config PPC64 This option selects whether a 32-bit or a 64-bit kernel will be built. -config PPC_BOOK3S_32 - bool - menu "Processor support" choice prompt "Processor Type" @@ -29,9 +26,8 @@ choice If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx. -config PPC_BOOK3S_6xx +config PPC_BOOK3S_32 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" - select PPC_BOOK3S_32 imply PPC_FPU select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP -- cgit v1.2.3 From 44e9754d63c7b419874e4c18c0b5e7a770e058c6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Oct 2020 06:29:44 +0000 Subject: powerpc/32s: Make support for 603 and 604+ selectable book3s/32 has two main families: - CPU with 603 cores that don't have HASH PTE table and perform SW TLB loading. - Other CPUs based on 604+ cores that have HASH PTE table. This leads to some complex logic and additionnal code to support both. This makes sense for distribution kernels that aim at running on any CPU, but when you are fine tuning a kernel for an embedded 603 based board you don't need all the HASH logic. Allow selection of support for each family, in order to opt out unneeded parts of code. At least one must be selected. Note that some of the CPU supporting HASH also support SW TLB loading, however it is not supported by Linux kernel at the time being, because they do not have alternate registers in the TLB miss exception handlers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8dde0cdb629a71abc29b0d85a52a86e920376cb6.1603348103.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 8 ++++++-- arch/powerpc/include/asm/mmu.h | 5 ++++- arch/powerpc/kernel/cputable.c | 6 ++++++ arch/powerpc/platforms/Kconfig.cputype | 16 ++++++++++++++++ 4 files changed, 32 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index b088c3443a99..5f21a5bab467 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -502,7 +502,7 @@ static inline void cpu_feature_keys_init(void) { } #else enum { CPU_FTRS_POSSIBLE = -#ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_604 CPU_FTRS_604 | CPU_FTRS_740_NOTAU | CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 | CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX | @@ -511,6 +511,8 @@ enum { CPU_FTRS_7455_20 | CPU_FTRS_7455 | CPU_FTRS_7447_10 | CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_CLASSIC32 | +#endif +#ifdef CONFIG_PPC_BOOK3S_603 CPU_FTRS_603 | CPU_FTRS_82XX | CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | #endif @@ -572,7 +574,7 @@ enum { #else enum { CPU_FTRS_ALWAYS = -#ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_604 CPU_FTRS_604 & CPU_FTRS_740_NOTAU & CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 & CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX & @@ -581,6 +583,8 @@ enum { CPU_FTRS_7455_20 & CPU_FTRS_7455 & CPU_FTRS_7447_10 & CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_CLASSIC32 & +#endif +#ifdef CONFIG_PPC_BOOK3S_603 CPU_FTRS_603 & CPU_FTRS_82XX & CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & #endif diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index ebf50286a924..80b27f5d9648 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -157,7 +157,7 @@ DECLARE_PER_CPU(int, next_tlbcam_idx); enum { MMU_FTRS_POSSIBLE = -#ifdef CONFIG_PPC_BOOK3S +#if defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_BOOK3S_604) MMU_FTR_HPTE_TABLE | #endif #ifdef CONFIG_PPC_8xx @@ -206,6 +206,9 @@ enum { 0, }; +#if defined(CONFIG_PPC_BOOK3S_604) && !defined(CONFIG_PPC_BOOK3S_603) +#define MMU_FTRS_ALWAYS MMU_FTR_HPTE_TABLE +#endif #ifdef CONFIG_PPC_8xx #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_8xx #endif diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 5bf971f7c325..65f35ec052d4 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -612,6 +612,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #ifdef CONFIG_PPC32 #ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_604 { /* 604 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00040000, @@ -1101,6 +1102,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc7450", }, +#endif /* CONFIG_PPC_BOOK3S_604 */ +#ifdef CONFIG_PPC_BOOK3S_603 { /* 603 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00030000, @@ -1229,6 +1232,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc603", }, #endif +#endif /* CONFIG_PPC_BOOK3S_603 */ +#ifdef CONFIG_PPC_BOOK3S_604 { /* default match, we assume split I/D cache & TB (non-601)... */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1241,6 +1246,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, +#endif /* CONFIG_PPC_BOOK3S_604 */ #endif /* CONFIG_PPC_BOOK3S_32 */ #ifdef CONFIG_PPC_8xx { /* 8xx */ diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 14a8481f36a8..3ce907523b1e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -64,6 +64,22 @@ config 44x endchoice +config PPC_BOOK3S_603 + bool "Support for 603 SW loaded TLB" + depends on PPC_BOOK3S_32 + default y + help + Provide support for processors based on the 603 cores. Those + processors don't have a HASH MMU and provide SW TLB loading. + +config PPC_BOOK3S_604 + bool "Support for 604+ HASH MMU" if PPC_BOOK3S_603 + depends on PPC_BOOK3S_32 + default y + help + Provide support for processors not based on the 603 cores. + Those processors have a HASH MMU. + choice prompt "Processor Type" depends on PPC64 -- cgit v1.2.3 From 1e78f723d6a52966bfe3804209dbf404fdc9d3bb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Dec 2020 10:11:34 +0000 Subject: powerpc/8xx: Fix early debug when SMC1 is relocated When SMC1 is relocated and early debug is selected, the board hangs is ppc_md.setup_arch(). This is because ones the microcode has been loaded and SMC1 relocated, early debug writes in the weed. To allow smooth continuation, the SMC1 parameter RAM set up by the bootloader have to be copied into the new location. Fixes: 43db76f41824 ("powerpc/8xx: Add microcode patch to move SMC parameter RAM.") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b2f71f39eca543f1e4ec06596f09a8b12235c701.1607076683.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cpm1.h | 1 + arch/powerpc/platforms/8xx/micropatch.c | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index a116fe931789..3bdd74739cb8 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -68,6 +68,7 @@ extern void cpm_reset(void); #define PROFF_SPI ((uint)0x0180) #define PROFF_SCC3 ((uint)0x0200) #define PROFF_SMC1 ((uint)0x0280) +#define PROFF_DSP1 ((uint)0x02c0) #define PROFF_SCC4 ((uint)0x0300) #define PROFF_SMC2 ((uint)0x0380) diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c index aed4bc75f352..aef179fcbd4f 100644 --- a/arch/powerpc/platforms/8xx/micropatch.c +++ b/arch/powerpc/platforms/8xx/micropatch.c @@ -360,6 +360,17 @@ void __init cpm_load_patch(cpm8xx_t *cp) if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) { smc_uart_t *smp; + if (IS_ENABLED(CONFIG_PPC_EARLY_DEBUG_CPM)) { + int i; + + for (i = 0; i < sizeof(*smp); i += 4) { + u32 __iomem *src = (u32 __iomem *)&cp->cp_dparam[PROFF_SMC1 + i]; + u32 __iomem *dst = (u32 __iomem *)&cp->cp_dparam[PROFF_DSP1 + i]; + + out_be32(dst, in_be32(src)); + } + } + smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1]; out_be16(&smp->smc_rpbase, 0x1ec0); smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2]; -- cgit v1.2.3 From bccc58986a2f98e3af349c85c5f49aac7fb19ef2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Nov 2020 15:24:55 +0000 Subject: powerpc/8xx: Always pin kernel text TLB There is no big poing in not pinning kernel text anymore, as now we can keep pinned TLB even with things like DEBUG_PAGEALLOC. Remove CONFIG_PIN_TLB_TEXT, making it always right. Signed-off-by: Christophe Leroy [mpe: Drop ifdef around mmu_pin_tlb() to fix build errors] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/203b89de491e1379f1677a2685211b7c32adfff0.1606231483.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 3 +-- arch/powerpc/kernel/head_8xx.S | 22 +++------------------- arch/powerpc/mm/nohash/8xx.c | 3 +-- arch/powerpc/platforms/8xx/Kconfig | 7 ------- 4 files changed, 5 insertions(+), 30 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d0d16c3e1b4b..2b8e47d1aa91 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -808,8 +808,7 @@ config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC - depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && \ - (!PIN_TLB_TEXT || !STRICT_KERNEL_RWX)) + depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 66ee62f30d36..e5d57018e1a0 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -42,15 +42,6 @@ #endif .endm -/* - * We need an ITLB miss handler for kernel addresses if: - * - Either we have modules - * - Or we have not pinned the first 8M - */ -#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) -#define ITLB_MISS_KERNEL 1 -#endif - /* * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. @@ -209,12 +200,12 @@ InstructionTLBMiss: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10) mtspr SPRN_MD_EPN, r10 -#ifdef ITLB_MISS_KERNEL +#ifdef CONFIG_MODULES mfcr r11 compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef ITLB_MISS_KERNEL +#ifdef CONFIG_MODULES blt+ 3f rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha @@ -618,10 +609,6 @@ start_here: lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 #endif -#ifndef CONFIG_PIN_TLB_TEXT - li r0, 0 - mtspr SPRN_MI_CTR, r0 -#endif #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) lis r0, MD_TWAM@h mtspr SPRN_MD_CTR, r0 @@ -717,7 +704,6 @@ initial_mmu: mtspr SPRN_DER, r8 blr -#ifdef CONFIG_PIN_TLB _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h ori r9, r9, (1f - PAGE_OFFSET)@l @@ -739,7 +725,6 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_MD_CTR, r6 tlbia -#ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) @@ -760,7 +745,7 @@ _GLOBAL(mmu_pin_tlb) bdnzt lt, 2b lis r0, MI_RSV4I@h mtspr SPRN_MI_CTR, r0 -#endif + LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) @@ -818,7 +803,6 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi -#endif /* CONFIG_PIN_TLB */ /* * We put a few things here that have to be page-aligned. diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 231ca95f9ffb..19a3eec1d8c5 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -186,8 +186,7 @@ void mmu_mark_initmem_nx(void) mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); - if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_pin_tlb(block_mapped_ram, false); + mmu_pin_tlb(block_mapped_ram, false); } #ifdef CONFIG_STRICT_KERNEL_RWX diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index abb2b45b2789..60cc5b537a98 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -194,13 +194,6 @@ config PIN_TLB_IMMR CONFIG_PIN_TLB_DATA is also selected, it will reduce CONFIG_PIN_TLB_DATA to 24 Mbytes. -config PIN_TLB_TEXT - bool "Pinned TLB for TEXT" - depends on PIN_TLB - default y - help - This pins kernel text with 8M pages. - endmenu endmenu -- cgit v1.2.3 From 59d512e4374b2d8a6ad341475dc94c4a4bdec7d3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 7 Nov 2020 11:43:36 +1000 Subject: powerpc/64: irq replay remove decrementer overflow check This is way to catch some cases of decrementer overflow, when the decrementer has underflowed an odd number of times, while MSR[EE] was disabled. With a typical small decrementer, a timer that fires when MSR[EE] is disabled will be "lost" if MSR[EE] remains disabled for between 4.3 and 8.6 seconds after the timer expires. In any case, the decrementer interrupt would be taken at 8.6 seconds and the timer would be found at that point. So this check is for catching extreme latency events, and it prevents those latencies from being a further few seconds long. It's not obvious this is a good tradeoff. This is already a watchdog magnitude event and that situation is not improved a significantly with this check. For large decrementers, it's useless. Therefore remove this check, which avoids a mftb when enabling hard disabled interrupts (e.g., when enabling after coming from hardware interrupt handlers). Perhaps more importantly, it also removes the clunky MSR[EE] vs PACA_IRQ_HARD_DIS incoherency in soft-interrupt replay which simplifies the code. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201107014336.2337337-1-npiggin@gmail.com --- arch/powerpc/kernel/irq.c | 53 +++-------------------------------- arch/powerpc/kernel/time.c | 9 ++---- arch/powerpc/platforms/powernv/opal.c | 2 +- 3 files changed, 8 insertions(+), 56 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 7d0f7682d01d..6b1eca53e36c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -102,14 +102,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -static inline notrace int decrementer_check_overflow(void) -{ - u64 now = get_tb(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - return now >= *next_tb; -} - #ifdef CONFIG_PPC_BOOK3E /* This is called whenever we are re-enabling interrupts @@ -142,35 +134,6 @@ notrace unsigned int __check_irq_replay(void) trace_hardirqs_on(); trace_hardirqs_off(); - /* - * We are always hard disabled here, but PACA_IRQ_HARD_DIS may - * not be set, which means interrupts have only just been hard - * disabled as part of the local_irq_restore or interrupt return - * code. In that case, skip the decrementr check becaus it's - * expensive to read the TB. - * - * HARD_DIS then gets cleared here, but it's reconciled later. - * Either local_irq_disable will replay the interrupt and that - * will reconcile state like other hard interrupts. Or interrupt - * retur will replay the interrupt and in that case it sets - * PACA_IRQ_HARD_DIS by hand (see comments in entry_64.S). - */ - if (happened & PACA_IRQ_HARD_DIS) { - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) { - local_paca->irq_happened |= PACA_IRQ_DEC; - happened |= PACA_IRQ_DEC; - } - } - } - if (happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; return 0x900; @@ -186,6 +149,9 @@ notrace unsigned int __check_irq_replay(void) return 0x280; } + if (happened & PACA_IRQ_HARD_DIS) + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); @@ -229,18 +195,6 @@ again: if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(mfmsr() & MSR_EE); - if (happened & PACA_IRQ_HARD_DIS) { - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) - happened |= PACA_IRQ_DEC; - } - } - /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -345,6 +299,7 @@ notrace void arch_local_irq_restore(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!(mfmsr() & MSR_EE)); __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; } else { /* * We should already be hard disabled here. We had bugs diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 92481463f9dc..5f9c323049eb 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -553,14 +553,11 @@ void timer_interrupt(struct pt_regs *regs) struct pt_regs *old_regs; u64 now; - /* Some implementations of hotplug will get timer interrupts while - * offline, just ignore these and we also need to set - * decrementers_next_tb as MAX to make sure __check_irq_replay - * don't replay timer interrupt when return, otherwise we'll trap - * here infinitely :( + /* + * Some implementations of hotplug will get timer interrupts while + * offline, just ignore these. */ if (unlikely(!cpu_online(smp_processor_id()))) { - *next_tb = ~(u64)0; set_dec(decrementer_max); return; } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d95954ad4c0a..c61c3b62c8c6 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -731,7 +731,7 @@ int opal_hmi_exception_early2(struct pt_regs *regs) return 1; } -/* HMI exception handler called in virtual mode during check_irq_replay. */ +/* HMI exception handler called in virtual mode when irqs are next enabled. */ int opal_handle_hmi_exception(struct pt_regs *regs) { /* -- cgit v1.2.3 From 20e9de85edae3a5866f29b6cce87c9ec66d62a1b Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 11 Dec 2020 15:59:54 +0100 Subject: powerpc/pseries/memhotplug: Quieten some DLPAR operations When attempting to remove by index a set of LMBs a lot of messages are displayed on the console, even when everything goes fine: pseries-hotplug-mem: Attempting to hot-remove LMB, drc index 8000002d Offlined Pages 4096 pseries-hotplug-mem: Memory at 2d0000000 was hot-removed The 2 messages prefixed by "pseries-hotplug-mem" are not really helpful for the end user, they should be debug outputs. In case of error, because some of the LMB's pages couldn't be offlined, the following is displayed on the console: pseries-hotplug-mem: Attempting to hot-remove LMB, drc index 8000003e pseries-hotplug-mem: Failed to hot-remove memory at 3e0000000 dlpar: Could not handle DLPAR request "memory remove index 0x8000003e" Again, the 2 messages prefixed by "pseries-hotplug-mem" are useless, and the generic DLPAR prefixed message should be enough. These 2 first changes are mainly triggered by the changes introduced in drmgr: https://groups.google.com/g/powerpc-utils-devel/c/Y6ef4NB3EzM/m/9cu5JHRxAQAJ Also, when adding a bunch of LMBs, a message is displayed in the console per LMB like these ones: pseries-hotplug-mem: Memory at 7e0000000 (drc index 8000007e) was hot-added pseries-hotplug-mem: Memory at 7f0000000 (drc index 8000007f) was hot-added pseries-hotplug-mem: Memory at 800000000 (drc index 80000080) was hot-added pseries-hotplug-mem: Memory at 810000000 (drc index 80000081) was hot-added When adding 1TB of memory and LMB size is 256MB, this leads to 4096 messages to be displayed on the console. These messages are not really helpful for the end user, so moving them to the DEBUG level. Signed-off-by: Laurent Dufour [mpe: Tweak change log wording] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201211145954.90143-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 7efe6ec5d14a..8377f1f7c78e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -479,7 +479,7 @@ static int dlpar_memory_remove_by_index(u32 drc_index) int lmb_found; int rc; - pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index); + pr_debug("Attempting to hot-remove LMB, drc index %x\n", drc_index); lmb_found = 0; for_each_drmem_lmb(lmb) { @@ -497,10 +497,10 @@ static int dlpar_memory_remove_by_index(u32 drc_index) rc = -EINVAL; if (rc) - pr_info("Failed to hot-remove memory at %llx\n", - lmb->base_addr); + pr_debug("Failed to hot-remove memory at %llx\n", + lmb->base_addr); else - pr_info("Memory at %llx was hot-removed\n", lmb->base_addr); + pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr); return rc; } @@ -717,8 +717,8 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) if (!drmem_lmb_reserved(lmb)) continue; - pr_info("Memory at %llx (drc index %x) was hot-added\n", - lmb->base_addr, lmb->drc_index); + pr_debug("Memory at %llx (drc index %x) was hot-added\n", + lmb->base_addr, lmb->drc_index); drmem_remove_lmb_reservation(lmb); } rc = 0; -- cgit v1.2.3 From c88017cf2af614409da69934c1738ed5ff2f7022 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 11 Dec 2020 13:11:41 +1100 Subject: powerpc/powernv: Rate limit opal-elog read failure message Sometimes we can't read an error log from OPAL, and we print an error message accordingly. But the OPAL userspace tools seem to like retrying a lot, in which case we flood the kernel log with a lot of messages. Change pr_err() to pr_err_ratelimited() to help with this. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201211021140.28402-1-ajd@linux.ibm.com --- arch/powerpc/platforms/powernv/opal-elog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 37b380eef41a..5821b0fa8614 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -171,8 +171,8 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, opal_rc = opal_read_elog(__pa(elog->buffer), elog->size, elog->id); if (opal_rc != OPAL_SUCCESS) { - pr_err("ELOG: log read failed for log-id=%llx\n", - elog->id); + pr_err_ratelimited("ELOG: log read failed for log-id=%llx\n", + elog->id); kfree(elog->buffer); elog->buffer = NULL; return -EIO; -- cgit v1.2.3