diff options
author | Bharata B Rao <bharata@linux.ibm.com> | 2020-07-09 15:19:24 +0200 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2020-07-20 14:57:56 +0200 |
commit | d6d6ebfc5dbb4008be21baa4ec2ad45606578966 (patch) | |
tree | 3c390a7e93880692b0a8553da11bb894738ef09d /arch/powerpc/mm/book3s64 | |
parent | powerpc/mm/radix: Free PUD table when freeing pagetable (diff) | |
download | linux-d6d6ebfc5dbb4008be21baa4ec2ad45606578966.tar.xz linux-d6d6ebfc5dbb4008be21baa4ec2ad45606578966.zip |
powerpc/mm/radix: Remove split_kernel_mapping()
We split the page table mapping on memory unplug if the
linear range was mapped with huge page mapping (for ex: 1G)
The page table splitting code has a few issues:
1. Recursive locking
--------------------
Memory unplug path takes cpu_hotplug_lock and calls stop_machine()
for splitting the mappings. However stop_machine() takes
cpu_hotplug_lock again causing deadlock.
2. BUG: sleeping function called from in_atomic() context
---------------------------------------------------------
Memory unplug path (remove_pagetable) takes init_mm.page_table_lock
spinlock and later calls stop_machine() which does wait_for_completion()
3. Bad unlock unbalance
-----------------------
Memory unplug path takes init_mm.page_table_lock spinlock and calls
stop_machine(). The stop_machine thread function runs in a different
thread context (migration thread) which tries to release and reaquire
ptl. Releasing ptl from a different thread than which acquired it
causes bad unlock unbalance.
These problems can be avoided if we avoid mapping hot-plugged memory
with 1G mapping, thereby removing the need for splitting them during
unplug. The kernel always make sure the minimum unplug request is
SUBSECTION_SIZE for device memory and SECTION_SIZE for regular memory.
In preparation for such a change remove page table splitting support.
This essentially is a revert of
commit 4dd5f8a99e791 ("powerpc/mm/radix: Split linear mapping on hot-unplug")
Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200709131925.922266-4-aneesh.kumar@linux.ibm.com
Diffstat (limited to 'arch/powerpc/mm/book3s64')
-rw-r--r-- | arch/powerpc/mm/book3s64/radix_pgtable.c | 95 |
1 files changed, 19 insertions, 76 deletions
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 46ad2da3087a..d5a01b9aadc9 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -15,7 +15,6 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/string_helpers.h> -#include <linux/stop_machine.h> #include <asm/pgalloc.h> #include <asm/mmu_context.h> @@ -722,32 +721,6 @@ static void free_pud_table(pud_t *pud_start, p4d_t *p4d) p4d_clear(p4d); } -struct change_mapping_params { - pte_t *pte; - unsigned long start; - unsigned long end; - unsigned long aligned_start; - unsigned long aligned_end; -}; - -static int __meminit stop_machine_change_mapping(void *data) -{ - struct change_mapping_params *params = - (struct change_mapping_params *)data; - - if (!data) - return -1; - - spin_unlock(&init_mm.page_table_lock); - pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(__pa(params->aligned_start), - __pa(params->start), -1, PAGE_KERNEL); - create_physical_mapping(__pa(params->end), __pa(params->aligned_end), - -1, PAGE_KERNEL); - spin_lock(&init_mm.page_table_lock); - return 0; -} - static void remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end) { @@ -776,52 +749,6 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, } } -/* - * clear the pte and potentially split the mapping helper - */ -static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end, - unsigned long size, pte_t *pte) -{ - unsigned long mask = ~(size - 1); - unsigned long aligned_start = addr & mask; - unsigned long aligned_end = addr + size; - struct change_mapping_params params; - bool split_region = false; - - if ((end - addr) < size) { - /* - * We're going to clear the PTE, but not flushed - * the mapping, time to remap and flush. The - * effects if visible outside the processor or - * if we are running in code close to the - * mapping we cleared, we are in trouble. - */ - if (overlaps_kernel_text(aligned_start, addr) || - overlaps_kernel_text(end, aligned_end)) { - /* - * Hack, just return, don't pte_clear - */ - WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " - "text, not splitting\n", addr, end); - return; - } - split_region = true; - } - - if (split_region) { - params.pte = pte; - params.start = addr; - params.end = end; - params.aligned_start = addr & ~(size - 1); - params.aligned_end = min_t(unsigned long, aligned_end, - (unsigned long)__va(memblock_end_of_DRAM())); - stop_machine(stop_machine_change_mapping, ¶ms, NULL); - return; - } - - pte_clear(&init_mm, addr, pte); -} - static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end) { @@ -837,7 +764,12 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, continue; if (pmd_is_leaf(*pmd)) { - split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + pte_clear(&init_mm, addr, (pte_t *)pmd); continue; } @@ -862,7 +794,12 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, continue; if (pud_is_leaf(*pud)) { - split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); + if (!IS_ALIGNED(addr, PUD_SIZE) || + !IS_ALIGNED(next, PUD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + pte_clear(&init_mm, addr, (pte_t *)pud); continue; } @@ -890,7 +827,13 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) continue; if (p4d_is_leaf(*p4d)) { - split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d); + if (!IS_ALIGNED(addr, P4D_SIZE) || + !IS_ALIGNED(next, P4D_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, (pte_t *)pgd); continue; } |