diff options
Diffstat (limited to 'arch/powerpc/mm')
30 files changed, 736 insertions, 451 deletions
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 877d880890fe..2702e8762c0d 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -81,7 +81,7 @@ _GLOBAL(hash_page) rlwinm. r8,r8,0,0,20 /* extract pt base address */ #endif #ifdef CONFIG_SMP - beq- hash_page_out /* return if no mapping */ + beq- .Lhash_page_out /* return if no mapping */ #else /* XXX it seems like the 601 will give a machine fault on the rfi if its alignment is wrong (bottom 4 bits of address are @@ -109,11 +109,11 @@ _GLOBAL(hash_page) #if (PTE_FLAGS_OFFSET != 0) addi r8,r8,PTE_FLAGS_OFFSET #endif -retry: +.Lretry: lwarx r6,0,r8 /* get linux-style pte, flag word */ andc. r5,r3,r6 /* check access & ~permission */ #ifdef CONFIG_SMP - bne- hash_page_out /* return if access not permitted */ + bne- .Lhash_page_out /* return if access not permitted */ #else bnelr- #endif @@ -128,7 +128,7 @@ retry: #endif /* CONFIG_SMP */ #endif /* CONFIG_PTE_64BIT */ stwcx. r5,0,r8 /* attempt to update PTE */ - bne- retry /* retry if someone got there first */ + bne- .Lretry /* retry if someone got there first */ mfsrin r3,r4 /* get segment reg for segment */ #ifndef CONFIG_VMAP_STACK @@ -156,13 +156,14 @@ retry: #endif #ifdef CONFIG_SMP -hash_page_out: +.Lhash_page_out: eieio lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha li r0,0 stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) blr #endif /* CONFIG_SMP */ +_ASM_NOKPROBE_SYMBOL(hash_page) /* * Add an entry for a particular page to the hash table. @@ -267,6 +268,7 @@ _GLOBAL(add_hash_page) lwz r0,4(r1) mtlr r0 blr +_ASM_NOKPROBE_SYMBOL(add_hash_page) /* * This routine adds a hardware PTE to the hash table. @@ -360,7 +362,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 1: LDPTEu r6,HPTE_SIZE(r4) /* get next PTE */ CMPPTE 0,r6,r5 bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ - beq+ found_slot + beq+ .Lfound_slot patch_site 0f, patch__hash_page_B /* Search the secondary PTEG for a matching PTE */ @@ -372,7 +374,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 2: LDPTEu r6,HPTE_SIZE(r4) CMPPTE 0,r6,r5 bdnzf 2,2b - beq+ found_slot + beq+ .Lfound_slot xori r5,r5,PTE_H /* clear H bit again */ /* Search the primary PTEG for an empty slot */ @@ -381,7 +383,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 1: LDPTEu r6,HPTE_SIZE(r4) /* get next PTE */ TST_V(r6) /* test valid bit */ bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ - beq+ found_empty + beq+ .Lfound_empty /* update counter of times that the primary PTEG is full */ lis r4, (primary_pteg_full - PAGE_OFFSET)@ha @@ -399,7 +401,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 2: LDPTEu r6,HPTE_SIZE(r4) TST_V(r6) bdnzf 2,2b - beq+ found_empty + beq+ .Lfound_empty xori r5,r5,PTE_H /* clear H bit again */ /* @@ -437,9 +439,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) #ifndef CONFIG_SMP /* Store PTE in PTEG */ -found_empty: +.Lfound_empty: STPTE r5,0(r4) -found_slot: +.Lfound_slot: STPTE r8,HPTE_SIZE/2(r4) #else /* CONFIG_SMP */ @@ -460,8 +462,8 @@ found_slot: * We do however have to make sure that the PTE is never in an invalid * state with the V bit set. */ -found_empty: -found_slot: +.Lfound_empty: +.Lfound_slot: CLR_V(r5,r0) /* clear V (valid) bit in PTE */ STPTE r5,0(r4) sync @@ -474,6 +476,7 @@ found_slot: sync /* make sure pte updates get to memory */ blr +_ASM_NOKPROBE_SYMBOL(create_hpte) .section .bss .align 2 @@ -630,6 +633,7 @@ _GLOBAL(flush_hash_pages) isync blr EXPORT_SYMBOL(flush_hash_pages) +_ASM_NOKPROBE_SYMBOL(flush_hash_pages) /* * Flush an entry from the TLB @@ -667,6 +671,7 @@ _GLOBAL(_tlbie) sync #endif /* CONFIG_SMP */ blr +_ASM_NOKPROBE_SYMBOL(_tlbie) /* * Flush the entire TLB. 603/603e only @@ -708,3 +713,4 @@ _GLOBAL(_tlbia) isync #endif /* CONFIG_SMP */ blr +_ASM_NOKPROBE_SYMBOL(_tlbia) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 39ba53ca5bb5..a6dcc708eee3 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -170,6 +170,12 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pr_debug("RAM mapped without BATs\n"); return base; } + if (debug_pagealloc_enabled()) { + if (base >= border) + return base; + if (top >= border) + top = border; + } if (!strict_kernel_rwx_enabled() || base >= border || top <= border) return __mmu_mapin_ram(base, top); @@ -187,6 +193,7 @@ void mmu_mark_initmem_nx(void) int i; unsigned long base = (unsigned long)_stext - PAGE_OFFSET; unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; unsigned long size; if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) @@ -201,9 +208,10 @@ void mmu_mark_initmem_nx(void) size = block_size(base, top); size = max(size, 128UL << 10); if ((top - base) > size) { - if (strict_kernel_rwx_enabled()) - pr_warn("Kernel _etext not properly aligned\n"); size <<= 1; + if (strict_kernel_rwx_enabled() && base + size > border) + pr_warn("Some RW data is getting mapped X. " + "Adjust CONFIG_DATA_SHIFT to avoid that.\n"); } setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 9cd15937e88a..8b4b0a602158 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -365,17 +365,6 @@ pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, * hash fault look at them. */ memset(pgtable, 0, PTE_FRAG_SIZE); - /* - * Serialize against find_current_mm_pte variants which does lock-less - * lookup in page tables with local interrupts disabled. For huge pages - * it casts pmd_t to pte_t. Since format of pte_t is different from - * pmd_t we want to prevent transit from pmd pointing to page table - * to pmd pointing to huge page (and back) while interrupts are disabled. - * We clear pmd to possibly replace it with page table pointer in - * different code paths. So make sure we wait for the parallel - * find_curren_mm_pte to finish. - */ - serialize_against_pte_lookup(mm); return old_pmd; } diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c index 4a70d8dd39cd..0fbf3dc9f2c2 100644 --- a/arch/powerpc/mm/book3s64/hash_tlb.c +++ b/arch/powerpc/mm/book3s64/hash_tlb.c @@ -176,7 +176,6 @@ void hash__tlb_flush(struct mmu_gather *tlb) * from the hash table (and the TLB). But keeps * the linux PTEs intact. * - * @mm : mm_struct of the target address space (generally init_mm) * @start : starting address * @end : ending address (not included in the flush) * @@ -189,17 +188,14 @@ void hash__tlb_flush(struct mmu_gather *tlb) * Because of that usage pattern, it is implemented for small size rather * than speed. */ -void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, - unsigned long end) +void __flush_hash_table_range(unsigned long start, unsigned long end) { - bool is_thp; int hugepage_shift; unsigned long flags; - start = _ALIGN_DOWN(start, PAGE_SIZE); - end = _ALIGN_UP(end, PAGE_SIZE); + start = ALIGN_DOWN(start, PAGE_SIZE); + end = ALIGN(end, PAGE_SIZE); - BUG_ON(!mm->pgd); /* * Note: Normally, we should only ever use a batch within a @@ -212,21 +208,15 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, local_irq_save(flags); arch_enter_lazy_mmu_mode(); for (; start < end; start += PAGE_SIZE) { - pte_t *ptep = find_current_mm_pte(mm->pgd, start, &is_thp, - &hugepage_shift); + pte_t *ptep = find_init_mm_pte(start, &hugepage_shift); unsigned long pte; if (ptep == NULL) continue; pte = pte_val(*ptep); - if (is_thp) - trace_hugepage_invalidate(start, pte); if (!(pte & H_PAGE_HASHPTE)) continue; - if (unlikely(is_thp)) - hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); - else - hpte_need_flush(mm, start, ptep, pte, hugepage_shift); + hpte_need_flush(&init_mm, start, ptep, pte, hugepage_shift); } arch_leave_lazy_mmu_mode(); local_irq_restore(flags); @@ -238,7 +228,7 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) pte_t *start_pte; unsigned long flags; - addr = _ALIGN_DOWN(addr, PMD_SIZE); + addr = ALIGN_DOWN(addr, PMD_SIZE); /* * Note: Normally, we should only ever use a batch within a * PTE locked section. This violates the rule, but will work diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 8ed2411c3f39..0124003e60d0 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -66,6 +66,9 @@ #include <mm/mmu_decl.h> +#include "internal.h" + + #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) #else @@ -870,6 +873,9 @@ static void __init htab_initialize(void) printk(KERN_INFO "Using 1TB segments\n"); } + if (stress_slb_enabled) + static_branch_enable(&stress_slb_key); + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. @@ -1350,8 +1356,15 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, goto bail; } - /* Add _PAGE_PRESENT to the required access perm */ - access |= _PAGE_PRESENT; + /* + * Add _PAGE_PRESENT to the required access perm. If there are parallel + * updates to the pte that can possibly clear _PAGE_PTE, catch that too. + * + * We can safely use the return pte address in rest of the function + * because we do set H_PAGE_BUSY which prevents further updates to pte + * from generic code. + */ + access |= _PAGE_PRESENT | _PAGE_PTE; /* * Pre-check access permissions (will be re-checked atomically @@ -1539,14 +1552,11 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) } #endif -static void hash_preload(struct mm_struct *mm, unsigned long ea, +static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, bool is_exec, unsigned long trap) { - int hugepage_shift; unsigned long vsid; pgd_t *pgdir; - pte_t *ptep; - unsigned long flags; int rc, ssize, update_flags = 0; unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0); @@ -1568,30 +1578,18 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea, vsid = get_user_vsid(&mm->context, ea, ssize); if (!vsid) return; - /* - * Hash doesn't like irqs. Walking linux page table with irq disabled - * saves us from holding multiple locks. - */ - local_irq_save(flags); - /* - * THP pages use update_mmu_cache_pmd. We don't do - * hash preload there. Hence can ignore THP here - */ - ptep = find_current_mm_pte(pgdir, ea, NULL, &hugepage_shift); - if (!ptep) - goto out_exit; - - WARN_ON(hugepage_shift); #ifdef CONFIG_PPC_64K_PAGES /* If either H_PAGE_4K_PFN or cache inhibited is set (and we are on * a 64K kernel), then we don't preload, hash_page() will take * care of it once we actually try to access the page. * That way we don't have to duplicate all of the logic for segment * page size demotion here + * Called with PTL held, hence can be sure the value won't change in + * between. */ if ((pte_val(*ptep) & H_PAGE_4K_PFN) || pte_ci(*ptep)) - goto out_exit; + return; #endif /* CONFIG_PPC_64K_PAGES */ /* Is that local to this CPU ? */ @@ -1616,8 +1614,6 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea, mm_ctx_user_psize(&mm->context), mm_ctx_user_psize(&mm->context), pte_val(*ptep)); -out_exit: - local_irq_restore(flags); } /* @@ -1638,10 +1634,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, unsigned long trap; bool is_exec; - if (radix_enabled()) { - prefetch((void *)address); + if (radix_enabled()) return; - } /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ if (!pte_young(*ptep) || address >= TASK_SIZE) @@ -1668,32 +1662,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, return; } - hash_preload(vma->vm_mm, address, is_exec, trap); -} - -#ifdef CONFIG_PPC_MEM_KEYS -/* - * Return the protection key associated with the given address and the - * mm_struct. - */ -u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) -{ - pte_t *ptep; - u16 pkey = 0; - unsigned long flags; - - if (!mm || !mm->pgd) - return 0; - - local_irq_save(flags); - ptep = find_linux_pte(mm->pgd, address, NULL, NULL); - if (ptep) - pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep))); - local_irq_restore(flags); - - return pkey; + hash_preload(vma->vm_mm, ptep, address, is_exec, trap); } -#endif /* CONFIG_PPC_MEM_KEYS */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline void tm_flush_hash_page(int local) diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h new file mode 100644 index 000000000000..7eda0d30d765 --- /dev/null +++ b/arch/powerpc/mm/book3s64/internal.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H +#define ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H + +#include <linux/jump_label.h> + +extern bool stress_slb_enabled; + +DECLARE_STATIC_KEY_FALSE(stress_slb_key); + +static inline bool stress_slb(void) +{ + return static_branch_unlikely(&stress_slb_key); +} + +#endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index e0bb69c616e4..c58ad1049909 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -109,15 +109,25 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + return __pmd(old_pmd); +} + +pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp, int full) +{ + pmd_t pmd; + VM_BUG_ON(addr & ~HPAGE_PMD_MASK); + VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && + !pmd_devmap(*pmdp)) || !pmd_present(*pmdp)); + pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); /* - * This ensures that generic code that rely on IRQ disabling - * to prevent a parallel THP split work as expected. - * - * Marking the entry with _PAGE_INVALID && ~_PAGE_PRESENT requires - * a special case check in pmd_access_permitted. + * if it not a fullmm flush, then we can possibly end up converting + * this PMD pte entry to a regular level 0 PTE by a parallel page fault. + * Make sure we flush the tlb in this case. */ - serialize_against_pte_lookup(vma->vm_mm); - return __pmd(old_pmd); + if (!full) + flush_pmd_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE); + return pmd; } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) @@ -146,19 +156,6 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) pmdv &= _HPAGE_CHG_MASK; return pmd_set_protbits(__pmd(pmdv), newprot); } - -/* - * This is called at the end of handling a user page fault, when the - * fault has been handled by updating a HUGE PMD entry in the linux page tables. - * We use it to preload an HPTE into the hash table corresponding to - * the updated linux HUGE PMD entry. - */ -void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd) -{ - if (radix_enabled()) - prefetch((void *)addr); -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* For use by kexec */ diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 97891ca0d428..8acb96de0e48 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -267,7 +267,7 @@ static int __meminit create_physical_mapping(unsigned long start, pgprot_t prot; int psize; - start = _ALIGN_UP(start, PAGE_SIZE); + start = ALIGN(start, PAGE_SIZE); for (addr = start; addr < end; addr += mapping_size) { unsigned long gap, previous_size; int rc; @@ -970,7 +970,13 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre pmd = *pmdp; pmd_clear(pmdp); - /*FIXME!! Verify whether we need this kick below */ + /* + * pmdp collapse_flush need to ensure that there are no parallel gup + * walk after this call. This is needed so that we can have stable + * page ref count when collapsing a page. We don't allow a collapse page + * if we have gup taken on the page. We can ensure that by sending IPI + * because gup walk happens with IRQ disabled. + */ serialize_against_pte_lookup(vma->vm_mm); radix__flush_tlb_collapsed_pmd(vma->vm_mm, address); @@ -1031,17 +1037,6 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0); old_pmd = __pmd(old); - /* - * Serialize against find_current_mm_pte which does lock-less - * lookup in page tables with local interrupts disabled. For huge pages - * it casts pmd_t to pte_t. Since format of pte_t is different from - * pmd_t we want to prevent transit from pmd pointing to page table - * to pmd pointing to huge page (and back) while interrupts are disabled. - * We clear pmd to possibly replace it with page table pointer in - * different code paths. So make sure we wait for the parallel - * find_current_mm_pte to finish. - */ - serialize_against_pte_lookup(mm); return old_pmd; } diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 758ade2c2b6e..b5cc9b23cf02 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -884,9 +884,7 @@ is_local: if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { hstart = (start + PMD_SIZE - 1) & PMD_MASK; hend = end & PMD_MASK; - if (hstart == hend) - hflush = false; - else + if (hstart < hend) hflush = true; } diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 716204aee3da..8141e8b40ee5 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -25,6 +25,9 @@ #include <asm/udbg.h> #include <asm/code-patching.h> +#include "internal.h" + + enum slb_index { LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ KSTACK_INDEX = 1, /* Kernel stack map */ @@ -54,6 +57,17 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); } +bool stress_slb_enabled __initdata; + +static int __init parse_stress_slb(char *p) +{ + stress_slb_enabled = true; + return 0; +} +early_param("stress_slb", parse_stress_slb); + +__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key); + static void assert_slb_presence(bool present, unsigned long ea) { #ifdef CONFIG_DEBUG_VM @@ -68,7 +82,7 @@ static void assert_slb_presence(bool present, unsigned long ea) * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware * ignores all other bits from 0-27, so just clear them all. */ - ea &= ~((1UL << 28) - 1); + ea &= ~((1UL << SID_SHIFT) - 1); asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); WARN_ON(present == (tmp == 0)); @@ -153,14 +167,42 @@ void slb_flush_all_realmode(void) asm volatile("slbmte %0,%0; slbia" : : "r" (0)); } +static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside) +{ + struct slb_shadow *p = get_slb_shadow(); + unsigned long ksp_esid_data, ksp_vsid_data; + u32 ih; + + /* + * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside + * information created with Class=0 entries, which we use for kernel + * SLB entries (the SLB entries themselves are still invalidated). + * + * Older processors will ignore this optimisation. Over-invalidation + * is fine because we never rely on lookaside information existing. + */ + if (preserve_kernel_lookaside) + ih = 1; + else + ih = 0; + + ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid); + ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid); + + asm volatile(PPC_SLBIA(%0)" \n" + "slbmte %1, %2 \n" + :: "i" (ih), + "r" (ksp_vsid_data), + "r" (ksp_esid_data) + : "memory"); +} + /* * This flushes non-bolted entries, it can be run in virtual mode. Must * be called with interrupts disabled. */ void slb_flush_and_restore_bolted(void) { - struct slb_shadow *p = get_slb_shadow(); - BUILD_BUG_ON(SLB_NUM_BOLTED != 2); WARN_ON(!irqs_disabled()); @@ -171,13 +213,10 @@ void slb_flush_and_restore_bolted(void) */ hard_irq_disable(); - asm volatile("isync\n" - "slbia\n" - "slbmte %0, %1\n" - "isync\n" - :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)), - "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid)) - : "memory"); + isync(); + __slb_flush_and_restore_bolted(false); + isync(); + assert_slb_presence(true, get_paca()->kstack); get_paca()->slb_cache_ptr = 0; @@ -400,6 +439,30 @@ void preload_new_slb_context(unsigned long start, unsigned long sp) local_irq_enable(); } +static void slb_cache_slbie_kernel(unsigned int index) +{ + unsigned long slbie_data = get_paca()->slb_cache[index]; + unsigned long ksp = get_paca()->kstack; + + slbie_data <<= SID_SHIFT; + slbie_data |= 0xc000000000000000ULL; + if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data) + return; + slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT; + + asm volatile("slbie %0" : : "r" (slbie_data)); +} + +static void slb_cache_slbie_user(unsigned int index) +{ + unsigned long slbie_data = get_paca()->slb_cache[index]; + + slbie_data <<= SID_SHIFT; + slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT; + slbie_data |= SLBIE_C; /* user slbs have C=1 */ + + asm volatile("slbie %0" : : "r" (slbie_data)); +} /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) @@ -414,8 +477,14 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) * which would update the slb_cache/slb_cache_ptr fields in the PACA. */ hard_irq_disable(); - asm volatile("isync" : : : "memory"); - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + isync(); + if (stress_slb()) { + __slb_flush_and_restore_bolted(false); + isync(); + get_paca()->slb_cache_ptr = 0; + get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; + + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { /* * SLBIA IH=3 invalidates all Class=1 SLBEs and their * associated lookaside structures, which matches what @@ -423,47 +492,29 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) * cache. */ asm volatile(PPC_SLBIA(3)); + } else { unsigned long offset = get_paca()->slb_cache_ptr; if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { - unsigned long slbie_data = 0; - - for (i = 0; i < offset; i++) { - unsigned long ea; - - ea = (unsigned long) - get_paca()->slb_cache[i] << SID_SHIFT; - /* - * Could assert_slb_presence(true) here, but - * hypervisor or machine check could have come - * in and removed the entry at this point. - */ - - slbie_data = ea; - slbie_data |= user_segment_size(slbie_data) - << SLBIE_SSIZE_SHIFT; - slbie_data |= SLBIE_C; /* user slbs have C=1 */ - asm volatile("slbie %0" : : "r" (slbie_data)); - } + /* + * Could assert_slb_presence(true) here, but + * hypervisor or machine check could have come + * in and removed the entry at this point. + */ + + for (i = 0; i < offset; i++) + slb_cache_slbie_user(i); /* Workaround POWER5 < DD2.1 issue */ if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1) - asm volatile("slbie %0" : : "r" (slbie_data)); + slb_cache_slbie_user(0); } else { - struct slb_shadow *p = get_slb_shadow(); - unsigned long ksp_esid_data = - be64_to_cpu(p->save_area[KSTACK_INDEX].esid); - unsigned long ksp_vsid_data = - be64_to_cpu(p->save_area[KSTACK_INDEX].vsid); - - asm volatile(PPC_SLBIA(1) "\n" - "slbmte %0,%1\n" - "isync" - :: "r"(ksp_vsid_data), - "r"(ksp_esid_data)); + /* Flush but retain kernel lookaside information */ + __slb_flush_and_restore_bolted(true); + isync(); get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; } @@ -503,7 +554,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) * address accesses by the kernel (user mode won't happen until * rfid, which is safe). */ - asm volatile("isync" : : : "memory"); + isync(); } void slb_set_size(u16 size) @@ -571,6 +622,9 @@ static void slb_cache_update(unsigned long esid_data) if (cpu_has_feature(CPU_FTR_ARCH_300)) return; /* ISAv3.0B and later does not use slb_cache */ + if (stress_slb()) + return; + /* * Now update slb cache entries */ @@ -580,7 +634,7 @@ static void slb_cache_update(unsigned long esid_data) * We have space in slb cache for optimized switch_slb(). * Top 36 bits from esid_data as per ISA */ - local_paca->slb_cache[slb_cache_index++] = esid_data >> 28; + local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT; local_paca->slb_cache_ptr++; } else { /* @@ -671,6 +725,28 @@ static long slb_insert_entry(unsigned long ea, unsigned long context, * accesses user memory before it returns to userspace with rfid. */ assert_slb_presence(false, ea); + if (stress_slb()) { + int slb_cache_index = local_paca->slb_cache_ptr; + + /* + * stress_slb() does not use slb cache, repurpose as a + * cache of inserted (non-bolted) kernel SLB entries. All + * non-bolted kernel entries are flushed on any user fault, + * or if there are already 3 non-boled kernel entries. + */ + BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3); + if (!kernel || slb_cache_index == 3) { + int i; + + for (i = 0; i < slb_cache_index; i++) + slb_cache_slbie_kernel(i); + slb_cache_index = 0; + } + + if (kernel) + local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT; + local_paca->slb_cache_ptr = slb_cache_index; + } asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); barrier(); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 84af6c8eecf7..2393ed9d84bb 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -41,18 +41,19 @@ #include <asm/siginfo.h> #include <asm/debug.h> #include <asm/kup.h> +#include <asm/inst.h> /* * Check whether the instruction inst is a store using * an update addressing form which will update r1. */ -static bool store_updates_sp(unsigned int inst) +static bool store_updates_sp(struct ppc_inst inst) { /* check for 1 in the rA field */ - if (((inst >> 16) & 0x1f) != 1) + if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1) return false; /* check major opcode */ - switch (inst >> 26) { + switch (ppc_inst_primary_opcode(inst)) { case OP_STWU: case OP_STBU: case OP_STHU: @@ -60,10 +61,10 @@ static bool store_updates_sp(unsigned int inst) case OP_STFDU: return true; case OP_STD: /* std or stdu */ - return (inst & 3) == 1; + return (ppc_inst_val(inst) & 3) == 1; case OP_31: /* check minor opcode */ - switch ((inst >> 1) & 0x3ff) { + switch ((ppc_inst_val(inst) >> 1) & 0x3ff) { case OP_31_XOP_STDUX: case OP_31_XOP_STWUX: case OP_31_XOP_STBUX: @@ -118,9 +119,34 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } -static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, - int pkey) +#ifdef CONFIG_PPC_MEM_KEYS +static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, + struct vm_area_struct *vma) { + struct mm_struct *mm = current->mm; + int pkey; + + /* + * We don't try to fetch the pkey from page table because reading + * page table without locking doesn't guarantee stable pte value. + * Hence the pkey value that we return to userspace can be different + * from the pkey that actually caused access error. + * + * It does *not* guarantee that the VMA we find here + * was the one that we faulted on. + * + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); + * 2. T1 : set AMR to deny access to pkey=4, touches, page + * 3. T1 : faults... + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); + * 5. T1 : enters fault handler, takes mmap_sem, etc... + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really + * faulted on a pte with its pkey=4. + */ + pkey = vma_pkey(vma); + + up_read(&mm->mmap_sem); + /* * If we are in kernel mode, bail out with a SEGV, this will * be caught by the assembly which will restore the non-volatile @@ -133,6 +159,7 @@ static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, return 0; } +#endif static noinline int bad_access(struct pt_regs *regs, unsigned long address) { @@ -255,7 +282,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, * expand to 1MB without further checks. */ if (address + 0x100000 < vma->vm_end) { - unsigned int __user *nip = (unsigned int __user *)regs->nip; + struct ppc_inst __user *nip = (struct ppc_inst __user *)regs->nip; /* get user regs even if this fault is in kernel mode */ struct pt_regs *uregs = current->thread.regs; if (uregs == NULL) @@ -278,9 +305,9 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && access_ok(nip, sizeof(*nip))) { - unsigned int inst; + struct ppc_inst inst; - if (!probe_user_read(&inst, nip, sizeof(inst))) + if (!probe_user_read_inst(&inst, nip)) return !store_updates_sp(inst); *must_retry = true; } @@ -289,8 +316,23 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, return false; } -static bool access_error(bool is_write, bool is_exec, - struct vm_area_struct *vma) +#ifdef CONFIG_PPC_MEM_KEYS +static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, + struct vm_area_struct *vma) +{ + /* + * Make sure to check the VMA so that we do not perform + * faults just to hit a pkey fault as soon as we fill in a + * page. Only called for current mm, hence foreign == 0 + */ + if (!arch_vma_access_permitted(vma, is_write, is_exec, 0)) + return true; + + return false; +} +#endif + +static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) { /* * Allow execution from readable areas if the MMU does not @@ -483,10 +525,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - if (error_code & DSISR_KEYFAULT) - return bad_key_fault_exception(regs, address, - get_mm_addr_key(mm, address)); - /* * We want to do this outside mmap_sem, because reading code around nip * can result in fault, which will cause a deadlock when called with @@ -555,6 +593,13 @@ retry: return bad_area(regs, address); good_area: + +#ifdef CONFIG_PPC_MEM_KEYS + if (unlikely(access_pkey_error(is_write, is_exec, + (error_code & DSISR_KEYFAULT), vma))) + return bad_access_pkey(regs, address, vma); +#endif /* CONFIG_PPC_MEM_KEYS */ + if (unlikely(access_error(is_write, is_exec, vma))) return bad_access(regs, address); @@ -565,21 +610,6 @@ good_area: */ fault = handle_mm_fault(vma, address, flags); -#ifdef CONFIG_PPC_MEM_KEYS - /* - * we skipped checking for access error due to key earlier. - * Check that using handle_mm_fault error return. - */ - if (unlikely(fault & VM_FAULT_SIGSEGV) && - !arch_vma_access_permitted(vma, is_write, is_exec, 0)) { - - int pkey = vma_pkey(vma); - - up_read(&mm->mmap_sem); - return bad_key_fault_exception(regs, address, pkey); - } -#endif /* CONFIG_PPC_MEM_KEYS */ - major |= fault & VM_FAULT_MAJOR; if (fault_signal_pending(fault, regs)) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f122d0f2c295..5b3d01404266 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -30,7 +30,8 @@ bool hugetlb_disabled = false; #define hugepd_none(hpd) (hpd_val(hpd) == 0) -#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *))) +#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \ + __builtin_ffs(sizeof(void *))) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { @@ -53,24 +54,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); - new = NULL; - } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = NULL; - num_hugepd = 1; - new = pte_alloc_one(mm); } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; - new = NULL; } - if (!cachep && !new) { + if (!cachep) { WARN_ONCE(1, "No page table cache created for hugetlb tables"); return -ENOMEM; } - if (cachep) - new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -101,10 +95,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); - if (cachep) - kmem_cache_free(cachep, new); - else - pte_free(mm, new); + kmem_cache_free(cachep, new); } else { kmemleak_ignore(new); } @@ -190,6 +181,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (!hpdp) return NULL; + if (IS_ENABLED(CONFIG_PPC_8xx) && sz == SZ_512K) + return pte_alloc_map(mm, (pmd_t *)hpdp, addr); + BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, @@ -255,7 +249,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h) struct hugepd_freelist { struct rcu_head rcu; unsigned int index; - void *ptes[0]; + void *ptes[]; }; static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); @@ -332,13 +326,20 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, 0); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); } +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) +{ + pgtable_t token = pmd_pgtable(*pmd); + + pmd_clear(pmd); + pte_free_tlb(tlb, token, addr); + mm_dec_nr_ptes(tlb->mm); +} + static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -354,11 +355,17 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { + if (pmd_none_or_clear_bad(pmd)) + continue; + /* * if it is not hugepd pointer, we should already find * it cleared. */ - WARN_ON(!pmd_none_or_clear_bad(pmd)); + WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx)); + + hugetlb_free_pte_range(tlb, pmd, addr); + continue; } /* diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 872df48ae41b..36c39bd37256 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -96,11 +96,13 @@ static void __init MMU_setup(void) if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } - if (debug_pagealloc_enabled()) { - __map_without_bats = 1; + if (IS_ENABLED(CONFIG_PPC_8xx)) + return; + + if (debug_pagealloc_enabled()) __map_without_ltlbs = 1; - } - if (strict_kernel_rwx_enabled() && !IS_ENABLED(CONFIG_PPC_8xx)) + + if (strict_kernel_rwx_enabled()) __map_without_ltlbs = 1; } @@ -170,8 +172,6 @@ void __init MMU_init(void) btext_unmap(); #endif - kasan_mmu_init(); - setup_kup(); /* Shortly after that, the entire linear mapping will be available */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 4002ced3596f..c7ce4ec5060e 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -203,7 +203,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; /* Align to the page size of the linear mapping. */ - start = _ALIGN_DOWN(start, page_size); + start = ALIGN_DOWN(start, page_size); pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); @@ -292,7 +292,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, unsigned long alt_start = ~0, alt_end = ~0; unsigned long base_pfn; - start = _ALIGN_DOWN(start, page_size); + start = ALIGN_DOWN(start, page_size); if (altmap) { alt_start = altmap->base_pfn; alt_end = altmap->base_pfn + altmap->reserve + diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c new file mode 100644 index 000000000000..db4ef44af22f --- /dev/null +++ b/arch/powerpc/mm/kasan/8xx.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define DISABLE_BRANCH_PROFILING + +#include <linux/kasan.h> +#include <linux/memblock.h> +#include <linux/hugetlb.h> +#include <asm/pgalloc.h> + +static int __init +kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block) +{ + pmd_t *pmd = pmd_ptr_k(k_start); + unsigned long k_cur, k_next; + + for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block += SZ_8M) { + pte_basic_t *new; + + k_next = pgd_addr_end(k_cur, k_end); + k_next = pgd_addr_end(k_next, k_end); + if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) + continue; + + new = memblock_alloc(sizeof(pte_basic_t), SZ_4K); + if (!new) + return -ENOMEM; + + *new = pte_val(pte_mkhuge(pfn_pte(PHYS_PFN(__pa(block)), PAGE_KERNEL))); + + hugepd_populate_kernel((hugepd_t *)pmd, (pte_t *)new, PAGE_SHIFT_8M); + hugepd_populate_kernel((hugepd_t *)pmd + 1, (pte_t *)new, PAGE_SHIFT_8M); + } + return 0; +} + +int __init kasan_init_region(void *start, size_t size) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); + unsigned long k_cur; + int ret; + void *block; + + block = memblock_alloc(k_end - k_start, SZ_8M); + if (!block) + return -ENOMEM; + + if (IS_ALIGNED(k_start, SZ_8M)) { + kasan_init_shadow_8M(k_start, ALIGN_DOWN(k_end, SZ_8M), block); + k_cur = ALIGN_DOWN(k_end, SZ_8M); + if (k_cur == k_end) + goto finish; + } else { + k_cur = k_start; + } + + ret = kasan_init_shadow_page_tables(k_start, k_end); + if (ret) + return ret; + + for (; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_ptr_k(k_cur); + void *va = block + k_cur - k_start; + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + + if (k_cur < ALIGN_DOWN(k_end, SZ_512K)) + pte = pte_mkhuge(pte); + + __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); + } +finish: + flush_tlb_kernel_range(k_start, k_end); + return 0; +} diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile index 6577897673dd..bb1a5408b86b 100644 --- a/arch/powerpc/mm/kasan/Makefile +++ b/arch/powerpc/mm/kasan/Makefile @@ -3,3 +3,5 @@ KASAN_SANITIZE := n obj-$(CONFIG_PPC32) += kasan_init_32.o +obj-$(CONFIG_PPC_8xx) += 8xx.o +obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c new file mode 100644 index 000000000000..4bc491a4a1fd --- /dev/null +++ b/arch/powerpc/mm/kasan/book3s_32.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define DISABLE_BRANCH_PROFILING + +#include <linux/kasan.h> +#include <linux/memblock.h> +#include <asm/pgalloc.h> +#include <mm/mmu_decl.h> + +int __init kasan_init_region(void *start, size_t size) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); + unsigned long k_cur = k_start; + int k_size = k_end - k_start; + int k_size_base = 1 << (ffs(k_size) - 1); + int ret; + void *block; + + block = memblock_alloc(k_size, k_size_base); + + if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { + int k_size_more = 1 << (ffs(k_size - k_size_base) - 1); + + setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); + if (k_size_more >= SZ_128K) + setbat(-1, k_start + k_size_base, __pa(block) + k_size_base, + k_size_more, PAGE_KERNEL); + if (v_block_mapped(k_start)) + k_cur = k_start + k_size_base; + if (v_block_mapped(k_start + k_size_base)) + k_cur = k_start + k_size_base + k_size_more; + + update_bats(); + } + + if (!block) + block = memblock_alloc(k_size, PAGE_SIZE); + if (!block) + return -ENOMEM; + + ret = kasan_init_shadow_page_tables(k_start, k_end); + if (ret) + return ret; + + kasan_update_early_region(k_start, k_cur, __pte(0)); + + for (; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_ptr_k(k_cur); + void *va = block + k_cur - k_start; + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + + __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); + } + flush_tlb_kernel_range(k_start, k_end); + return 0; +} diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index c99aa8cbaac5..c42085801c04 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -5,9 +5,7 @@ #include <linux/kasan.h> #include <linux/printk.h> #include <linux/memblock.h> -#include <linux/moduleloader.h> #include <linux/sched/task.h> -#include <linux/vmalloc.h> #include <asm/pgalloc.h> #include <asm/code-patching.h> #include <mm/mmu_decl.h> @@ -30,40 +28,31 @@ static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot) __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); } -static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) +int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) { pmd_t *pmd; unsigned long k_cur, k_next; - pte_t *new = NULL; pmd = pmd_ptr_k(k_start); for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { + pte_t *new; + k_next = pgd_addr_end(k_cur, k_end); if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) continue; - if (!new) - new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); + new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); if (!new) return -ENOMEM; kasan_populate_pte(new, PAGE_KERNEL); - - smp_wmb(); /* See comment in __pte_alloc */ - - spin_lock(&init_mm.page_table_lock); - /* Has another populated it ? */ - if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) { - pmd_populate_kernel(&init_mm, pmd, new); - new = NULL; - } - spin_unlock(&init_mm.page_table_lock); + pmd_populate_kernel(&init_mm, pmd, new); } return 0; } -static int __init kasan_init_region(void *start, size_t size) +int __init __weak kasan_init_region(void *start, size_t size) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); @@ -76,75 +65,63 @@ static int __init kasan_init_region(void *start, size_t size) return ret; block = memblock_alloc(k_end - k_start, PAGE_SIZE); + if (!block) + return -ENOMEM; for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_ptr_k(k_cur); void *va = block + k_cur - k_start; pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); - if (!va) - return -ENOMEM; - __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); } flush_tlb_kernel_range(k_start, k_end); return 0; } -static void __init kasan_remap_early_shadow_ro(void) +void __init +kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { - pgprot_t prot = kasan_prot_ro(); - unsigned long k_start = KASAN_SHADOW_START; - unsigned long k_end = KASAN_SHADOW_END; unsigned long k_cur; phys_addr_t pa = __pa(kasan_early_shadow_page); - kasan_populate_pte(kasan_early_shadow_pte, prot); - - for (k_cur = k_start & PAGE_MASK; k_cur != k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_ptr_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) continue; - __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); + __set_pte_at(&init_mm, k_cur, ptep, pte, 0); } - flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); + + flush_tlb_kernel_range(k_start, k_end); } -static void __init kasan_unmap_early_shadow_vmalloc(void) +static void __init kasan_remap_early_shadow_ro(void) { - unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START); - unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END); - unsigned long k_cur; + pgprot_t prot = kasan_prot_ro(); phys_addr_t pa = __pa(kasan_early_shadow_page); - for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_ptr_k(k_cur); - pte_t *ptep = pte_offset_kernel(pmd, k_cur); + kasan_populate_pte(kasan_early_shadow_pte, prot); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) - continue; + kasan_update_early_region(KASAN_SHADOW_START, KASAN_SHADOW_END, + pfn_pte(PHYS_PFN(pa), prot)); +} - __set_pte_at(&init_mm, k_cur, ptep, __pte(0), 0); - } - flush_tlb_kernel_range(k_start, k_end); +static void __init kasan_unmap_early_shadow_vmalloc(void) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END); + + kasan_update_early_region(k_start, k_end, __pte(0)); } -void __init kasan_mmu_init(void) +static void __init kasan_mmu_init(void) { int ret; struct memblock_region *reg; - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || - IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); - - if (ret) - panic("kasan: kasan_init_shadow_page_tables() failed"); - } - for_each_memblock(memory, reg) { phys_addr_t base = reg->base; phys_addr_t top = min(base + reg->size, total_lowmem); @@ -156,10 +133,21 @@ void __init kasan_mmu_init(void) if (ret) panic("kasan: kasan_init_region() failed"); } + + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || + IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); + + if (ret) + panic("kasan: kasan_init_shadow_page_tables() failed"); + } + } void __init kasan_init(void) { + kasan_mmu_init(); + kasan_remap_early_shadow_ro(); clear_page(kasan_early_shadow_page); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 7cebb9c818d3..5f7fe13211e9 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -32,6 +32,7 @@ #include <linux/vmalloc.h> #include <linux/memremap.h> #include <linux/dma-direct.h> +#include <linux/kprobes.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -465,6 +466,7 @@ static void flush_dcache_icache_phys(unsigned long physaddr) : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) : "ctr", "memory"); } +NOKPROBE_SYMBOL(flush_dcache_icache_phys) #endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) /* diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 7097e07a209a..1b6d39e9baed 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -182,6 +182,10 @@ static inline void mmu_mark_initmem_nx(void) { } static inline void mmu_mark_rodata_ro(void) { } #endif +#ifdef CONFIG_PPC_8xx +void __init mmu_mapin_immr(void); +#endif + #ifdef CONFIG_PPC_DEBUG_WX void ptdump_check_wx(void); #else diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index 82862723ab42..4eaf462cda30 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -102,7 +102,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) while (s >= LARGE_PAGE_SIZE_16M) { pmd_t *pmdp; - unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; + unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_RW; pmdp = pmd_ptr_k(v); *pmdp++ = __pmd(val); @@ -117,7 +117,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) while (s >= LARGE_PAGE_SIZE_4M) { pmd_t *pmdp; - unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; + unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_RW; pmdp = pmd_ptr_k(v); *pmdp = __pmd(val); diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index d83a12c5bc7f..286441bbbe49 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -9,8 +9,11 @@ #include <linux/memblock.h> #include <linux/mmu_context.h> +#include <linux/hugetlb.h> #include <asm/fixmap.h> #include <asm/code-patching.h> +#include <asm/inst.h> +#include <asm/pgalloc.h> #include <mm/mmu_decl.h> @@ -54,158 +57,148 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -#define LARGE_PAGE_SIZE_8M (1<<23) - -/* - * MMU_init_hw does the chip-specific initialization of the MMU hardware. - */ -void __init MMU_init_hw(void) +static pte_t __init *early_hugepd_alloc_kernel(hugepd_t *pmdp, unsigned long va) { - /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ - if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) { - unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; - int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28; - unsigned long addr = 0; - unsigned long mem = total_lowmem; - - for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { - mtspr(SPRN_MD_CTR, ctr | (i << 8)); - mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); - mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); - addr += LARGE_PAGE_SIZE_8M; - mem -= LARGE_PAGE_SIZE_8M; - } + if (hpd_val(*pmdp) == 0) { + pte_t *ptep = memblock_alloc(sizeof(pte_basic_t), SZ_4K); + + if (!ptep) + return NULL; + + hugepd_populate_kernel((hugepd_t *)pmdp, ptep, PAGE_SHIFT_8M); + hugepd_populate_kernel((hugepd_t *)pmdp + 1, ptep, PAGE_SHIFT_8M); } + return hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT); } -static void __init mmu_mapin_immr(void) +static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa, + pgprot_t prot, int psize, bool new) { - unsigned long p = PHYS_IMMR_BASE; - unsigned long v = VIRT_IMMR_BASE; - int offset; + pmd_t *pmdp = pmd_ptr_k(va); + pte_t *ptep; + + if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M)) + return -EINVAL; + + if (new) { + if (WARN_ON(slab_is_available())) + return -EINVAL; + + if (psize == MMU_PAGE_512K) + ptep = early_pte_alloc_kernel(pmdp, va); + else + ptep = early_hugepd_alloc_kernel((hugepd_t *)pmdp, va); + } else { + if (psize == MMU_PAGE_512K) + ptep = pte_offset_kernel(pmdp, va); + else + ptep = hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT); + } + + if (WARN_ON(!ptep)) + return -ENOMEM; - for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) - map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); + /* The PTE should never be already present */ + if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot))) + return -EINVAL; + + set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot))); + + return 0; } -static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped) +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) { - modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16); } -static void mmu_patch_addis(s32 *site, long simm) +static bool immr_is_mapped __initdata; + +void __init mmu_mapin_immr(void) { - unsigned int instr = *(unsigned int *)patch_site_addr(site); + if (immr_is_mapped) + return; + + immr_is_mapped = true; - instr &= 0xffff0000; - instr |= ((unsigned long)simm) >> 16; - patch_instruction_site(site, instr); + __early_map_kernel_hugepage(VIRT_IMMR_BASE, PHYS_IMMR_BASE, + PAGE_KERNEL_NCG, MMU_PAGE_512K, true); } -static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot) +static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, + pgprot_t prot, bool new) { - unsigned long s = offset; - unsigned long v = PAGE_OFFSET + s; - phys_addr_t p = memstart_addr + s; - - for (; s < top; s += PAGE_SIZE) { - map_kernel_page(v, p, prot); - v += PAGE_SIZE; - p += PAGE_SIZE; - } + unsigned long v = PAGE_OFFSET + offset; + unsigned long p = offset; + + WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K)); + + for (; p < ALIGN(p, SZ_8M) && p < top; p += SZ_512K, v += SZ_512K) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + for (; p < ALIGN_DOWN(top, SZ_8M) && p < top; p += SZ_8M, v += SZ_8M) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new); + for (; p < ALIGN_DOWN(top, SZ_512K) && p < top; p += SZ_512K, v += SZ_512K) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + + if (!new) + flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top); } unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { - unsigned long mapped; - - if (__map_without_ltlbs) { - mapped = 0; - mmu_mapin_immr(); - if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR)) - patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); + unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); + unsigned long sinittext = __pa(_sinittext); + bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled(); + unsigned long boundary = strict_boundary ? sinittext : etext8; + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + + WARN_ON(top < einittext8); + + mmu_mapin_immr(); + + if (__map_without_ltlbs) + return 0; + + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); + if (debug_pagealloc_enabled()) { + top = boundary; } else { - unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - - mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8); - - /* - * Populate page tables to: - * - have them appear in /sys/kernel/debug/kernel_page_tables - * - allow the BDI to find the pages when they are not PINNED - */ - mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X); - mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL); - mmu_mapin_immr(); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true); } - mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); - mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped); + if (top > SZ_32M) + memblock_set_current_limit(top); - /* If the size of RAM is not an exact power of two, we may not - * have covered RAM in its entirety with 8 MiB - * pages. Consequently, restrict the top end of RAM currently - * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" - * coverage with normal-sized pages (or other reasons) do not - * attempt to allocate outside the allowed range. - */ - if (mapped) - memblock_set_current_limit(mapped); + block_mapped_ram = top; - block_mapped_ram = mapped; - - return mapped; + return top; } void mmu_mark_initmem_nx(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23) - mmu_patch_addis(&patch__itlbmiss_linmem_top8, - -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1))); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) { - unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); - unsigned long etext = __pa(_etext); - - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext)); - - /* Update page tables for PTDUMP and BDI */ - mmu_mapin_ram_chunk(0, einittext8, __pgprot(0)); - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { - mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT); - mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL); - } else { - mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT); - mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL); - } - } - _tlbil_all(); + unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); + unsigned long sinittext = __pa(_sinittext); + unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); + + if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_pin_tlb(block_mapped_ram, false); } #ifdef CONFIG_STRICT_KERNEL_RWX void mmu_mark_rodata_ro(void) { unsigned long sinittext = __pa(_sinittext); - unsigned long etext = __pa(_etext); - - if (CONFIG_DATA_SHIFT < 23) - mmu_patch_addis(&patch__dtlbmiss_romem_top8, - -__pa(((unsigned long)_sinittext) & - ~(LARGE_PAGE_SIZE_8M - 1))); - mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext)); - - _tlbil_all(); - /* Update page tables for PTDUMP and BDI */ - mmu_mapin_ram_chunk(0, sinittext, __pgprot(0)); - mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX); - mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO); + mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false); + if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) + mmu_pin_tlb(block_mapped_ram, true); } #endif @@ -218,7 +211,7 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, BUG_ON(first_memblock_base != 0); /* 8xx can only access 32MB at the moment */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000)); + memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); } /* diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index c2499271f6c1..cea5b4e25a24 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -100,7 +100,7 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; } * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so * instead we "filter out" the exec permission for non clean pages. */ -static pte_t set_pte_filter(pte_t pte) +static inline pte_t set_pte_filter(pte_t pte) { struct page *pg; @@ -249,16 +249,42 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, #else /* - * Not used on non book3s64 platforms. But 8xx - * can possibly use tsize derived from hstate. + * Not used on non book3s64 platforms. + * 8xx compares it with mmu_virtual_psize to + * know if it is a huge page or not. */ - psize = 0; + psize = MMU_PAGE_COUNT; #endif __ptep_set_access_flags(vma, ptep, pte, addr, psize); } return changed; #endif } + +#if defined(CONFIG_PPC_8xx) +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + pmd_t *pmd = pmd_ptr(mm, addr); + pte_basic_t val; + pte_basic_t *entry = &ptep->pte; + int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K; + int i; + + /* + * Make sure hardware valid bit is not set. We don't do + * tlb flush for this update. + */ + VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); + + pte = pte_mkpte(pte); + + pte = set_pte_filter(pte); + + val = pte_val(pte); + for (i = 0; i < num; i++, entry++, val += SZ_4K) + *entry = val; +} +#endif #endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_DEBUG_VM diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index f62de06e3d07..05902bbff8d6 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -29,11 +29,27 @@ #include <asm/fixmap.h> #include <asm/setup.h> #include <asm/sections.h> +#include <asm/early_ioremap.h> #include <mm/mmu_decl.h> extern char etext[], _stext[], _sinittext[], _einittext[]; +static u8 early_fixmap_pagetable[FIXMAP_PTE_SIZE] __page_aligned_data; + +notrace void __init early_ioremap_init(void) +{ + unsigned long addr = ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE); + pte_t *ptep = (pte_t *)early_fixmap_pagetable; + pmd_t *pmdp = pmd_ptr_k(addr); + + for (; (s32)(FIXADDR_TOP - addr) > 0; + addr += PGDIR_SIZE, ptep += PTRS_PER_PTE, pmdp++) + pmd_populate_kernel(&init_mm, pmdp, ptep); + + early_ioremap_setup(); +} + static void __init *early_alloc_pgtable(unsigned long size) { void *ptr = memblock_alloc(size, size); @@ -45,7 +61,7 @@ static void __init *early_alloc_pgtable(unsigned long size) return ptr; } -static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va) +pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va) { if (pmd_none(*pmdp)) { pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE); @@ -169,7 +185,7 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_stext + 1)) + if (v_block_mapped((unsigned long)_sinittext)) mmu_mark_initmem_nx(); else change_page_attr(page, numpages, PAGE_KERNEL); @@ -181,7 +197,7 @@ void mark_rodata_ro(void) struct page *page; unsigned long numpages; - if (v_block_mapped((unsigned long)_sinittext)) { + if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); ptdump_check_wx(); return; diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c index 9e2d8e847d6e..4bc350736c1d 100644 --- a/arch/powerpc/mm/ptdump/8xx.c +++ b/arch/powerpc/mm/ptdump/8xx.c @@ -11,6 +11,11 @@ static const struct flag_info flag_array[] = { { + .mask = _PAGE_HUGE, + .val = _PAGE_HUGE, + .set = "huge", + .clear = " ", + }, { .mask = _PAGE_SH, .val = 0, .set = "user", diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index d3a5d6b318d1..cebb58c7e289 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -10,15 +10,17 @@ #include <asm/pgtable.h> #include <asm/cpu_has_feature.h> +#include "ptdump.h" + static char *pp_601(int k, int pp) { if (pp == 0) - return k ? "NA" : "RWX"; + return k ? " " : "rwx"; if (pp == 1) - return k ? "ROX" : "RWX"; + return k ? "r x" : "rwx"; if (pp == 2) - return k ? "RWX" : "RWX"; - return k ? "ROX" : "ROX"; + return "rwx"; + return "r x"; } static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) @@ -42,15 +44,13 @@ static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) #else seq_printf(m, "0x%08x ", pbn); #endif + pt_dump_size(m, size); seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp)); - if (lower & _PAGE_WRITETHRU) - seq_puts(m, "write through "); - if (lower & _PAGE_NO_CACHE) - seq_puts(m, "no cache "); - if (lower & _PAGE_COHERENT) - seq_puts(m, "coherent "); + seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " "); + seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " "); + seq_puts(m, lower & _PAGE_COHERENT ? "m " : " "); seq_puts(m, "\n"); } @@ -88,6 +88,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool #else seq_printf(m, "0x%08x ", brpn); #endif + pt_dump_size(m, size); if (k == 1) seq_puts(m, "User "); @@ -97,20 +98,16 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool seq_puts(m, "Kernel/User "); if (lower & BPP_RX) - seq_puts(m, is_d ? "RO " : "EXEC "); + seq_puts(m, is_d ? "r " : " x "); else if (lower & BPP_RW) - seq_puts(m, is_d ? "RW " : "EXEC "); + seq_puts(m, is_d ? "rw " : " x "); else - seq_puts(m, is_d ? "NA " : "NX "); - - if (lower & _PAGE_WRITETHRU) - seq_puts(m, "write through "); - if (lower & _PAGE_NO_CACHE) - seq_puts(m, "no cache "); - if (lower & _PAGE_COHERENT) - seq_puts(m, "coherent "); - if (lower & _PAGE_GUARDED) - seq_puts(m, "guarded "); + seq_puts(m, is_d ? " " : " "); + + seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " "); + seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " "); + seq_puts(m, lower & _PAGE_COHERENT ? "m " : " "); + seq_puts(m, lower & _PAGE_GUARDED ? "g " : " "); seq_puts(m, "\n"); } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index b3fead0230c1..3209f78297ad 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -23,6 +23,7 @@ #include <linux/const.h> #include <asm/page.h> #include <asm/pgalloc.h> +#include <asm/hugetlb.h> #include <mm/mmu_decl.h> @@ -60,6 +61,7 @@ struct pg_state { unsigned long start_address; unsigned long start_pa; unsigned long last_pa; + unsigned long page_size; unsigned int level; u64 current_flags; bool check_wx; @@ -112,6 +114,19 @@ static struct addr_marker address_markers[] = { seq_putc(m, c); \ }) +void pt_dump_size(struct seq_file *m, unsigned long size) +{ + static const char units[] = "KMGTPE"; + const char *unit = units; + + /* Work out what appropriate unit to use */ + while (!(size & 1023) && unit[1]) { + size >>= 10; + unit++; + } + pt_dump_seq_printf(m, "%9lu%c ", size, *unit); +} + static void dump_flag_info(struct pg_state *st, const struct flag_info *flag, u64 pte, int num) { @@ -146,8 +161,6 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info static void dump_addr(struct pg_state *st, unsigned long addr) { - static const char units[] = "KMGTPE"; - const char *unit = units; unsigned long delta; #ifdef CONFIG_PPC64 @@ -157,20 +170,14 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #endif pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); - if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { + if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) { pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); - delta = PAGE_SIZE >> 10; + delta = st->page_size >> 10; } else { pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); delta = (addr - st->start_address) >> 10; } - /* Work out what appropriate unit to use */ - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit); - + pt_dump_size(st->seq, delta); } static void note_prot_wx(struct pg_state *st, unsigned long addr) @@ -190,7 +197,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) } static void note_page(struct pg_state *st, unsigned long addr, - unsigned int level, u64 val) + unsigned int level, u64 val, unsigned long page_size) { u64 flag = val & pg_level[level].mask; u64 pa = val & PTE_RPN_MASK; @@ -202,6 +209,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); /* * Dump the section of virtual memory when: @@ -213,7 +221,7 @@ static void note_page(struct pg_state *st, unsigned long addr, */ } else if (flag != st->current_flags || level != st->level || addr >= st->marker[1].start_address || - (pa != st->last_pa + PAGE_SIZE && + (pa != st->last_pa + st->page_size && (pa != st->start_pa || st->start_pa != st->last_pa))) { /* Check the PTE flags */ @@ -241,6 +249,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; st->current_flags = flag; st->level = level; } else { @@ -256,11 +265,31 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) for (i = 0; i < PTRS_PER_PTE; i++, pte++) { addr = start + i * PAGE_SIZE; - note_page(st, addr, 4, pte_val(*pte)); + note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE); } } +static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start, + int pdshift, int level) +{ +#ifdef CONFIG_ARCH_HAS_HUGEPD + unsigned int i; + int shift = hugepd_shift(*phpd); + int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1; + + if (start & ((1 << shift) - 1)) + return; + + for (i = 0; i < ptrs_per_hpd; i++) { + unsigned long addr = start + (i << shift); + pte_t *pte = hugepte_offset(*phpd, addr, pdshift); + + note_page(st, addr, level + 1, pte_val(*pte), 1 << shift); + } +#endif +} + static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) { pmd_t *pmd = pmd_offset(pud, 0); @@ -273,7 +302,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) /* pmd exists */ walk_pte(st, pmd, addr); else - note_page(st, addr, 3, pmd_val(*pmd)); + note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE); } } @@ -289,7 +318,7 @@ static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) /* pud exists */ walk_pmd(st, pud, addr); else - note_page(st, addr, 2, pud_val(*pud)); + note_page(st, addr, 2, pud_val(*pud), PUD_SIZE); } } @@ -306,11 +335,13 @@ static void walk_pagetables(struct pg_state *st) for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { p4d_t *p4d = p4d_offset(pgd, 0); - if (!p4d_none(*p4d) && !p4d_is_leaf(*p4d)) - /* pgd exists */ - walk_pud(st, p4d, addr); + if (p4d_none(*p4d) || p4d_is_leaf(*p4d)) + note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE); + else if (is_hugepd(__hugepd(p4d_val(*p4d)))) + walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1); else - note_page(st, addr, 1, p4d_val(*p4d)); + /* p4d exists */ + walk_pud(st, p4d, addr); } } @@ -365,7 +396,7 @@ static int ptdump_show(struct seq_file *m, void *v) /* Traverse kernel page tables */ walk_pagetables(&st); - note_page(&st, 0, 0, 0); + note_page(&st, 0, 0, 0, 0); return 0; } diff --git a/arch/powerpc/mm/ptdump/ptdump.h b/arch/powerpc/mm/ptdump/ptdump.h index 5d513636de73..154efae96ae0 100644 --- a/arch/powerpc/mm/ptdump/ptdump.h +++ b/arch/powerpc/mm/ptdump/ptdump.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/types.h> +#include <linux/seq_file.h> struct flag_info { u64 mask; @@ -17,3 +18,5 @@ struct pgtable_level { }; extern struct pgtable_level pg_level[5]; + +void pt_dump_size(struct seq_file *m, unsigned long delta); diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index f7ed2f187cb0..784f8df17f73 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -31,6 +31,11 @@ static const struct flag_info flag_array[] = { .set = "present", .clear = " ", }, { + .mask = _PAGE_COHERENT, + .val = _PAGE_COHERENT, + .set = "coherent", + .clear = " ", + }, { .mask = _PAGE_GUARDED, .val = _PAGE_GUARDED, .set = "guarded", diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index dffe1a45b6ed..82b45b1cb973 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -478,7 +478,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* If hint, make sure it matches our alignment restrictions */ if (!fixed && addr) { - addr = _ALIGN_UP(addr, page_size); + addr = ALIGN(addr, page_size); slice_dbg(" aligned addr=%lx\n", addr); /* Ignore hint if it's too large or overlaps a VMA */ if (addr > high_limit - len || addr < mmap_min_addr || |