diff options
Diffstat (limited to 'arch/powerpc/mm')
27 files changed, 422 insertions, 538 deletions
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 1732eaa740a9..3f972db17761 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -6,4 +6,4 @@ ifdef CONFIG_KASAN CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += mmu.o hash_low.o mmu_context.o tlb.o +obj-y += mmu.o hash_low.o mmu_context.o tlb.o nohash_low.o diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index b2c912e517b9..0e6dc830c38b 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -26,12 +26,11 @@ #include <asm/feature-fixups.h> #include <asm/code-patching-asm.h> -#ifdef CONFIG_SMP - .section .bss - .align 2 -mmu_hash_lock: - .space 4 -#endif /* CONFIG_SMP */ +#ifdef CONFIG_PTE_64BIT +#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ +#else +#define PTE_FLAGS_OFFSET 0 +#endif /* * Load a PTE into the hash table, if possible. @@ -65,13 +64,14 @@ _GLOBAL(hash_page) /* Get PTE (linux-style) and check access */ lis r0, TASK_SIZE@h /* check if kernel address */ cmplw 0,r4,r0 + mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ - mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ + lwz r5,PGDIR(r8) /* virt page-table root */ blt+ 112f /* assume user more likely */ - lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + lis r5,swapper_pg_dir@ha /* if kernel address, use */ + addi r5,r5,swapper_pg_dir@l /* kernel page table */ rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ -112: +112: tophys(r5, r5) #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ lwz r8,0(r5) /* get pmd entry */ @@ -94,25 +94,33 @@ _GLOBAL(hash_page) rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */ #else rlwimi r8,r4,23,20,28 /* compute pte address */ + /* + * If PTE_64BIT is set, the low word is the flags word; use that + * word for locking since it contains all the interesting bits. + */ + addi r8,r8,PTE_FLAGS_OFFSET #endif - rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ - ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE /* * Update the linux PTE atomically. We do the lwarx up-front * because almost always, there won't be a permission violation * and there won't already be an HPTE, and thus we will have * to update the PTE to set _PAGE_HASHPTE. -- paulus. - * - * If PTE_64BIT is set, the low word is the flags word; use that - * word for locking since it contains all the interesting bits. */ -#if (PTE_FLAGS_OFFSET != 0) - addi r8,r8,PTE_FLAGS_OFFSET -#endif .Lretry: lwarx r6,0,r8 /* get linux-style pte, flag word */ +#ifdef CONFIG_PPC_KUAP + mfsrin r5,r4 + rlwinm r0,r9,28,_PAGE_RW /* MSR[PR] => _PAGE_RW */ + rlwinm r5,r5,12,_PAGE_RW /* Ks => _PAGE_RW */ + andc r5,r5,r0 /* Ks & ~MSR[PR] */ + andc r5,r6,r5 /* Clear _PAGE_RW when Ks = 1 && MSR[PR] = 0 */ + andc. r5,r3,r5 /* check access & ~permission */ +#else andc. r5,r3,r6 /* check access & ~permission */ +#endif + rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE #ifdef CONFIG_SMP bne- .Lhash_page_out /* return if access not permitted */ #else @@ -179,12 +187,6 @@ _GLOBAL(add_hash_page) mflr r0 stw r0,4(r1) - /* Convert context and va to VSID */ - mulli r3,r3,897*16 /* multiply context by context skew */ - rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ - mulli r0,r0,0x111 /* multiply by ESID skew */ - add r3,r3,r0 /* note create_hpte trims to 24 bits */ - #ifdef CONFIG_SMP lwz r8,TASK_CPU(r2) /* to go in mmu_hash_lock */ oris r8,r8,12 @@ -248,6 +250,12 @@ _GLOBAL(add_hash_page) stwcx. r5,0,r8 bne- 1b + /* Convert context and va to VSID */ + mulli r3,r3,897*16 /* multiply context by context skew */ + rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ + mulli r0,r0,0x111 /* multiply by ESID skew */ + add r3,r3,r0 /* note create_hpte trims to 24 bits */ + bl create_hpte 9: @@ -350,11 +358,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ 10f /* no PTE: go look for an empty slot */ tlbie r4 - lis r4, (htab_hash_searches - PAGE_OFFSET)@ha - lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) - addi r6,r6,1 /* count how many searches we do */ - stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) - /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ mtctr r0 addi r4,r3,-HPTE_SIZE @@ -384,12 +387,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ beq+ .Lfound_empty - /* update counter of times that the primary PTEG is full */ - lis r4, (primary_pteg_full - PAGE_OFFSET)@ha - lwz r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) - addi r6,r6,1 - stw r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) - patch_site 0f, patch__hash_page_C /* Search the secondary PTEG for an empty slot */ ori r5,r5,PTE_H /* set H (secondary hash) bit */ @@ -411,30 +408,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) * and we know there is a definite (although small) speed * advantage to putting the PTE in the primary PTEG, we always * put the PTE in the primary PTEG. - * - * In addition, we skip any slot that is mapping kernel text in - * order to avoid a deadlock when not using BAT mappings if - * trying to hash in the kernel hash code itself after it has - * already taken the hash table lock. This works in conjunction - * with pre-faulting of the kernel text. - * - * If the hash table bucket is full of kernel text entries, we'll - * lockup here but that shouldn't happen */ -1: lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ + lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ lwz r6, (next_slot - PAGE_OFFSET)@l(r4) addi r6,r6,HPTE_SIZE /* search for candidate */ andi. r6,r6,7*HPTE_SIZE stw r6,next_slot@l(r4) add r4,r3,r6 - LDPTE r0,HPTE_SIZE/2(r4) /* get PTE second word */ - clrrwi r0,r0,12 - lis r6,etext@h - ori r6,r6,etext@l /* get etext */ - tophys(r6,r6) - cmpl cr0,r0,r6 /* compare and try again */ - blt 1b #ifndef CONFIG_SMP /* Store PTE in PTEG */ @@ -482,10 +463,6 @@ _ASM_NOKPROBE_SYMBOL(create_hpte) .align 2 next_slot: .space 4 -primary_pteg_full: - .space 4 -htab_hash_searches: - .space 4 .previous /* @@ -517,8 +494,9 @@ _GLOBAL(flush_hash_pages) rlwimi r5,r4,22,20,29 #else rlwimi r5,r4,23,20,28 + addi r5,r5,PTE_FLAGS_OFFSET #endif -1: lwz r0,PTE_FLAGS_OFFSET(r5) +1: lwz r0,0(r5) cmpwi cr1,r6,1 andi. r0,r0,_PAGE_HASHPTE bne 2f @@ -562,9 +540,6 @@ _GLOBAL(flush_hash_pages) * already clear, we're done (for this pte). If not, * clear it (atomically) and proceed. -- paulus. */ -#if (PTE_FLAGS_OFFSET != 0) - addi r5,r5,PTE_FLAGS_OFFSET -#endif 33: lwarx r8,0,r5 /* fetch the pte flags word */ andi. r0,r8,_PAGE_HASHPTE beq 8f /* done if HASHPTE is already clear */ @@ -633,77 +608,3 @@ _GLOBAL(flush_hash_pages) .previous EXPORT_SYMBOL(flush_hash_pages) _ASM_NOKPROBE_SYMBOL(flush_hash_pages) - -/* - * Flush an entry from the TLB - */ -_GLOBAL(_tlbie) -#ifdef CONFIG_SMP - lwz r8,TASK_CPU(r2) - oris r8,r8,11 - mfmsr r10 - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - rlwinm r0,r0,0,28,26 /* clear DR */ - mtmsr r0 - isync - lis r9,mmu_hash_lock@h - ori r9,r9,mmu_hash_lock@l - tophys(r9,r9) -10: lwarx r7,0,r9 - cmpwi 0,r7,0 - bne- 10b - stwcx. r8,0,r9 - bne- 10b - eieio - tlbie r3 - sync - TLBSYNC - li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ - mtmsr r10 - isync -#else /* CONFIG_SMP */ - tlbie r3 - sync -#endif /* CONFIG_SMP */ - blr -_ASM_NOKPROBE_SYMBOL(_tlbie) - -/* - * Flush the entire TLB. 603/603e only - */ -_GLOBAL(_tlbia) -#if defined(CONFIG_SMP) - lwz r8,TASK_CPU(r2) - oris r8,r8,10 - mfmsr r10 - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - rlwinm r0,r0,0,28,26 /* clear DR */ - mtmsr r0 - isync - lis r9,mmu_hash_lock@h - ori r9,r9,mmu_hash_lock@l - tophys(r9,r9) -10: lwarx r7,0,r9 - cmpwi 0,r7,0 - bne- 10b - stwcx. r8,0,r9 - bne- 10b -#endif /* CONFIG_SMP */ - li r5, 32 - lis r4, KERNELBASE@h - mtctr r5 - sync -0: tlbie r4 - addi r4, r4, 0x1000 - bdnz 0b - sync -#ifdef CONFIG_SMP - TLBSYNC - li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ - mtmsr r10 - isync -#endif /* CONFIG_SMP */ - blr -_ASM_NOKPROBE_SYMBOL(_tlbia) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index a59e7ec98180..859e5bd603ac 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -33,19 +33,23 @@ u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0}; -struct hash_pte *Hash; -static unsigned long Hash_size, Hash_mask; -unsigned long _SDR1; -static unsigned int hash_mb, hash_mb2; +static struct hash_pte __initdata *Hash = (struct hash_pte *)early_hash; +static unsigned long __initdata Hash_size, Hash_mask; +static unsigned int __initdata hash_mb, hash_mb2; +unsigned long __initdata _SDR1; struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ -struct batrange { /* stores address ranges mapped by BATs */ +static struct batrange { /* stores address ranges mapped by BATs */ unsigned long start; unsigned long limit; phys_addr_t phys; } bat_addrs[8]; +#ifdef CONFIG_SMP +unsigned long mmu_hash_lock; +#endif + /* * Return PA for this VA if it is mapped by a BAT, or 0 */ @@ -157,11 +161,9 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long done; unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (__map_without_bats) { - pr_debug("RAM mapped without BATs\n"); - return base; - } - if (debug_pagealloc_enabled()) { + + if (debug_pagealloc_enabled() || __map_without_bats) { + pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; if (top >= border) @@ -304,11 +306,11 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* * Preload a translation in the hash table */ -void hash_preload(struct mm_struct *mm, unsigned long ea) +static void hash_preload(struct mm_struct *mm, unsigned long ea) { pmd_t *pmd; - if (!Hash) + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) return; pmd = pmd_off(mm, ea); if (!pmd_none(*pmd)) @@ -469,3 +471,7 @@ void __init setup_kuap(bool disabled) pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); } #endif + +void __init early_init_mmu(void) +{ +} diff --git a/arch/powerpc/mm/book3s32/nohash_low.S b/arch/powerpc/mm/book3s32/nohash_low.S new file mode 100644 index 000000000000..19f418b0ed2d --- /dev/null +++ b/arch/powerpc/mm/book3s32/nohash_low.S @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file contains low-level assembler routines for managing + * the PowerPC 603 tlb invalidation. + */ + +#include <asm/page.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* + * Flush an entry from the TLB + */ +#ifdef CONFIG_SMP +_GLOBAL(_tlbie) + lwz r8,TASK_CPU(r2) + oris r8,r8,11 + mfmsr r10 + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b + eieio + tlbie r3 + sync + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + isync + blr +_ASM_NOKPROBE_SYMBOL(_tlbie) +#endif /* CONFIG_SMP */ + +/* + * Flush the entire TLB. 603/603e only + */ +_GLOBAL(_tlbia) +#if defined(CONFIG_SMP) + lwz r8,TASK_CPU(r2) + oris r8,r8,10 + mfmsr r10 + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b +#endif /* CONFIG_SMP */ + li r5, 32 + lis r4, KERNELBASE@h + mtctr r5 + sync +0: tlbie r4 + addi r4, r4, 0x1000 + bdnz 0b + sync +#ifdef CONFIG_SMP + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + isync +#endif /* CONFIG_SMP */ + blr +_ASM_NOKPROBE_SYMBOL(_tlbia) diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index b6c7427daa6f..19f0ef950d77 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -30,35 +30,6 @@ #include <mm/mmu_decl.h> /* - * Called when unmapping pages to flush entries from the TLB/hash table. - */ -void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) -{ - unsigned long ptephys; - - if (Hash) { - ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(mm->context.id, addr, ptephys, 1); - } -} -EXPORT_SYMBOL(flush_hash_entry); - -/* - * Called at the end of a mmu_gather operation to make sure the - * TLB flush is completely done. - */ -void tlb_flush(struct mmu_gather *tlb) -{ - if (!Hash) { - /* - * 603 needs to flush the whole TLB here since - * it doesn't use a hash table. - */ - _tlbia(); - } -} - -/* * TLB flushing: * * - flush_tlb_mm(mm) flushes the specified mm context TLB's @@ -71,8 +42,12 @@ void tlb_flush(struct mmu_gather *tlb) * -- Cort */ -static void flush_range(struct mm_struct *mm, unsigned long start, - unsigned long end) +/* + * For each address in the range, find the pte for the address + * and check _PAGE_HASHPTE bit; if it is set, find and destroy + * the corresponding HPTE. + */ +void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) { pmd_t *pmd; unsigned long pmd_end; @@ -80,13 +55,6 @@ static void flush_range(struct mm_struct *mm, unsigned long start, unsigned int ctx = mm->context.id; start &= PAGE_MASK; - if (!Hash) { - if (end - start <= PAGE_SIZE) - _tlbie(start); - else - _tlbia(); - return; - } if (start >= end) return; end = (end - 1) | ~PAGE_MASK; @@ -105,28 +73,15 @@ static void flush_range(struct mm_struct *mm, unsigned long start, ++pmd; } } - -/* - * Flush kernel TLB entries in the given range - */ -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - flush_range(&init_mm, start, end); -} -EXPORT_SYMBOL(flush_tlb_kernel_range); +EXPORT_SYMBOL(hash__flush_range); /* * Flush all the (user) entries for the address space described by mm. */ -void flush_tlb_mm(struct mm_struct *mm) +void hash__flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *mp; - if (!Hash) { - _tlbia(); - return; - } - /* * It is safe to go down the mm's list of vmas when called * from dup_mmap, holding mmap_lock. It would also be safe from @@ -134,38 +89,18 @@ void flush_tlb_mm(struct mm_struct *mm) * but it seems dup_mmap is the only SMP case which gets here. */ for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) - flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); + hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); } -EXPORT_SYMBOL(flush_tlb_mm); +EXPORT_SYMBOL(hash__flush_tlb_mm); -void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { struct mm_struct *mm; pmd_t *pmd; - if (!Hash) { - _tlbie(vmaddr); - return; - } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; pmd = pmd_off(mm, vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); } -EXPORT_SYMBOL(flush_tlb_page); - -/* - * For each address in the range, find the pte for the address - * and check _PAGE_HASHPTE bit; if it is set, find and destroy - * the corresponding HPTE. - */ -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_range(vma->vm_mm, start, end); -} -EXPORT_SYMBOL(flush_tlb_range); - -void __init early_init_mmu(void) -{ -} +EXPORT_SYMBOL(hash__flush_tlb_page); diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index fd393b8be14f..1b56d3af47d4 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -17,7 +17,7 @@ endif obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o -obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o +obj-$(CONFIG_PPC_PKEY) += pkeys.o # Instrumenting the SLB fault path can lead to duplicate SLB entries KCOV_INSTRUMENT_slb.o := n diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 22e787123cdf..7de1a8a0c62a 100644 --- a/arch/powerpc/mm/book3s64/hash_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -54,7 +54,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * PP bits. _PAGE_USER is already PP bit 0x2, so we only * need to add in 0x1 if it's a read-only user page */ - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 7084ce2951e6..998c6817ed47 100644 --- a/arch/powerpc/mm/book3s64/hash_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -72,7 +72,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * Handle the subpage protection bits */ subpg_pte = new_pte & ~subpg_prot; - rflags = htab_convert_pte_flags(subpg_pte); + rflags = htab_convert_pte_flags(subpg_pte, flags); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { @@ -260,7 +260,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, new_pte |= _PAGE_DIRTY; } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && diff --git a/arch/powerpc/mm/book3s64/hash_hugepage.c b/arch/powerpc/mm/book3s64/hash_hugepage.c index 440823797de7..c0fabe6c5a12 100644 --- a/arch/powerpc/mm/book3s64/hash_hugepage.c +++ b/arch/powerpc/mm/book3s64/hash_hugepage.c @@ -57,7 +57,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!(old_pmd & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))) return 0; - rflags = htab_convert_pte_flags(new_pmd); + rflags = htab_convert_pte_flags(new_pmd, flags); #if 0 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index 964467b3a776..b5e9fff8c217 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -70,7 +70,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP)) return 0; - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); if (unlikely(mmu_psize == MMU_PAGE_16G)) offset = PTRS_PER_PUD; else diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index fd9c7f91b092..567e0c6b3978 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -443,7 +443,7 @@ void hash__mark_initmem_nx(void) start = (unsigned long)__init_begin; end = (unsigned long)__init_end; - pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); WARN_ON(!hash__change_memory_range(start, end, pp)); } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 24702c0a92e0..73b06adb6eeb 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -112,6 +112,7 @@ int mmu_linear_psize = MMU_PAGE_4K; EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; +EXPORT_SYMBOL_GPL(mmu_vmalloc_psize); #ifdef CONFIG_SPARSEMEM_VMEMMAP int mmu_vmemmap_psize = MMU_PAGE_4K; #endif @@ -186,7 +187,7 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = { * - We make sure R is always set and never lost * - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping */ -unsigned long htab_convert_pte_flags(unsigned long pteflags) +unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags) { unsigned long rflags = 0; @@ -240,7 +241,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) */ rflags |= HPTE_R_M; - rflags |= pte_to_hpte_pkey_bits(pteflags); + rflags |= pte_to_hpte_pkey_bits(pteflags, flags); return rflags; } @@ -255,7 +256,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, shift = mmu_psize_defs[psize].shift; step = 1 << shift; - prot = htab_convert_pte_flags(prot); + prot = htab_convert_pte_flags(prot, HPTE_USE_KERNEL_KEY); DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n", vstart, vend, pstart, prot, psize, ssize); @@ -845,7 +846,6 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end) { int rc = htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); - WARN_ON(rc < 0); if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) pr_warn("Hash collision while resizing HPT\n"); @@ -1317,12 +1317,14 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, vsid = get_kernel_vsid(ea, mmu_kernel_ssize); psize = mmu_vmalloc_psize; ssize = mmu_kernel_ssize; + flags |= HPTE_USE_KERNEL_KEY; break; case IO_REGION_ID: vsid = get_kernel_vsid(ea, mmu_kernel_ssize); psize = mmu_io_psize; ssize = mmu_kernel_ssize; + flags |= HPTE_USE_KERNEL_KEY; break; default: /* @@ -1901,7 +1903,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) unsigned long hash; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); long ret; hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 563faa10bb66..685d7bb3d26f 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -263,7 +263,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) goto unlock_exit; /* Are there still mappings? */ - if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { + if (atomic64_cmpxchg(&mem->mapped, 1, 0) != 1) { ++mem->used; ret = -EBUSY; goto unlock_exit; diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index e18ae50a275c..5b3a3bae21aa 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -136,12 +136,18 @@ static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) return __pmd(pmd_val(pmd) | pgprot_val(pgprot)); } +/* + * At some point we should be able to get rid of + * pmd_mkhuge() and mk_huge_pmd() when we update all the + * other archs to mark the pmd huge in pfn_pmd() + */ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) { unsigned long pmdv; pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK; - return pmd_set_protbits(__pmd(pmdv), pgprot); + + return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot)); } pmd_t mk_pmd(struct page *page, pgprot_t pgprot) diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index b1d091a97611..f1c6f264ed91 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -9,9 +9,12 @@ #include <asm/mmu_context.h> #include <asm/mmu.h> #include <asm/setup.h> +#include <asm/smp.h> + #include <linux/pkeys.h> #include <linux/of_fdt.h> + int num_pkey; /* Max number of pkeys supported */ /* * Keys marked in the reservation list cannot be allocated by userspace @@ -25,8 +28,8 @@ static u32 initial_allocation_mask __ro_after_init; * Even if we allocate keys with sys_pkey_alloc(), we need to make sure * other thread still find the access denied using the same keys. */ -static u64 default_amr = ~0x0UL; -static u64 default_iamr = 0x5555555555555555UL; +u64 default_amr __ro_after_init = ~0x0UL; +u64 default_iamr __ro_after_init = 0x5555555555555555UL; u64 default_uamor __ro_after_init; /* * Key used to implement PROT_EXEC mmap. Denies READ/WRITE @@ -89,12 +92,14 @@ static int scan_pkey_feature(void) } } +#ifdef CONFIG_PPC_MEM_KEYS /* * Adjust the upper limit, based on the number of bits supported by * arch-neutral code. */ pkeys_total = min_t(int, pkeys_total, ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1)); +#endif return pkeys_total; } @@ -102,6 +107,7 @@ void __init pkey_early_init_devtree(void) { int pkeys_total, i; +#ifdef CONFIG_PPC_MEM_KEYS /* * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. @@ -117,7 +123,7 @@ void __init pkey_early_init_devtree(void) BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) != (sizeof(u64) * BITS_PER_BYTE)); - +#endif /* * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 */ @@ -179,6 +185,27 @@ void __init pkey_early_init_devtree(void) default_uamor &= ~(0x3ul << pkeyshift(execute_only_key)); } + if (unlikely(num_pkey <= 3)) { + /* + * Insufficient number of keys to support + * KUAP/KUEP feature. + */ + disable_kuep = true; + disable_kuap = true; + WARN(1, "Disabling kernel user protection due to low (%d) max supported keys\n", num_pkey); + } else { + /* handle key which is used by kernel for KAUP */ + reserved_allocation_mask |= (0x1 << 3); + /* + * Mark access for kup_key in default amr so that + * we continue to operate with that AMR in + * copy_to/from_user(). + */ + default_amr &= ~(0x3ul << pkeyshift(3)); + default_iamr &= ~(0x1ul << pkeyshift(3)); + default_uamor &= ~(0x3ul << pkeyshift(3)); + } + /* * Allow access for only key 0. And prevent any other modification. */ @@ -223,54 +250,92 @@ out: return; } -void pkey_mm_init(struct mm_struct *mm) +#ifdef CONFIG_PPC_KUEP +void setup_kuep(bool disabled) { - if (!mmu_has_feature(MMU_FTR_PKEY)) + if (disabled) return; - mm_pkey_allocation_map(mm) = initial_allocation_mask; - mm->context.execute_only_pkey = execute_only_key; + /* + * On hash if PKEY feature is not enabled, disable KUAP too. + */ + if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Execution Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUEP; + } + + /* + * Radix always uses key0 of the IAMR to determine if an access is + * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction + * fetch. + */ + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + isync(); } +#endif -static inline u64 read_amr(void) +#ifdef CONFIG_PPC_KUAP +void setup_kuap(bool disabled) { - return mfspr(SPRN_AMR); + if (disabled) + return; + /* + * On hash if PKEY feature is not enabled, disable KUAP too. + */ + if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Access Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUAP; + } + + /* + * Set the default kernel AMR values on all cpus. + */ + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + isync(); } +#endif -static inline void write_amr(u64 value) +static inline void update_current_thread_amr(u64 value) { - mtspr(SPRN_AMR, value); + current->thread.regs->amr = value; } -static inline u64 read_iamr(void) +static inline void update_current_thread_iamr(u64 value) { if (!likely(pkey_execute_disable_supported)) - return 0x0UL; + return; - return mfspr(SPRN_IAMR); + current->thread.regs->iamr = value; } -static inline void write_iamr(u64 value) +#ifdef CONFIG_PPC_MEM_KEYS +void pkey_mm_init(struct mm_struct *mm) { - if (!likely(pkey_execute_disable_supported)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; - - mtspr(SPRN_IAMR, value); + mm_pkey_allocation_map(mm) = initial_allocation_mask; + mm->context.execute_only_pkey = execute_only_key; } static inline void init_amr(int pkey, u8 init_bits) { u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); - u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); + u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); - write_amr(old_amr | new_amr_bits); + update_current_thread_amr(old_amr | new_amr_bits); } static inline void init_iamr(int pkey, u8 init_bits) { u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); - u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); + u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); - write_iamr(old_iamr | new_iamr_bits); + update_current_thread_iamr(old_iamr | new_iamr_bits); } /* @@ -313,42 +378,6 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, return 0; } -void thread_pkey_regs_save(struct thread_struct *thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - /* - * TODO: Skip saving registers if @thread hasn't used any keys yet. - */ - thread->amr = read_amr(); - thread->iamr = read_iamr(); -} - -void thread_pkey_regs_restore(struct thread_struct *new_thread, - struct thread_struct *old_thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - if (old_thread->amr != new_thread->amr) - write_amr(new_thread->amr); - if (old_thread->iamr != new_thread->iamr) - write_iamr(new_thread->iamr); -} - -void thread_pkey_regs_init(struct thread_struct *thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - thread->amr = default_amr; - thread->iamr = default_iamr; - - write_amr(default_amr); - write_iamr(default_iamr); -} - int execute_only_pkey(struct mm_struct *mm) { return mm->context.execute_only_pkey; @@ -397,9 +426,9 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) pkey_shift = pkeyshift(pkey); if (execute) - return !(read_iamr() & (IAMR_EX_BIT << pkey_shift)); + return !(current_thread_iamr() & (IAMR_EX_BIT << pkey_shift)); - amr = read_amr(); + amr = current_thread_amr(); if (write) return !(amr & (AMR_WR_BIT << pkey_shift)); @@ -445,3 +474,5 @@ void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm); mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; } + +#endif /* CONFIG_PPC_MEM_KEYS */ diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 3adcf730f478..98f0b243c1ab 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -589,48 +589,6 @@ static void radix_init_amor(void) mtspr(SPRN_AMOR, (3ul << 62)); } -#ifdef CONFIG_PPC_KUEP -void setup_kuep(bool disabled) -{ - if (disabled || !early_radix_enabled()) - return; - - if (smp_processor_id() == boot_cpuid) { - pr_info("Activating Kernel Userspace Execution Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_KUEP; - } - - /* - * Radix always uses key0 of the IAMR to determine if an access is - * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction - * fetch. - */ - mtspr(SPRN_IAMR, (1ul << 62)); -} -#endif - -#ifdef CONFIG_PPC_KUAP -void setup_kuap(bool disabled) -{ - if (disabled || !early_radix_enabled()) - return; - - if (smp_processor_id() == boot_cpuid) { - pr_info("Activating Kernel Userspace Access Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP; - } - - /* Make sure userspace can't change the AMR */ - mtspr(SPRN_UAMOR, 0); - - /* - * Set the default kernel AMR values on all cpus. - */ - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - isync(); -} -#endif - void __init radix__early_init_mmu(void) { unsigned long lpcr; @@ -721,6 +679,9 @@ void radix__early_init_mmu_secondary(void) radix__switch_mmu_context(NULL, &init_mm); tlbiel_all(); + + /* Make sure userspace can't change the AMR */ + mtspr(SPRN_UAMOR, 0); } void radix__mmu_cleanup_all(void) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index b487b489d4b6..fb66d154b26c 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -56,14 +56,21 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) if (early_cpu_has_feature(CPU_FTR_HVMODE)) { /* MSR[HV] should flush partition scope translations first. */ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); - for (set = 1; set < num_sets; set++) - tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, + RIC_FLUSH_TLB, 0); + } } /* Flush process scoped entries. */ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); - for (set = 1; set < num_sets; set++) - tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); + } ppc_after_tlbiel_barrier(); } @@ -300,9 +307,11 @@ static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) return; } - /* For the remaining sets, just flush the TLB */ - for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) - __tlbiel_pid(pid, set, RIC_FLUSH_TLB); + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + /* For the remaining sets, just flush the TLB */ + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) + __tlbiel_pid(pid, set, RIC_FLUSH_TLB); + } ppc_after_tlbiel_barrier(); asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index c30fcbfa0e32..584567970c11 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -28,35 +28,8 @@ #include "internal.h" -enum slb_index { - LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ - KSTACK_INDEX = 1, /* Kernel stack map */ -}; - static long slb_allocate_user(struct mm_struct *mm, unsigned long ea); -#define slb_esid_mask(ssize) \ - (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) - -static inline unsigned long mk_esid_data(unsigned long ea, int ssize, - enum slb_index index) -{ - return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; -} - -static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, - unsigned long flags) -{ - return (vsid << slb_vsid_shift(ssize)) | flags | - ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); -} - -static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, - unsigned long flags) -{ - return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); -} - bool stress_slb_enabled __initdata; static int __init parse_stress_slb(char *p) @@ -255,7 +228,6 @@ void slb_dump_contents(struct slb_entry *slb_ptr) return; pr_err("SLB contents of cpu 0x%x\n", smp_processor_id()); - pr_err("Last SLB entry inserted at slot %d\n", get_paca()->stab_rr); for (i = 0; i < mmu_slb_size; i++) { e = slb_ptr->esid; @@ -265,34 +237,38 @@ void slb_dump_contents(struct slb_entry *slb_ptr) if (!e && !v) continue; - pr_err("%02d %016lx %016lx\n", i, e, v); + pr_err("%02d %016lx %016lx %s\n", i, e, v, + (e & SLB_ESID_V) ? "VALID" : "NOT VALID"); - if (!(e & SLB_ESID_V)) { - pr_err("\n"); + if (!(e & SLB_ESID_V)) continue; - } + llp = v & SLB_VSID_LLP; if (v & SLB_VSID_B_1T) { - pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n", + pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n", GET_ESID_1T(e), (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp); } else { - pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n", + pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n", GET_ESID(e), (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp); } } - pr_err("----------------------------------\n"); - - /* Dump slb cache entires as well. */ - pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr); - pr_err("Valid SLB cache entries:\n"); - n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES); - for (i = 0; i < n; i++) - pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); - pr_err("Rest of SLB cache entries:\n"); - for (i = n; i < SLB_CACHE_ENTRIES; i++) - pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { + /* RR is not so useful as it's often not used for allocation */ + pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr); + + /* Dump slb cache entires as well. */ + pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr); + pr_err("Valid SLB cache entries:\n"); + n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES); + for (i = 0; i < n; i++) + pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + pr_err("Rest of SLB cache entries:\n"); + for (i = n; i < SLB_CACHE_ENTRIES; i++) + pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + } } void slb_vmalloc_update(void) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0add963a849b..8961b44f350c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -210,28 +210,26 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return true; } - if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) && - !search_exception_tables(regs->nip)) { - pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n", - address, - from_kuid(&init_user_ns, current_uid())); - } - // Kernel fault on kernel address is bad if (address >= TASK_SIZE) return true; - // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad - if (!search_exception_tables(regs->nip)) - return true; + // Read/write fault blocked by KUAP is bad, it can never succeed. + if (bad_kuap_fault(regs, address, is_write)) { + pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n", + is_write ? "write" : "read", address, + from_kuid(&init_user_ns, current_uid())); - // Read/write fault in a valid region (the exception table search passed - // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, address, is_write)) - return true; + // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad + if (!search_exception_tables(regs->nip)) + return true; + + // Read/write fault in a valid region (the exception table search passed + // above), but blocked by KUAP is bad, it can never succeed. + return WARN(true, "Bug: %s fault blocked by KUAP!", is_write ? "Write" : "Read"); + } - // What's left? Kernel fault on user in well defined regions (extable - // matched), and allowed by KUAP in the faulting context. + // What's left? Kernel fault on user and allowed by KUAP in the faulting context. return false; } @@ -303,7 +301,6 @@ static inline void cmo_account_page_fault(void) static inline void cmo_account_page_fault(void) { } #endif /* CONFIG_PPC_SMLPAR */ -#ifdef CONFIG_PPC_BOOK3S static void sanity_check_fault(bool is_write, bool is_user, unsigned long error_code, unsigned long address) { @@ -320,6 +317,9 @@ static void sanity_check_fault(bool is_write, bool is_user, return; } + if (!IS_ENABLED(CONFIG_PPC_BOOK3S)) + return; + /* * For hash translation mode, we should never get a * PROTFAULT. Any update to pte to reduce access will result in us @@ -354,10 +354,6 @@ static void sanity_check_fault(bool is_write, bool is_user, WARN_ON_ONCE(error_code & DSISR_PROTFAULT); } -#else -static void sanity_check_fault(bool is_write, bool is_user, - unsigned long error_code, unsigned long address) { } -#endif /* CONFIG_PPC_BOOK3S */ /* * Define the correct "is_write" bit in error_code based @@ -365,17 +361,19 @@ static void sanity_check_fault(bool is_write, bool is_user, */ #if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) #define page_fault_is_write(__err) ((__err) & ESR_DST) -#define page_fault_is_bad(__err) (0) #else #define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE) -#if defined(CONFIG_PPC_8xx) +#endif + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#define page_fault_is_bad(__err) (0) +#elif defined(CONFIG_PPC_8xx) #define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G) #elif defined(CONFIG_PPC64) #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S) #else #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) #endif -#endif /* * For 600- and 800-family processors, the error_code parameter is DSISR @@ -547,10 +545,20 @@ NOKPROBE_SYMBOL(__do_page_fault); int do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { + const struct exception_table_entry *entry; enum ctx_state prev_state = exception_enter(); int rc = __do_page_fault(regs, address, error_code); exception_exit(prev_state); - return rc; + if (likely(!rc)) + return 0; + + entry = search_exception_tables(regs->nip); + if (unlikely(!entry)) + return rc; + + instruction_pointer_set(regs, extable_fixup(entry)); + + return 0; } NOKPROBE_SYMBOL(do_page_fault); @@ -559,17 +567,10 @@ NOKPROBE_SYMBOL(do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { - const struct exception_table_entry *entry; int is_write = page_fault_is_write(regs->dsisr); - /* Are we prepared to handle this fault? */ - if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = extable_fixup(entry); - return; - } - /* kernel has accessed a bad area */ switch (TRAP(regs)) { @@ -603,3 +604,15 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) die("Kernel access of bad area", regs, sig); } + +void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +{ + const struct exception_table_entry *entry; + + /* Are we prepared to handle this fault? */ + entry = search_exception_tables(instruction_pointer(regs)); + if (entry) + instruction_pointer_set(regs, extable_fixup(entry)); + else + __bad_page_fault(regs, address, sig); +} diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 36c3800769fb..8b3cc4d688e8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -294,6 +294,21 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {} #endif +/* Return true when the entry to be freed maps more than the area being freed */ +static bool range_is_outside_limits(unsigned long start, unsigned long end, + unsigned long floor, unsigned long ceiling, + unsigned long mask) +{ + if ((start & mask) < floor) + return true; + if (ceiling) { + ceiling &= mask; + if (!ceiling) + return true; + } + return end - 1 > ceiling - 1; +} + static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, unsigned long start, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -309,15 +324,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift > pdshift) num_hugepd = 1 << (shift - pdshift); - start &= pdmask; - if (start < floor) - return; - if (ceiling) { - ceiling &= pdmask; - if (! ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, pdmask)) return; for (i = 0; i < num_hugepd; i++, hpdp++) @@ -334,18 +341,9 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { - unsigned long start = addr; pgtable_t token = pmd_pgtable(*pmd); - start &= PMD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PMD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(addr, end, floor, ceiling, PMD_MASK)) return; pmd_clear(pmd); @@ -395,20 +393,12 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, addr, next, floor, ceiling); } while (addr = next, addr != end); - start &= PUD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PUD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK)) return; - pmd = pmd_offset(pud, start); + pmd = pmd_offset(pud, start & PUD_MASK); pud_clear(pud); - pmd_free_tlb(tlb, pmd, start); + pmd_free_tlb(tlb, pmd, start & PUD_MASK); mm_dec_nr_pmds(tlb->mm); } @@ -446,20 +436,12 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, } } while (addr = next, addr != end); - start &= PGDIR_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PGDIR_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK)) return; - pud = pud_offset(p4d, start); + pud = pud_offset(p4d, start & PGDIR_MASK); p4d_clear(p4d); - pud_free_tlb(tlb, pud, start); + pud_free_tlb(tlb, pud, start & PGDIR_MASK); mm_dec_nr_puds(tlb->mm); } diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 8e0d792ac296..3a82f89827a5 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -28,8 +28,8 @@ EXPORT_SYMBOL_GPL(kernstart_addr); unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE; EXPORT_SYMBOL_GPL(kernstart_virt_addr); -static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); -static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); +bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); +bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); static int __init parse_nosmep(char *p) { @@ -47,12 +47,6 @@ static int __init parse_nosmap(char *p) } early_param("nosmap", parse_nosmap); -void __ref setup_kup(void) -{ - setup_kuep(disable_kuep); - setup_kuap(disable_kuap); -} - #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ memset(addr, 0, sizeof(void *) << (shift)); \ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 25284fdb300c..afab328d0887 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -54,11 +54,7 @@ #include <mm/mmu_decl.h> -#ifndef CPU_FTR_COHERENT_ICACHE -#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */ -#define CPU_FTR_NOEXECUTE 0 -#endif - +static DEFINE_MUTEX(linear_mapping_mutex); unsigned long long memory_limit; bool init_mem_is_free; @@ -116,46 +112,70 @@ static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, } } -int __ref arch_add_memory(int nid, u64 start, u64 size, - struct mhp_params *params) +int __ref arch_create_linear_mapping(int nid, u64 start, u64 size, + struct mhp_params *params) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; int rc; start = (unsigned long)__va(start); + mutex_lock(&linear_mapping_mutex); rc = create_section_mapping(start, start + size, nid, params->pgprot); + mutex_unlock(&linear_mapping_mutex); if (rc) { - pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", + pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n", start, start + size, rc); return -EFAULT; } - - return __add_pages(nid, start_pfn, nr_pages, params); + return 0; } -void __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_linear_mapping(u64 start, u64 size) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - __remove_pages(start_pfn, nr_pages, altmap); - /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); + mutex_lock(&linear_mapping_mutex); ret = remove_section_mapping(start, start + size); - WARN_ON_ONCE(ret); + mutex_unlock(&linear_mapping_mutex); + if (ret) + pr_warn("Unable to remove linear mapping for 0x%llx..0x%llx: %d\n", + start, start + size, ret); /* Ensure all vmalloc mappings are flushed in case they also * hit that section of memory */ vm_unmap_aliases(); } + +int __ref arch_add_memory(int nid, u64 start, u64 size, + struct mhp_params *params) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + int rc; + + rc = arch_create_linear_mapping(nid, start, size, params); + if (rc) + return rc; + rc = __add_pages(nid, start_pfn, nr_pages, params); + if (rc) + arch_remove_linear_mapping(start, size); + return rc; +} + +void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + __remove_pages(start_pfn, nr_pages, altmap); + arch_remove_linear_mapping(start, size); +} #endif #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -525,7 +545,7 @@ void __flush_dcache_icache(void *p) * space occurs, before returning to user space. */ - if (cpu_has_feature(MMU_FTR_TYPE_44x)) + if (mmu_has_feature(MMU_FTR_TYPE_44x)) return; invalidate_icache_range(addr, addr + PAGE_SIZE); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 1b6d39e9baed..998810e68562 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -82,17 +82,12 @@ static inline void print_system_hash_info(void) {} #else /* CONFIG_PPC_MMU_NOHASH */ -extern void _tlbie(unsigned long address); -extern void _tlbia(void); - void print_system_hash_info(void); #endif /* CONFIG_PPC_MMU_NOHASH */ #ifdef CONFIG_PPC32 -void hash_preload(struct mm_struct *mm, unsigned long ea); - extern void mapin_ram(void); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); @@ -101,7 +96,6 @@ extern int __map_without_bats; extern unsigned int rtas_data, rtas_size; struct hash_pte; -extern struct hash_pte *Hash; extern u8 early_hash[]; #endif /* CONFIG_PPC32 */ diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 231ca95f9ffb..19a3eec1d8c5 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -186,8 +186,7 @@ void mmu_mark_initmem_nx(void) mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); - if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_pin_tlb(block_mapped_ram, false); + mmu_pin_tlb(block_mapped_ram, false); } #ifdef CONFIG_STRICT_KERNEL_RWX diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c index 36bda962d3b3..03dacbe940e5 100644 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -223,15 +223,9 @@ void flush_instruction_cache(void) { unsigned long tmp; - if (IS_ENABLED(CONFIG_E200)) { - tmp = mfspr(SPRN_L1CSR0); - tmp |= L1CSR0_CFI | L1CSR0_CLFC; - mtspr(SPRN_L1CSR0, tmp); - } else { - tmp = mfspr(SPRN_L1CSR1); - tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; - mtspr(SPRN_L1CSR1, tmp); - } + tmp = mfspr(SPRN_L1CSR1); + tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; + mtspr(SPRN_L1CSR1, tmp); isync(); } diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index eaeee402f96e..68797e072f55 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -92,36 +92,25 @@ _GLOBAL(__tlbil_va) tlbsx. r6,0,r3 bne 10f sync -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#ifndef CONFIG_PPC_47x /* On 440 There are only 64 TLB entries, so r3 < 64, which means bit * 22, is clear. Since 22 is the V bit in the TLB_PAGEID, loading this * value will invalidate the TLB entry. */ tlbwe r6,r6,PPC44x_TLB_PAGEID - isync -10: wrtee r10 - blr -2: -#ifdef CONFIG_PPC_47x +#else oris r7,r6,0x8000 /* specify way explicitly */ clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */ ori r4,r4,PPC47x_TLBE_SIZE tlbwe r4,r7,0 /* write it */ +#endif /* !CONFIG_PPC_47x */ isync - wrtee r10 +10: wrtee r10 blr -#else /* CONFIG_PPC_47x */ -1: trap - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; -#endif /* !CONFIG_PPC_47x */ _GLOBAL(_tlbil_all) _GLOBAL(_tlbil_pid) -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#ifndef CONFIG_PPC_47x li r3,0 sync @@ -136,8 +125,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) isync blr -2: -#ifdef CONFIG_PPC_47x +#else /* 476 variant. There's not simple way to do this, hopefully we'll * try to limit the amount of such full invalidates */ @@ -179,11 +167,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) b 1b /* Then loop */ 1: isync /* Sync shadows */ wrtee r11 -#else /* CONFIG_PPC_47x */ -1: trap - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; -#endif /* !CONFIG_PPC_47x */ blr +#endif /* !CONFIG_PPC_47x */ #ifdef CONFIG_PPC_47x diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 079159e97bca..e0ec67a16887 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -84,7 +84,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) pg = pte_alloc_kernel(pd, va); else pg = early_pte_alloc_kernel(pd, va); - if (pg != 0) { + if (pg) { err = 0; /* The PTE should never be already set nor present in the * hash table @@ -112,10 +112,6 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) ktext = ((char *)v >= _stext && (char *)v < etext) || ((char *)v >= _sinittext && (char *)v < _einittext); map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL); -#ifdef CONFIG_PPC_BOOK3S_32 - if (ktext) - hash_preload(&init_mm, v); -#endif v += PAGE_SIZE; p += PAGE_SIZE; } |