diff options
Diffstat (limited to 'arch')
225 files changed, 5666 insertions, 4924 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e307f777d942..ae7391627054 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -66,7 +66,7 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK config NR_IRQS int "Number of virtual interrupt numbers" - range 32 32768 + range 32 1048576 default "512" help This defines the number of virtual interrupt numbers the kernel @@ -87,7 +87,7 @@ config PPC_WATCHDOG help This is a placeholder when the powerpc hardlockup detector watchdog is selected (arch/powerpc/kernel/watchdog.c). It is - seleted via the generic lockup detector menu which is why we + selected via the generic lockup detector menu which is why we have no standalone config option for it here. config STACKTRACE_SUPPORT @@ -177,6 +177,7 @@ config PPC select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL @@ -207,6 +208,7 @@ config PPC select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC + select HAVE_GENERIC_VDSO select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT @@ -312,6 +314,10 @@ config GENERIC_BUG default y depends on BUG +config GENERIC_BUG_RELATIVE_POINTERS + def_bool y + depends on GENERIC_BUG + config SYS_SUPPORTS_APM_EMULATION default y if PMAC_APM_EMU bool @@ -418,6 +424,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE config MATH_EMULATION bool "Math emulation" depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE + select PPC_FPU_REGS help Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the @@ -657,9 +664,15 @@ config IRQ_ALL_CPUS reported with SMP Power Macintoshes with this option enabled. config NUMA - bool "NUMA support" - depends on PPC64 - default y if SMP && PPC_PSERIES + bool "NUMA Memory Allocation and Scheduler Support" + depends on PPC64 && SMP + default y if PPC_PSERIES || PPC_POWERNV + help + Enable NUMA (Non-Uniform Memory Access) support. + + The kernel will try to allocate memory used by a CPU on the + local memory controller of the CPU and add some more + NUMA awareness to the kernel. config NODES_SHIFT int @@ -793,8 +806,7 @@ config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC - depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && \ - (!PIN_TLB_TEXT || !STRICT_KERNEL_RWX)) + depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5c8c06215dd4..08cf0eade56a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -374,6 +374,11 @@ ppc64le_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \ -f $(srctree)/Makefile allmodconfig +PHONY += ppc64le_allnoconfig +ppc64le_allnoconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/ppc64le.config \ + -f $(srctree)/Makefile allnoconfig + PHONY += ppc64_book3e_allmodconfig ppc64_book3e_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \ @@ -405,18 +410,24 @@ PHONY += install install: $(Q)$(MAKE) $(build)=$(boot) install -PHONY += vdso_install -vdso_install: -ifdef CONFIG_PPC64 - $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ -endif -ifdef CONFIG_VDSO32 - $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ -endif - archclean: $(Q)$(MAKE) $(clean)=$(boot) +ifeq ($(KBUILD_EXTMOD),) +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(if $(CONFIG_VDSO32),$(Q)$(MAKE) \ + $(build)=arch/powerpc/kernel/vdso32 include/generated/vdso32-offsets.h) + $(if $(CONFIG_PPC64),$(Q)$(MAKE) \ + $(build)=arch/powerpc/kernel/vdso64 include/generated/vdso64-offsets.h) +endif + archprepare: checkbin archheaders: diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 1659963a8f1d..ec0b2186e41c 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -21,7 +21,11 @@ all: $(obj)/zImage ifdef CROSS32_COMPILE +ifdef CONFIG_CC_IS_CLANG + BOOTCC := $(CROSS32_COMPILE)clang +else BOOTCC := $(CROSS32_COMPILE)gcc +endif BOOTAR := $(CROSS32_COMPILE)ar else BOOTCC := $(CC) diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index 6e4efbdb6b7c..f157717ae814 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -21,13 +21,6 @@ extern int lv1_get_logical_ppe_id(u64 *out_1); extern int lv1_get_repository_node_value(u64 in_1, u64 in_2, u64 in_3, u64 in_4, u64 in_5, u64 *out_1, u64 *out_2); -#ifdef DEBUG -#define DBG(fmt...) printf(fmt) -#else -static inline int __attribute__ ((format (printf, 1, 2))) DBG( - const char *fmt, ...) {return 0;} -#endif - BSS_STACK(4096); /* A buffer that may be edited by tools operating on a zImage binary so as to diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S index d03cdb7606dc..6a92376daf3f 100644 --- a/arch/powerpc/boot/util.S +++ b/arch/powerpc/boot/util.S @@ -42,14 +42,11 @@ udelay: * (nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns * timebase_period_ns defaults to 60 (16.6MHz) */ mflr r5 - bl 0f + bcl 20,31,0f 0: mflr r6 mtlr r5 - lis r5,0b@ha - addi r5,r5,0b@l - subf r5,r5,r6 /* In case we're relocated */ - addis r5,r5,timebase_period_ns@ha - lwz r5,timebase_period_ns@l(r5) + addis r5,r6,(timebase_period_ns-0b)@ha + lwz r5,(timebase_period_ns-0b)@l(r5) add r4,r4,r5 addi r4,r4,-1 divw r4,r4,r5 /* BUS ticks */ diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index cd58a62e810d..41fa0a8715e3 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -46,6 +46,8 @@ compression=.gz uboot_comp=gzip pie= format= +notext= +rodynamic= # cross-compilation prefix CROSS= @@ -353,6 +355,8 @@ epapr) platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o" link_address='0x20000000' pie=-pie + notext='-z notext' + rodynamic=$(if ${CROSS}ld -V 2>&1 | grep -q LLD ; then echo "-z rodynamic"; fi) ;; mvme5100) platformo="$object/fixed-head.o $object/mvme5100.o" @@ -493,7 +497,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" $map \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic $notext -o "$ofile" $map \ $platformo $tmp $object/wrapper.a rm $tmp fi diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index a21f3a76e06f..d6f072865627 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -34,6 +34,17 @@ SECTIONS __dynamic_start = .; *(.dynamic) } + +#ifdef CONFIG_PPC64_BOOT_WRAPPER + . = ALIGN(256); + .got : + { + __toc_start = .; + *(.got) + *(.toc) + } +#endif + .hash : { *(.hash) } .interp : { *(.interp) } .rela.dyn : @@ -76,16 +87,6 @@ SECTIONS _esm_blob_end = .; } -#ifdef CONFIG_PPC64_BOOT_WRAPPER - . = ALIGN(256); - .got : - { - __toc_start = .; - *(.got) - *(.toc) - } -#endif - . = ALIGN(4096); .bss : { diff --git a/arch/powerpc/configs/disable-werror.config b/arch/powerpc/configs/disable-werror.config new file mode 100644 index 000000000000..6ea12a12432c --- /dev/null +++ b/arch/powerpc/configs/disable-werror.config @@ -0,0 +1 @@ +CONFIG_PPC_DISABLE_WERROR=y diff --git a/arch/powerpc/configs/ppc64le.config b/arch/powerpc/configs/ppc64le.config new file mode 100644 index 000000000000..14dca1062c1b --- /dev/null +++ b/arch/powerpc/configs/ppc64le.config @@ -0,0 +1,2 @@ +CONFIG_PPC64=y +CONFIG_CPU_LITTLE_ENDIAN=y diff --git a/arch/powerpc/configs/security.config b/arch/powerpc/configs/security.config new file mode 100644 index 000000000000..1c91a35c6a73 --- /dev/null +++ b/arch/powerpc/configs/security.config @@ -0,0 +1,15 @@ +# This is the equivalent of booting with lockdown=integrity +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_SECURITY_LOCKDOWN_LSM=y +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y +CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y + +# These are some general, reasonably inexpensive hardening options +CONFIG_HARDENED_USERCOPY=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y + +# UBSAN bounds checking is very cheap and good for hardening +CONFIG_UBSAN=y +# CONFIG_UBSAN_MISC is not set
\ No newline at end of file diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 8a55eb8cc97b..61c6e8b200e8 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -10,6 +10,7 @@ #include <linux/types.h> #include <asm/cmpxchg.h> #include <asm/barrier.h> +#include <asm/asm-const.h> /* * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with @@ -26,14 +27,14 @@ static __inline__ int atomic_read(const atomic_t *v) { int t; - __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter)); + __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"UPD_CONSTR(v->counter)); return t; } static __inline__ void atomic_set(atomic_t *v, int i) { - __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); + __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i)); } #define ATOMIC_OP(op, asm_op) \ @@ -316,14 +317,14 @@ static __inline__ s64 atomic64_read(const atomic64_t *v) { s64 t; - __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter)); + __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"UPD_CONSTR(v->counter)); return t; } static __inline__ void atomic64_set(atomic64_t *v, s64 i) { - __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); + __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i)); } #define ATOMIC64_OP(op, asm_op) \ diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index f53c42380832..aecfde829d5d 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -40,7 +40,7 @@ #define wmb() __asm__ __volatile__ ("sync" : : : "memory") /* The sub-arch has lwsync */ -#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) +#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC) # define SMPWMB LWSYNC #else # define SMPWMB eieio diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 4a4d3afd5340..299ab33505a6 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -216,15 +216,34 @@ static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr) */ static inline int fls(unsigned int x) { - return 32 - __builtin_clz(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 32 - __builtin_clz(x) : 0; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (x)); + return 32 - lz; } #include <asm-generic/bitops/builtin-__fls.h> +/* + * 64-bit can do this using one cntlzd (count leading zeroes doubleword) + * instruction; for 32-bit we use the generic version, which does two + * 32-bit fls calls. + */ +#ifdef CONFIG_PPC64 static inline int fls64(__u64 x) { - return 64 - __builtin_clzll(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 64 - __builtin_clzll(x) : 0; + asm("cntlzd %0,%1" : "=r" (lz) : "r" (x)); + return 64 - lz; } +#else +#include <asm-generic/bitops/fls64.h> +#endif #ifdef CONFIG_PPC64 unsigned int __arch_hweight8(unsigned int w); diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 32fd4452e960..a0117a9d5b06 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -183,11 +183,7 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) unsigned long begin = regs->kuap & 0xf0000000; unsigned long end = regs->kuap << 28; - if (!is_write) - return false; - - return WARN(address < begin || address >= end, - "Bug: write fault blocked by segment registers !"); + return is_write && (address < begin || address >= end); } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 2e277ca0170f..685c589e723f 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -90,10 +90,11 @@ struct hash_pte { typedef struct { unsigned long id; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; void update_bats(void); +static inline void cleanup_cpu_mmu_context(void) { }; /* patch sites */ extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2; diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 1376be95e975..415ae29fa73a 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -240,8 +240,14 @@ extern void add_hash_page(unsigned context, unsigned long va, unsigned long pmdval); /* Flush an entry from the TLB/hash table */ -extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, - unsigned long address); +static inline void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) +{ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + unsigned long ptephys = __pa(ptep) & PAGE_MASK; + + flush_hash_pages(mm->context.id, addr, ptephys, 1); + } +} /* * PTE updates. This function is called whenever an existing @@ -293,10 +299,9 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, { unsigned long old; old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); - if (old & _PAGE_HASHPTE) { - unsigned long ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(mm->context.id, addr, ptephys, 1); - } + if (old & _PAGE_HASHPTE) + flush_hash_entry(mm, ptep, addr); + return (old & _PAGE_ACCESSED) != 0; } #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ @@ -524,9 +529,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_val(*ptep) & _PAGE_HASHPTE) flush_hash_entry(mm, ptep, addr); __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index 068085b709fb..d941c06d4f2e 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -6,12 +6,69 @@ /* * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx */ -extern void flush_tlb_mm(struct mm_struct *mm); -extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr); -extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end); -extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +void hash__flush_tlb_mm(struct mm_struct *mm); +void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end); + +#ifdef CONFIG_SMP +void _tlbie(unsigned long address); +#else +static inline void _tlbie(unsigned long address) +{ + asm volatile ("tlbie %0; sync" : : "r" (address) : "memory"); +} +#endif +void _tlbia(void); + +/* + * Called at the end of a mmu_gather operation to make sure the + * TLB flush is completely done. + */ +static inline void tlb_flush(struct mmu_gather *tlb) +{ + /* 603 needs to flush the whole TLB here since it doesn't use a hash table. */ + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) + _tlbia(); +} + +static inline void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) +{ + start &= PAGE_MASK; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + hash__flush_range(mm, start, end); + else if (end - start <= PAGE_SIZE) + _tlbie(start); + else + _tlbia(); +} + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + hash__flush_tlb_mm(mm); + else + _tlbia(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + hash__flush_tlb_page(vma, vmaddr); + else + _tlbie(vmaddr); +} + +static inline void +flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + flush_range(vma->vm_mm, start, end); +} + +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_range(&init_mm, start, end); +} + static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h index 795010897e5d..f1e60d579f6c 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-pkey.h +++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h @@ -2,6 +2,9 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H #define _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H +/* We use key 3 for KERNEL */ +#define HASH_DEFAULT_KERNEL_KEY (HPTE_R_KEY_BIT0 | HPTE_R_KEY_BIT1) + static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags) { return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) | @@ -11,13 +14,23 @@ static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags) ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT4 : 0x0UL)); } -static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags) { - return (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL)); + unsigned long pte_pkey; + + pte_pkey = (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL)); + + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP) || + mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + if ((pte_pkey == 0) && (flags & HPTE_USE_KERNEL_KEY)) + return HASH_DEFAULT_KERNEL_KEY; + } + + return pte_pkey; } static inline u16 hash__pte_to_pkey_bits(u64 pteflags) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 73ad038ed10b..d959b0195ad9 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -145,7 +145,7 @@ extern void hash__mark_initmem_nx(void); extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pte, int huge); -extern unsigned long htab_convert_pte_flags(unsigned long pteflags); +unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags); /* Atomic PTE updates */ static inline unsigned long hash__pte_update(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/include/asm/book3s/64/kexec.h b/arch/powerpc/include/asm/book3s/64/kexec.h index 6b5c3a248ba2..d4b9d476ecba 100644 --- a/arch/powerpc/include/asm/book3s/64/kexec.h +++ b/arch/powerpc/include/asm/book3s/64/kexec.h @@ -3,6 +3,7 @@ #ifndef _ASM_POWERPC_BOOK3S_64_KEXEC_H_ #define _ASM_POWERPC_BOOK3S_64_KEXEC_H_ +#include <asm/plpar_wrappers.h> #define reset_sprs reset_sprs static inline void reset_sprs(void) @@ -14,6 +15,10 @@ static inline void reset_sprs(void) if (cpu_has_feature(CPU_FTR_ARCH_207S)) { mtspr(SPRN_IAMR, 0); + if (cpu_has_feature(CPU_FTR_HVMODE)) + mtspr(SPRN_CIABR, 0); + else + plpar_set_ciabr(0); } /* Do we need isync()? We are going via a kexec reset */ diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h deleted file mode 100644 index a39e2d193fdc..000000000000 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ /dev/null @@ -1,205 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H -#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H - -#include <linux/const.h> -#include <asm/reg.h> - -#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000) -#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000) -#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) -#define AMR_KUAP_SHIFT 62 - -#ifdef __ASSEMBLY__ - -.macro kuap_restore_amr gpr1, gpr2 -#ifdef CONFIG_PPC_KUAP - BEGIN_MMU_FTR_SECTION_NESTED(67) - mfspr \gpr1, SPRN_AMR - ld \gpr2, STACK_REGS_KUAP(r1) - cmpd \gpr1, \gpr2 - beq 998f - isync - mtspr SPRN_AMR, \gpr2 - /* No isync required, see kuap_restore_amr() */ -998: - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) -#endif -.endm - -#ifdef CONFIG_PPC_KUAP -.macro kuap_check_amr gpr1, gpr2 -#ifdef CONFIG_PPC_KUAP_DEBUG - BEGIN_MMU_FTR_SECTION_NESTED(67) - mfspr \gpr1, SPRN_AMR - li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) - sldi \gpr2, \gpr2, AMR_KUAP_SHIFT -999: tdne \gpr1, \gpr2 - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) -#endif -.endm -#endif - -.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr -#ifdef CONFIG_PPC_KUAP - BEGIN_MMU_FTR_SECTION_NESTED(67) - .ifnb \msr_pr_cr - bne \msr_pr_cr, 99f - .endif - mfspr \gpr1, SPRN_AMR - std \gpr1, STACK_REGS_KUAP(r1) - li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) - sldi \gpr2, \gpr2, AMR_KUAP_SHIFT - cmpd \use_cr, \gpr1, \gpr2 - beq \use_cr, 99f - // We don't isync here because we very recently entered via rfid - mtspr SPRN_AMR, \gpr2 - isync -99: - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) -#endif -.endm - -#else /* !__ASSEMBLY__ */ - -#include <linux/jump_label.h> - -DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); - -#ifdef CONFIG_PPC_KUAP - -#include <asm/mmu.h> -#include <asm/ptrace.h> - -static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) -{ - if (mmu_has_feature(MMU_FTR_RADIX_KUAP) && unlikely(regs->kuap != amr)) { - isync(); - mtspr(SPRN_AMR, regs->kuap); - /* - * No isync required here because we are about to RFI back to - * previous context before any user accesses would be made, - * which is a CSI. - */ - } -} - -static inline unsigned long kuap_get_and_check_amr(void) -{ - if (mmu_has_feature(MMU_FTR_RADIX_KUAP)) { - unsigned long amr = mfspr(SPRN_AMR); - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ - WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); - return amr; - } - return 0; -} - -static inline void kuap_check_amr(void) -{ - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP)) - WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); -} - -/* - * We support individually allowing read or write, but we don't support nesting - * because that would require an expensive read/modify write of the AMR. - */ - -static inline unsigned long get_kuap(void) -{ - /* - * We return AMR_KUAP_BLOCKED when we don't support KUAP because - * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to - * cause restore_user_access to do a flush. - * - * This has no effect in terms of actually blocking things on hash, - * so it doesn't break anything. - */ - if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) - return AMR_KUAP_BLOCKED; - - return mfspr(SPRN_AMR); -} - -static inline void set_kuap(unsigned long value) -{ - if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) - return; - - /* - * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both - * before and after the move to AMR. See table 6 on page 1134. - */ - isync(); - mtspr(SPRN_AMR, value); - isync(); -} - -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ - return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && - (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), - "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); -} -#else /* CONFIG_PPC_KUAP */ -static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } - -static inline unsigned long kuap_get_and_check_amr(void) -{ - return 0UL; -} - -static inline unsigned long get_kuap(void) -{ - return AMR_KUAP_BLOCKED; -} - -static inline void set_kuap(unsigned long value) { } -#endif /* !CONFIG_PPC_KUAP */ - -static __always_inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) -{ - // This is written so we can resolve to a single case at build time - BUILD_BUG_ON(!__builtin_constant_p(dir)); - if (dir == KUAP_READ) - set_kuap(AMR_KUAP_BLOCK_WRITE); - else if (dir == KUAP_WRITE) - set_kuap(AMR_KUAP_BLOCK_READ); - else if (dir == KUAP_READ_WRITE) - set_kuap(0); - else - BUILD_BUG(); -} - -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) -{ - set_kuap(AMR_KUAP_BLOCKED); - if (static_branch_unlikely(&uaccess_flush_key)) - do_uaccess_flush(); -} - -static inline unsigned long prevent_user_access_return(void) -{ - unsigned long flags = get_kuap(); - - set_kuap(AMR_KUAP_BLOCKED); - if (static_branch_unlikely(&uaccess_flush_key)) - do_uaccess_flush(); - - return flags; -} - -static inline void restore_user_access(unsigned long flags) -{ - set_kuap(flags); - if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED) - do_uaccess_flush(); -} -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h new file mode 100644 index 000000000000..f50f72e535aa --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_KUP_H +#define _ASM_POWERPC_BOOK3S_64_KUP_H + +#include <linux/const.h> +#include <asm/reg.h> + +#define AMR_KUAP_BLOCK_READ UL(0x5455555555555555) +#define AMR_KUAP_BLOCK_WRITE UL(0xa8aaaaaaaaaaaaaa) +#define AMR_KUEP_BLOCKED UL(0x5455555555555555) +#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) + +#ifdef __ASSEMBLY__ + +.macro kuap_user_restore gpr1, gpr2 +#if defined(CONFIG_PPC_PKEY) + BEGIN_MMU_FTR_SECTION_NESTED(67) + b 100f // skip_restore_amr + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY, 67) + /* + * AMR and IAMR are going to be different when + * returning to userspace. + */ + ld \gpr1, STACK_REGS_AMR(r1) + + /* + * If kuap feature is not enabled, do the mtspr + * only if AMR value is different. + */ + BEGIN_MMU_FTR_SECTION_NESTED(68) + mfspr \gpr2, SPRN_AMR + cmpd \gpr1, \gpr2 + beq 99f + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUAP, 68) + + isync + mtspr SPRN_AMR, \gpr1 +99: + /* + * Restore IAMR only when returning to userspace + */ + ld \gpr1, STACK_REGS_IAMR(r1) + + /* + * If kuep feature is not enabled, do the mtspr + * only if IAMR value is different. + */ + BEGIN_MMU_FTR_SECTION_NESTED(69) + mfspr \gpr2, SPRN_IAMR + cmpd \gpr1, \gpr2 + beq 100f + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUEP, 69) + + isync + mtspr SPRN_IAMR, \gpr1 + +100: //skip_restore_amr + /* No isync required, see kuap_user_restore() */ +#endif +.endm + +.macro kuap_kernel_restore gpr1, gpr2 +#if defined(CONFIG_PPC_PKEY) + + BEGIN_MMU_FTR_SECTION_NESTED(67) + /* + * AMR is going to be mostly the same since we are + * returning to the kernel. Compare and do a mtspr. + */ + ld \gpr2, STACK_REGS_AMR(r1) + mfspr \gpr1, SPRN_AMR + cmpd \gpr1, \gpr2 + beq 100f + isync + mtspr SPRN_AMR, \gpr2 + /* + * No isync required, see kuap_restore_amr() + * No need to restore IAMR when returning to kernel space. + */ +100: + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) +#endif +.endm + +#ifdef CONFIG_PPC_KUAP +.macro kuap_check_amr gpr1, gpr2 +#ifdef CONFIG_PPC_KUAP_DEBUG + BEGIN_MMU_FTR_SECTION_NESTED(67) + mfspr \gpr1, SPRN_AMR + /* Prevent access to userspace using any key values */ + LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED) +999: tdne \gpr1, \gpr2 + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) +#endif +.endm +#endif + +/* + * if (pkey) { + * + * save AMR -> stack; + * if (kuap) { + * if (AMR != BLOCKED) + * KUAP_BLOCKED -> AMR; + * } + * if (from_user) { + * save IAMR -> stack; + * if (kuep) { + * KUEP_BLOCKED ->IAMR + * } + * } + * return; + * } + * + * if (kuap) { + * if (from_kernel) { + * save AMR -> stack; + * if (AMR != BLOCKED) + * KUAP_BLOCKED -> AMR; + * } + * + * } + */ +.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr +#if defined(CONFIG_PPC_PKEY) + + /* + * if both pkey and kuap is disabled, nothing to do + */ + BEGIN_MMU_FTR_SECTION_NESTED(68) + b 100f // skip_save_amr + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_BOOK3S_KUAP, 68) + + /* + * if pkey is disabled and we are entering from userspace + * don't do anything. + */ + BEGIN_MMU_FTR_SECTION_NESTED(67) + .ifnb \msr_pr_cr + /* + * Without pkey we are not changing AMR outside the kernel + * hence skip this completely. + */ + bne \msr_pr_cr, 100f // from userspace + .endif + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY, 67) + + /* + * pkey is enabled or pkey is disabled but entering from kernel + */ + mfspr \gpr1, SPRN_AMR + std \gpr1, STACK_REGS_AMR(r1) + + /* + * update kernel AMR with AMR_KUAP_BLOCKED only + * if KUAP feature is enabled + */ + BEGIN_MMU_FTR_SECTION_NESTED(69) + LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED) + cmpd \use_cr, \gpr1, \gpr2 + beq \use_cr, 102f + /* + * We don't isync here because we very recently entered via an interrupt + */ + mtspr SPRN_AMR, \gpr2 + isync +102: + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 69) + + /* + * if entering from kernel we don't need save IAMR + */ + .ifnb \msr_pr_cr + beq \msr_pr_cr, 100f // from kernel space + mfspr \gpr1, SPRN_IAMR + std \gpr1, STACK_REGS_IAMR(r1) + + /* + * update kernel IAMR with AMR_KUEP_BLOCKED only + * if KUEP feature is enabled + */ + BEGIN_MMU_FTR_SECTION_NESTED(70) + LOAD_REG_IMMEDIATE(\gpr2, AMR_KUEP_BLOCKED) + mtspr SPRN_IAMR, \gpr2 + isync + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUEP, 70) + .endif + +100: // skip_save_amr +#endif +.endm + +#else /* !__ASSEMBLY__ */ + +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); + +#ifdef CONFIG_PPC_PKEY + +#include <asm/mmu.h> +#include <asm/ptrace.h> + +/* + * For kernel thread that doesn't have thread.regs return + * default AMR/IAMR values. + */ +static inline u64 current_thread_amr(void) +{ + if (current->thread.regs) + return current->thread.regs->amr; + return AMR_KUAP_BLOCKED; +} + +static inline u64 current_thread_iamr(void) +{ + if (current->thread.regs) + return current->thread.regs->iamr; + return AMR_KUEP_BLOCKED; +} +#endif /* CONFIG_PPC_PKEY */ + +#ifdef CONFIG_PPC_KUAP + +static inline void kuap_user_restore(struct pt_regs *regs) +{ + bool restore_amr = false, restore_iamr = false; + unsigned long amr, iamr; + + if (!mmu_has_feature(MMU_FTR_PKEY)) + return; + + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + amr = mfspr(SPRN_AMR); + if (amr != regs->amr) + restore_amr = true; + } else { + restore_amr = true; + } + + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + iamr = mfspr(SPRN_IAMR); + if (iamr != regs->iamr) + restore_iamr = true; + } else { + restore_iamr = true; + } + + + if (restore_amr || restore_iamr) { + isync(); + if (restore_amr) + mtspr(SPRN_AMR, regs->amr); + if (restore_iamr) + mtspr(SPRN_IAMR, regs->iamr); + } + /* + * No isync required here because we are about to rfi + * back to previous context before any user accesses + * would be made, which is a CSI. + */ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, + unsigned long amr) +{ + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + if (unlikely(regs->amr != amr)) { + isync(); + mtspr(SPRN_AMR, regs->amr); + /* + * No isync required here because we are about to rfi + * back to previous context before any user accesses + * would be made, which is a CSI. + */ + } + } + /* + * No need to restore IAMR when returning to kernel space. + */ +} + +static inline unsigned long kuap_get_and_check_amr(void) +{ + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + unsigned long amr = mfspr(SPRN_AMR); + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ + WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); + return amr; + } + return 0; +} + +#else /* CONFIG_PPC_PKEY */ + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) +{ +} + +static inline unsigned long kuap_get_and_check_amr(void) +{ + return 0; +} + +#endif /* CONFIG_PPC_PKEY */ + + +#ifdef CONFIG_PPC_KUAP + +static inline void kuap_check_amr(void) +{ + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); +} + +/* + * We support individually allowing read or write, but we don't support nesting + * because that would require an expensive read/modify write of the AMR. + */ + +static inline unsigned long get_kuap(void) +{ + /* + * We return AMR_KUAP_BLOCKED when we don't support KUAP because + * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to + * cause restore_user_access to do a flush. + * + * This has no effect in terms of actually blocking things on hash, + * so it doesn't break anything. + */ + if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + return AMR_KUAP_BLOCKED; + + return mfspr(SPRN_AMR); +} + +static inline void set_kuap(unsigned long value) +{ + if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + return; + + /* + * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both + * before and after the move to AMR. See table 6 on page 1134. + */ + isync(); + mtspr(SPRN_AMR, value); + isync(); +} + +static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, + bool is_write) +{ + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + return false; + /* + * For radix this will be a storage protection fault (DSISR_PROTFAULT). + * For hash this will be a key fault (DSISR_KEYFAULT) + */ + /* + * We do have exception table entry, but accessing the + * userspace results in fault. This could be because we + * didn't unlock the AMR or access is denied by userspace + * using a key value that blocks access. We are only interested + * in catching the use case of accessing without unlocking + * the AMR. Hence check for BLOCK_WRITE/READ against AMR. + */ + if (is_write) { + return (regs->amr & AMR_KUAP_BLOCK_WRITE) == AMR_KUAP_BLOCK_WRITE; + } + return (regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ; +} + +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ + unsigned long thread_amr = 0; + + // This is written so we can resolve to a single case at build time + BUILD_BUG_ON(!__builtin_constant_p(dir)); + + if (mmu_has_feature(MMU_FTR_PKEY)) + thread_amr = current_thread_amr(); + + if (dir == KUAP_READ) + set_kuap(thread_amr | AMR_KUAP_BLOCK_WRITE); + else if (dir == KUAP_WRITE) + set_kuap(thread_amr | AMR_KUAP_BLOCK_READ); + else if (dir == KUAP_READ_WRITE) + set_kuap(thread_amr); + else + BUILD_BUG(); +} + +#else /* CONFIG_PPC_KUAP */ + +static inline unsigned long get_kuap(void) +{ + return AMR_KUAP_BLOCKED; +} + +static inline void set_kuap(unsigned long value) { } + +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ } + +#endif /* !CONFIG_PPC_KUAP */ + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ + set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); +} + +static inline unsigned long prevent_user_access_return(void) +{ + unsigned long flags = get_kuap(); + + set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); + + return flags; +} + +static inline void restore_user_access(unsigned long flags) +{ + set_kuap(flags); + if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED) + do_uaccess_flush(); +} +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_64_KUP_H */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 683a9c7d1b03..066b1d34c7bc 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -452,6 +452,7 @@ static inline unsigned long hpt_hash(unsigned long vpn, #define HPTE_LOCAL_UPDATE 0x1 #define HPTE_NOHPTE_UPDATE 0x2 +#define HPTE_USE_KERNEL_KEY 0x4 extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, @@ -842,6 +843,32 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) unsigned htab_shift_for_mem_size(unsigned long mem_size); -#endif /* __ASSEMBLY__ */ +enum slb_index { + LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ + KSTACK_INDEX = 1, /* Kernel stack map */ +}; +#define slb_esid_mask(ssize) \ + (((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T) + +static inline unsigned long mk_esid_data(unsigned long ea, int ssize, + enum slb_index index) +{ + return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; +} + +static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, + unsigned long flags) +{ + return (vsid << slb_vsid_shift(ssize)) | flags | + ((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT); +} + +static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, + unsigned long flags) +{ + return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); +} + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 750918451dd2..995bbcdd0ef8 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -111,7 +111,7 @@ typedef struct { struct hash_mm_context *hash_context; - unsigned long vdso_base; + void __user *vdso; /* * pagetable fragment support */ @@ -199,7 +199,7 @@ extern int mmu_io_psize; void mmu_early_init_devtree(void); void hash__early_init_devtree(void); void radix__early_init_devtree(void); -#ifdef CONFIG_PPC_MEM_KEYS +#ifdef CONFIG_PPC_PKEY void pkey_early_init_devtree(void); #else static inline void pkey_early_init_devtree(void) {} diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index cd3feeac6e87..a39886681629 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1231,13 +1231,28 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) return hash__pmd_same(pmd_a, pmd_b); } -static inline pmd_t pmd_mkhuge(pmd_t pmd) +static inline pmd_t __pmd_mkhuge(pmd_t pmd) { if (radix_enabled()) return radix__pmd_mkhuge(pmd); return hash__pmd_mkhuge(pmd); } +/* + * pfn_pmd return a pmd_t that can be used as pmd pte entry. + */ +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ +#ifdef CONFIG_DEBUG_VM + if (radix_enabled()) + WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)) == 0); + else + WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)) != + cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)); +#endif + return pmd; +} + #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h index b7d9f4267bcd..3b8640498f5b 100644 --- a/arch/powerpc/include/asm/book3s/64/pkeys.h +++ b/arch/powerpc/include/asm/book3s/64/pkeys.h @@ -6,6 +6,8 @@ #include <asm/book3s/64/hash-pkey.h> extern u64 __ro_after_init default_uamor; +extern u64 __ro_after_init default_amr; +extern u64 __ro_after_init default_iamr; static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) { diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 338f36cd9934..464f8ca8a5c9 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -12,7 +12,7 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE .macro EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" -5001: PPC_LONG \addr, 5002f +5001: .4byte \addr - 5001b, 5002f - 5001b .short \line, \flags .org 5001b+BUG_ENTRY_SIZE .previous @@ -23,7 +23,7 @@ #else .macro EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" -5001: PPC_LONG \addr +5001: .4byte \addr - 5001b .short \flags .org 5001b+BUG_ENTRY_SIZE .previous @@ -36,14 +36,14 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE #define _EMIT_BUG_ENTRY \ ".section __bug_table,\"aw\"\n" \ - "2:\t" PPC_LONG "1b, %0\n" \ + "2:\t.4byte 1b - 2b, %0 - 2b\n" \ "\t.short %1, %2\n" \ ".org 2b+%3\n" \ ".previous\n" #else #define _EMIT_BUG_ENTRY \ ".section __bug_table,\"aw\"\n" \ - "2:\t" PPC_LONG "1b\n" \ + "2:\t.4byte 1b - 2b\n" \ "\t.short %2\n" \ ".org 2b+%3\n" \ ".previous\n" @@ -113,6 +113,7 @@ struct pt_regs; extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); +void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 82f099ba2411..d5da7ddbf0fc 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -163,7 +163,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) */ __wsum __csum_partial(const void *buff, int len, __wsum sum); -static inline __wsum csum_partial(const void *buff, int len, __wsum sum) +static __always_inline __wsum csum_partial(const void *buff, int len, __wsum sum) { if (__builtin_constant_p(len) && len <= 16 && (len & 1) == 0) { if (len == 2) diff --git a/arch/powerpc/include/asm/clocksource.h b/arch/powerpc/include/asm/clocksource.h new file mode 100644 index 000000000000..0a26ef13a34a --- /dev/null +++ b/arch/powerpc/include/asm/clocksource.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_CLOCKSOURCE_H +#define _ASM_POWERPC_CLOCKSOURCE_H + +#include <asm/vdso/clocksource.h> + +#endif /* _ASM_POWERPC_CLOCKSOURCE_H */ diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index a116fe931789..3bdd74739cb8 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -68,6 +68,7 @@ extern void cpm_reset(void); #define PROFF_SPI ((uint)0x0180) #define PROFF_SCC3 ((uint)0x0200) #define PROFF_SMC1 ((uint)0x0280) +#define PROFF_DSP1 ((uint)0x02c0) #define PROFF_SCC4 ((uint)0x0300) #define PROFF_SMC2 ((uint)0x0380) diff --git a/arch/powerpc/include/asm/cpu_setup_power.h b/arch/powerpc/include/asm/cpu_setup_power.h new file mode 100644 index 000000000000..24be9131f803 --- /dev/null +++ b/arch/powerpc/include/asm/cpu_setup_power.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 IBM Corporation + */ +void __setup_cpu_power7(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power7(void); +void __setup_cpu_power8(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power8(void); +void __setup_cpu_power9(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power9(void); +void __setup_cpu_power10(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power10(void); diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 3d2f94afc13a..5f21a5bab467 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -41,7 +41,6 @@ extern int machine_check_4xx(struct pt_regs *regs); extern int machine_check_440A(struct pt_regs *regs); extern int machine_check_e500mc(struct pt_regs *regs); extern int machine_check_e500(struct pt_regs *regs); -extern int machine_check_e200(struct pt_regs *regs); extern int machine_check_47x(struct pt_regs *regs); int machine_check_8xx(struct pt_regs *regs); int machine_check_83xx(struct pt_regs *regs); @@ -137,7 +136,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_DBELL ASM_CONST(0x00000004) #define CPU_FTR_CAN_NAP ASM_CONST(0x00000008) #define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00000010) -#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00000020) +// ASM_CONST(0x00000020) Free #define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00000040) #define CPU_FTR_LWSYNC ASM_CONST(0x00000080) #define CPU_FTR_NOEXECUTE ASM_CONST(0x00000100) @@ -219,9 +218,7 @@ static inline void cpu_feature_keys_init(void) { } #ifndef __ASSEMBLY__ -#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN) - -#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE) +#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE) /* We only set the altivec features if the kernel was compiled with altivec * support @@ -369,7 +366,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_NOEXECUTE) #define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_MAYBE_CAN_NAP) + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NOEXECUTE) #define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_NOEXECUTE) @@ -378,38 +375,33 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE | CPU_FTR_NOEXECUTE) #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON) #define CPU_FTRS_8XX (CPU_FTR_NOEXECUTE) -#define CPU_FTRS_40X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_44X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_440x6 (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_40X (CPU_FTR_NOEXECUTE) +#define CPU_FTRS_44X (CPU_FTR_NOEXECUTE) +#define CPU_FTRS_440x6 (CPU_FTR_NOEXECUTE | \ CPU_FTR_INDEXED_DCR) #define CPU_FTRS_47X (CPU_FTRS_440x6) -#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \ - CPU_FTR_NOEXECUTE | \ - CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ + CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_NOEXECUTE) #define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_E500MC (CPU_FTR_NODSISRALIGN | \ + CPU_FTR_NOEXECUTE) +#define CPU_FTRS_E500MC ( \ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) /* * e5500/e6500 erratum A-006958 is a timebase bug that can use the * same workaround as CPU_FTR_CELL_TB_BUG. */ -#define CPU_FTRS_E5500 (CPU_FTR_NODSISRALIGN | \ +#define CPU_FTRS_E5500 ( \ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG) -#define CPU_FTRS_E6500 (CPU_FTR_NODSISRALIGN | \ +#define CPU_FTRS_E6500 ( \ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT) -#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ #define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \ @@ -489,7 +481,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX) #define CPU_FTRS_COMPATIBLE (CPU_FTR_PPCAS_ARCH_V2) -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC_BOOK3E #define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500) #else @@ -510,18 +502,19 @@ static inline void cpu_feature_keys_init(void) { } #else enum { CPU_FTRS_POSSIBLE = -#ifdef CONFIG_PPC_BOOK3S_32 - CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU | +#ifdef CONFIG_PPC_BOOK3S_604 + CPU_FTRS_604 | CPU_FTRS_740_NOTAU | CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 | CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX | CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 | CPU_FTRS_7450_21 | CPU_FTRS_7450_23 | CPU_FTRS_7455_1 | CPU_FTRS_7455_20 | CPU_FTRS_7455 | CPU_FTRS_7447_10 | - CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_82XX | - CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | + CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_CLASSIC32 | -#else - CPU_FTRS_GENERIC_32 | +#endif +#ifdef CONFIG_PPC_BOOK3S_603 + CPU_FTRS_603 | CPU_FTRS_82XX | + CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX | @@ -529,14 +522,10 @@ enum { #ifdef CONFIG_40x CPU_FTRS_40X | #endif -#ifdef CONFIG_44x - CPU_FTRS_44X | CPU_FTRS_440x6 | -#endif #ifdef CONFIG_PPC_47x CPU_FTRS_47X | CPU_FTR_476_DD2 | -#endif -#ifdef CONFIG_E200 - CPU_FTRS_E200 | +#elif defined(CONFIG_44x) + CPU_FTRS_44X | CPU_FTRS_440x6 | #endif #ifdef CONFIG_E500 CPU_FTRS_E500 | CPU_FTRS_E500_2 | @@ -548,7 +537,7 @@ enum { }; #endif /* __powerpc64__ */ -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC_BOOK3E #define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500) #else @@ -557,7 +546,6 @@ enum { #define CPU_FTRS_DT_CPU_BASE \ (CPU_FTR_LWSYNC | \ CPU_FTR_FPU_UNAVAILABLE | \ - CPU_FTR_NODSISRALIGN | \ CPU_FTR_NOEXECUTE | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_STCX_CHECKS_ADDRESS | \ @@ -586,18 +574,19 @@ enum { #else enum { CPU_FTRS_ALWAYS = -#ifdef CONFIG_PPC_BOOK3S_32 - CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU & +#ifdef CONFIG_PPC_BOOK3S_604 + CPU_FTRS_604 & CPU_FTRS_740_NOTAU & CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 & CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX & CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 & CPU_FTRS_7450_21 & CPU_FTRS_7450_23 & CPU_FTRS_7455_1 & CPU_FTRS_7455_20 & CPU_FTRS_7455 & CPU_FTRS_7447_10 & - CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_82XX & - CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & + CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_CLASSIC32 & -#else - CPU_FTRS_GENERIC_32 & +#endif +#ifdef CONFIG_PPC_BOOK3S_603 + CPU_FTRS_603 & CPU_FTRS_82XX & + CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX & @@ -605,12 +594,11 @@ enum { #ifdef CONFIG_40x CPU_FTRS_40X & #endif -#ifdef CONFIG_44x +#ifdef CONFIG_PPC_47x + CPU_FTRS_47X & +#elif defined(CONFIG_44x) CPU_FTRS_44X & CPU_FTRS_440x6 & #endif -#ifdef CONFIG_E200 - CPU_FTRS_E200 & -#endif #ifdef CONFIG_E500 CPU_FTRS_E500 & CPU_FTRS_E500_2 & #endif diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 53ed2ca40151..b8425e3cfd81 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -168,8 +168,8 @@ do { \ /* Cache size items */ \ NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \ NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \ - NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \ - VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base); \ + NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \ + VDSO_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long)current->mm->context.vdso);\ ARCH_DLINFO_CACHE_GEOMETRY; \ } while (0) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index fbd406cd6916..f6d2acb57425 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -36,6 +36,24 @@ label##2: \ .align 2; \ label##3: + +#ifndef CONFIG_CC_IS_CLANG +#define CHECK_ALT_SIZE(else_size, body_size) \ + .ifgt (else_size) - (body_size); \ + .error "Feature section else case larger than body"; \ + .endif; +#else +/* + * If we use the ifgt syntax above, clang's assembler complains about the + * expression being non-absolute when the code appears in an inline assembly + * statement. + * As a workaround use an .org directive that has no effect if the else case + * instructions are smaller than the body, but fails otherwise. + */ +#define CHECK_ALT_SIZE(else_size, body_size) \ + .org . + ((else_size) > (body_size)); +#endif + #define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect) \ label##4: \ .popsection; \ @@ -48,9 +66,7 @@ label##5: \ FTR_ENTRY_OFFSET label##2b-label##5b; \ FTR_ENTRY_OFFSET label##3b-label##5b; \ FTR_ENTRY_OFFSET label##4b-label##5b; \ - .ifgt (label##4b- label##3b)-(label##2b- label##1b); \ - .error "Feature section else case larger than body"; \ - .endif; \ + CHECK_ALT_SIZE((label##4b-label##3b), (label##2b-label##1b)); \ .popsection; @@ -100,6 +116,9 @@ label##5: \ #define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \ END_MMU_FTR_SECTION_NESTED((msk), (msk), label) +#define END_MMU_FTR_SECTION_NESTED_IFCLR(msk, label) \ + END_MMU_FTR_SECTION_NESTED((msk), 0, label) + #define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk)) #define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0) diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 0b295bdb201e..aa6a5ef5d483 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -134,12 +134,6 @@ extern int ibm_nmi_interlock_token; extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void); -#else -static inline bool is_kvm_guest(void) { return false; } -#endif - #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index c1fbccb04390..c98f5141e3fc 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -155,6 +155,14 @@ #define H_VASI_RESUMED 5 #define H_VASI_COMPLETED 6 +/* VASI signal codes. Only the Cancel code is valid for H_VASI_SIGNAL. */ +#define H_VASI_SIGNAL_CANCEL 1 +#define H_VASI_SIGNAL_ABORT 2 +#define H_VASI_SIGNAL_SUSPEND 3 +#define H_VASI_SIGNAL_COMPLETE 4 +#define H_VASI_SIGNAL_ENABLE 5 +#define H_VASI_SIGNAL_FAILOVER 6 + /* Each control block has to be on a 4K boundary */ #define H_CB_ALIGNMENT 4096 @@ -261,6 +269,7 @@ #define H_ADD_CONN 0x284 #define H_DEL_CONN 0x288 #define H_JOIN 0x298 +#define H_VASI_SIGNAL 0x2A0 #define H_VASI_STATE 0x2A4 #define H_VIOCTL 0x2A8 #define H_ENABLE_CRQ 0x2B0 diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 58635960403c..273edd208ec5 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -122,7 +122,7 @@ static inline u##size name(const volatile u##size __iomem *addr) \ { \ u##size ret; \ __asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\ - : "=r" (ret) : "m" (*addr) : "memory"); \ + : "=r" (ret) : "m"UPD_CONSTR (*addr) : "memory"); \ return ret; \ } @@ -130,7 +130,7 @@ static inline u##size name(const volatile u##size __iomem *addr) \ static inline void name(volatile u##size __iomem *addr, u##size val) \ { \ __asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \ - : "=m" (*addr) : "r" (val) : "memory"); \ + : "=m"UPD_CONSTR (*addr) : "r" (val) : "memory"); \ mmiowb_set_pending(); \ } @@ -302,41 +302,56 @@ static inline unsigned char __raw_readb(const volatile void __iomem *addr) { return *(volatile unsigned char __force *)PCI_FIX_ADDR(addr); } +#define __raw_readb __raw_readb + static inline unsigned short __raw_readw(const volatile void __iomem *addr) { return *(volatile unsigned short __force *)PCI_FIX_ADDR(addr); } +#define __raw_readw __raw_readw + static inline unsigned int __raw_readl(const volatile void __iomem *addr) { return *(volatile unsigned int __force *)PCI_FIX_ADDR(addr); } +#define __raw_readl __raw_readl + static inline void __raw_writeb(unsigned char v, volatile void __iomem *addr) { *(volatile unsigned char __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writeb __raw_writeb + static inline void __raw_writew(unsigned short v, volatile void __iomem *addr) { *(volatile unsigned short __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writew __raw_writew + static inline void __raw_writel(unsigned int v, volatile void __iomem *addr) { *(volatile unsigned int __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writel __raw_writel #ifdef __powerpc64__ static inline unsigned long __raw_readq(const volatile void __iomem *addr) { return *(volatile unsigned long __force *)PCI_FIX_ADDR(addr); } +#define __raw_readq __raw_readq + static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr) { *(volatile unsigned long __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writeq __raw_writeq static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) { __raw_writeq((__force unsigned long)cpu_to_be64(v), addr); } +#define __raw_writeq_be __raw_writeq_be /* * Real mode versions of the above. Those instructions are only supposed @@ -609,10 +624,37 @@ static inline void name at \ /* Some drivers check for the presence of readq & writeq with * a #ifdef, so we make them happy here. */ +#define readb readb +#define readw readw +#define readl readl +#define writeb writeb +#define writew writew +#define writel writel +#define readsb readsb +#define readsw readsw +#define readsl readsl +#define writesb writesb +#define writesw writesw +#define writesl writesl +#define inb inb +#define inw inw +#define inl inl +#define outb outb +#define outw outw +#define outl outl +#define insb insb +#define insw insw +#define insl insl +#define outsb outsb +#define outsw outsw +#define outsl outsl #ifdef __powerpc64__ #define readq readq #define writeq writeq #endif +#define memset_io memset_io +#define memcpy_fromio memcpy_fromio +#define memcpy_toio memcpy_toio /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem @@ -637,7 +679,106 @@ static inline void name at \ #define writel_relaxed(v, addr) writel(v, addr) #define writeq_relaxed(v, addr) writeq(v, addr) +#ifdef CONFIG_GENERIC_IOMAP #include <asm-generic/iomap.h> +#else +/* + * Here comes the implementation of the IOMAP interfaces. + */ +static inline unsigned int ioread16be(const void __iomem *addr) +{ + return readw_be(addr); +} +#define ioread16be ioread16be + +static inline unsigned int ioread32be(const void __iomem *addr) +{ + return readl_be(addr); +} +#define ioread32be ioread32be + +#ifdef __powerpc64__ +static inline u64 ioread64_lo_hi(const void __iomem *addr) +{ + return readq(addr); +} +#define ioread64_lo_hi ioread64_lo_hi + +static inline u64 ioread64_hi_lo(const void __iomem *addr) +{ + return readq(addr); +} +#define ioread64_hi_lo ioread64_hi_lo + +static inline u64 ioread64be(const void __iomem *addr) +{ + return readq_be(addr); +} +#define ioread64be ioread64be + +static inline u64 ioread64be_lo_hi(const void __iomem *addr) +{ + return readq_be(addr); +} +#define ioread64be_lo_hi ioread64be_lo_hi + +static inline u64 ioread64be_hi_lo(const void __iomem *addr) +{ + return readq_be(addr); +} +#define ioread64be_hi_lo ioread64be_hi_lo +#endif /* __powerpc64__ */ + +static inline void iowrite16be(u16 val, void __iomem *addr) +{ + writew_be(val, addr); +} +#define iowrite16be iowrite16be + +static inline void iowrite32be(u32 val, void __iomem *addr) +{ + writel_be(val, addr); +} +#define iowrite32be iowrite32be + +#ifdef __powerpc64__ +static inline void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} +#define iowrite64_lo_hi iowrite64_lo_hi + +static inline void iowrite64_hi_lo(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} +#define iowrite64_hi_lo iowrite64_hi_lo + +static inline void iowrite64be(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +#define iowrite64be iowrite64be + +static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +#define iowrite64be_lo_hi iowrite64be_lo_hi + +static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +#define iowrite64be_hi_lo iowrite64be_hi_lo +#endif /* __powerpc64__ */ + +struct pci_dev; +void pci_iounmap(struct pci_dev *dev, void __iomem *addr); +#define pci_iounmap pci_iounmap +void __iomem *ioport_map(unsigned long port, unsigned int len); +#define ioport_map ioport_map +#endif static inline void iosync(void) { @@ -670,7 +811,6 @@ static inline void iosync(void) #define IO_SPACE_LIMIT ~(0UL) - /** * ioremap - map bus memory into CPU space * @address: bus address of the memory @@ -706,7 +846,13 @@ extern void __iomem *ioremap(phys_addr_t address, unsigned long size); extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size, unsigned long flags); extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size); +#define ioremap_wc ioremap_wc + +#ifdef CONFIG_PPC32 void __iomem *ioremap_wt(phys_addr_t address, unsigned long size); +#define ioremap_wt ioremap_wt +#endif + void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); #define ioremap_uc(addr, size) ioremap((addr), (size)) #define ioremap_cache(addr, size) \ @@ -766,6 +912,7 @@ static inline unsigned long virt_to_phys(volatile void * address) return __pa((unsigned long)address); } +#define virt_to_phys virt_to_phys /** * phys_to_virt - map physical address to virtual @@ -783,6 +930,7 @@ static inline void * phys_to_virt(unsigned long address) { return (void *)__va(address); } +#define phys_to_virt phys_to_virt /* * Change "struct page" to physical address. @@ -810,6 +958,7 @@ static inline unsigned long virt_to_bus(volatile void * address) return 0; return __pa(address) + PCI_DRAM_OFFSET; } +#define virt_to_bus virt_to_bus static inline void * bus_to_virt(unsigned long address) { @@ -817,6 +966,7 @@ static inline void * bus_to_virt(unsigned long address) return NULL; return __va(address - PCI_DRAM_OFFSET); } +#define bus_to_virt bus_to_virt #define page_to_bus(page) (page_to_phys(page) + PCI_DRAM_OFFSET) @@ -855,6 +1005,8 @@ static inline void * bus_to_virt(unsigned long address) #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set) +#include <asm-generic/io.h> + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_IO_H */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 0d93331d0fab..bf221a2a523e 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -15,11 +15,13 @@ #define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) #ifdef CONFIG_PPC_BOOK3S_64 -#include <asm/book3s/64/kup-radix.h> +#include <asm/book3s/64/kup.h> #endif + #ifdef CONFIG_PPC_8xx #include <asm/nohash/32/kup-8xx.h> #endif + #ifdef CONFIG_PPC_BOOK3S_32 #include <asm/book3s/32/kup.h> #endif @@ -42,9 +44,10 @@ #else /* !__ASSEMBLY__ */ -#include <linux/pgtable.h> +extern bool disable_kuep; +extern bool disable_kuap; -void setup_kup(void); +#include <linux/pgtable.h> #ifdef CONFIG_PPC_KUEP void setup_kuep(bool disabled); @@ -80,6 +83,12 @@ static inline void restore_user_access(unsigned long flags) { } #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ +static __always_inline void setup_kup(void) +{ + setup_kuep(disable_kuep); + setup_kuap(disable_kuap); +} + static inline void allow_read_from_user(const void __user *from, unsigned long size) { allow_user_access(NULL, from, size, KUAP_READ); diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h new file mode 100644 index 000000000000..2fca299f7e19 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 IBM Corporation + */ + +#ifndef _ASM_POWERPC_KVM_GUEST_H_ +#define _ASM_POWERPC_KVM_GUEST_H_ + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_FALSE(kvm_guest); + +static inline bool is_kvm_guest(void) +{ + return static_branch_unlikely(&kvm_guest); +} + +bool check_kvm_guest(void); +#else +static inline bool is_kvm_guest(void) { return false; } +static inline bool check_kvm_guest(void) { return false; } +#endif + +#endif /* _ASM_POWERPC_KVM_GUEST_H_ */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 744612054c94..abe1b5e82547 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -8,7 +8,7 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ -#include <asm/firmware.h> +#include <asm/kvm_guest.h> #include <uapi/asm/kvm_para.h> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 475687f24f4a..cf6ebbc16cb4 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -207,7 +207,6 @@ struct machdep_calls { void (*suspend_disable_irqs)(void); void (*suspend_enable_irqs)(void); #endif - int (*suspend_disable_cpu)(void); #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE ssize_t (*cpu_probe)(const char *, size_t); diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 89aa8248a57d..e6c27ae843dc 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -228,6 +228,7 @@ int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); +void flush_erat(void); long __machine_check_early_realmode_p7(struct pt_regs *regs); long __machine_check_early_realmode_p8(struct pt_regs *regs); long __machine_check_early_realmode_p9(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/mm-arch-hooks.h b/arch/powerpc/include/asm/mm-arch-hooks.h deleted file mode 100644 index dce274be824a..000000000000 --- a/arch/powerpc/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - */ - -#ifndef _ASM_POWERPC_MM_ARCH_HOOKS_H -#define _ASM_POWERPC_MM_ARCH_HOOKS_H - -static inline void arch_remap(struct mm_struct *mm, - unsigned long old_start, unsigned long old_end, - unsigned long new_start, unsigned long new_end) -{ - /* - * mremap() doesn't allow moving multiple vmas so we can limit the - * check to old_start == vdso_base. - */ - if (old_start == mm->context.vdso_base) - mm->context.vdso_base = new_start; -} -#define arch_remap arch_remap - -#endif /* _ASM_POWERPC_MM_ARCH_HOOKS_H */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 255a1837e9f7..80b27f5d9648 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -29,9 +29,18 @@ */ /* - * Support for KUEP feature. + * Supports KUAP feature + * key 0 controlling userspace addresses on radix + * Key 3 on hash */ -#define MMU_FTR_KUEP ASM_CONST(0x00000400) +#define MMU_FTR_BOOK3S_KUAP ASM_CONST(0x00000200) + +/* + * Supports KUEP feature + * key 0 controlling userspace addresses on radix + * Key 3 on hash + */ +#define MMU_FTR_BOOK3S_KUEP ASM_CONST(0x00000400) /* * Support for memory protection keys. @@ -120,14 +129,8 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) -/* - * Supports KUAP (key 0 controlling userspace addresses) on radix - */ -#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000) - /* MMU feature bit sets for various CPUs */ -#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ - MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 +#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 (MMU_FTR_HPTE_TABLE | MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE) #define MMU_FTRS_POWER MMU_FTRS_DEFAULT_HPTE_ARCH_V2 #define MMU_FTRS_PPC970 MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA #define MMU_FTRS_POWER5 MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE @@ -154,7 +157,7 @@ DECLARE_PER_CPU(int, next_tlbcam_idx); enum { MMU_FTRS_POSSIBLE = -#ifdef CONFIG_PPC_BOOK3S +#if defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_BOOK3S_604) MMU_FTR_HPTE_TABLE | #endif #ifdef CONFIG_PPC_8xx @@ -163,17 +166,19 @@ enum { #ifdef CONFIG_40x MMU_FTR_TYPE_40x | #endif -#ifdef CONFIG_44x +#ifdef CONFIG_PPC_47x + MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL | +#elif defined(CONFIG_44x) MMU_FTR_TYPE_44x | #endif -#if defined(CONFIG_E200) || defined(CONFIG_E500) +#ifdef CONFIG_E500 MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX | #endif -#ifdef CONFIG_PPC_47x - MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL | -#endif #ifdef CONFIG_PPC_BOOK3S_32 - MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU | + MMU_FTR_USE_HIGH_BATS | +#endif +#ifdef CONFIG_PPC_83xx + MMU_FTR_NEED_DTLB_SW_LRU | #endif #ifdef CONFIG_PPC_BOOK3E_64 MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS | @@ -187,22 +192,47 @@ enum { #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE | +#endif /* CONFIG_PPC_RADIX_MMU */ #ifdef CONFIG_PPC_KUAP - MMU_FTR_RADIX_KUAP | + MMU_FTR_BOOK3S_KUAP | #endif /* CONFIG_PPC_KUAP */ -#endif /* CONFIG_PPC_RADIX_MMU */ #ifdef CONFIG_PPC_MEM_KEYS MMU_FTR_PKEY | #endif #ifdef CONFIG_PPC_KUEP - MMU_FTR_KUEP | + MMU_FTR_BOOK3S_KUEP | #endif /* CONFIG_PPC_KUAP */ 0, }; +#if defined(CONFIG_PPC_BOOK3S_604) && !defined(CONFIG_PPC_BOOK3S_603) +#define MMU_FTRS_ALWAYS MMU_FTR_HPTE_TABLE +#endif +#ifdef CONFIG_PPC_8xx +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_8xx +#endif +#ifdef CONFIG_40x +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_40x +#endif +#ifdef CONFIG_PPC_47x +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_47x +#elif defined(CONFIG_44x) +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_44x +#endif +#if defined(CONFIG_E200) || defined(CONFIG_E500) +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E +#endif + +#ifndef MMU_FTRS_ALWAYS +#define MMU_FTRS_ALWAYS 0 +#endif + static inline bool early_mmu_has_feature(unsigned long feature) { + if (MMU_FTRS_ALWAYS & feature) + return true; + return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature); } @@ -231,6 +261,9 @@ static __always_inline bool mmu_has_feature(unsigned long feature) } #endif + if (MMU_FTRS_ALWAYS & feature) + return true; + if (!(MMU_FTRS_POSSIBLE & feature)) return false; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b42813359f49..d5821834dba9 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -263,8 +263,10 @@ extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - if (start <= mm->context.vdso_base && mm->context.vdso_base < end) - mm->context.vdso_base = 0; + unsigned long vdso_base = (unsigned long)mm->context.vdso - PAGE_SIZE; + + if (start <= vdso_base && vdso_base < end) + mm->context.vdso = NULL; } #ifdef CONFIG_PPC_MEM_KEYS @@ -285,7 +287,7 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, #define thread_pkey_regs_init(thread) #define arch_dup_pkeys(oldmm, mm) -static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags) { return 0x0UL; } diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 567cdc557402..17a4a616436f 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -63,8 +63,7 @@ static inline void restore_user_access(unsigned long flags) static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000), - "Bug: fault blocked by AP register !"); + return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-40x.h b/arch/powerpc/include/asm/nohash/32/mmu-40x.h index 74f4edb5916e..8a8f13a22cf4 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-40x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-40x.h @@ -57,7 +57,7 @@ typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h index 28aa3b339c5e..2d92a39d8f2e 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h @@ -108,7 +108,7 @@ extern unsigned int tlb_44x_index; typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; /* patch sites */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 0bd1b144eb76..478249959baa 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -181,7 +181,7 @@ void mmu_pin_tlb(unsigned long top, bool readonly); typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; void *pte_frag; } mm_context_t; diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h index b41004664312..e43a418d3ccd 100644 --- a/arch/powerpc/include/asm/nohash/mmu-book3e.h +++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h @@ -238,7 +238,7 @@ extern unsigned int tlbcam_index; typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; /* Page size definitions, common between 32 and 64-bit diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 6277e7596ae5..ac75f4ab0dba 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -192,9 +192,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, */ if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) { __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); return; diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h index b1d8fec29169..1edb7243e515 100644 --- a/arch/powerpc/include/asm/nohash/tlbflush.h +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -10,7 +10,6 @@ * - local_flush_tlb_mm(mm, full) flushes the specified mm context on * the local processor * - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor - * - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages * diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 1dffa3cb16ba..0b63ba7d5917 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1091,9 +1091,9 @@ enum { OPAL_XIVE_IRQ_TRIGGER_PAGE = 0x00000001, OPAL_XIVE_IRQ_STORE_EOI = 0x00000002, OPAL_XIVE_IRQ_LSI = 0x00000004, - OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, - OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, - OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, + OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */ + OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, /* P9 DD1.0 workaround */ + OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, /* P9 DD1.0 workaround */ }; /* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */ diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index d64dfe3ac712..56f217606327 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -16,12 +16,6 @@ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES #endif -#ifdef CONFIG_PTE_64BIT -#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ -#else -#define PTE_FLAGS_OFFSET 0 -#endif - #if defined(CONFIG_PPC_256K_PAGES) || \ (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)) #define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2 - 2) /* 1/4 of a page */ diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index 9362c94fe3aa..edc08f04aef7 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -10,6 +10,9 @@ #endif #ifdef CONFIG_PPC_SPLPAR +#include <asm/kvm_guest.h> +#include <asm/cputhreads.h> + DECLARE_STATIC_KEY_FALSE(shared_processor); static inline bool is_shared_processor(void) @@ -74,6 +77,21 @@ static inline bool vcpu_is_preempted(int cpu) { if (!is_shared_processor()) return false; + +#ifdef CONFIG_PPC_SPLPAR + if (!is_kvm_guest()) { + int first_cpu = cpu_first_thread_sibling(smp_processor_id()); + + /* + * Preemption can only happen at core granularity. This CPU + * is not preempted if one of the CPU of this core is not + * preempted. + */ + if (cpu_first_thread_sibling(cpu) == first_cpu) + return false; + } +#endif + if (yield_count_of(cpu) & 1) return true; return false; diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index f6acabb6c9be..3b7baba01c92 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -82,6 +82,7 @@ struct power_pmu { #define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ #define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ #define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */ +#define PPMU_P10_DD1 0x00000400 /* Is power10 DD1 processor version */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index d37ededca3ee..9acd1fbf1197 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -3,12 +3,59 @@ #ifndef _ASM_PNV_OCXL_H #define _ASM_PNV_OCXL_H +#include <linux/bitfield.h> #include <linux/pci.h> #define PNV_OCXL_TL_MAX_TEMPLATE 63 #define PNV_OCXL_TL_BITS_PER_RATE 4 #define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) +#define PNV_OCXL_ATSD_TIMEOUT 1 + +/* TLB Management Instructions */ +#define PNV_OCXL_ATSD_LNCH 0x00 +/* Radix Invalidate */ +#define PNV_OCXL_ATSD_LNCH_R PPC_BIT(0) +/* Radix Invalidation Control + * 0b00 Just invalidate TLB. + * 0b01 Invalidate just Page Walk Cache. + * 0b10 Invalidate TLB, Page Walk Cache, and any + * caching of Partition and Process Table Entries. + */ +#define PNV_OCXL_ATSD_LNCH_RIC PPC_BITMASK(1, 2) +/* Number and Page Size of translations to be invalidated */ +#define PNV_OCXL_ATSD_LNCH_LP PPC_BITMASK(3, 10) +/* Invalidation Criteria + * 0b00 Invalidate just the target VA. + * 0b01 Invalidate matching PID. + */ +#define PNV_OCXL_ATSD_LNCH_IS PPC_BITMASK(11, 12) +/* 0b1: Process Scope, 0b0: Partition Scope */ +#define PNV_OCXL_ATSD_LNCH_PRS PPC_BIT(13) +/* Invalidation Flag */ +#define PNV_OCXL_ATSD_LNCH_B PPC_BIT(14) +/* Actual Page Size to be invalidated + * 000 4KB + * 101 64KB + * 001 2MB + * 010 1GB + */ +#define PNV_OCXL_ATSD_LNCH_AP PPC_BITMASK(15, 17) +/* Defines the large page select + * L=0b0 for 4KB pages + * L=0b1 for large pages) + */ +#define PNV_OCXL_ATSD_LNCH_L PPC_BIT(18) +/* Process ID */ +#define PNV_OCXL_ATSD_LNCH_PID PPC_BITMASK(19, 38) +/* NoFlush – Assumed to be 0b0 */ +#define PNV_OCXL_ATSD_LNCH_F PPC_BIT(39) +#define PNV_OCXL_ATSD_LNCH_OCAPI_SLBI PPC_BIT(40) +#define PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON PPC_BIT(41) +#define PNV_OCXL_ATSD_AVA 0x08 +#define PNV_OCXL_ATSD_AVA_AVA PPC_BITMASK(0, 51) +#define PNV_OCXL_ATSD_STAT 0x10 + int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported); int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); @@ -28,4 +75,11 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **p void pnv_ocxl_spa_release(void *platform_data); int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva); +void pnv_ocxl_unmap_lpar(void __iomem *arva); +void pnv_ocxl_tlb_invalidate(void __iomem *arva, + unsigned long pid, + unsigned long addr, + unsigned long page_size); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index a6e3700c4566..ed161ef2b3ca 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -78,6 +78,9 @@ #define IMM_L(i) ((uintptr_t)(i) & 0xffff) #define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) +#define IMM_DQ(i) ((uintptr_t)(i) & 0xfff0) +#define IMM_D0(i) (((uintptr_t)(i) >> 16) & 0x3ffff) +#define IMM_D1(i) IMM_L(i) /* * 16-bit immediate helper macros: HA() is for use with sign-extending instrs @@ -230,7 +233,6 @@ #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 #define PPC_INST_RFID 0x4c000024 -#define PPC_INST_MFSPR 0x7c0002a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 #define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR 0x7c1103a6 @@ -295,6 +297,8 @@ #define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4)) #define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5)) #define __PPC_XT(s) __PPC_XS(s) +#define __PPC_XSP(s) ((((s) & 0x1e) | (((s) >> 5) & 0x1)) << 21) +#define __PPC_XTP(s) __PPC_XSP(s) #define __PPC_T_TLB(t) (((t) & 0x3) << 21) #define __PPC_WC(w) (((w) & 0x3) << 21) #define __PPC_WS(w) (((w) & 0x1f) << 11) @@ -395,6 +399,14 @@ #define PPC_RAW_XVCPSGNDP(t, a, b) ((0xf0000780 | VSX_XX3((t), (a), (b)))) #define PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc) \ ((0x1000002d | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) +#define PPC_RAW_LXVP(xtp, a, i) (0x18000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_DQ(i)) +#define PPC_RAW_STXVP(xsp, a, i) (0x18000001 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i)) +#define PPC_RAW_LXVPX(xtp, a, b) (0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STXVPX(xsp, a, b) (0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_PLXVP(xtp, i, a, pr) \ + ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))) +#define PPC_RAW_PSTXVP(xsp, i, a, pr) \ + ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))) #define PPC_RAW_NAP (0x4c000364) #define PPC_RAW_SLEEP (0x4c0003a4) #define PPC_RAW_WINKLE (0x4c0003e4) @@ -507,6 +519,8 @@ #define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a)) +#define PPC_RAW_MFSPR(d, spr) (0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr)) + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 511786f0e40d..cfa814824285 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -251,6 +251,8 @@ n: #define _GLOBAL_TOC(name) _GLOBAL(name) +#define DOTSYM(a) a + #endif /* @@ -495,15 +497,9 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #endif #ifdef CONFIG_PPC_BOOK3S_64 -#define RFI rfid #define MTMSRD(r) mtmsrd r #define MTMSR_EERI(reg) mtmsrd reg,1 #else -#ifndef CONFIG_40x -#define RFI rfi -#else -#define RFI rfi; b . /* Prevent prefetch past rfi */ -#endif #define MTMSRD(r) mtmsr r #define MTMSR_EERI(reg) mtmsr reg #endif diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c61c859b51a8..8acc3590c971 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -6,6 +6,8 @@ * Copyright (C) 2001 PPC 64 Team, IBM Corp */ +#include <vdso/processor.h> + #include <asm/reg.h> #ifdef CONFIG_VSX @@ -63,14 +65,6 @@ extern int _chrp_type; #endif /* defined(__KERNEL__) && defined(CONFIG_PPC32) */ -/* Macros for adjusting thread priority (hardware multi-threading) */ -#define HMT_very_low() asm volatile("or 31,31,31 # very low priority") -#define HMT_low() asm volatile("or 1,1,1 # low priority") -#define HMT_medium_low() asm volatile("or 6,6,6 # medium low priority") -#define HMT_medium() asm volatile("or 2,2,2 # medium priority") -#define HMT_medium_high() asm volatile("or 5,5,5 # medium high priority") -#define HMT_high() asm volatile("or 3,3,3 # high priority") - #ifdef __KERNEL__ #ifdef CONFIG_PPC64 @@ -170,8 +164,10 @@ struct thread_struct { #endif /* Debug Registers */ struct debug_reg debug; +#ifdef CONFIG_PPC_FPU_REGS struct thread_fp_state fp_state; struct thread_fp_state *fp_save_area; +#endif int fpexc_mode; /* floating-point exception mode */ unsigned int align_ctl; /* alignment handling control */ #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -230,10 +226,6 @@ struct thread_struct { struct thread_vr_state ckvr_state; /* Checkpointed VR state */ unsigned long ckvrsave; /* Checkpointed VRSAVE */ #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -#ifdef CONFIG_PPC_MEM_KEYS - unsigned long amr; - unsigned long iamr; -#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ @@ -344,7 +336,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) } #ifdef CONFIG_PPC64 -#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0) #define spin_begin() HMT_low() @@ -363,8 +354,6 @@ do { \ } \ } while (0) -#else -#define cpu_relax() barrier() #endif /* Check that a certain kernel stack pointer is valid in task_struct p */ @@ -398,20 +387,6 @@ static inline void prefetchw(const void *x) #define HAVE_ARCH_PICK_MMAP_LAYOUT -#ifdef CONFIG_PPC64 -static inline unsigned long get_clean_sp(unsigned long sp, int is_32) -{ - if (is_32) - return sp & 0x0ffffffffUL; - return sp; -} -#else -static inline unsigned long get_clean_sp(unsigned long sp, int is_32) -{ - return sp; -} -#endif - /* asm stubs */ extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val); extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index cb89e4bf55ce..e646c7f218bc 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -378,8 +378,8 @@ struct ps3_system_bus_driver { enum ps3_match_sub_id match_sub_id; struct device_driver core; int (*probe)(struct ps3_system_bus_device *); - int (*remove)(struct ps3_system_bus_device *); - int (*shutdown)(struct ps3_system_bus_device *); + void (*remove)(struct ps3_system_bus_device *); + void (*shutdown)(struct ps3_system_bus_device *); /* int (*suspend)(struct ps3_system_bus_device *, pm_message_t); */ /* int (*resume)(struct ps3_system_bus_device *); */ }; diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index e2c778c176a3..58f9dc060a7b 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -53,11 +53,19 @@ struct pt_regs #ifdef CONFIG_PPC64 unsigned long ppr; #endif + union { #ifdef CONFIG_PPC_KUAP - unsigned long kuap; + unsigned long kuap; +#endif +#ifdef CONFIG_PPC_PKEY + unsigned long amr; +#endif + }; +#ifdef CONFIG_PPC_PKEY + unsigned long iamr; #endif }; - unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */ + unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */ }; }; #endif @@ -171,12 +179,6 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) set_thread_flag(TIF_NOERROR); \ } while(0) -struct task_struct; -extern int ptrace_get_reg(struct task_struct *task, int regno, - unsigned long *data); -extern int ptrace_put_reg(struct task_struct *task, int regno, - unsigned long data); - #define current_pt_regs() \ ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index f877a576b338..e40a921d78f9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -29,7 +29,6 @@ #include <asm/reg_8xx.h> #define MSR_SF_LG 63 /* Enable 64 bit mode */ -#define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */ #define MSR_HV_LG 60 /* Hypervisor state */ #define MSR_TS_T_LG 34 /* Trans Mem state: Transactional */ #define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */ @@ -69,13 +68,11 @@ #ifdef CONFIG_PPC64 #define MSR_SF __MASK(MSR_SF_LG) /* Enable 64 bit mode */ -#define MSR_ISF __MASK(MSR_ISF_LG) /* Interrupt 64b mode valid on 630 */ #define MSR_HV __MASK(MSR_HV_LG) /* Hypervisor state */ #define MSR_S __MASK(MSR_S_LG) /* Secure state */ #else /* so tests for these bits fail on 32-bit */ #define MSR_SF 0 -#define MSR_ISF 0 #define MSR_HV 0 #define MSR_S 0 #endif @@ -134,7 +131,7 @@ #define MSR_64BIT MSR_SF /* Server variant */ -#define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV) +#define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_HV) #ifdef __BIG_ENDIAN__ #define MSR_ __MSR #define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV) @@ -864,6 +861,7 @@ #define MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */ #define MMCR0_EBE 0x00100000UL /* Event based branch enable */ #define MMCR0_PMCC 0x000c0000UL /* PMC control */ +#define MMCR0_PMCCEXT ASM_CONST(0x00000200) /* PMCCEXT control */ #define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */ #define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCjCE ASM_CONST(0x00004000) /* PMCj count enable*/ @@ -1203,7 +1201,7 @@ #ifdef CONFIG_PPC_BOOK3S_32 #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 -#define SPRN_SPRG_PGDIR SPRN_SPRG2 +#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 #define SPRN_SPRG_603_LRU SPRN_SPRG4 #endif @@ -1232,14 +1230,9 @@ #define SPRN_SPRG_WSCRATCH_MC SPRN_SPRG1 #define SPRN_SPRG_RSCRATCH4 SPRN_SPRG7R #define SPRN_SPRG_WSCRATCH4 SPRN_SPRG7W -#ifdef CONFIG_E200 -#define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG6R -#define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG6W -#else #define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9 #define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9 #endif -#endif #ifdef CONFIG_PPC_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 @@ -1419,37 +1412,6 @@ static inline void msr_check_and_clear(unsigned long bits) __msr_check_and_clear(bits); } -#if defined(CONFIG_PPC_CELL) || defined(CONFIG_E500) -#define mftb() ({unsigned long rval; \ - asm volatile( \ - "90: mfspr %0, %2;\n" \ - ASM_FTR_IFSET( \ - "97: cmpwi %0,0;\n" \ - " beq- 90b;\n", "", %1) \ - : "=r" (rval) \ - : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ - rval;}) -#elif defined(CONFIG_PPC_8xx) -#define mftb() ({unsigned long rval; \ - asm volatile("mftbl %0" : "=r" (rval)); rval;}) -#else -#define mftb() ({unsigned long rval; \ - asm volatile("mfspr %0, %1" : \ - "=r" (rval) : "i" (SPRN_TBRL)); rval;}) -#endif /* !CONFIG_PPC_CELL */ - -#if defined(CONFIG_PPC_8xx) -#define mftbu() ({unsigned long rval; \ - asm volatile("mftbu %0" : "=r" (rval)); rval;}) -#else -#define mftbu() ({unsigned long rval; \ - asm volatile("mfspr %0, %1" : "=r" (rval) : \ - "i" (SPRN_TBRU)); rval;}) -#endif - -#define mttbl(v) asm volatile("mttbl %0":: "r"(v)) -#define mttbu(v) asm volatile("mttbu %0":: "r"(v)) - #ifdef CONFIG_PPC32 #define mfsrin(v) ({unsigned int rval; \ asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 29a948e0c0f2..262782f08fd4 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -281,18 +281,6 @@ #define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */ #endif -#ifdef CONFIG_E200 -#define MCSR_MCP 0x80000000UL /* Machine Check Input Pin */ -#define MCSR_CP_PERR 0x20000000UL /* Cache Push Parity Error */ -#define MCSR_CPERR 0x10000000UL /* Cache Parity Error */ -#define MCSR_EXCP_ERR 0x08000000UL /* ISI, ITLB, or Bus Error on 1st insn - fetch for an exception handler */ -#define MCSR_BUS_IRERR 0x00000010UL /* Read Bus Error on instruction fetch*/ -#define MCSR_BUS_DRERR 0x00000008UL /* Read Bus Error on data load */ -#define MCSR_BUS_WRERR 0x00000004UL /* Write Bus Error on buffered - store or cache line push */ -#endif - /* Bit definitions for the HID1 */ #ifdef CONFIG_E500 /* e500v1/v2 */ diff --git a/arch/powerpc/include/asm/rtas-types.h b/arch/powerpc/include/asm/rtas-types.h index aa420561bc10..8df6235d64d1 100644 --- a/arch/powerpc/include/asm/rtas-types.h +++ b/arch/powerpc/include/asm/rtas-types.h @@ -23,14 +23,6 @@ struct rtas_t { struct device_node *dev; /* virtual address pointer */ }; -struct rtas_suspend_me_data { - atomic_t working; /* number of cpus accessing this struct */ - atomic_t done; - int token; /* ibm,suspend-me */ - atomic_t error; - struct completion *complete; /* wait on this until working == 0 */ -}; - struct rtas_error_log { /* Byte 0 */ u8 byte0; /* Architectural version */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 55f9a154c95d..332e1000ca0f 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -23,11 +23,16 @@ #define RTAS_RMOBUF_MAX (64 * 1024) /* RTAS return status codes */ -#define RTAS_NOT_SUSPENDABLE -9004 #define RTAS_BUSY -2 /* RTAS Busy */ #define RTAS_EXTENDED_DELAY_MIN 9900 #define RTAS_EXTENDED_DELAY_MAX 9905 +/* statuses specific to ibm,suspend-me */ +#define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */ +#define RTAS_NOT_SUSPENDABLE -9004 /* Partition not suspendable */ +#define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */ +#define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */ + /* * In general to call RTAS use rtas_token("string") to lookup * an RTAS token for the given string (e.g. "event-scan"). @@ -242,6 +247,7 @@ extern void __noreturn rtas_restart(char *cmd); extern void rtas_power_off(void); extern void __noreturn rtas_halt(void); extern void rtas_os_term(char *str); +void rtas_activate_firmware(void); extern int rtas_get_sensor(int sensor, int index, int *state); extern int rtas_get_sensor_fast(int sensor, int index, int *state); extern int rtas_get_power_level(int powerdomain, int *level); @@ -250,9 +256,7 @@ extern bool rtas_indicator_present(int token, int *maxindex); extern int rtas_set_indicator(int indicator, int index, int new_value); extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); -extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); -extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); -extern int rtas_ibm_suspend_me(u64 handle); +int rtas_ibm_suspend_me(int *fw_status); struct rtc_time; extern time64_t rtas_get_boot_time(void); @@ -272,8 +276,13 @@ extern time64_t last_rtas_event; extern int clobbering_unread_rtas_event(void); extern int pseries_devicetree_update(s32 scope); extern void post_mobility_fixup(void); +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else static inline int clobbering_unread_rtas_event(void) { return 0; } +static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) +{ + return -EINVAL; +} #endif #ifdef CONFIG_PPC_RTAS_DAEMON diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index b2035b2f57ce..c4e2d53acd2b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -134,6 +134,7 @@ static inline struct cpumask *cpu_smallcore_mask(int cpu) extern int cpu_to_core_id(int cpu); extern bool has_big_cores; +extern bool thread_group_shares_l2; #define cpu_smt_mask cpu_smt_mask #ifdef CONFIG_SCHED_SMT @@ -187,6 +188,7 @@ extern void __cpu_die(unsigned int cpu); /* for UP */ #define hard_smp_processor_id() get_hard_smp_processor_id(0) #define smp_setup_cpu_maps() +#define thread_group_shares_l2 0 static inline void inhibit_secondary_onlining(void) {} static inline void uninhibit_secondary_onlining(void) {} static inline const struct cpumask *cpu_sibling_mask(int cpu) @@ -199,6 +201,10 @@ static inline const struct cpumask *cpu_smallcore_mask(int cpu) return cpumask_of(cpu); } +static inline const struct cpumask *cpu_l2_cache_mask(int cpu) +{ + return cpumask_of(cpu); +} #endif /* CONFIG_SMP */ #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 53115ae61495..3d8a47af7a25 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -77,10 +77,8 @@ struct thread_info { /* how to get the thread information struct from C */ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -#ifdef CONFIG_PPC_BOOK3S_64 void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec -#endif #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 2f566c1a754c..8f789b597bae 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -15,6 +15,7 @@ #include <asm/processor.h> #include <asm/cpu_has_feature.h> +#include <asm/vdso/timebase.h> /* time.c */ extern unsigned long tb_ticks_per_jiffy; @@ -38,42 +39,12 @@ struct div_result { u64 result_low; }; -/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ -static inline unsigned long get_tbl(void) -{ - return mftb(); -} - static inline u64 get_vtb(void) { -#ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_ARCH_207S)) return mfspr(SPRN_VTB); -#endif - return 0; -} - -static inline u64 get_tb(void) -{ - unsigned int tbhi, tblo, tbhi2; - - if (IS_ENABLED(CONFIG_PPC64)) - return mftb(); - do { - tbhi = mftbu(); - tblo = mftb(); - tbhi2 = mftbu(); - } while (tbhi != tbhi2); - - return ((u64)tbhi << 32) | tblo; -} - -static inline void set_tb(unsigned int upper, unsigned int lower) -{ - mtspr(SPRN_TBWL, 0); - mtspr(SPRN_TBWU, upper); - mtspr(SPRN_TBWL, lower); + return 0; } /* Accessor functions for the decrementer register. diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 95988870a57b..fa2e76e4093a 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -9,7 +9,7 @@ */ #include <asm/cputable.h> -#include <asm/reg.h> +#include <asm/vdso/timebase.h> #define CLOCK_TICK_RATE 1024000 /* Underlying HZ */ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index d97f061fecac..160422a439aa 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -40,9 +40,6 @@ extern void tlb_flush(struct mmu_gather *tlb); /* Get the generic bits... */ #include <asm-generic/tlb.h> -extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, - unsigned long address); - static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address) { diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index 2ff884853f97..8542e9bbeead 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -1,12 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PPC64_VDSO_H__ -#define __PPC64_VDSO_H__ - -#ifdef __KERNEL__ - -/* Default link addresses for the vDSOs */ -#define VDSO32_LBASE 0x0 -#define VDSO64_LBASE 0x0 +#ifndef _ASM_POWERPC_VDSO_H +#define _ASM_POWERPC_VDSO_H /* Default map addresses for 32bit vDSO */ #define VDSO32_MBASE 0x100000 @@ -15,10 +9,17 @@ #ifndef __ASSEMBLY__ -/* Offsets relative to thread->vdso_base */ -extern unsigned long vdso64_rt_sigtramp; -extern unsigned long vdso32_sigtramp; -extern unsigned long vdso32_rt_sigtramp; +#ifdef CONFIG_PPC64 +#include <generated/vdso64-offsets.h> +#endif + +#ifdef CONFIG_VDSO32 +#include <generated/vdso32-offsets.h> +#endif + +#define VDSO64_SYMBOL(base, name) ((unsigned long)(base) + (vdso64_offset_##name)) + +#define VDSO32_SYMBOL(base, name) ((unsigned long)(base) + (vdso32_offset_##name)) int vdso_getcpu_init(void); @@ -51,6 +52,4 @@ int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ - -#endif /* __PPC64_VDSO_H__ */ +#endif /* _ASM_POWERPC_VDSO_H */ diff --git a/arch/powerpc/include/asm/vdso/clocksource.h b/arch/powerpc/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..c1ba56b82ee5 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/clocksource.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_VDSO_CLOCKSOURCE_H +#define _ASM_POWERPC_VDSO_CLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES VDSO_CLOCKMODE_ARCHTIMER + +#endif diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..81671aa365b3 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H +#define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H + +#ifdef __ASSEMBLY__ + +#include <asm/ppc_asm.h> + +/* + * The macros sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + mflr r0 + .cfi_register lr, r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + get_datapage r5 + addi r5, r5, VDSO_DATA_OFFSET + bl DOTSYM(\funct) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + cmpwi r3, 0 + mtlr r0 + .cfi_restore lr + addi r1, r1, 2 * PPC_MIN_STKFRM + crclr so + beqlr+ + crset so + neg r3, r3 + blr + .cfi_endproc +.endm + +.macro cvdso_call_time funct + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + mflr r0 + .cfi_register lr, r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + get_datapage r4 + addi r4, r4, VDSO_DATA_OFFSET + bl DOTSYM(\funct) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + crclr so + mtlr r0 + .cfi_restore lr + addi r1, r1, 2 * PPC_MIN_STKFRM + blr + .cfi_endproc +.endm + +#else + +#include <asm/vdso/timebase.h> +#include <asm/barrier.h> +#include <asm/unistd.h> +#include <uapi/linux/time.h> + +#define VDSO_HAS_CLOCK_GETRES 1 + +#define VDSO_HAS_TIME 1 + +static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3, + const unsigned long _r4) +{ + register long r0 asm("r0") = _r0; + register unsigned long r3 asm("r3") = _r3; + register unsigned long r4 asm("r4") = _r4; + register int ret asm ("r3"); + + asm volatile( + " sc\n" + " bns+ 1f\n" + " neg %0, %0\n" + "1:\n" + : "=r" (ret), "+r" (r4), "+r" (r0) + : "r" (r3) + : "memory", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cr0", "ctr"); + + return ret; +} + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz) +{ + return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz); +} + +static __always_inline +int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts); +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts); +} + +#ifdef CONFIG_VDSO32 + +#define BUILD_VDSO32 1 + +static __always_inline +int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts); +} + +static __always_inline +int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts); +} +#endif + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) +{ + return get_tb(); +} + +const struct vdso_data *__arch_get_vdso_data(void); + +static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +{ + return true; +} +#define vdso_clocksource_ok vdso_clocksource_ok + +/* + * powerpc specific delta calculation. + * + * This variant removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + return (cycles - last) * mult; +} +#define vdso_calc_delta vdso_calc_delta + +#ifndef __powerpc64__ +static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift) +{ + u32 hi = ns >> 32; + u32 lo = ns; + + lo >>= shift; + lo |= hi << (32 - shift); + hi >>= shift; + + if (likely(hi == 0)) + return lo; + + return ((u64)hi << 32) | lo; +} +#define vdso_shift_ns vdso_shift_ns +#endif + +#ifdef __powerpc64__ +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd); +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd); +#else +int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, + const struct vdso_data *vd); +int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd); +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, + const struct vdso_data *vd); +#endif +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd); +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, + const struct vdso_data *vd); +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h new file mode 100644 index 000000000000..e072577bc7c0 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/processor.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_VDSO_PROCESSOR_H +#define _ASM_POWERPC_VDSO_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +/* Macros for adjusting thread priority (hardware multi-threading) */ +#define HMT_very_low() asm volatile("or 31, 31, 31 # very low priority") +#define HMT_low() asm volatile("or 1, 1, 1 # low priority") +#define HMT_medium_low() asm volatile("or 6, 6, 6 # medium low priority") +#define HMT_medium() asm volatile("or 2, 2, 2 # medium priority") +#define HMT_medium_high() asm volatile("or 5, 5, 5 # medium high priority") +#define HMT_high() asm volatile("or 3, 3, 3 # high priority") + +#ifdef CONFIG_PPC64 +#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0) +#else +#define cpu_relax() barrier() +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_VDSO_PROCESSOR_H */ diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h new file mode 100644 index 000000000000..b558b07959ce --- /dev/null +++ b/arch/powerpc/include/asm/vdso/timebase.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Common timebase prototypes and such for all ppc machines. + */ + +#ifndef _ASM_POWERPC_VDSO_TIMEBASE_H +#define _ASM_POWERPC_VDSO_TIMEBASE_H + +#include <asm/reg.h> + +/* + * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit + * version below in the else case of the ifdef. + */ +#if defined(__powerpc64__) && (defined(CONFIG_PPC_CELL) || defined(CONFIG_E500)) +#define mftb() ({unsigned long rval; \ + asm volatile( \ + "90: mfspr %0, %2;\n" \ + ASM_FTR_IFSET( \ + "97: cmpwi %0,0;\n" \ + " beq- 90b;\n", "", %1) \ + : "=r" (rval) \ + : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ + rval;}) +#elif defined(CONFIG_PPC_8xx) +#define mftb() ({unsigned long rval; \ + asm volatile("mftbl %0" : "=r" (rval)); rval;}) +#else +#define mftb() ({unsigned long rval; \ + asm volatile("mfspr %0, %1" : \ + "=r" (rval) : "i" (SPRN_TBRL)); rval;}) +#endif /* !CONFIG_PPC_CELL */ + +#if defined(CONFIG_PPC_8xx) +#define mftbu() ({unsigned long rval; \ + asm volatile("mftbu %0" : "=r" (rval)); rval;}) +#else +#define mftbu() ({unsigned long rval; \ + asm volatile("mfspr %0, %1" : "=r" (rval) : \ + "i" (SPRN_TBRU)); rval;}) +#endif + +#define mttbl(v) asm volatile("mttbl %0":: "r"(v)) +#define mttbu(v) asm volatile("mttbu %0":: "r"(v)) + +/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ +static inline unsigned long get_tbl(void) +{ + return mftb(); +} + +static inline u64 get_tb(void) +{ + unsigned int tbhi, tblo, tbhi2; + + /* + * We use __powerpc64__ here not CONFIG_PPC64 because we want the compat + * VDSO to use the 32-bit compatible version in the while loop below. + */ + if (__is_defined(__powerpc64__)) + return mftb(); + + do { + tbhi = mftbu(); + tblo = mftb(); + tbhi2 = mftbu(); + } while (tbhi != tbhi2); + + return ((u64)tbhi << 32) | tblo; +} + +static inline void set_tb(unsigned int upper, unsigned int lower) +{ + mtspr(SPRN_TBWL, 0); + mtspr(SPRN_TBWU, upper); + mtspr(SPRN_TBWL, lower); +} + +#endif /* _ASM_POWERPC_VDSO_TIMEBASE_H */ diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..48cf23f1e273 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/vsyscall.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_VDSO_VSYSCALL_H +#define _ASM_POWERPC_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/timekeeper_internal.h> +#include <asm/vdso_datapage.h> + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__arch_get_k_vdso_data(void) +{ + return vdso_data->data; +} +#define __arch_get_k_vdso_data __arch_get_k_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_VDSO_VSYSCALL_H */ diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index b9ef6cf50ea5..3f958ecf2beb 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -36,6 +36,7 @@ #include <linux/unistd.h> #include <linux/time.h> +#include <vdso/datapage.h> #define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32) @@ -45,7 +46,7 @@ #ifdef CONFIG_PPC64 -struct vdso_data { +struct vdso_arch_data { __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */ struct { /* Systemcfg version numbers */ __u32 major; /* Major number 0x10 */ @@ -59,13 +60,13 @@ struct vdso_data { __u32 processor; /* Processor type 0x1C */ __u64 processorCount; /* # of physical processors 0x20 */ __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */ - __u64 tb_orig_stamp; /* Timebase at boot 0x30 */ + __u64 tb_orig_stamp; /* (NU) Timebase at boot 0x30 */ __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */ - __u64 stamp_xsec; /* 0x48 */ - __u64 tb_update_count; /* Timebase atomicity ctr 0x50 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */ - __u32 tz_dsttime; /* Type of dst correction 0x5C */ + __u64 tb_to_xs; /* (NU) Inverse of TB to 2^20 0x40 */ + __u64 stamp_xsec; /* (NU) 0x48 */ + __u64 tb_update_count; /* (NU) Timebase atomicity ctr 0x50 */ + __u32 tz_minuteswest; /* (NU) Min. west of Greenwich 0x58 */ + __u32 tz_dsttime; /* (NU) Type of dst correction 0x5C */ __u32 dcache_size; /* L1 d-cache size 0x60 */ __u32 dcache_line_size; /* L1 d-cache line size 0x64 */ __u32 icache_size; /* L1 i-cache size 0x68 */ @@ -78,14 +79,10 @@ struct vdso_data { __u32 icache_block_size; /* L1 i-cache block size */ __u32 dcache_log_block_size; /* L1 d-cache log block size */ __u32 icache_log_block_size; /* L1 i-cache log block size */ - __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ - __s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */ - __s64 wtom_clock_sec; /* Wall to monotonic clock sec */ - __s64 stamp_xtime_sec; /* xtime secs as at tb_orig_stamp */ - __s64 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */ - __u32 hrtimer_res; /* hrtimer resolution */ - __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ - __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ + __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ + __u32 compat_syscall_map[SYSCALL_MAP_SIZE]; /* Map of compat syscalls */ + + struct vdso_data data[CS_BASES]; }; #else /* CONFIG_PPC64 */ @@ -93,35 +90,27 @@ struct vdso_data { /* * And here is the simpler 32 bits version */ -struct vdso_data { - __u64 tb_orig_stamp; /* Timebase at boot 0x30 */ +struct vdso_arch_data { __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */ - __u64 stamp_xsec; /* 0x48 */ - __u32 tb_update_count; /* Timebase atomicity ctr 0x50 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */ - __u32 tz_dsttime; /* Type of dst correction 0x5C */ - __s32 wtom_clock_sec; /* Wall to monotonic clock */ - __s32 wtom_clock_nsec; - __s32 stamp_xtime_sec; /* xtime seconds as at tb_orig_stamp */ - __s32 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */ - __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ - __u32 hrtimer_res; /* hrtimer resolution */ - __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ + __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ + __u32 compat_syscall_map[0]; /* No compat syscalls on PPC32 */ + struct vdso_data data[CS_BASES]; }; #endif /* CONFIG_PPC64 */ -extern struct vdso_data *vdso_data; +extern struct vdso_arch_data *vdso_data; #else /* __ASSEMBLY__ */ -.macro get_datapage ptr, tmp +.macro get_datapage ptr bcl 20, 31, .+4 +999: mflr \ptr - addi \ptr, \ptr, (__kernel_datapage_offset - (.-4))@l - lwz \tmp, 0(\ptr) - add \ptr, \tmp, \ptr +#if CONFIG_PPC_PAGE_SHIFT > 14 + addis \ptr, \ptr, (_vdso_datapage - 999b)@ha +#endif + addi \ptr, \ptr, (_vdso_datapage - 999b)@l .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 309b4d65b74f..9a312b975ca8 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -60,13 +60,13 @@ struct xive_irq_data { }; #define XIVE_IRQ_FLAG_STORE_EOI 0x01 #define XIVE_IRQ_FLAG_LSI 0x02 -#define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 -#define XIVE_IRQ_FLAG_MASK_FW 0x08 -#define XIVE_IRQ_FLAG_EOI_FW 0x10 +/* #define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 */ /* P9 DD1.0 workaround */ +/* #define XIVE_IRQ_FLAG_MASK_FW 0x08 */ /* P9 DD1.0 workaround */ +/* #define XIVE_IRQ_FLAG_EOI_FW 0x10 */ /* P9 DD1.0 workaround */ #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 /* Special flag set by KVM for excalation interrupts */ -#define XIVE_IRQ_NO_EOI 0x80 +#define XIVE_IRQ_FLAG_NO_EOI 0x80 #define XIVE_INVALID_CHIP_ID -1 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index bf0bf1b900d2..fe2ef598e2ea 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -173,6 +173,9 @@ KCOV_INSTRUMENT_cputable.o := n KCOV_INSTRUMENT_setup_64.o := n KCOV_INSTRUMENT_paca.o := n +CFLAGS_setup_64.o += -fno-stack-protector +CFLAGS_paca.o += -fno-stack-protector + extra-$(CONFIG_PPC_FPU) += fpu.o extra-$(CONFIG_ALTIVEC) += vector.o extra-$(CONFIG_PPC64) += entry_64.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c2722ff36e98..b12d7c049bfe 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -110,9 +110,11 @@ int main(void) #ifdef CONFIG_BOOKE OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]); #endif +#ifdef CONFIG_PPC_FPU OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode); OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr); OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area); +#endif OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr); OFFSET(THREAD_LOAD_FP, thread_struct, load_fp); #ifdef CONFIG_ALTIVEC @@ -354,10 +356,15 @@ int main(void) STACK_PT_REGS_OFFSET(_PPR, ppr); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_PKEY + STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); + STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); +#endif #ifdef CONFIG_PPC_KUAP STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); #endif + #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); @@ -398,47 +405,18 @@ int main(void) #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ - OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp); - OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec); - OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs); - OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count); - OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest); - OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime); - OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32); - OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec); - OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec); - OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec); - OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec); - OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); - OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res); + OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); + OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); #ifdef CONFIG_PPC64 - OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); - OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); - OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size); - OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size); - OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64); - OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec); - OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec); -#endif - OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec); - OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec); - OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec); - OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec); - OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec); - OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec); - /* timeval/timezone offsets for use by vdso */ - OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest); - OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime); - - /* Other bits used by the vdso */ - DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); - DEFINE(CLOCK_MAX, CLOCK_TAI); - DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - DEFINE(EINVAL, EINVAL); - DEFINE(KTIME_LOW_RES, KTIME_LOW_RES); + OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); + OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size); + OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size); + OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size); + OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map); + OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, compat_syscall_map); +#else + OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map); +#endif #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 65ab9fcebd31..6f903e9aa20b 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -655,11 +655,27 @@ static unsigned int index_dir_to_cpu(struct cache_index_dir *index) * On big-core systems, each core has two groups of CPUs each of which * has its own L1-cache. The thread-siblings which share l1-cache with * @cpu can be obtained via cpu_smallcore_mask(). + * + * On some big-core systems, the L2 cache is shared only between some + * groups of siblings. This is already parsed and encoded in + * cpu_l2_cache_mask(). + * + * TODO: cache_lookup_or_instantiate() needs to be made aware of the + * "ibm,thread-groups" property so that cache->shared_cpu_map + * reflects the correct siblings on platforms that have this + * device-tree property. This helper function is only a stop-gap + * solution so that we report the correct siblings to the + * userspace via sysfs. */ -static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache) +static const struct cpumask *get_shared_cpu_map(struct cache_index_dir *index, struct cache *cache) { - if (cache->level == 1) - return cpu_smallcore_mask(cpu); + if (has_big_cores) { + int cpu = index_dir_to_cpu(index); + if (cache->level == 1) + return cpu_smallcore_mask(cpu); + if (cache->level == 2 && thread_group_shares_l2) + return cpu_l2_cache_mask(cpu); + } return &cache->shared_cpu_map; } @@ -670,17 +686,11 @@ show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bo struct cache_index_dir *index; struct cache *cache; const struct cpumask *mask; - int cpu; index = kobj_to_cache_index_dir(k); cache = index->cache; - if (has_big_cores) { - cpu = index_dir_to_cpu(index); - mask = get_big_core_shared_cpu_map(cpu, cache); - } else { - mask = &cache->shared_cpu_map; - } + mask = get_shared_cpu_map(index, cache); return cpumap_print_to_pagebuf(list, buf, mask); } diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 1d308780e0d3..4bf33f1b4193 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -108,15 +108,6 @@ _GLOBAL(__setup_cpu_e6500) #endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_E200 -_GLOBAL(__setup_cpu_e200) - /* enable dedicated debug exception handling resources (Debug APU) */ - mfspr r3,SPRN_HID0 - ori r3,r3,HID0_DAPUEN@l - mtspr SPRN_HID0,r3 - b __setup_e200_ivors -#endif /* CONFIG_E200 */ - #ifdef CONFIG_E500 #ifndef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e500v1) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S deleted file mode 100644 index 704e8b9501ee..000000000000 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ /dev/null @@ -1,252 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * This file contains low level CPU setup functions. - * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) - */ - -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/cputable.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/cache.h> -#include <asm/book3s/64/mmu-hash.h> - -/* Entry: r3 = crap, r4 = ptr to cputable entry - * - * Note that we can be called twice for pseudo-PVRs - */ -_GLOBAL(__setup_cpu_power7) - mflr r11 - bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR_ISA206 - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power7) - mflr r11 - mfmsr r3 - rldicl. r0,r3,4,63 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR_ISA206 - mtlr r11 - blr - -_GLOBAL(__setup_cpu_power8) - mflr r11 - bl __init_FSCR - bl __init_PMU - bl __init_PMU_ISA207 - bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA206 - bl __init_HFSCR - bl __init_PMU_HV - bl __init_PMU_HV_ISA207 - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power8) - mflr r11 - bl __init_FSCR - bl __init_PMU - bl __init_PMU_ISA207 - mfmsr r3 - rldicl. r0,r3,4,63 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA206 - bl __init_HFSCR - bl __init_PMU_HV - bl __init_PMU_HV_ISA207 - mtlr r11 - blr - -_GLOBAL(__setup_cpu_power10) - mflr r11 - bl __init_FSCR_power10 - bl __init_PMU - bl __init_PMU_ISA31 - b 1f - -_GLOBAL(__setup_cpu_power9) - mflr r11 - bl __init_FSCR_power9 - bl __init_PMU -1: bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_PSSCR,r0 - mtspr SPRN_LPID,r0 - mtspr SPRN_PID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) - or r3, r3, r4 - LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) - andc r3, r3, r4 - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA300 - bl __init_HFSCR - bl __init_PMU_HV - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power10) - mflr r11 - bl __init_FSCR_power10 - bl __init_PMU - bl __init_PMU_ISA31 - b 1f - -_GLOBAL(__restore_cpu_power9) - mflr r11 - bl __init_FSCR_power9 - bl __init_PMU -1: mfmsr r3 - rldicl. r0,r3,4,63 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_PSSCR,r0 - mtspr SPRN_LPID,r0 - mtspr SPRN_PID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) - or r3, r3, r4 - LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) - andc r3, r3, r4 - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA300 - bl __init_HFSCR - bl __init_PMU_HV - mtlr r11 - blr - -__init_hvmode_206: - /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ - mfmsr r3 - rldicl. r0,r3,4,63 - bnelr - ld r5,CPU_SPEC_FEATURES(r4) - LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST) - andc r5,r5,r6 - std r5,CPU_SPEC_FEATURES(r4) - blr - -__init_LPCR_ISA206: - /* Setup a sane LPCR: - * Called with initial LPCR in R3 and desired LPES 2-bit value in R4 - * - * LPES = 0b01 (HSRR0/1 used for 0x500) - * PECE = 0b111 - * DPFD = 4 - * HDICE = 0 - * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) - * VRMASD = 0b10000 (L=1, LP=00) - * - * Other bits untouched for now - */ - li r5,0x10 - rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 - - /* POWER9 has no VRMASD */ -__init_LPCR_ISA300: - rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 - ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) - li r5,4 - rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 - clrrdi r3,r3,1 /* clear HDICE */ - li r5,4 - rldimi r3,r5, LPCR_VC_SH, 0 - mtspr SPRN_LPCR,r3 - isync - blr - -__init_FSCR_power10: - mfspr r3, SPRN_FSCR - ori r3, r3, FSCR_PREFIX - mtspr SPRN_FSCR, r3 - // fall through - -__init_FSCR_power9: - mfspr r3, SPRN_FSCR - ori r3, r3, FSCR_SCV - mtspr SPRN_FSCR, r3 - // fall through - -__init_FSCR: - mfspr r3,SPRN_FSCR - ori r3,r3,FSCR_TAR|FSCR_EBB - mtspr SPRN_FSCR,r3 - blr - -__init_HFSCR: - mfspr r3,SPRN_HFSCR - ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ - HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP - mtspr SPRN_HFSCR,r3 - blr - -__init_PMU_HV: - li r5,0 - mtspr SPRN_MMCRC,r5 - blr - -__init_PMU_HV_ISA207: - li r5,0 - mtspr SPRN_MMCRH,r5 - blr - -__init_PMU: - li r5,0 - mtspr SPRN_MMCRA,r5 - mtspr SPRN_MMCR0,r5 - mtspr SPRN_MMCR1,r5 - mtspr SPRN_MMCR2,r5 - blr - -__init_PMU_ISA207: - li r5,0 - mtspr SPRN_MMCRS,r5 - blr - -__init_PMU_ISA31: - li r5,0 - mtspr SPRN_MMCR3,r5 - LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE) - mtspr SPRN_MMCRA,r5 - blr diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c new file mode 100644 index 000000000000..3cca88ee96d7 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020, Jordan Niethe, IBM Corporation. + * + * This file contains low level CPU setup functions. + * Originally written in assembly by Benjamin Herrenschmidt & various other + * authors. + */ + +#include <asm/reg.h> +#include <asm/synch.h> +#include <linux/bitops.h> +#include <asm/cputable.h> +#include <asm/cpu_setup_power.h> + +/* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */ +static bool init_hvmode_206(struct cpu_spec *t) +{ + u64 msr; + + msr = mfmsr(); + if (msr & MSR_HV) + return true; + + t->cpu_features &= ~(CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST); + return false; +} + +static void init_LPCR_ISA300(u64 lpcr, u64 lpes) +{ + /* POWER9 has no VRMASD */ + lpcr |= (lpes << LPCR_LPES_SH) & LPCR_LPES; + lpcr |= LPCR_PECE0|LPCR_PECE1|LPCR_PECE2; + lpcr |= (4ull << LPCR_DPFD_SH) & LPCR_DPFD; + lpcr &= ~LPCR_HDICE; /* clear HDICE */ + lpcr |= (4ull << LPCR_VC_SH); + mtspr(SPRN_LPCR, lpcr); + isync(); +} + +/* + * Setup a sane LPCR: + * Called with initial LPCR and desired LPES 2-bit value + * + * LPES = 0b01 (HSRR0/1 used for 0x500) + * PECE = 0b111 + * DPFD = 4 + * HDICE = 0 + * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) + * VRMASD = 0b10000 (L=1, LP=00) + * + * Other bits untouched for now + */ +static void init_LPCR_ISA206(u64 lpcr, u64 lpes) +{ + lpcr |= (0x10ull << LPCR_VRMASD_SH) & LPCR_VRMASD; + init_LPCR_ISA300(lpcr, lpes); +} + +static void init_FSCR(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_TAR|FSCR_EBB; + mtspr(SPRN_FSCR, fscr); +} + +static void init_FSCR_power9(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_SCV; + mtspr(SPRN_FSCR, fscr); + init_FSCR(); +} + +static void init_FSCR_power10(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_PREFIX; + mtspr(SPRN_FSCR, fscr); + init_FSCR_power9(); +} + +static void init_HFSCR(void) +{ + u64 hfscr; + + hfscr = mfspr(SPRN_HFSCR); + hfscr |= HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|HFSCR_DSCR|\ + HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP; + mtspr(SPRN_HFSCR, hfscr); +} + +static void init_PMU_HV(void) +{ + mtspr(SPRN_MMCRC, 0); +} + +static void init_PMU_HV_ISA207(void) +{ + mtspr(SPRN_MMCRH, 0); +} + +static void init_PMU(void) +{ + mtspr(SPRN_MMCRA, 0); + mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR1, 0); + mtspr(SPRN_MMCR2, 0); +} + +static void init_PMU_ISA207(void) +{ + mtspr(SPRN_MMCRS, 0); +} + +static void init_PMU_ISA31(void) +{ + mtspr(SPRN_MMCR3, 0); + mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); + mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); +} + +/* + * Note that we can be called twice of pseudo-PVRs. + * The parameter offset is not used. + */ + +void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t) +{ + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); +} + +void __restore_cpu_power7(void) +{ + u64 msr; + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); +} + +void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR(); + init_PMU(); + init_PMU_ISA207(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ + init_HFSCR(); + init_PMU_HV(); + init_PMU_HV_ISA207(); +} + +void __restore_cpu_power8(void) +{ + u64 msr; + + init_FSCR(); + init_PMU(); + init_PMU_ISA207(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ + init_HFSCR(); + init_PMU_HV(); + init_PMU_HV_ISA207(); +} + +void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR_power9(); + init_PMU(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __restore_cpu_power9(void) +{ + u64 msr; + + init_FSCR_power9(); + init_PMU(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR_power10(); + init_PMU(); + init_PMU_ISA31(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __restore_cpu_power10(void) +{ + u64 msr; + + init_FSCR_power10(); + init_PMU(); + init_PMU_ISA31(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 29de58d4dfb7..65f35ec052d4 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -36,7 +36,6 @@ const char *powerpc_base_platform; * and ppc64 */ #ifdef CONFIG_PPC32 -extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec); @@ -60,19 +59,15 @@ extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 +#include <asm/cpu_setup_power.h> extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_pa6t(void); extern void __restore_cpu_ppc970(void); -extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power7(void); -extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power8(void); -extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power9(void); -extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power10(void); +extern long __machine_check_early_realmode_p7(struct pt_regs *regs); +extern long __machine_check_early_realmode_p8(struct pt_regs *regs); +extern long __machine_check_early_realmode_p9(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -616,46 +611,8 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_PPC_BOOK3S_6xx - { /* 603 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00030000, - .cpu_name = "603", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603e */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00060000, - .cpu_name = "603e", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603ev */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00070000, - .cpu_name = "603ev", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, +#ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_604 { /* 604 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00040000, @@ -1145,6 +1102,47 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc7450", }, +#endif /* CONFIG_PPC_BOOK3S_604 */ +#ifdef CONFIG_PPC_BOOK3S_603 + { /* 603 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00030000, + .cpu_name = "603", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603e */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00060000, + .cpu_name = "603e", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603ev */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00070000, + .cpu_name = "603ev", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, { /* 82xx (8240, 8245, 8260 are all 603e cores) */ .pvr_mask = 0x7fff0000, .pvr_value = 0x00810000, @@ -1234,6 +1232,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc603", }, #endif +#endif /* CONFIG_PPC_BOOK3S_603 */ +#ifdef CONFIG_PPC_BOOK3S_604 { /* default match, we assume split I/D cache & TB (non-601)... */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1246,7 +1246,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, -#endif /* CONFIG_PPC_BOOK3S_6xx */ +#endif /* CONFIG_PPC_BOOK3S_604 */ +#endif /* CONFIG_PPC_BOOK3S_32 */ #ifdef CONFIG_PPC_8xx { /* 8xx */ .pvr_mask = 0xffff0000, @@ -1540,6 +1541,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_40x */ #ifdef CONFIG_44x +#ifndef CONFIG_PPC_47x { .pvr_mask = 0xf0000fff, .pvr_value = 0x40000850, @@ -1822,7 +1824,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, -#ifdef CONFIG_PPC_47x + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 44x PPC)", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + } +#else /* CONFIG_PPC_47x */ { /* 476 DD2 core */ .pvr_mask = 0xffffffff, .pvr_value = 0x11a52080, @@ -1879,65 +1893,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, -#endif /* CONFIG_PPC_47x */ { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, - .cpu_name = "(generic 44x PPC)", - .cpu_features = CPU_FTRS_44X, + .cpu_name = "(generic 47x PPC)", + .cpu_features = CPU_FTRS_47X, .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, + .mmu_features = MMU_FTR_TYPE_47x, .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", } +#endif /* CONFIG_PPC_47x */ #endif /* CONFIG_44x */ -#ifdef CONFIG_E200 - { /* e200z5 */ - .pvr_mask = 0xfff00000, - .pvr_value = 0x81000000, - .cpu_name = "e200z5", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_EFP_SINGLE | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .machine_check = machine_check_e200, - .platform = "ppc5554", - }, - { /* e200z6 */ - .pvr_mask = 0xfff00000, - .pvr_value = 0x81100000, - .cpu_name = "e200z6", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .machine_check = machine_check_e200, - .platform = "ppc5554", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic E200 PPC)", - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_EFP_SINGLE | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_e200, - .machine_check = machine_check_e200, - .platform = "ppc5554", - } -#endif /* CONFIG_E200 */ #endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 1098863e17ee..b5478b72c08c 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -69,7 +69,6 @@ static int hv_mode; static struct { u64 lpcr; - u64 lpcr_clear; u64 hfscr; u64 fscr; u64 pcr; @@ -79,24 +78,7 @@ static void (*init_pmu_registers)(void); static void __restore_cpu_cpufeatures(void) { - u64 lpcr; - - /* - * LPCR is restored by the power on engine already. It can be changed - * after early init e.g., by radix enable, and we have no unified API - * for saving and restoring such SPRs. - * - * This ->restore hook should really be removed from idle and register - * restore moved directly into the idle restore code, because this code - * doesn't know how idle is implemented or what it needs restored here. - * - * The best we can do to accommodate secondary boot and idle restore - * for now is "or" LPCR with existing. - */ - lpcr = mfspr(SPRN_LPCR); - lpcr |= system_registers.lpcr; - lpcr &= ~system_registers.lpcr_clear; - mtspr(SPRN_LPCR, lpcr); + mtspr(SPRN_LPCR, system_registers.lpcr); if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); @@ -273,13 +255,6 @@ static int __init feat_enable_idle_nap(struct dt_cpu_feature *f) return 1; } -static int __init feat_enable_align_dsisr(struct dt_cpu_feature *f) -{ - cur_cpu_spec->cpu_features &= ~CPU_FTR_NODSISRALIGN; - - return 1; -} - static int __init feat_enable_idle_stop(struct dt_cpu_feature *f) { u64 lpcr; @@ -317,7 +292,6 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) { u64 lpcr; - system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR); lpcr = mfspr(SPRN_LPCR); lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR); mtspr(SPRN_LPCR, lpcr); @@ -454,6 +428,7 @@ static void init_pmu_power10(void) mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); + mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); } static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) @@ -641,7 +616,7 @@ static struct dt_cpu_feature_match __initdata {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST}, {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG}, {"idle-nap", feat_enable_idle_nap, 0}, - {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0}, + /* alignment-interrupt-dsisr ignored */ {"idle-stop", feat_enable_idle_stop, 0}, {"machine-check-power8", feat_enable_mce_power8, 0}, {"performance-monitor-power8", feat_enable_pmu_power8, 0}, diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8cdc8bcde703..1c9b0ccc2172 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -234,7 +234,10 @@ transfer_to_handler_cont: mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r10 mtlr r9 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING @@ -263,7 +266,10 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r0 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif reenable_mmu: /* @@ -321,7 +327,10 @@ stack_ovf: #endif mtspr SPRN_SRR0,r9 mtspr SPRN_SRR1,r10 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(stack_ovf) #endif @@ -439,15 +448,13 @@ syscall_exit_cont: andis. r10,r0,DBCR0_IDM@h bnel- load_dbcr0 #endif -#ifdef CONFIG_44x -BEGIN_MMU_FTR_SECTION +#ifdef CONFIG_PPC_47x lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 bne- 2f +#endif /* CONFIG_PPC_47x */ 1: -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x) -#endif /* CONFIG_44x */ BEGIN_FTR_SECTION lwarx r7,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) @@ -470,7 +477,10 @@ syscall_exit_finish: #endif mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(syscall_exit_finish) #ifdef CONFIG_44x 2: li r7,0 @@ -600,7 +610,10 @@ ret_from_kernel_syscall: #endif mtspr SPRN_SRR0, r9 mtspr SPRN_SRR1, r10 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) /* @@ -671,7 +684,7 @@ handle_page_fault: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD lwz r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b ret_from_except_full #ifdef CONFIG_PPC_BOOK3S_32 @@ -803,7 +816,10 @@ fast_exception_return: REST_GPR(9, r11) REST_GPR(12, r11) lwz r11,GPR11(r11) - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) @@ -948,10 +964,7 @@ restore_kuap: /* interrupts are hard-disabled at this point */ restore: -#ifdef CONFIG_44x -BEGIN_MMU_FTR_SECTION - b 1f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#if defined(CONFIG_44x) && !defined(CONFIG_PPC_47x) lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 @@ -1027,7 +1040,7 @@ exc_exit_restart: lwz r1,GPR1(r1) .globl exc_exit_restart_end exc_exit_restart_end: - RFI + rfi _ASM_NOKPROBE_SYMBOL(exc_exit_restart) _ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) @@ -1356,7 +1369,7 @@ _GLOBAL(enter_rtas) stw r7, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 - RFI + rfi 1: tophys_novmstack r9, r1 #ifdef CONFIG_VMAP_STACK li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */ @@ -1371,6 +1384,6 @@ _GLOBAL(enter_rtas) stw r0, THREAD + RTAS_SP(r7) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 - RFI /* return to caller */ + rfi /* return to caller */ _ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2f3846192ec7..aa1af139d947 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -653,8 +653,8 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) kuap_check_amr r3, r4 ld r5,_MSR(r1) andi. r0,r5,MSR_PR - bne .Lfast_user_interrupt_return - kuap_restore_amr r3, r4 + bne .Lfast_user_interrupt_return_amr + kuap_kernel_restore r3, r4 andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return @@ -674,6 +674,8 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) cmpdi r3,0 bne- .Lrestore_nvgprs +.Lfast_user_interrupt_return_amr: + kuap_user_restore r3, r4 .Lfast_user_interrupt_return: ld r11,_NIP(r1) ld r12,_MSR(r1) @@ -967,7 +969,7 @@ _GLOBAL(enter_prom) mtsrr1 r11 rfi #else /* CONFIG_PPC_BOOK3E */ - LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) + LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE) andc r11,r11,r12 mtsrr1 r11 RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index f579ce46eef2..74d07dc0bb48 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1023,7 +1023,7 @@ storage_fault_common: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b ret_from_except /* diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4d01f09ecf80..e02ad6fefa46 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1059,7 +1059,7 @@ EXC_COMMON_BEGIN(system_reset_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r9, r10 + kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL @@ -2875,7 +2875,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r9, r10 + kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL @@ -3259,7 +3259,7 @@ handle_page_fault: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b interrupt_return /* We have a data breakpoint exception - handle it */ diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index fe48d319d490..c9e2819b095a 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -14,6 +14,7 @@ #include <linux/of.h> #include <asm/firmware.h> +#include <asm/kvm_guest.h> #ifdef CONFIG_PPC64 unsigned long powerpc_firmware_features __read_mostly; @@ -21,17 +22,19 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #endif #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void) +DEFINE_STATIC_KEY_FALSE(kvm_guest); +bool check_kvm_guest(void) { struct device_node *hyper_node; hyper_node = of_find_node_by_path("/hypervisor"); if (!hyper_node) - return 0; + return false; if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return 0; + return false; - return 1; + static_branch_enable(&kvm_guest); + return true; } #endif diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 7c767765071d..541664d95702 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -40,38 +40,31 @@ .macro EXCEPTION_PROLOG_1 for_rtas=0 #ifdef CONFIG_VMAP_STACK - mr r11, r1 + mtspr SPRN_SPRG_SCRATCH2,r1 subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f mfspr r1,SPRN_SPRG_THREAD lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE +1: + mtcrf 0x7f, r1 + bt 32 - THREAD_ALIGN_SHIFT, stack_overflow #else subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f mfspr r11,SPRN_SPRG_THREAD lwz r11,TASK_STACK-THREAD(r11) addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -#endif -1: - tophys_novmstack r11, r11 -#ifdef CONFIG_VMAP_STACK - mtcrf 0x7f, r1 - bt 32 - THREAD_ALIGN_SHIFT, stack_overflow +1: tophys(r11, r11) #endif .endm .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 #ifdef CONFIG_VMAP_STACK - mtcr r10 - li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r10 + li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r11 isync -#else - stw r10,_CCR(r11) /* save registers */ -#endif - mfspr r10, SPRN_SPRG_SCRATCH0 -#ifdef CONFIG_VMAP_STACK + mfspr r11, SPRN_SPRG_SCRATCH2 stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 @@ -80,14 +73,12 @@ stw r1,0(r11) tovirt(r1, r11) /* set new kernel sp */ #endif + stw r10,_CCR(r11) /* save registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) - stw r10,GPR10(r11) -#ifdef CONFIG_VMAP_STACK - mfcr r10 - stw r10, _CCR(r11) -#endif + mfspr r10,SPRN_SPRG_SCRATCH0 mfspr r12,SPRN_SPRG_SCRATCH1 + stw r10,GPR10(r11) stw r12,GPR11(r11) mflr r10 stw r10,_LINK(r11) @@ -101,7 +92,6 @@ stw r10, _DSISR(r11) .endif lwz r9, SRR1(r12) - andi. r10, r9, MSR_PR lwz r12, SRR0(r12) #else mfspr r12,SPRN_SRR0 @@ -222,7 +212,10 @@ #endif mtspr SPRN_SRR1,r10 mtspr SPRN_SRR0,r11 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif 99: b ret_from_kernel_syscall .endm diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1510b2a56669..ece7f97bafff 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -41,6 +41,11 @@ #include <asm/ppc-opcode.h> #include <asm/export.h> #include <asm/feature-fixups.h> +#ifdef CONFIG_PPC_BOOK3S +#include <asm/exception-64s.h> +#else +#include <asm/exception-64e.h> +#endif /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -417,6 +422,10 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) @@ -445,10 +454,6 @@ generic_secondary_common_init: sync /* order paca.run and cur_cpu_spec */ isync /* In case code patching happened */ - /* Create a temp kernel stack for use before relocation is on. */ - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD - b __secondary_start #endif /* SMP */ @@ -829,7 +834,7 @@ __secondary_start: mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ /* @@ -865,8 +870,7 @@ enable_64b_mode: oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ mtmsr r11 #else /* CONFIG_PPC_BOOK3E */ - li r12,(MSR_64BIT | MSR_ISF)@highest - sldi r12,r12,48 + LOAD_REG_IMMEDIATE(r12, MSR_64BIT) or r11,r11,r12 mtmsrd r11 isync @@ -966,7 +970,7 @@ start_here_multiplatform: ld r4,PACAKMSR(r13) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ /* This is where all platforms converge execution */ @@ -990,7 +994,7 @@ start_here_common: bl start_kernel /* Not reached */ - trap +0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 .previous diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ee0bfebc375f..52702f3db6df 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -43,16 +43,6 @@ .endm /* - * We need an ITLB miss handler for kernel addresses if: - * - Either we have modules - * - Or we have not pinned the first 8M - */ -#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \ - defined(CONFIG_DEBUG_PAGEALLOC) -#define ITLB_MISS_KERNEL 1 -#endif - -/* * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. */ @@ -190,32 +180,31 @@ SystemCall: */ #ifdef CONFIG_8xx_CPU15 -#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) \ - addi addr, addr, PAGE_SIZE; \ - tlbie addr; \ - addi addr, addr, -(PAGE_SIZE << 1); \ - tlbie addr; \ - addi addr, addr, PAGE_SIZE +#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) \ + addi tmp, addr, PAGE_SIZE; \ + tlbie tmp; \ + addi tmp, addr, -PAGE_SIZE; \ + tlbie tmp #else -#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) +#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) #endif InstructionTLBMiss: - mtspr SPRN_SPRG_SCRATCH0, r10 - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH2, r10 + mtspr SPRN_M_TW, r11 /* If we are faulting a kernel address, we have to use the * kernel page tables. */ mfspr r10, SPRN_SRR0 /* Get effective address of fault */ - INVALIDATE_ADJACENT_PAGES_CPU15(r10) + INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 -#ifdef ITLB_MISS_KERNEL +#ifdef CONFIG_MODULES mfcr r11 compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef ITLB_MISS_KERNEL +#ifdef CONFIG_MODULES blt+ 3f rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha @@ -241,8 +230,8 @@ InstructionTLBMiss: mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 +0: mfspr r10, SPRN_SPRG_SCRATCH2 + mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -251,14 +240,14 @@ InstructionTLBMiss: 0: lwz r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) addi r10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH2 + mfspr r11, SPRN_M_TW rfi #endif . = 0x1200 DataStoreTLBMiss: - mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 mfcr r11 @@ -297,11 +286,11 @@ DataStoreTLBMiss: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + mtspr SPRN_DAR, r11 /* Tag DAR */ /* Restore registers */ -0: mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ +0: mfspr r10, SPRN_SPRG_SCRATCH2 mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__dtlbmiss_exit_1 @@ -311,8 +300,7 @@ DataStoreTLBMiss: 0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) addi r10, r10, 1 stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r10, SPRN_SPRG_SCRATCH2 mfspr r11, SPRN_M_TW rfi #endif @@ -619,10 +607,6 @@ start_here: lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 #endif -#ifndef CONFIG_PIN_TLB_TEXT - li r0, 0 - mtspr SPRN_MI_CTR, r0 -#endif #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) lis r0, MD_TWAM@h mtspr SPRN_MD_CTR, r0 @@ -718,7 +702,6 @@ initial_mmu: mtspr SPRN_DER, r8 blr -#ifdef CONFIG_PIN_TLB _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h ori r9, r9, (1f - PAGE_OFFSET)@l @@ -740,7 +723,6 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_MD_CTR, r6 tlbia -#ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) @@ -761,7 +743,7 @@ _GLOBAL(mmu_pin_tlb) bdnzt lt, 2b lis r0, MI_RSV4I@h mtspr SPRN_MI_CTR, r0 -#endif + LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) @@ -819,7 +801,6 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi -#endif /* CONFIG_PIN_TLB */ /* * We put a few things here that have to be page-aligned. diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index a0dda2a1f2df..349bf3f0c3af 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -155,10 +155,8 @@ __after_mmu_off: bl initial_bats bl load_segment_registers -BEGIN_MMU_FTR_SECTION bl reloc_offset bl early_hash_table -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) #if defined(CONFIG_BOOTX_TEXT) bl setup_disp_bat #endif @@ -207,7 +205,7 @@ turn_on_mmu: lis r0,start_here@h ori r0,r0,start_here@l mtspr SPRN_SRR0,r0 - RFI /* enables MMU */ + rfi /* enables MMU */ /* * We need __secondary_hold as a place to hold the other cpus on @@ -288,51 +286,35 @@ MachineCheck: DO_KVM 0x300 DataAccess: #ifdef CONFIG_VMAP_STACK - mtspr SPRN_SPRG_SCRATCH0,r10 - mfspr r10, SPRN_SPRG_THREAD BEGIN_MMU_FTR_SECTION + mtspr SPRN_SPRG_SCRATCH2,r10 + mfspr r10, SPRN_SPRG_THREAD stw r11, THR11(r10) mfspr r10, SPRN_DSISR mfcr r11 -#ifdef CONFIG_PPC_KUAP - andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h -#endif mfspr r10, SPRN_SPRG_THREAD beq hash_page_dsi .Lhash_page_dsi_cont: mtcr r11 lwz r11, THR11(r10) -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) - mtspr SPRN_SPRG_SCRATCH1,r11 - mfspr r11, SPRN_DAR - stw r11, DAR(r10) - mfspr r11, SPRN_DSISR - stw r11, DSISR(r10) - mfspr r11, SPRN_SRR0 - stw r11, SRR0(r10) - mfspr r11, SPRN_SRR1 /* check whether user or kernel */ - stw r11, SRR1(r10) - mfcr r10 - andi. r11, r11, MSR_PR - + mfspr r10, SPRN_SPRG_SCRATCH2 +MMU_FTR_SECTION_ELSE + b 1f +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 b handle_page_fault_tramp_1 #else /* CONFIG_VMAP_STACK */ EXCEPTION_PROLOG handle_dar_dsisr=1 get_and_save_dar_dsisr_on_stack r4, r5, r11 BEGIN_MMU_FTR_SECTION -#ifdef CONFIG_PPC_KUAP - andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h -#endif bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ bl hash_page b handle_page_fault_tramp_1 -FTR_SECTION_ELSE +MMU_FTR_SECTION_ELSE b handle_page_fault_tramp_2 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) #endif /* CONFIG_VMAP_STACK */ @@ -394,6 +376,7 @@ Alignment: . = 0x800 DO_KVM 0x800 FPUnavailable: +#ifdef CONFIG_PPC_FPU BEGIN_FTR_SECTION /* * Certain Freescale cores don't have a FPU and treat fp instructions @@ -407,6 +390,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) +#else + b ProgramCheck +#endif /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) @@ -453,13 +439,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) +#ifdef CONFIG_MODULES lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) + rlwinm r2, r2, 28, 0xfffff000 +#ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -519,8 +506,9 @@ DataLoadTLBMiss: mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1, _PAGE_PRESENT | _PAGE_ACCESSED + rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -595,8 +583,9 @@ DataStoreTLBMiss: mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -757,14 +746,14 @@ fast_hash_page_return: /* DSI */ mtcr r11 lwz r11, THR11(r10) - mfspr r10, SPRN_SPRG_SCRATCH0 - RFI + mfspr r10, SPRN_SPRG_SCRATCH2 + rfi 1: /* ISI */ mtcr r11 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - RFI + rfi stack_overflow: vmap_stack_overflow_exception @@ -889,9 +878,12 @@ __secondary_start: tophys(r4,r2) addi r4,r4,THREAD /* phys address of our thread_struct */ mtspr SPRN_SPRG_THREAD,r4 +BEGIN_MMU_FTR_SECTION lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) /* enable MMU and jump to start_secondary */ li r4,MSR_KERNEL @@ -899,7 +891,7 @@ __secondary_start: ori r3,r3,start_secondary@l mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + rfi #endif /* CONFIG_SMP */ #ifdef CONFIG_KVM_BOOK3S_HANDLER @@ -920,9 +912,6 @@ early_hash_table: lis r6, early_hash - PAGE_OFFSET@h ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 - lis r6, early_hash@h - addis r3, r3, Hash@ha - stw r6, Hash@l(r3) blr load_up_mmu: @@ -931,11 +920,13 @@ load_up_mmu: tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ TLBSYNC /* ... on all CPUs */ +BEGIN_MMU_FTR_SECTION /* Load the SDR1 register (hash table base & size) */ lis r6,_SDR1@ha tophys(r6,r6) lwz r6,_SDR1@l(r6) mtspr SPRN_SDR1,r6 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* Load the BAT registers with the values set up by MMU_init. */ lis r3,BATS@ha @@ -991,9 +982,12 @@ start_here: tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ mtspr SPRN_SPRG_THREAD,r4 +BEGIN_MMU_FTR_SECTION lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) /* stack */ lis r1,init_thread_union@ha @@ -1027,7 +1021,7 @@ start_here: .align 4 mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 - RFI + rfi /* Load up the kernel context */ 2: bl load_up_mmu @@ -1051,7 +1045,7 @@ start_here: ori r3,r3,start_kernel@l mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + rfi /* * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); @@ -1073,16 +1067,22 @@ _ENTRY(switch_mmu_context) li r0,NUM_USER_SEGMENTS mtctr r0 - lwz r4, MM_PGD(r4) #ifdef CONFIG_BDI_SWITCH /* Context switch the PTE pointer for the Abatron BDI2000. * The PGDIR is passed as second argument. */ + lwz r4, MM_PGD(r4) lis r5, abatron_pteptrs@ha stw r4, abatron_pteptrs@l + 0x4(r5) #endif +BEGIN_MMU_FTR_SECTION +#ifndef CONFIG_BDI_SWITCH + lwz r4, MM_PGD(r4) +#endif tophys(r4, r4) - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) li r4,0 isync 3: @@ -1166,7 +1166,7 @@ _ENTRY(update_bats) .align 4 mtspr SPRN_SRR0, r4 mtspr SPRN_SRR1, r3 - RFI + rfi 1: bl clear_bats lis r3, BATS@ha addi r3, r3, BATS@l @@ -1185,7 +1185,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtmsr r3 mtspr SPRN_SRR0, r7 mtspr SPRN_SRR1, r6 - RFI + rfi flush_tlbs: lis r10, 0x40 @@ -1206,7 +1206,7 @@ mmu_off: mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 sync - RFI + rfi /* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ initial_bats: diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 71c359d438b5..74e230c200fb 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -176,7 +176,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif mtspr SPRN_SRR1,r10 mtspr SPRN_SRR0,r11 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ 99: b ret_from_kernel_syscall .endm @@ -185,7 +185,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) * * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check - * On e200 we have critical and debug (machine check occurs via critical) * * Additionally we reserve a SPRG for each priority level so we can free up a * GPR to use as the base for indirect access to the exception stacks. This @@ -201,7 +200,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define MC_STACK_BASE mcheckirq_ctx #define CRIT_STACK_BASE critirq_ctx -/* only on e500mc/e200 */ +/* only on e500mc */ #define DBG_STACK_BASE dbgirq_ctx #define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 586a6ac501e9..fdd4d274c245 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -187,9 +187,6 @@ set_ivor: /* Setup the defaults for TLB entries */ li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l -#ifdef CONFIG_E200 - oris r2,r2,MAS4_TLBSELD(1)@h -#endif mtspr SPRN_MAS4, r2 #if !defined(CONFIG_BDI_SWITCH) @@ -362,13 +359,7 @@ interrupt_base: CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ -#ifdef CONFIG_E200 - /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ - machine_check_exception) -#else MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) -#endif /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) @@ -400,15 +391,9 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else -#ifdef CONFIG_E200 - /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - program_check_exception, EXC_XFER_STD) -#else EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ unknown_exception, EXC_XFER_STD) #endif -#endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) @@ -625,7 +610,7 @@ END_BTB_FLUSH_SECTION mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage -/* Define SPE handlers for e200 and e500v2 */ +/* Define SPE handlers for e500v2 */ #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) @@ -807,31 +792,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #endif 3: mtspr SPRN_MAS2, r12 -#ifdef CONFIG_E200 - /* Round robin TLB1 entries assignment */ - mfspr r12, SPRN_MAS0 - - /* Extract TLB1CFG(NENTRY) */ - mfspr r11, SPRN_TLB1CFG - andi. r11, r11, 0xfff - - /* Extract MAS0(NV) */ - andi. r13, r12, 0xfff - addi r13, r13, 1 - cmpw 0, r13, r11 - addi r12, r12, 1 - - /* check if we need to wrap */ - blt 7f - - /* wrap back to first free tlbcam entry */ - lis r13, tlbcam_index@ha - lwz r13, tlbcam_index@l(r13) - rlwimi r12, r13, 0, 20, 31 -7: - mtspr SPRN_MAS0,r12 -#endif /* CONFIG_E200 */ - tlb_write_entry: tlbwe @@ -933,21 +893,6 @@ get_phys_addr: * Global functions */ -#ifdef CONFIG_E200 -/* Adjust or setup IVORs for e200 */ -_GLOBAL(__setup_e200_ivors) - li r3,DebugDebug@l - mtspr SPRN_IVOR15,r3 - li r3,SPEUnavailable@l - mtspr SPRN_IVOR32,r3 - li r3,SPEFloatingPointData@l - mtspr SPRN_IVOR33,r3 - li r3,SPEFloatingPointRound@l - mtspr SPRN_IVOR34,r3 - sync - blr -#endif - #ifdef CONFIG_E500 #ifndef CONFIG_PPC_E500MC /* Adjust or setup IVORs for e500v1/v2 */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f4e8f21046f5..8fc7a14e4d71 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -499,6 +499,11 @@ static bool is_larx_stcx_instr(int type) return type == LARX || type == STCX; } +static bool is_octword_vsx_instr(int type, int size) +{ + return ((type == LOAD_VSX || type == STORE_VSX) && size == 32); +} + /* * We've failed in reliably handling the hw-breakpoint. Unregister * it and throw a warning message to let the user know about it. @@ -549,6 +554,58 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, return true; } +static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, + int *hit, unsigned long ea) +{ + int i; + unsigned long hw_end_addr; + + /* + * Handle spurious exception only when any bp_per_reg is set. + * Otherwise this might be created by xmon and not actually a + * spurious exception. + */ + for (i = 0; i < nr_wp_slots(); i++) { + if (!info[i]) + continue; + + hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE); + + /* + * Ending address of DAWR range is less than starting + * address of op. + */ + if ((hw_end_addr - 1) >= ea) + continue; + + /* + * Those addresses need to be in the same or in two + * consecutive 512B blocks; + */ + if (((hw_end_addr - 1) >> 10) != (ea >> 10)) + continue; + + /* + * 'op address + 64B' generates an address that has a + * carry into bit 52 (crosses 2K boundary). + */ + if ((ea & 0x800) == ((ea + 64) & 0x800)) + continue; + + break; + } + + if (i == nr_wp_slots()) + return; + + for (i = 0; i < nr_wp_slots(); i++) { + if (info[i]) { + hit[i] = 1; + info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + } + } +} + int hw_breakpoint_handler(struct die_args *args) { bool err = false; @@ -607,8 +664,14 @@ int hw_breakpoint_handler(struct die_args *args) goto reset; if (!nr_hit) { - rc = NOTIFY_DONE; - goto out; + /* Workaround for Power10 DD1 */ + if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 && + is_octword_vsx_instr(type, size)) { + handle_p10dd1_spurious_exception(info, hit, ea); + } else { + rc = NOTIFY_DONE; + goto out; + } } /* diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 9fe4fb3b08aa..72862a4d3a5d 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -11,177 +11,11 @@ #include <asm/pci-bridge.h> #include <asm/isa-bridge.h> -/* - * Here comes the ppc64 implementation of the IOMAP - * interfaces. - */ -unsigned int ioread8(const void __iomem *addr) -{ - return readb(addr); -} -unsigned int ioread16(const void __iomem *addr) -{ - return readw(addr); -} -unsigned int ioread16be(const void __iomem *addr) -{ - return readw_be(addr); -} -unsigned int ioread32(const void __iomem *addr) -{ - return readl(addr); -} -unsigned int ioread32be(const void __iomem *addr) -{ - return readl_be(addr); -} -EXPORT_SYMBOL(ioread8); -EXPORT_SYMBOL(ioread16); -EXPORT_SYMBOL(ioread16be); -EXPORT_SYMBOL(ioread32); -EXPORT_SYMBOL(ioread32be); -#ifdef __powerpc64__ -u64 ioread64(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64_lo_hi(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64_hi_lo(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64be(const void __iomem *addr) -{ - return readq_be(addr); -} -u64 ioread64be_lo_hi(const void __iomem *addr) -{ - return readq_be(addr); -} -u64 ioread64be_hi_lo(const void __iomem *addr) -{ - return readq_be(addr); -} -EXPORT_SYMBOL(ioread64); -EXPORT_SYMBOL(ioread64_lo_hi); -EXPORT_SYMBOL(ioread64_hi_lo); -EXPORT_SYMBOL(ioread64be); -EXPORT_SYMBOL(ioread64be_lo_hi); -EXPORT_SYMBOL(ioread64be_hi_lo); -#endif /* __powerpc64__ */ - -void iowrite8(u8 val, void __iomem *addr) -{ - writeb(val, addr); -} -void iowrite16(u16 val, void __iomem *addr) -{ - writew(val, addr); -} -void iowrite16be(u16 val, void __iomem *addr) -{ - writew_be(val, addr); -} -void iowrite32(u32 val, void __iomem *addr) -{ - writel(val, addr); -} -void iowrite32be(u32 val, void __iomem *addr) -{ - writel_be(val, addr); -} -EXPORT_SYMBOL(iowrite8); -EXPORT_SYMBOL(iowrite16); -EXPORT_SYMBOL(iowrite16be); -EXPORT_SYMBOL(iowrite32); -EXPORT_SYMBOL(iowrite32be); -#ifdef __powerpc64__ -void iowrite64(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64_lo_hi(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64_hi_lo(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64be(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -void iowrite64be_lo_hi(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -void iowrite64be_hi_lo(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -EXPORT_SYMBOL(iowrite64); -EXPORT_SYMBOL(iowrite64_lo_hi); -EXPORT_SYMBOL(iowrite64_hi_lo); -EXPORT_SYMBOL(iowrite64be); -EXPORT_SYMBOL(iowrite64be_lo_hi); -EXPORT_SYMBOL(iowrite64be_hi_lo); -#endif /* __powerpc64__ */ - -/* - * These are the "repeat read/write" functions. Note the - * non-CPU byte order. We do things in "IO byteorder" - * here. - * - * FIXME! We could make these do EEH handling if we really - * wanted. Not clear if we do. - */ -void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsb(addr, dst, count); -} -void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsw(addr, dst, count); -} -void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsl(addr, dst, count); -} -EXPORT_SYMBOL(ioread8_rep); -EXPORT_SYMBOL(ioread16_rep); -EXPORT_SYMBOL(ioread32_rep); - -void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesb(addr, src, count); -} -void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesw(addr, src, count); -} -void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesl(addr, src, count); -} -EXPORT_SYMBOL(iowrite8_rep); -EXPORT_SYMBOL(iowrite16_rep); -EXPORT_SYMBOL(iowrite32_rep); - void __iomem *ioport_map(unsigned long port, unsigned int len) { return (void __iomem *) (port + _IO_BASE); } - -void ioport_unmap(void __iomem *addr) -{ - /* Nothing to do */ -} EXPORT_SYMBOL(ioport_map); -EXPORT_SYMBOL(ioport_unmap); #ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem *addr) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 7d0f7682d01d..6b1eca53e36c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -102,14 +102,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -static inline notrace int decrementer_check_overflow(void) -{ - u64 now = get_tb(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - return now >= *next_tb; -} - #ifdef CONFIG_PPC_BOOK3E /* This is called whenever we are re-enabling interrupts @@ -142,35 +134,6 @@ notrace unsigned int __check_irq_replay(void) trace_hardirqs_on(); trace_hardirqs_off(); - /* - * We are always hard disabled here, but PACA_IRQ_HARD_DIS may - * not be set, which means interrupts have only just been hard - * disabled as part of the local_irq_restore or interrupt return - * code. In that case, skip the decrementr check becaus it's - * expensive to read the TB. - * - * HARD_DIS then gets cleared here, but it's reconciled later. - * Either local_irq_disable will replay the interrupt and that - * will reconcile state like other hard interrupts. Or interrupt - * retur will replay the interrupt and in that case it sets - * PACA_IRQ_HARD_DIS by hand (see comments in entry_64.S). - */ - if (happened & PACA_IRQ_HARD_DIS) { - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) { - local_paca->irq_happened |= PACA_IRQ_DEC; - happened |= PACA_IRQ_DEC; - } - } - } - if (happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; return 0x900; @@ -186,6 +149,9 @@ notrace unsigned int __check_irq_replay(void) return 0x280; } + if (happened & PACA_IRQ_HARD_DIS) + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); @@ -229,18 +195,6 @@ again: if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(mfmsr() & MSR_EE); - if (happened & PACA_IRQ_HARD_DIS) { - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) - happened |= PACA_IRQ_DEC; - } - } - /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -345,6 +299,7 @@ notrace void arch_local_irq_restore(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!(mfmsr() & MSR_EE)); __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; } else { /* * We should already be hard disabled here. We had bugs diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 63702c0badb9..9f3e133b57b7 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -555,7 +555,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, } printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", - level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", + level, evt->cpu, sevstr, in_guest ? "Guest" : "", err_type, subtype, dar_str, evt->disposition == MCE_DISPOSITION_RECOVERED ? "Recovered" : "Not recovered"); @@ -577,7 +577,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, #ifdef CONFIG_PPC_BOOK3S_64 /* Display faulty slb contents for SLB errors. */ - if (evt->error_type == MCE_ERROR_TYPE_SLB) + if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest) slb_dump_contents(local_paca->mce_faulty_slbs); #endif } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index b7e173754a2e..667104d4c455 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -62,6 +62,20 @@ out: return pfn; } +static bool mce_in_guest(void) +{ +#ifdef CONFIG_KVM_BOOK3S_HANDLER + /* + * If machine check is hit when in guest context or low level KVM + * code, avoid looking up any translations or making any attempts + * to recover, just record the event and pass to KVM. + */ + if (get_paca()->kvm_hstate.in_guest) + return true; +#endif + return false; +} + /* flush SLBs and reload */ #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void) @@ -69,14 +83,6 @@ void flush_and_reload_slb(void) /* Invalidate all SLBs */ slb_flush_all_realmode(); -#ifdef CONFIG_KVM_BOOK3S_HANDLER - /* - * If machine check is hit when in guest or in transition, we will - * only flush the SLBs and continue. - */ - if (get_paca()->kvm_hstate.in_guest) - return; -#endif if (early_radix_enabled()) return; @@ -91,7 +97,7 @@ void flush_and_reload_slb(void) } #endif -static void flush_erat(void) +void flush_erat(void) { #ifdef CONFIG_PPC_BOOK3S_64 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { @@ -490,19 +496,21 @@ static int mce_handle_ierror(struct pt_regs *regs, if ((srr1 & table[i].srr1_mask) != table[i].srr1_value) continue; - /* attempt to correct the error */ - switch (table[i].error_type) { - case MCE_ERROR_TYPE_SLB: - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - handled = mce_flush(MCE_FLUSH_SLB); - break; - case MCE_ERROR_TYPE_ERAT: - handled = mce_flush(MCE_FLUSH_ERAT); - break; - case MCE_ERROR_TYPE_TLB: - handled = mce_flush(MCE_FLUSH_TLB); - break; + if (!mce_in_guest()) { + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + handled = mce_flush(MCE_FLUSH_SLB); + break; + case MCE_ERROR_TYPE_ERAT: + handled = mce_flush(MCE_FLUSH_ERAT); + break; + case MCE_ERROR_TYPE_TLB: + handled = mce_flush(MCE_FLUSH_TLB); + break; + } } /* now fill in mce_error_info */ @@ -534,7 +542,7 @@ static int mce_handle_ierror(struct pt_regs *regs, mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; - if (table[i].nip_valid) { + if (table[i].nip_valid && !mce_in_guest()) { *addr = regs->nip; if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { @@ -577,22 +585,24 @@ static int mce_handle_derror(struct pt_regs *regs, if (!(dsisr & table[i].dsisr_value)) continue; - /* attempt to correct the error */ - switch (table[i].error_type) { - case MCE_ERROR_TYPE_SLB: - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - if (mce_flush(MCE_FLUSH_SLB)) - handled = 1; - break; - case MCE_ERROR_TYPE_ERAT: - if (mce_flush(MCE_FLUSH_ERAT)) - handled = 1; - break; - case MCE_ERROR_TYPE_TLB: - if (mce_flush(MCE_FLUSH_TLB)) - handled = 1; - break; + if (!mce_in_guest()) { + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + if (mce_flush(MCE_FLUSH_SLB)) + handled = 1; + break; + case MCE_ERROR_TYPE_ERAT: + if (mce_flush(MCE_FLUSH_ERAT)) + handled = 1; + break; + case MCE_ERROR_TYPE_TLB: + if (mce_flush(MCE_FLUSH_TLB)) + handled = 1; + break; + } } /* @@ -634,7 +644,7 @@ static int mce_handle_derror(struct pt_regs *regs, mce_err->initiator = table[i].initiator; if (table[i].dar_valid) *addr = regs->dar; - else if (mce_err->sync_error && + else if (mce_err->sync_error && !mce_in_guest() && table[i].error_type == MCE_ERROR_TYPE_UE) { /* * We do a maximum of 4 nested MCE calls, see @@ -662,7 +672,8 @@ static int mce_handle_derror(struct pt_regs *regs, static long mce_handle_ue_error(struct pt_regs *regs, struct mce_error_info *mce_err) { - long handled = 0; + if (mce_in_guest()) + return 0; mce_common_process_ue(regs, mce_err); if (mce_err->ignore_event) @@ -677,9 +688,10 @@ static long mce_handle_ue_error(struct pt_regs *regs, if (ppc_md.mce_check_early_recovery) { if (ppc_md.mce_check_early_recovery(regs)) - handled = 1; + return 1; } - return handled; + + return 0; } static long mce_handle_error(struct pt_regs *regs, diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0ad15768d762..7f5aae3c387d 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -208,7 +208,7 @@ static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) struct paca_struct **paca_ptrs __read_mostly; EXPORT_SYMBOL(paca_ptrs); -void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu) +void __init initialise_paca(struct paca_struct *new_paca, int cpu) { #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; @@ -241,7 +241,7 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int } /* Put the paca pointer into r13 and SPRG_PACA */ -void __nostackprotector setup_paca(struct paca_struct *new_paca) +void setup_paca(struct paca_struct *new_paca) { /* Setup r13 */ local_paca = new_paca; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index be108616a721..2b555997b295 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -353,6 +353,55 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr) return NULL; } +struct pci_intx_virq { + int virq; + struct kref kref; + struct list_head list_node; +}; + +static LIST_HEAD(intx_list); +static DEFINE_MUTEX(intx_mutex); + +static void ppc_pci_intx_release(struct kref *kref) +{ + struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, kref); + + list_del(&vi->list_node); + irq_dispose_mapping(vi->virq); + kfree(vi); +} + +static int ppc_pci_unmap_irq_line(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct pci_dev *pdev = to_pci_dev(data); + + if (action == BUS_NOTIFY_DEL_DEVICE) { + struct pci_intx_virq *vi; + + mutex_lock(&intx_mutex); + list_for_each_entry(vi, &intx_list, list_node) { + if (vi->virq == pdev->irq) { + kref_put(&vi->kref, ppc_pci_intx_release); + break; + } + } + mutex_unlock(&intx_mutex); + } + + return NOTIFY_DONE; +} + +static struct notifier_block ppc_pci_unmap_irq_notifier = { + .notifier_call = ppc_pci_unmap_irq_line, +}; + +static int ppc_pci_register_irq_notifier(void) +{ + return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier); +} +arch_initcall(ppc_pci_register_irq_notifier); + /* * Reads the interrupt pin to determine if interrupt is use by card. * If the interrupt is used, then gets the interrupt line from the @@ -361,6 +410,12 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr) static int pci_read_irq_line(struct pci_dev *pci_dev) { int virq; + struct pci_intx_virq *vi, *vitmp; + + /* Preallocate vi as rewind is complex if this fails after mapping */ + vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL); + if (!vi) + return -1; pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); @@ -377,12 +432,12 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) * function. */ if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin)) - return -1; + goto error_exit; if (pin == 0) - return -1; + goto error_exit; if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) || line == 0xff || line == 0) { - return -1; + goto error_exit; } pr_debug(" No map ! Using line %d (pin %d) from PCI config\n", line, pin); @@ -394,14 +449,33 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) if (!virq) { pr_debug(" Failed to map !\n"); - return -1; + goto error_exit; } pr_debug(" Mapped to linux irq %d\n", virq); pci_dev->irq = virq; + mutex_lock(&intx_mutex); + list_for_each_entry(vitmp, &intx_list, list_node) { + if (vitmp->virq == virq) { + kref_get(&vitmp->kref); + kfree(vi); + vi = NULL; + break; + } + } + if (vi) { + vi->virq = virq; + kref_init(&vi->kref); + list_add_tail(&vi->list_node, &intx_list); + } + mutex_unlock(&intx_mutex); + return 0; +error_exit: + kfree(vi); + return -1; } /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d421a2c7f822..a66f435dabbf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -589,7 +589,6 @@ static void save_all(struct task_struct *tsk) __giveup_spe(tsk); msr_check_and_clear(msr_all_available); - thread_pkey_regs_save(&tsk->thread); } void flush_all_to_thread(struct task_struct *tsk) @@ -807,29 +806,6 @@ static void switch_hw_breakpoint(struct task_struct *new) #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - mtspr(SPRN_DAC1, dabr); - if (IS_ENABLED(CONFIG_PPC_47x)) - isync(); - return 0; -} -#elif defined(CONFIG_PPC_BOOK3S) -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - mtspr(SPRN_DABR, dabr); - if (cpu_has_feature(CPU_FTR_DABRX)) - mtspr(SPRN_DABRX, dabrx); - return 0; -} -#else -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - return -EINVAL; -} -#endif - static inline int set_dabr(struct arch_hw_breakpoint *brk) { unsigned long dabr, dabrx; @@ -840,7 +816,19 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) if (ppc_md.set_dabr) return ppc_md.set_dabr(dabr, dabrx); - return __set_dabr(dabr, dabrx); + if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) { + mtspr(SPRN_DAC1, dabr); + if (IS_ENABLED(CONFIG_PPC_47x)) + isync(); + return 0; + } else if (IS_ENABLED(CONFIG_PPC_BOOK3S)) { + mtspr(SPRN_DABR, dabr); + if (cpu_has_feature(CPU_FTR_DABRX)) + mtspr(SPRN_DABRX, dabrx); + return 0; + } else { + return -EINVAL; + } } static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) @@ -1160,8 +1148,6 @@ static inline void save_sprs(struct thread_struct *t) t->tar = mfspr(SPRN_TAR); } #endif - - thread_pkey_regs_save(t); } static inline void restore_sprs(struct thread_struct *old_thread, @@ -1202,7 +1188,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, mtspr(SPRN_TIDR, new_thread->tidr); #endif - thread_pkey_regs_restore(new_thread, old_thread); } struct task_struct *__switch_to(struct task_struct *prev, @@ -1466,12 +1451,10 @@ static void print_msr_bits(unsigned long val) #define LAST_VOLATILE 12 #endif -void show_regs(struct pt_regs * regs) +static void __show_regs(struct pt_regs *regs) { int i, trap; - show_regs_print_info(KERN_DEFAULT); - printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); printk("REGS: %px TRAP: %04lx %s (%s)\n", @@ -1513,6 +1496,12 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); } +} + +void show_regs(struct pt_regs *regs) +{ + show_regs_print_info(KERN_DEFAULT); + __show_regs(regs); show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT); if (!user_mode(regs)) show_instructions(regs); @@ -1527,14 +1516,27 @@ void flush_thread(void) #endif /* CONFIG_HAVE_HW_BREAKPOINT */ } -#ifdef CONFIG_PPC_BOOK3S_64 void arch_setup_new_exec(void) { - if (radix_enabled()) - return; - hash__setup_new_exec(); -} + +#ifdef CONFIG_PPC_BOOK3S_64 + if (!radix_enabled()) + hash__setup_new_exec(); #endif + /* + * If we exec out of a kernel thread then thread.regs will not be + * set. Do it now. + */ + if (!current->thread.regs) { + struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; + current->thread.regs = regs - 1; + } + +#ifdef CONFIG_PPC_MEM_KEYS + current->thread.regs->amr = default_amr; + current->thread.regs->iamr = default_iamr; +#endif +} #ifdef CONFIG_PPC64 /** @@ -1730,7 +1732,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.ptrace_bps[i] = NULL; #endif +#ifdef CONFIG_PPC_FPU_REGS p->thread.fp_save_area = NULL; +#endif #ifdef CONFIG_ALTIVEC p->thread.vr_save_area = NULL; #endif @@ -1747,6 +1751,16 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.tidr = 0; #endif + /* + * Run with the current AMR value of the kernel + */ +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + kregs->amr = AMR_KUAP_BLOCKED; + + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) + kregs->iamr = AMR_KUEP_BLOCKED; +#endif kregs->nip = ppc_function_entry(f); return 0; } @@ -1765,15 +1779,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) preload_new_slb_context(start, sp); #endif - /* - * If we exec out of a kernel thread then thread.regs will not be - * set. Do it now. - */ - if (!current->thread.regs) { - struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; - current->thread.regs = regs - 1; - } - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Clear any transactional state, we're exec()ing. The cause is @@ -1855,8 +1860,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #endif current->thread.load_slb = 0; current->thread.load_fp = 0; +#ifdef CONFIG_PPC_FPU_REGS memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state)); current->thread.fp_save_area = NULL; +#endif #ifdef CONFIG_ALTIVEC memset(¤t->thread.vr_state, 0, sizeof(current->thread.vr_state)); current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */ @@ -1878,7 +1885,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - thread_pkey_regs_init(¤t->thread); } EXPORT_SYMBOL(start_thread); @@ -2174,10 +2180,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); + lr = regs->link; - printk("%s--- interrupt: %lx at %pS\n LR = %pS\n", - loglvl, regs->trap, - (void *)regs->nip, (void *)lr); + printk("%s--- interrupt: %lx at %pS\n", + loglvl, regs->trap, (void *)regs->nip); + __show_regs(regs); + printk("%s--- interrupt: %lx\n", + loglvl, regs->trap); + firstframe = 1; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index c1545f22c077..ae3c41730367 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -165,7 +165,6 @@ static struct ibm_pa_feature { #ifdef CONFIG_PPC_RADIX_MMU { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE }, #endif - { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, /* diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index c2f2402ebc8c..8ebc11d1168d 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -6,10 +6,11 @@ CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y += ptrace.o ptrace-view.o +obj-$(CONFIG_PPC_FPU_REGS) += ptrace-fpu.o obj-$(CONFIG_COMPAT) += ptrace32.o obj-$(CONFIG_VSX) += ptrace-vsx.o ifneq ($(CONFIG_VSX),y) -obj-y += ptrace-novsx.o +obj-$(CONFIG_PPC_FPU_REGS) += ptrace-novsx.o endif obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o obj-$(CONFIG_SPE) += ptrace-spe.o diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 67447a6197eb..3487f2c9735c 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -159,8 +159,29 @@ int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-view */ +int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data); +int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data); + extern const struct user_regset_view user_ppc_native_view; +/* ptrace-fpu */ +#ifdef CONFIG_PPC_FPU_REGS +int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data); +int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data); +#else +static inline int +ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) +{ + return -EIO; +} + +static inline int +ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) +{ + return -EIO; +} +#endif + /* ptrace-(no)adv */ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo); int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c new file mode 100644 index 000000000000..8301cb52dd99 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/regset.h> + +#include <asm/switch_to.h> + +#include "ptrace-decl.h" + +int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) +{ + unsigned int fpidx = index - PT_FPR0; + + if (index > PT_FPSCR) + return -EIO; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long)); + else + *data = child->thread.fp_state.fpscr; + + return 0; +} + +int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) +{ + unsigned int fpidx = index - PT_FPR0; + + if (index > PT_FPSCR) + return -EIO; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long)); + else + child->thread.fp_state.fpscr = data; + + return 0; +} + diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index 54f2d076206f..44045363a903 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -86,6 +86,11 @@ int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset) int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr)); +#ifdef CONFIG_PPC64 + struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); +#endif + if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; @@ -96,16 +101,12 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); flush_altivec_to_thread(target); - membuf_write(&to, &target->thread.ckpt_regs, - offsetof(struct pt_regs, msr)); - membuf_store(&to, get_user_ckpt_msr(target)); - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); + membuf_write(&to, &target->thread.ckpt_regs, sizeof(struct user_pt_regs)); - membuf_write(&to, &target->thread.ckpt_regs.orig_gpr3, - sizeof(struct user_pt_regs) - - offsetof(struct pt_regs, orig_gpr3)); + membuf_store(&to_msr, get_user_ckpt_msr(target)); +#ifdef CONFIG_PPC64 + membuf_store(&to_softe, 0x1ul); +#endif return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - sizeof(struct user_pt_regs)); } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 7e6478e7ed07..2bad8068f598 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -217,6 +217,10 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) static int gpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr)); +#ifdef CONFIG_PPC64 + struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); +#endif int i; if (target->thread.regs == NULL) @@ -228,15 +232,12 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, target->thread.regs->gpr[i] = NV_REG_POISON; } - membuf_write(&to, target->thread.regs, offsetof(struct pt_regs, msr)); - membuf_store(&to, get_user_msr(target)); - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); + membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs)); - membuf_write(&to, &target->thread.regs->orig_gpr3, - sizeof(struct user_pt_regs) - - offsetof(struct pt_regs, orig_gpr3)); + membuf_store(&to_msr, get_user_msr(target)); +#ifdef CONFIG_PPC64 + membuf_store(&to_softe, 0x1ul); +#endif return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - sizeof(struct user_pt_regs)); } @@ -470,12 +471,12 @@ static int pkey_active(struct task_struct *target, const struct user_regset *reg static int pkey_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); if (!arch_pkeys_enabled()) return -ENODEV; - membuf_write(&to, &target->thread.amr, 2 * sizeof(unsigned long)); + membuf_store(&to, target->thread.regs->amr); + membuf_store(&to, target->thread.regs->iamr); return membuf_store(&to, default_uamor); } @@ -508,7 +509,8 @@ static int pkey_set(struct task_struct *target, const struct user_regset *regset * Pick the AMR values for the keys that kernel is using. This * will be indicated by the ~default_uamor bits. */ - target->thread.amr = (new_amr & default_uamor) | (target->thread.amr & ~default_uamor); + target->thread.regs->amr = (new_amr & default_uamor) | + (target->thread.regs->amr & ~default_uamor); return 0; } @@ -520,11 +522,13 @@ static const struct user_regset native_regsets[] = { .size = sizeof(long), .align = sizeof(long), .regset_get = gpr_get, .set = gpr_set }, +#ifdef CONFIG_PPC_FPU_REGS [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .regset_get = fpr_get, .set = fpr_set }, +#endif #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index f6e51be47c6e..3d44b73adb83 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -55,31 +55,18 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif + index = addr / sizeof(long); + if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { + if (index < PT_FPR0) ret = ptrace_get_reg(child, (int) index, &tmp); - if (ret) - break; - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - sizeof(long)); - else - tmp = child->thread.fp_state.fpscr; - } + else + ret = ptrace_get_fpr(child, index, &tmp); + + if (ret) + break; ret = put_user(tmp, datalp); break; } @@ -90,30 +77,15 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif + index = addr / sizeof(long); + if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { + if (index < PT_FPR0) ret = ptrace_put_reg(child, index, data); - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - sizeof(long)); - else - child->thread.fp_state.fpscr = data; - ret = 0; - } + else + ret = ptrace_put_fpr(child, index, data); break; } diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index 7589a9665ffb..d30b9ad70edc 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -23,6 +23,8 @@ #include <asm/switch_to.h> +#include "ptrace-decl.h" + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 954f41676f69..d126d71ea5bd 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -684,6 +684,63 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) return rc; } +/** + * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR. + * + * @fw_status: RTAS call status will be placed here if not NULL. + * + * rtas_ibm_suspend_me() should be called only on a CPU which has + * received H_CONTINUE from the H_JOIN hcall. All other active CPUs + * should be waiting to return from H_JOIN. + * + * rtas_ibm_suspend_me() may suspend execution of the OS + * indefinitely. Callers should take appropriate measures upon return, such as + * resetting watchdog facilities. + * + * Callers may choose to retry this call if @fw_status is + * %RTAS_THREADS_ACTIVE. + * + * Return: + * 0 - The partition has resumed from suspend, possibly after + * migration to a different host. + * -ECANCELED - The operation was aborted. + * -EAGAIN - There were other CPUs not in H_JOIN at the time of the call. + * -EBUSY - Some other condition prevented the suspend from succeeding. + * -EIO - Hardware/platform error. + */ +int rtas_ibm_suspend_me(int *fw_status) +{ + int fwrc; + int ret; + + fwrc = rtas_call(rtas_token("ibm,suspend-me"), 0, 1, NULL); + + switch (fwrc) { + case 0: + ret = 0; + break; + case RTAS_SUSPEND_ABORTED: + ret = -ECANCELED; + break; + case RTAS_THREADS_ACTIVE: + ret = -EAGAIN; + break; + case RTAS_NOT_SUSPENDABLE: + case RTAS_OUTSTANDING_COPROC: + ret = -EBUSY; + break; + case -1: + default: + ret = -EIO; + break; + } + + if (fw_status) + *fw_status = fwrc; + + return ret; +} + void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) @@ -741,163 +798,38 @@ void rtas_os_term(char *str) printk(KERN_EMERG "ibm,os-term call failed %d\n", status); } -static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; -#ifdef CONFIG_PPC_PSERIES -static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done) -{ - u16 slb_size = mmu_slb_size; - int rc = H_MULTI_THREADS_ACTIVE; - int cpu; - - slb_set_size(SLB_MIN_SIZE); - printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); - - while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && - !atomic_read(&data->error)) - rc = rtas_call(data->token, 0, 1, NULL); - - if (rc || atomic_read(&data->error)) { - printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc); - slb_set_size(slb_size); - } - - if (atomic_read(&data->error)) - rc = atomic_read(&data->error); - - atomic_set(&data->error, rc); - pSeries_coalesce_init(); - - if (wake_when_done) { - atomic_set(&data->done, 1); - - for_each_online_cpu(cpu) - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); - } - - if (atomic_dec_return(&data->working) == 0) - complete(data->complete); - - return rc; -} - -int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data) -{ - atomic_inc(&data->working); - return __rtas_suspend_last_cpu(data, 0); -} - -static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done) -{ - long rc = H_SUCCESS; - unsigned long msr_save; - int cpu; - - atomic_inc(&data->working); - - /* really need to ensure MSR.EE is off for H_JOIN */ - msr_save = mfmsr(); - mtmsr(msr_save & ~(MSR_EE)); - - while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error)) - rc = plpar_hcall_norets(H_JOIN); - - mtmsr(msr_save); - - if (rc == H_SUCCESS) { - /* This cpu was prodded and the suspend is complete. */ - goto out; - } else if (rc == H_CONTINUE) { - /* All other cpus are in H_JOIN, this cpu does - * the suspend. - */ - return __rtas_suspend_last_cpu(data, wake_when_done); - } else { - printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n", - smp_processor_id(), rc); - atomic_set(&data->error, rc); - } - - if (wake_when_done) { - atomic_set(&data->done, 1); - - /* This cpu did the suspend or got an error; in either case, - * we need to prod all other other cpus out of join state. - * Extra prods are harmless. - */ - for_each_online_cpu(cpu) - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); - } -out: - if (atomic_dec_return(&data->working) == 0) - complete(data->complete); - return rc; -} - -int rtas_suspend_cpu(struct rtas_suspend_me_data *data) -{ - return __rtas_suspend_cpu(data, 0); -} - -static void rtas_percpu_suspend_me(void *info) +/** + * rtas_activate_firmware() - Activate a new version of firmware. + * + * Activate a new version of partition firmware. The OS must call this + * after resuming from a partition hibernation or migration in order + * to maintain the ability to perform live firmware updates. It's not + * catastrophic for this method to be absent or to fail; just log the + * condition in that case. + * + * Context: This function may sleep. + */ +void rtas_activate_firmware(void) { - __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); -} + int token; + int fwrc; -int rtas_ibm_suspend_me(u64 handle) -{ - long state; - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - struct rtas_suspend_me_data data; - DECLARE_COMPLETION_ONSTACK(done); - - if (!rtas_service_present("ibm,suspend-me")) - return -ENOSYS; - - /* Make sure the state is valid */ - rc = plpar_hcall(H_VASI_STATE, retbuf, handle); - - state = retbuf[0]; - - if (rc) { - printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc); - return rc; - } else if (state == H_VASI_ENABLED) { - return -EAGAIN; - } else if (state != H_VASI_SUSPENDING) { - printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n", - state); - return -EIO; + token = rtas_token("ibm,activate-firmware"); + if (token == RTAS_UNKNOWN_SERVICE) { + pr_notice("ibm,activate-firmware method unavailable\n"); + return; } - atomic_set(&data.working, 0); - atomic_set(&data.done, 0); - atomic_set(&data.error, 0); - data.token = rtas_token("ibm,suspend-me"); - data.complete = &done; - - lock_device_hotplug(); - - cpu_hotplug_disable(); - - /* Call function on all CPUs. One of us will make the - * rtas call - */ - on_each_cpu(rtas_percpu_suspend_me, &data, 0); - - wait_for_completion(&done); - - if (atomic_read(&data.error) != 0) - printk(KERN_ERR "Error doing global join\n"); - - - cpu_hotplug_enable(); - - unlock_device_hotplug(); + do { + fwrc = rtas_call(token, 0, 1, NULL); + } while (rtas_busy_delay(fwrc)); - return atomic_read(&data.error); + if (fwrc) + pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } +static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; +#ifdef CONFIG_PPC_PSERIES /** * rtas_call_reentrant() - Used for reentrant rtas calls * @token: Token for desired reentrant RTAS call @@ -948,12 +880,7 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) return ret; } -#else /* CONFIG_PPC_PSERIES */ -int rtas_ibm_suspend_me(u64 handle) -{ - return -ENOSYS; -} -#endif +#endif /* CONFIG_PPC_PSERIES */ /** * Find a specific pseries error log in an RTAS extended event log. @@ -1030,7 +957,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "ibm,display-message", -1, 0, -1, -1, -1 }, { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,open-errinct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, { "ibm,get-indices", -1, 2, 3, -1, -1 }, @@ -1050,9 +977,11 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "set-time-for-power-on", -1, -1, -1, -1, -1 }, { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, { "set-time-of-day", -1, -1, -1, -1, -1 }, +#ifdef CONFIG_CPU_BIG_ENDIAN { "ibm,suspend-me", -1, -1, -1, -1, -1 }, { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, +#endif { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, }; @@ -1183,7 +1112,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) int rc = 0; u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32) | be32_to_cpu(args.args[1]); - rc = rtas_ibm_suspend_me(handle); + rc = rtas_syscall_dispatch_ibm_suspend_me(handle); if (rc == -EAGAIN) args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE); else if (rc == -EIO) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 808ec9fab605..71f38e9248be 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -90,8 +90,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); */ int dcache_bsize; int icache_bsize; -int ucache_bsize; - unsigned long klimit = (unsigned long) _end; @@ -802,8 +800,6 @@ static __init void print_system_info(void) pr_info("dcache_bsize = 0x%x\n", dcache_bsize); pr_info("icache_bsize = 0x%x\n", icache_bsize); - if (ucache_bsize != 0) - pr_info("ucache_bsize = 0x%x\n", ucache_bsize); pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); pr_info(" possible = 0x%016lx\n", @@ -919,8 +915,6 @@ void __init setup_arch(char **cmdline_p) /* On BookE, setup per-core TLB data structures. */ setup_tlb_core_data(); - - smp_release_cpus(); #endif /* Print various info about the machine that has been gathered so far. */ @@ -944,6 +938,8 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); + smp_release_cpus(); + initmem_init(); early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 2ec835574cc9..2dd0d9cb5a20 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -8,12 +8,6 @@ #ifndef __ARCH_POWERPC_KERNEL_SETUP_H #define __ARCH_POWERPC_KERNEL_SETUP_H -#ifdef CONFIG_CC_IS_CLANG -#define __nostackprotector -#else -#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) -#endif - void initialize_cache_info(void); void irqstack_early_init(void); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 057d6b8e9bb0..8ba49a6bf515 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -222,7 +222,4 @@ __init void initialize_cache_info(void) */ dcache_bsize = cur_cpu_spec->dcache_bsize; icache_bsize = cur_cpu_spec->icache_bsize; - ucache_bsize = 0; - if (IS_ENABLED(CONFIG_E200)) - ucache_bsize = icache_bsize = dcache_bsize; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 74fd47f46fa5..c28e949cc222 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -283,7 +283,7 @@ void __init record_spr_defaults(void) * device-tree is not accessible via normal means at this point. */ -void __init __nostackprotector early_setup(unsigned long dt_ptr) +void __init early_setup(unsigned long dt_ptr) { static __initdata struct paca_struct boot_paca; diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a8bb0aca1d02..53782aa60ade 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -133,36 +133,6 @@ unsigned long copy_ckvsx_from_user(struct task_struct *task, return 0; } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -#else -inline unsigned long copy_fpr_to_user(void __user *to, - struct task_struct *task) -{ - return __copy_to_user(to, task->thread.fp_state.fpr, - ELF_NFPREG * sizeof(double)); -} - -inline unsigned long copy_fpr_from_user(struct task_struct *task, - void __user *from) -{ - return __copy_from_user(task->thread.fp_state.fpr, from, - ELF_NFPREG * sizeof(double)); -} - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -inline unsigned long copy_ckfpr_to_user(void __user *to, - struct task_struct *task) -{ - return __copy_to_user(to, task->thread.ckfp_state.fpr, - ELF_NFPREG * sizeof(double)); -} - -inline unsigned long copy_ckfpr_from_user(struct task_struct *task, - void __user *from) -{ - return __copy_from_user(task->thread.ckfp_state.fpr, from, - ELF_NFPREG * sizeof(double)); -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #endif /* Log an error when sending an unhandled signal to a process. Controlled @@ -174,20 +144,22 @@ int show_unhandled_signals = 1; /* * Allocate space for the signal frame */ -void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, - size_t frame_size, int is_32) +static unsigned long get_tm_stackpointer(struct task_struct *tsk); + +void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk, + size_t frame_size, int is_32) { unsigned long oldsp, newsp; + unsigned long sp = get_tm_stackpointer(tsk); /* Default to using normal stack */ - oldsp = get_clean_sp(sp, is_32); + if (is_32) + oldsp = sp & 0x0ffffffffUL; + else + oldsp = sp; oldsp = sigsp(oldsp, ksig); newsp = (oldsp - frame_size) & ~0xFUL; - /* Check access */ - if (!access_ok((void __user *)newsp, oldsp - newsp)) - return NULL; - return (void __user *)newsp; } @@ -331,7 +303,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) user_enter(); } -unsigned long get_tm_stackpointer(struct task_struct *tsk) +static unsigned long get_tm_stackpointer(struct task_struct *tsk) { /* When in an active transaction that takes a signal, we need to be * careful with the stack. It's possible that the stack has moved back @@ -379,3 +351,14 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk) #endif return ret; } + +static const char fm32[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %08lx lr %08lx\n"; +static const char fm64[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %016lx lr %016lx\n"; + +void signal_fault(struct task_struct *tsk, struct pt_regs *regs, + const char *where, void __user *ptr) +{ + if (show_unhandled_signals) + printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm, + task_pid_nr(tsk), where, ptr, regs->nip, regs->link); +} diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index d396efca4068..2559a681536e 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -10,8 +10,8 @@ #ifndef _POWERPC_ARCH_SIGNAL_H #define _POWERPC_ARCH_SIGNAL_H -extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, - size_t frame_size, int is_32); +void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk, + size_t frame_size, int is_32); extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); @@ -19,16 +19,6 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); -extern unsigned long copy_fpr_to_user(void __user *to, - struct task_struct *task); -extern unsigned long copy_ckfpr_to_user(void __user *to, - struct task_struct *task); -extern unsigned long copy_fpr_from_user(struct task_struct *task, - void __user *from); -extern unsigned long copy_ckfpr_from_user(struct task_struct *task, - void __user *from); -extern unsigned long get_tm_stackpointer(struct task_struct *tsk); - #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); @@ -38,6 +28,104 @@ extern unsigned long copy_vsx_from_user(struct task_struct *task, void __user *from); extern unsigned long copy_ckvsx_from_user(struct task_struct *task, void __user *from); +unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); +unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task); +unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); +unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); + +#define unsafe_copy_fpr_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_put_user(__t->thread.TS_FPR(i), &buf[i], label); \ + unsafe_put_user(__t->thread.fp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_vsx_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_put_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label);\ +} while (0) + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define unsafe_copy_ckfpr_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_put_user(__t->thread.TS_CKFPR(i), &buf[i], label);\ + unsafe_put_user(__t->thread.ckfp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_ckvsx_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label);\ +} while (0) +#endif +#elif defined(CONFIG_PPC_FPU_REGS) + +#define unsafe_copy_fpr_to_user(to, task, label) \ + unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \ + ELF_NFPREG * sizeof(double), label) + +static inline unsigned long +copy_fpr_to_user(void __user *to, struct task_struct *task) +{ + return __copy_to_user(to, task->thread.fp_state.fpr, + ELF_NFPREG * sizeof(double)); +} + +static inline unsigned long +copy_fpr_from_user(struct task_struct *task, void __user *from) +{ + return __copy_from_user(task->thread.fp_state.fpr, from, + ELF_NFPREG * sizeof(double)); +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define unsafe_copy_ckfpr_to_user(to, task, label) \ + unsafe_copy_to_user(to, (task)->thread.ckfp_state.fpr, \ + ELF_NFPREG * sizeof(double), label) + +inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task) +{ + return __copy_to_user(to, task->thread.ckfp_state.fpr, + ELF_NFPREG * sizeof(double)); +} + +static inline unsigned long +copy_ckfpr_from_user(struct task_struct *task, void __user *from) +{ + return __copy_from_user(task->thread.ckfp_state.fpr, from, + ELF_NFPREG * sizeof(double)); +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#else +#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) + +static inline unsigned long +copy_fpr_to_user(void __user *to, struct task_struct *task) +{ + return 0; +} + +static inline unsigned long +copy_fpr_from_user(struct task_struct *task, void __user *from) +{ + return 0; +} #endif #ifdef CONFIG_PPC64 @@ -58,4 +146,7 @@ static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #endif /* !defined(CONFIG_PPC64) */ +void signal_fault(struct task_struct *tsk, struct pt_regs *regs, + const char *where, void __user *ptr); + #endif /* _POWERPC_ARCH_SIGNAL_H */ diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 96950f189b5a..934cbdf6dd10 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -58,8 +58,6 @@ #define mcontext mcontext32 #define ucontext ucontext32 -#define __save_altstack __compat_save_altstack - /* * Userspace code may pass a ucontext which doesn't include VSX added * at the end. We need to check for this case. @@ -84,10 +82,7 @@ * Functions for flipping sigsets (thanks to brain dead generic * implementation that makes things simple for little endian only) */ -static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) -{ - return put_compat_sigset(uset, set, sizeof(*uset)); -} +#define unsafe_put_sigset_t unsafe_put_compat_sigset static inline int get_sigset_t(sigset_t *set, const compat_sigset_t __user *uset) @@ -98,8 +93,8 @@ static inline int get_sigset_t(sigset_t *set, #define to_user_ptr(p) ptr_to_compat(p) #define from_user_ptr(p) compat_ptr(p) -static inline int save_general_regs(struct pt_regs *regs, - struct mcontext __user *frame) +static __always_inline int +save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int val, i; @@ -113,10 +108,12 @@ static inline int save_general_regs(struct pt_regs *regs, else val = gregs[i]; - if (__put_user(val, &frame->mc_gregs[i])) - return -EFAULT; + unsafe_put_user(val, &frame->mc_gregs[i], failed); } return 0; + +failed: + return 1; } static inline int restore_general_regs(struct pt_regs *regs, @@ -138,10 +135,12 @@ static inline int restore_general_regs(struct pt_regs *regs, #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) -static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set) -{ - return copy_to_user(uset, set, sizeof(*uset)); -} +#define unsafe_put_sigset_t(uset, set, label) do { \ + sigset_t __user *__us = uset ; \ + const sigset_t *__s = set; \ + \ + unsafe_copy_to_user(__us, __s, sizeof(*__us), label); \ +} while (0) static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) { @@ -151,11 +150,15 @@ static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) #define to_user_ptr(p) ((unsigned long)(p)) #define from_user_ptr(p) ((void __user *)(p)) -static inline int save_general_regs(struct pt_regs *regs, - struct mcontext __user *frame) +static __always_inline int +save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) { WARN_ON(!FULL_REGS(regs)); - return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE); + unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed); + return 0; + +failed: + return 1; } static inline int restore_general_regs(struct pt_regs *regs, @@ -173,6 +176,11 @@ static inline int restore_general_regs(struct pt_regs *regs, } #endif +#define unsafe_save_general_regs(regs, frame, label) do { \ + if (save_general_regs_unsafe(regs, frame)) \ + goto label; \ +} while (0) + /* * When we have signals to deliver, we set up on the * user stack, going down from the original stack pointer: @@ -199,9 +207,6 @@ struct sigframe { int abigap[56]; }; -/* We use the mc_pad field for the signal return trampoline. */ -#define tramp mc_pad - /* * When we have rt signals to deliver, we set up on the * user stack, going down from the original stack pointer: @@ -235,26 +240,39 @@ struct rt_sigframe { * We only save the altivec/spe registers if the process has used * altivec/spe instructions at some point. */ -static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret, - int ctx_has_vsx_region) +static void prepare_save_user_regs(int ctx_has_vsx_region) { - unsigned long msr = regs->msr; - /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); +#ifdef CONFIG_ALTIVEC + if (current->thread.used_vr) + flush_altivec_to_thread(current); + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.vrsave = mfspr(SPRN_VRSAVE); +#endif +#ifdef CONFIG_VSX + if (current->thread.used_vsr && ctx_has_vsx_region) + flush_vsx_to_thread(current); +#endif +#ifdef CONFIG_SPE + if (current->thread.used_spe) + flush_spe_to_thread(current); +#endif +} + +static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, int ctx_has_vsx_region) +{ + unsigned long msr = regs->msr; /* save general registers */ - if (save_general_regs(regs, frame)) - return 1; + unsafe_save_general_regs(regs, frame, failed); #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - flush_altivec_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, + ELF_NVRREG * sizeof(vector128), failed); /* set MSR_VEC in the saved MSR value to indicate that frame->mc_vregs contains valid data */ msr |= MSR_VEC; @@ -267,13 +285,10 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * most significant bits of that same vector. --BenH * Note that the current VRSAVE value is in the SPR at this point. */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); - if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32])) - return 1; + unsafe_put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32], + failed); #endif /* CONFIG_ALTIVEC */ - if (copy_fpr_to_user(&frame->mc_fregs, current)) - return 1; + unsafe_copy_fpr_to_user(&frame->mc_fregs, current, failed); /* * Clear the MSR VSX bit to indicate there is no valid state attached @@ -288,19 +303,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * contains valid data */ if (current->thread.used_vsr && ctx_has_vsx_region) { - flush_vsx_to_thread(current); - if (copy_vsx_to_user(&frame->mc_vsregs, current)) - return 1; + unsafe_copy_vsx_to_user(&frame->mc_vsregs, current, failed); msr |= MSR_VSX; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* save spe registers */ if (current->thread.used_spe) { - flush_spe_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32), failed); /* set MSR_SPE in the saved MSR value to indicate that frame->mc_vregs contains valid data */ msr |= MSR_SPE; @@ -308,30 +319,29 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, /* else assert((regs->msr & MSR_SPE) == 0) */ /* We always copy to/from spefscr */ - if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_put_user(current->thread.spefscr, + (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - if (__put_user(msr, &frame->mc_gregs[PT_MSR])) - return 1; + unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); + /* We need to write 0 the MSR top 32 bits in the tm frame so that we * can check it on the restore to see if TM is active */ - if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) - return 1; - - if (sigret) { - /* Set up the sigreturn trampoline: li 0,sigret; sc */ - if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0]) - || __put_user(PPC_INST_SC, &frame->tramp[1])) - return 1; - flush_icache_range((unsigned long) &frame->tramp[0], - (unsigned long) &frame->tramp[2]); - } + if (tm_frame) + unsafe_put_user(0, &tm_frame->mc_gregs[PT_MSR], failed); return 0; + +failed: + return 1; } +#define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \ + if (save_user_regs_unsafe(regs, frame, tm_frame, has_vsx)) \ + goto label; \ +} while (0) + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Save the current user registers on the user stack. @@ -340,19 +350,28 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * We also save the transactional registers to a second ucontext in the * frame. * - * See save_user_regs() and signal_64.c:setup_tm_sigcontexts(). + * See save_user_regs_unsafe() and signal_64.c:setup_tm_sigcontexts(). */ -static int save_tm_user_regs(struct pt_regs *regs, - struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret, - unsigned long msr) +static void prepare_save_tm_user_regs(void) { WARN_ON(tm_suspend_disabled); +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.ckvrsave = mfspr(SPRN_VRSAVE); +#endif +#ifdef CONFIG_SPE + if (current->thread.used_spe) + flush_spe_to_thread(current); +#endif +} + +static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) +{ /* Save both sets of general registers */ - if (save_general_regs(¤t->thread.ckpt_regs, frame) - || save_general_regs(regs, tm_frame)) - return 1; + unsafe_save_general_regs(¤t->thread.ckpt_regs, frame, failed); + unsafe_save_general_regs(regs, tm_frame, failed); /* Stash the top half of the 64bit MSR into the 32bit MSR word * of the transactional mcontext. This way we have a backward-compatible @@ -360,26 +379,21 @@ static int save_tm_user_regs(struct pt_regs *regs, * also look at what type of transaction (T or S) was active at the * time of the signal. */ - if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR])) - return 1; + unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed); #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - if (__copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - if (msr & MSR_VEC) { - if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.vr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - } else { - if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.ckvr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - } + unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, + ELF_NVRREG * sizeof(vector128), failed); + if (msr & MSR_VEC) + unsafe_copy_to_user(&tm_frame->mc_vregs, + ¤t->thread.vr_state, + ELF_NVRREG * sizeof(vector128), failed); + else + unsafe_copy_to_user(&tm_frame->mc_vregs, + ¤t->thread.ckvr_state, + ELF_NVRREG * sizeof(vector128), failed); /* set MSR_VEC in the saved MSR value to indicate that * frame->mc_vregs contains valid data @@ -392,31 +406,21 @@ static int save_tm_user_regs(struct pt_regs *regs, * significant bits of a vector, we "cheat" and stuff VRSAVE in the * most significant bits of that same vector. --BenH */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.ckvrsave = mfspr(SPRN_VRSAVE); - if (__put_user(current->thread.ckvrsave, - (u32 __user *)&frame->mc_vregs[32])) - return 1; - if (msr & MSR_VEC) { - if (__put_user(current->thread.vrsave, - (u32 __user *)&tm_frame->mc_vregs[32])) - return 1; - } else { - if (__put_user(current->thread.ckvrsave, - (u32 __user *)&tm_frame->mc_vregs[32])) - return 1; - } + unsafe_put_user(current->thread.ckvrsave, + (u32 __user *)&frame->mc_vregs[32], failed); + if (msr & MSR_VEC) + unsafe_put_user(current->thread.vrsave, + (u32 __user *)&tm_frame->mc_vregs[32], failed); + else + unsafe_put_user(current->thread.ckvrsave, + (u32 __user *)&tm_frame->mc_vregs[32], failed); #endif /* CONFIG_ALTIVEC */ - if (copy_ckfpr_to_user(&frame->mc_fregs, current)) - return 1; - if (msr & MSR_FP) { - if (copy_fpr_to_user(&tm_frame->mc_fregs, current)) - return 1; - } else { - if (copy_ckfpr_to_user(&tm_frame->mc_fregs, current)) - return 1; - } + unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed); + if (msr & MSR_FP) + unsafe_copy_fpr_to_user(&tm_frame->mc_fregs, current, failed); + else + unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed); #ifdef CONFIG_VSX /* @@ -426,54 +430,54 @@ static int save_tm_user_regs(struct pt_regs *regs, * contains valid data */ if (current->thread.used_vsr) { - if (copy_ckvsx_to_user(&frame->mc_vsregs, current)) - return 1; - if (msr & MSR_VSX) { - if (copy_vsx_to_user(&tm_frame->mc_vsregs, - current)) - return 1; - } else { - if (copy_ckvsx_to_user(&tm_frame->mc_vsregs, current)) - return 1; - } + unsafe_copy_ckvsx_to_user(&frame->mc_vsregs, current, failed); + if (msr & MSR_VSX) + unsafe_copy_vsx_to_user(&tm_frame->mc_vsregs, current, failed); + else + unsafe_copy_ckvsx_to_user(&tm_frame->mc_vsregs, current, failed); msr |= MSR_VSX; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in save_user_regs(). + * simply the same as in save_user_regs_unsafe(). */ if (current->thread.used_spe) { - flush_spe_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32), failed); /* set MSR_SPE in the saved MSR value to indicate that * frame->mc_vregs contains valid data */ msr |= MSR_SPE; } /* We always copy to/from spefscr */ - if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_put_user(current->thread.spefscr, + (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - if (__put_user(msr, &frame->mc_gregs[PT_MSR])) - return 1; - if (sigret) { - /* Set up the sigreturn trampoline: li 0,sigret; sc */ - if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0]) - || __put_user(PPC_INST_SC, &frame->tramp[1])) - return 1; - flush_icache_range((unsigned long) &frame->tramp[0], - (unsigned long) &frame->tramp[2]); - } + unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); return 0; + +failed: + return 1; +} +#else +static void prepare_save_tm_user_regs(void) { } + +static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) +{ + return 0; } #endif +#define unsafe_save_tm_user_regs(regs, frame, tm_frame, msr, label) do { \ + if (save_tm_user_regs_unsafe(regs, frame, tm_frame, msr)) \ + goto label; \ +} while (0) + /* * Restore the current user register values from the user stack, * (except for MSR). @@ -751,96 +755,189 @@ static long restore_tm_user_regs(struct pt_regs *regs, int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk) { - struct rt_sigframe __user *rt_sf; - struct mcontext __user *frame; - struct mcontext __user *tm_frame = NULL; - void __user *addr; + struct rt_sigframe __user *frame; + struct mcontext __user *mctx; + struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; - int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* Save the thread's msr before get_tm_stackpointer() changes it */ unsigned long msr = regs->msr; -#endif - - BUG_ON(tsk != current); /* Set up Signal Frame */ - /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1); - addr = rt_sf; - if (unlikely(rt_sf == NULL)) + frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); + mctx = &frame->uc.uc_mcontext; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->uc_transact.uc_mcontext; +#endif + if (MSR_TM_ACTIVE(msr)) + prepare_save_tm_user_regs(); + else + prepare_save_user_regs(1); + + if (!user_write_access_begin(frame, sizeof(*frame))) goto badframe; /* Put the siginfo & fill in most of the ucontext */ - if (copy_siginfo_to_user(&rt_sf->info, &ksig->info) - || __put_user(0, &rt_sf->uc.uc_flags) - || __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1]) - || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext), - &rt_sf->uc.uc_regs) - || put_sigset_t(&rt_sf->uc.uc_sigmask, oldset)) - goto badframe; + unsafe_put_user(0, &frame->uc.uc_flags, failed); +#ifdef CONFIG_PPC64 + unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed); +#else + unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed); +#endif + unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, failed); - /* Save user registers on the stack */ - frame = &rt_sf->uc.uc_mcontext; - addr = frame; - if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { - sigret = 0; - tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp; + if (MSR_TM_ACTIVE(msr)) { +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + unsafe_put_user((unsigned long)&frame->uc_transact, + &frame->uc.uc_link, failed); + unsafe_put_user((unsigned long)tm_mctx, + &frame->uc_transact.uc_regs, failed); +#endif + unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed); } else { - sigret = __NR_rt_sigreturn; - tramp = (unsigned long) frame->tramp; + unsafe_put_user(0, &frame->uc.uc_link, failed); + unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed); } -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - tm_frame = &rt_sf->uc_transact.uc_mcontext; - if (MSR_TM_ACTIVE(msr)) { - if (__put_user((unsigned long)&rt_sf->uc_transact, - &rt_sf->uc.uc_link) || - __put_user((unsigned long)tm_frame, - &rt_sf->uc_transact.uc_regs)) - goto badframe; - if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr)) - goto badframe; - } - else -#endif - { - if (__put_user(0, &rt_sf->uc.uc_link)) - goto badframe; - if (save_user_regs(regs, frame, tm_frame, sigret, 1)) - goto badframe; + /* Save user registers on the stack */ + if (tsk->mm->context.vdso) { + tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32); + } else { + tramp = (unsigned long)mctx->mc_pad; + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0], + failed); + unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); } + unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed); + + user_write_access_end(); + + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + + if (tramp == (unsigned long)mctx->mc_pad) + flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); + regs->link = tramp; +#ifdef CONFIG_PPC_FPU_REGS tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif /* create a stack frame for the caller of the handler */ - newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); - addr = (void __user *)regs->gpr[1]; + newsp = ((unsigned long)frame) - (__SIGNAL_FRAMESIZE + 16); if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; /* Fill registers for signal handler */ regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; - regs->gpr[4] = (unsigned long) &rt_sf->info; - regs->gpr[5] = (unsigned long) &rt_sf->uc; - regs->gpr[6] = (unsigned long) rt_sf; + regs->gpr[4] = (unsigned long)&frame->info; + regs->gpr[5] = (unsigned long)&frame->uc; + regs->gpr[6] = (unsigned long)frame; regs->nip = (unsigned long) ksig->ka.sa.sa_handler; /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; regs->msr |= (MSR_KERNEL & MSR_LE); return 0; +failed: + user_write_access_end(); + +badframe: + signal_fault(tsk, regs, "handle_rt_signal32", frame); + + return 1; +} + +/* + * OK, we're invoking a handler + */ +int handle_signal32(struct ksignal *ksig, sigset_t *oldset, + struct task_struct *tsk) +{ + struct sigcontext __user *sc; + struct sigframe __user *frame; + struct mcontext __user *mctx; + struct mcontext __user *tm_mctx = NULL; + unsigned long newsp = 0; + unsigned long tramp; + struct pt_regs *regs = tsk->thread.regs; + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; + + /* Set up Signal Frame */ + frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); + mctx = &frame->mctx; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->mctx_transact; +#endif + if (MSR_TM_ACTIVE(msr)) + prepare_save_tm_user_regs(); + else + prepare_save_user_regs(1); + + if (!user_write_access_begin(frame, sizeof(*frame))) + goto badframe; + sc = (struct sigcontext __user *) &frame->sctx; + +#if _NSIG != 64 +#error "Please adjust handle_signal()" +#endif + unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, failed); + unsafe_put_user(oldset->sig[0], &sc->oldmask, failed); +#ifdef CONFIG_PPC64 + unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], failed); +#else + unsafe_put_user(oldset->sig[1], &sc->_unused[3], failed); +#endif + unsafe_put_user(to_user_ptr(mctx), &sc->regs, failed); + unsafe_put_user(ksig->sig, &sc->signal, failed); + + if (MSR_TM_ACTIVE(msr)) + unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed); + else + unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed); + + if (tsk->mm->context.vdso) { + tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32); + } else { + tramp = (unsigned long)mctx->mc_pad; + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + } + user_write_access_end(); + + if (tramp == (unsigned long)mctx->mc_pad) + flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); + + regs->link = tramp; + +#ifdef CONFIG_PPC_FPU_REGS + tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif + + /* create a stack frame for the caller of the handler */ + newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; + if (put_user(regs->gpr[1], (u32 __user *)newsp)) + goto badframe; + + regs->gpr[1] = newsp; + regs->gpr[3] = ksig->sig; + regs->gpr[4] = (unsigned long) sc; + regs->nip = (unsigned long)ksig->ka.sa.sa_handler; + /* enter the signal handler in big-endian mode */ + regs->msr &= ~MSR_LE; + return 0; + +failed: + user_write_access_end(); + badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in handle_rt_signal32: " - "%p nip %08lx lr %08lx\n", - tsk->comm, tsk->pid, - addr, regs->nip, regs->link); + signal_fault(tsk, regs, "handle_signal32", frame); return 1; } @@ -967,11 +1064,13 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, */ mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); - if (!access_ok(old_ctx, ctx_size) - || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region) - || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) - || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) + prepare_save_user_regs(ctx_has_vsx_region); + if (!user_write_access_begin(old_ctx, ctx_size)) return -EFAULT; + unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed); + unsafe_put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked, failed); + unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed); + user_write_access_end(); } if (new_ctx == NULL) return 0; @@ -995,6 +1094,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, set_thread_flag(TIF_RESTOREALL); return 0; + +failed: + user_write_access_end(); + return -EFAULT; } #ifdef CONFIG_PPC64 @@ -1092,12 +1195,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; bad: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in sys_rt_sigreturn: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - rt_sf, regs->nip, regs->link); + signal_fault(current, regs, "sys_rt_sigreturn", rt_sf); force_sig(SIGSEGV); return 0; @@ -1181,12 +1279,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, * We kill the task with a SIGSEGV in this situation. */ if (do_setcontext(ctx, regs, 1)) { - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO "%s[%d]: bad frame in " - "sys_debug_setcontext: %p nip %08lx " - "lr %08lx\n", - current->comm, current->pid, - ctx, regs->nip, regs->link); + signal_fault(current, regs, "sys_debug_setcontext", ctx); force_sig(SIGSEGV); goto out; @@ -1208,96 +1301,6 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, #endif /* - * OK, we're invoking a handler - */ -int handle_signal32(struct ksignal *ksig, sigset_t *oldset, - struct task_struct *tsk) -{ - struct sigcontext __user *sc; - struct sigframe __user *frame; - struct mcontext __user *tm_mctx = NULL; - unsigned long newsp = 0; - int sigret; - unsigned long tramp; - struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Save the thread's msr before get_tm_stackpointer() changes it */ - unsigned long msr = regs->msr; -#endif - - BUG_ON(tsk != current); - - /* Set up Signal Frame */ - frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1); - if (unlikely(frame == NULL)) - goto badframe; - sc = (struct sigcontext __user *) &frame->sctx; - -#if _NSIG != 64 -#error "Please adjust handle_signal()" -#endif - if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler) - || __put_user(oldset->sig[0], &sc->oldmask) -#ifdef CONFIG_PPC64 - || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) -#else - || __put_user(oldset->sig[1], &sc->_unused[3]) -#endif - || __put_user(to_user_ptr(&frame->mctx), &sc->regs) - || __put_user(ksig->sig, &sc->signal)) - goto badframe; - - if (vdso32_sigtramp && tsk->mm->context.vdso_base) { - sigret = 0; - tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; - } else { - sigret = __NR_sigreturn; - tramp = (unsigned long) frame->mctx.tramp; - } - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - tm_mctx = &frame->mctx_transact; - if (MSR_TM_ACTIVE(msr)) { - if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, - sigret, msr)) - goto badframe; - } - else -#endif - { - if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) - goto badframe; - } - - regs->link = tramp; - - tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ - - /* create a stack frame for the caller of the handler */ - newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; - if (put_user(regs->gpr[1], (u32 __user *)newsp)) - goto badframe; - - regs->gpr[1] = newsp; - regs->gpr[3] = ksig->sig; - regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler; - /* enter the signal handler in big-endian mode */ - regs->msr &= ~MSR_LE; - return 0; - -badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in handle_signal32: " - "%p nip %08lx lr %08lx\n", - tsk->comm, tsk->pid, - frame, regs->nip, regs->link); - - return 1; -} - -/* * Do a signal return; undo the signal stack. */ #ifdef CONFIG_PPC64 @@ -1363,12 +1366,7 @@ SYSCALL_DEFINE0(sigreturn) return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in sys_sigreturn: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - addr, regs->nip, regs->link); + signal_fault(current, regs, "sys_sigreturn", addr); force_sig(SIGSEGV); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index bfc939360bad..f9e4a1ac440f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -66,11 +66,6 @@ struct rt_sigframe { char abigap[USER_REDZONE_SIZE]; } __attribute__ ((aligned (16))); -static const char fmt32[] = KERN_INFO \ - "%s[%d]: bad frame in %s: %08lx nip %08lx lr %08lx\n"; -static const char fmt64[] = KERN_INFO \ - "%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n"; - /* * This computes a quad word aligned pointer inside the vmx_reserve array * element. For historical reasons sigcontext might not be quad word aligned, @@ -801,10 +796,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, "rt_sigreturn", - (long)uc, regs->nip, regs->link); + signal_fault(current, regs, "rt_sigreturn", uc); force_sig(SIGSEGV); return 0; @@ -822,10 +814,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long msr = regs->msr; #endif - BUG_ON(tsk != current); - - frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 0); - if (unlikely(frame == NULL)) + frame = get_sigframe(ksig, tsk, sizeof(*frame), 0); + if (!access_ok(frame, sizeof(*frame))) goto badframe; err |= __put_user(&frame->info, &frame->pinfo); @@ -864,8 +854,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, tsk->thread.fp_state.fpscr = 0; /* Set up to return from userspace. */ - if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { - regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; + if (tsk->mm->context.vdso) { + regs->nip = VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64); } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) @@ -913,10 +903,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - tsk->comm, tsk->pid, "setup_rt_frame", - (long)frame, regs->nip, regs->link); + signal_fault(current, regs, "handle_rt_signal64", frame); return 1; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8c2857cbd960..2b9b1bb4c5f2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -76,6 +76,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; struct task_struct *secondary_current; bool has_big_cores; bool coregroup_enabled; +bool thread_group_shares_l2; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -99,6 +100,7 @@ enum { #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 +#define THREAD_GROUP_SHARE_L2 2 struct thread_groups { unsigned int property; unsigned int nr_groups; @@ -106,11 +108,27 @@ struct thread_groups { unsigned int thread_list[MAX_THREAD_LIST_SIZE]; }; +/* Maximum number of properties that groups of threads within a core can share */ +#define MAX_THREAD_GROUP_PROPERTIES 2 + +struct thread_groups_list { + unsigned int nr_properties; + struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES]; +}; + +static struct thread_groups_list tgl[NR_CPUS] __initdata; /* - * On big-cores system, cpu_l1_cache_map for each CPU corresponds to + * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to * the set its siblings that share the L1-cache. */ -DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map); +DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); + +/* + * On some big-cores system, thread_group_l2_cache_map for each CPU + * corresponds to the set its siblings within the core that share the + * L2-cache. + */ +DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -695,81 +713,100 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int), /* * parse_thread_groups: Parses the "ibm,thread-groups" device tree * property for the CPU device node @dn and stores - * the parsed output in the thread_groups - * structure @tg if the ibm,thread-groups[0] - * matches @property. + * the parsed output in the thread_groups_list + * structure @tglp. * * @dn: The device node of the CPU device. - * @tg: Pointer to a thread group structure into which the parsed + * @tglp: Pointer to a thread group list structure into which the parsed * output of "ibm,thread-groups" is stored. - * @property: The property of the thread-group that the caller is - * interested in. * * ibm,thread-groups[0..N-1] array defines which group of threads in * the CPU-device node can be grouped together based on the property. * - * ibm,thread-groups[0] tells us the property based on which the + * This array can represent thread groupings for multiple properties. + * + * ibm,thread-groups[i + 0] tells us the property based on which the * threads are being grouped together. If this value is 1, it implies - * that the threads in the same group share L1, translation cache. + * that the threads in the same group share L1, translation cache. If + * the value is 2, it implies that the threads in the same group share + * the same L2 cache. * - * ibm,thread-groups[1] tells us how many such thread groups exist. + * ibm,thread-groups[i+1] tells us how many such thread groups exist for the + * property ibm,thread-groups[i] * - * ibm,thread-groups[2] tells us the number of threads in each such + * ibm,thread-groups[i+2] tells us the number of threads in each such * group. + * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then, * - * ibm,thread-groups[3..N-1] is the list of threads identified by + * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by * "ibm,ppc-interrupt-server#s" arranged as per their membership in * the grouping. * - * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it - * implies that there are 2 groups of 4 threads each, where each group - * of threads share L1, translation cache. + * Example: + * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15] + * This can be decomposed up into two consecutive arrays: + * a) [1,2,4,8,10,12,14,9,11,13,15] + * b) [2,2,4,8,10,12,14,9,11,13,15] + * + * where in, + * + * a) provides information of Property "1" being shared by "2" groups, + * each with "4" threads each. The "ibm,ppc-interrupt-server#s" of + * the first group is {8,10,12,14} and the + * "ibm,ppc-interrupt-server#s" of the second group is + * {9,11,13,15}. Property "1" is indicative of the thread in the + * group sharing L1 cache, translation cache and Instruction Data + * flow. * - * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8} - * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10, - * 11, 12} structure + * b) provides information of Property "2" being shared by "2" groups, + * each group with "4" threads. The "ibm,ppc-interrupt-server#s" of + * the first group is {8,10,12,14} and the + * "ibm,ppc-interrupt-server#s" of the second group is + * {9,11,13,15}. Property "2" indicates that the threads in each + * group share the L2-cache. * * Returns 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. */ static int parse_thread_groups(struct device_node *dn, - struct thread_groups *tg, - unsigned int property) + struct thread_groups_list *tglp) { - int i; - u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE]; - u32 *thread_list; + unsigned int property_idx = 0; + u32 *thread_group_array; size_t total_threads; - int ret; + int ret = 0, count; + u32 *thread_list; + int i = 0; + count = of_property_count_u32_elems(dn, "ibm,thread-groups"); + thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL); ret = of_property_read_u32_array(dn, "ibm,thread-groups", - thread_group_array, 3); + thread_group_array, count); if (ret) - return ret; - - tg->property = thread_group_array[0]; - tg->nr_groups = thread_group_array[1]; - tg->threads_per_group = thread_group_array[2]; - if (tg->property != property || - tg->nr_groups < 1 || - tg->threads_per_group < 1) - return -ENODATA; + goto out_free; - total_threads = tg->nr_groups * tg->threads_per_group; + while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) { + int j; + struct thread_groups *tg = &tglp->property_tgs[property_idx++]; - ret = of_property_read_u32_array(dn, "ibm,thread-groups", - thread_group_array, - 3 + total_threads); - if (ret) - return ret; + tg->property = thread_group_array[i]; + tg->nr_groups = thread_group_array[i + 1]; + tg->threads_per_group = thread_group_array[i + 2]; + total_threads = tg->nr_groups * tg->threads_per_group; - thread_list = &thread_group_array[3]; + thread_list = &thread_group_array[i + 3]; - for (i = 0 ; i < total_threads; i++) - tg->thread_list[i] = thread_list[i]; + for (j = 0; j < total_threads; j++) + tg->thread_list[j] = thread_list[j]; + i = i + 3 + total_threads; + } - return 0; + tglp->nr_properties = property_idx; + +out_free: + kfree(thread_group_array); + return ret; } /* @@ -805,50 +842,84 @@ static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg) return -1; } -static int init_cpu_l1_cache_map(int cpu) - +static struct thread_groups *__init get_thread_groups(int cpu, + int group_property, + int *err) { struct device_node *dn = of_get_cpu_node(cpu, NULL); - struct thread_groups tg = {.property = 0, - .nr_groups = 0, - .threads_per_group = 0}; + struct thread_groups_list *cpu_tgl = &tgl[cpu]; + struct thread_groups *tg = NULL; + int i; + *err = 0; + + if (!dn) { + *err = -ENODATA; + return NULL; + } + + if (!cpu_tgl->nr_properties) { + *err = parse_thread_groups(dn, cpu_tgl); + if (*err) + goto out; + } + + for (i = 0; i < cpu_tgl->nr_properties; i++) { + if (cpu_tgl->property_tgs[i].property == group_property) { + tg = &cpu_tgl->property_tgs[i]; + break; + } + } + + if (!tg) + *err = -EINVAL; +out: + of_node_put(dn); + return tg; +} + +static int __init init_thread_group_cache_map(int cpu, int cache_property) + +{ int first_thread = cpu_first_thread_sibling(cpu); int i, cpu_group_start = -1, err = 0; + struct thread_groups *tg = NULL; + cpumask_var_t *mask = NULL; - if (!dn) - return -ENODATA; + if (cache_property != THREAD_GROUP_SHARE_L1 && + cache_property != THREAD_GROUP_SHARE_L2) + return -EINVAL; - err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1); - if (err) - goto out; + tg = get_thread_groups(cpu, cache_property, &err); + if (!tg) + return err; - cpu_group_start = get_cpu_thread_group_start(cpu, &tg); + cpu_group_start = get_cpu_thread_group_start(cpu, tg); if (unlikely(cpu_group_start == -1)) { WARN_ON_ONCE(1); - err = -ENODATA; - goto out; + return -ENODATA; } - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), - GFP_KERNEL, cpu_to_node(cpu)); + if (cache_property == THREAD_GROUP_SHARE_L1) + mask = &per_cpu(thread_group_l1_cache_map, cpu); + else if (cache_property == THREAD_GROUP_SHARE_L2) + mask = &per_cpu(thread_group_l2_cache_map, cpu); + + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) { - int i_group_start = get_cpu_thread_group_start(i, &tg); + int i_group_start = get_cpu_thread_group_start(i, tg); if (unlikely(i_group_start == -1)) { WARN_ON_ONCE(1); - err = -ENODATA; - goto out; + return -ENODATA; } if (i_group_start == cpu_group_start) - cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu)); + cpumask_set_cpu(i, *mask); } -out: - of_node_put(dn); - return err; + return 0; } static bool shared_caches; @@ -924,7 +995,7 @@ static int init_big_cores(void) int cpu; for_each_possible_cpu(cpu) { - int err = init_cpu_l1_cache_map(cpu); + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1); if (err) return err; @@ -935,6 +1006,16 @@ static int init_big_cores(void) } has_big_cores = true; + + for_each_possible_cpu(cpu) { + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); + + if (err) + return err; + } + + thread_group_shares_l2 = true; + pr_debug("L2 cache only shared by the threads in the small core\n"); return 0; } @@ -1249,6 +1330,28 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask) if (has_big_cores) submask_fn = cpu_smallcore_mask; + /* + * If the threads in a thread-group share L2 cache, then the + * L2-mask can be obtained from thread_group_l2_cache_map. + */ + if (thread_group_shares_l2) { + cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu)); + + for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) { + if (cpu_online(i)) + set_cpus_related(i, cpu, cpu_l2_cache_mask); + } + + /* Verify that L1-cache siblings are a subset of L2 cache-siblings */ + if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) && + !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) { + pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n", + cpu); + } + + return true; + } + l2_cache = cpu_to_l2cache(cpu); if (!l2_cache || !*mask) { /* Assume only core siblings share cache with this CPU */ @@ -1320,7 +1423,7 @@ static inline void add_cpu_to_smallcore_masks(int cpu) cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu)); - for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) { + for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) { if (cpu_online(i)) set_cpus_related(i, cpu, cpu_smallcore_mask); } diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 310bcd768cd5..7c85ed04a164 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -35,7 +35,31 @@ notrace long system_call_exception(long r3, long r4, long r5, BUG_ON(!FULL_REGS(regs)); BUG_ON(regs->softe != IRQS_ENABLED); - kuap_check_amr(); +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_PKEY)) { + unsigned long amr, iamr; + bool flush_needed = false; + /* + * When entering from userspace we mostly have the AMR/IAMR + * different from kernel default values. Hence don't compare. + */ + amr = mfspr(SPRN_AMR); + iamr = mfspr(SPRN_IAMR); + regs->amr = amr; + regs->iamr = iamr; + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + flush_needed = true; + } + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + flush_needed = true; + } + if (flush_needed) + isync(); + } else +#endif + kuap_check_amr(); account_cpu_user_entry(); @@ -245,6 +269,12 @@ again: account_cpu_user_exit(); +#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ + /* + * We do this at the end so that we do context switch with KERNEL AMR + */ + kuap_user_restore(regs); +#endif return ret; } @@ -330,6 +360,10 @@ again: account_cpu_user_exit(); + /* + * We do this at the end so that we do context switch with KERNEL AMR + */ + kuap_user_restore(regs); return ret; } @@ -400,7 +434,7 @@ again: * which would cause Read-After-Write stalls. Hence, we take the AMR * value from the check above. */ - kuap_restore_amr(regs, amr); + kuap_kernel_restore(regs, amr); return ret; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cf3f8db7e0e3..67feb3524460 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -82,6 +82,7 @@ static struct clocksource clocksource_timebase = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), .read = timebase_read, + .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER, }; #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF @@ -576,14 +577,11 @@ void timer_interrupt(struct pt_regs *regs) struct pt_regs *old_regs; u64 now; - /* Some implementations of hotplug will get timer interrupts while - * offline, just ignore these and we also need to set - * decrementers_next_tb as MAX to make sure __check_irq_replay - * don't replay timer interrupt when return, otherwise we'll trap - * here infinitely :( + /* + * Some implementations of hotplug will get timer interrupts while + * offline, just ignore these. */ if (unlikely(!cpu_online(smp_processor_id()))) { - *next_tb = ~(u64)0; set_dec(decrementer_max); return; } @@ -855,95 +853,6 @@ static notrace u64 timebase_read(struct clocksource *cs) return (u64)get_tb(); } - -void update_vsyscall(struct timekeeper *tk) -{ - struct timespec64 xt; - struct clocksource *clock = tk->tkr_mono.clock; - u32 mult = tk->tkr_mono.mult; - u32 shift = tk->tkr_mono.shift; - u64 cycle_last = tk->tkr_mono.cycle_last; - u64 new_tb_to_xs, new_stamp_xsec; - u64 frac_sec; - - if (clock != &clocksource_timebase) - return; - - xt.tv_sec = tk->xtime_sec; - xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); - - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_mb(); - - /* - * This computes ((2^20 / 1e9) * mult) >> shift as a - * 0.64 fixed-point fraction. - * The computation in the else clause below won't overflow - * (as long as the timebase frequency is >= 1.049 MHz) - * but loses precision because we lose the low bits of the constant - * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9. - * For a shift of 24 the error is about 0.5e-9, or about 0.5ns - * over a second. (Shift values are usually 22, 23 or 24.) - * For high frequency clocks such as the 512MHz timebase clock - * on POWER[6789], the mult value is small (e.g. 32768000) - * and so we can shift the constant by 16 initially - * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the - * remaining shifts after the multiplication, which gives a - * more accurate result (e.g. with mult = 32768000, shift = 24, - * the error is only about 1.2e-12, or 0.7ns over 10 minutes). - */ - if (mult <= 62500000 && clock->shift >= 16) - new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16); - else - new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); - - /* - * Compute the fractional second in units of 2^-32 seconds. - * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift - * in nanoseconds, so multiplying that by 2^32 / 1e9 gives - * it in units of 2^-32 seconds. - * We assume shift <= 32 because clocks_calc_mult_shift() - * generates shift values in the range 0 - 32. - */ - frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift); - do_div(frac_sec, NSEC_PER_SEC); - - /* - * Work out new stamp_xsec value for any legacy users of systemcfg. - * stamp_xsec is in units of 2^-20 seconds. - */ - new_stamp_xsec = frac_sec >> 12; - new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC; - - /* - * tb_update_count is used to allow the userspace gettimeofday code - * to assure itself that it sees a consistent view of the tb_to_xs and - * stamp_xsec variables. It reads the tb_update_count, then reads - * tb_to_xs and stamp_xsec and then reads tb_update_count again. If - * the two values of tb_update_count match and are even then the - * tb_to_xs and stamp_xsec values are consistent. If not, then it - * loops back and reads them again until this criteria is met. - */ - vdso_data->tb_orig_stamp = cycle_last; - vdso_data->stamp_xsec = new_stamp_xsec; - vdso_data->tb_to_xs = new_tb_to_xs; - vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec; - vdso_data->stamp_xtime_sec = xt.tv_sec; - vdso_data->stamp_xtime_nsec = xt.tv_nsec; - vdso_data->stamp_sec_fraction = frac_sec; - vdso_data->hrtimer_res = hrtimer_resolution; - smp_wmb(); - ++(vdso_data->tb_update_count); -} - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; -} - static void __init clocksource_init(void) { struct clocksource *clock = &clocksource_timebase; @@ -1103,7 +1012,6 @@ void __init time_init(void) sys_tz.tz_dsttime = 0; } - vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; /* initialise and enable the large decrementer (if we have one) */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5006dcbe1d9f..3ec7b443fe6b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -347,12 +347,6 @@ static bool exception_common(int signr, struct pt_regs *regs, int code, current->thread.trap_nr = code; - /* - * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need - * to capture the content, if the task gets killed. - */ - thread_pkey_regs_save(¤t->thread); - return true; } @@ -757,31 +751,6 @@ int machine_check_generic(struct pt_regs *regs) { return 0; } -#elif defined(CONFIG_E200) -int machine_check_e200(struct pt_regs *regs) -{ - unsigned long reason = mfspr(SPRN_MCSR); - - printk("Machine check in kernel mode.\n"); - printk("Caused by (from MCSR=%lx): ", reason); - - if (reason & MCSR_MCP) - pr_cont("Machine Check Signal\n"); - if (reason & MCSR_CP_PERR) - pr_cont("Cache Push Parity Error\n"); - if (reason & MCSR_CPERR) - pr_cont("Cache Parity Error\n"); - if (reason & MCSR_EXCP_ERR) - pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); - if (reason & MCSR_BUS_IRERR) - pr_cont("Bus - Read Bus Error on instruction fetch\n"); - if (reason & MCSR_BUS_DRERR) - pr_cont("Bus - Read Bus Error on data load\n"); - if (reason & MCSR_BUS_WRERR) - pr_cont("Bus - Write Bus Error on buffered store or cache line push\n"); - - return 0; -} #elif defined(CONFIG_PPC32) int machine_check_generic(struct pt_regs *regs) { @@ -1190,7 +1159,9 @@ static void parse_fpe(struct pt_regs *regs) flush_fp_to_thread(current); +#ifdef CONFIG_PPC_FPU_REGS code = __parse_fpscr(current->thread.fp_state.fpscr); +#endif _exception(SIGFPE, regs, code, regs->nip); } diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 8dad44262e75..e839a906fdf2 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -17,7 +17,10 @@ #include <linux/elf.h> #include <linux/security.h> #include <linux/memblock.h> +#include <linux/syscalls.h> +#include <vdso/datapage.h> +#include <asm/syscall.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -30,39 +33,11 @@ #include <asm/vdso_datapage.h> #include <asm/setup.h> -#undef DEBUG - -#ifdef DEBUG -#define DBG(fmt...) printk(fmt) -#else -#define DBG(fmt...) -#endif - -/* Max supported size for symbol names */ -#define MAX_SYMNAME 64 - /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) -static unsigned int vdso32_pages; -static void *vdso32_kbase; -static struct page **vdso32_pagelist; -unsigned long vdso32_sigtramp; -unsigned long vdso32_rt_sigtramp; - -#ifdef CONFIG_VDSO32 extern char vdso32_start, vdso32_end; -#endif - -#ifdef CONFIG_PPC64 extern char vdso64_start, vdso64_end; -static void *vdso64_kbase = &vdso64_start; -static unsigned int vdso64_pages; -static struct page **vdso64_pagelist; -unsigned long vdso64_rt_sigtramp; -#endif /* CONFIG_PPC64 */ - -static int vdso_ready; /* * The vdso data page (aka. systemcfg for old ppc64 fans) is here. @@ -70,77 +45,63 @@ static int vdso_ready; * with it, it will become dynamically allocated */ static union { - struct vdso_data data; + struct vdso_arch_data data; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_arch_data *vdso_data = &vdso_data_store.data; -/* Format of the patch table */ -struct vdso_patch_def +static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, + unsigned long text_size) { - unsigned long ftr_mask, ftr_value; - const char *gen_name; - const char *fix_name; -}; + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; -/* Table of functions to patch based on the CPU type/revision - * - * Currently, we only change sync_dicache to do nothing on processors - * with a coherent icache - */ -static struct vdso_patch_def vdso_patches[] = { - { - CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, - "__kernel_sync_dicache", "__kernel_sync_dicache_p5" - }, -}; + if (new_size != text_size + PAGE_SIZE) + return -EINVAL; -/* - * Some infos carried around for each of them during parsing at - * boot time. - */ -struct lib32_elfinfo + current->mm->context.vdso = (void __user *)new_vma->vm_start + PAGE_SIZE; + + return 0; +} + +static int vdso32_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - Elf32_Ehdr *hdr; /* ptr to ELF */ - Elf32_Sym *dynsym; /* ptr to .dynsym section */ - unsigned long dynsymsize; /* size of .dynsym section */ - char *dynstr; /* ptr to .dynstr section */ - unsigned long text; /* offset of .text section in .so */ -}; + return vdso_mremap(sm, new_vma, &vdso32_end - &vdso32_start); +} -struct lib64_elfinfo +static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - Elf64_Ehdr *hdr; - Elf64_Sym *dynsym; - unsigned long dynsymsize; - char *dynstr; - unsigned long text; + return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); +} + +static struct vm_special_mapping vdso32_spec __ro_after_init = { + .name = "[vdso]", + .mremap = vdso32_mremap, }; +static struct vm_special_mapping vdso64_spec __ro_after_init = { + .name = "[vdso]", + .mremap = vdso64_mremap, +}; /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - struct page **vdso_pagelist; - unsigned long vdso_pages; + struct vm_special_mapping *vdso_spec; + struct vm_area_struct *vma; + unsigned long vdso_size; unsigned long vdso_base; - int rc; - - if (!vdso_ready) - return 0; -#ifdef CONFIG_PPC64 if (is_32bit_task()) { - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; + vdso_spec = &vdso32_spec; + vdso_size = &vdso32_end - &vdso32_start; vdso_base = VDSO32_MBASE; } else { - vdso_pagelist = vdso64_pagelist; - vdso_pages = vdso64_pages; + vdso_spec = &vdso64_spec; + vdso_size = &vdso64_end - &vdso64_start; /* * On 64bit we don't have a preferred map address. This * allows get_unmapped_area to find an area near other mmaps @@ -148,21 +109,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) */ vdso_base = 0; } -#else - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; - vdso_base = VDSO32_MBASE; -#endif - current->mm->context.vdso_base = 0; - - /* vDSO has a problem and was disabled, just don't "enable" it for the - * process - */ - if (vdso_pages == 0) - return 0; /* Add a page to the vdso size for the data page */ - vdso_pages ++; + vdso_size += PAGE_SIZE; /* * pick a base address for the vDSO in process space. We try to put it @@ -170,16 +119,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * and end up putting it elsewhere. * Add enough to the size so that the result can be aligned. */ - if (mmap_write_lock_killable(mm)) - return -EINTR; vdso_base = get_unmapped_area(NULL, vdso_base, - (vdso_pages << PAGE_SHIFT) + - ((VDSO_ALIGNMENT - 1) & PAGE_MASK), + vdso_size + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - rc = vdso_base; - goto fail_mmapsem; - } + if (IS_ERR_VALUE(vdso_base)) + return vdso_base; /* Add required alignment. */ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); @@ -187,9 +131,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) /* * Put vDSO base into mm struct. We need to do this before calling * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO (since arch_vma_name fails). + * will fail to recognise it as a vDSO. */ - current->mm->context.vdso_base = vdso_base; + mm->context.vdso = (void __user *)vdso_base + PAGE_SIZE; /* * our vma flags don't have VM_WRITE so by default, the process isn't @@ -201,434 +145,54 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * It's fine to use that for setting breakpoints in the vDSO code * pages though. */ - rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - if (rc) { - current->mm->context.vdso_base = 0; - goto fail_mmapsem; - } - - mmap_write_unlock(mm); - return 0; - - fail_mmapsem: - mmap_write_unlock(mm); - return rc; -} - -const char *arch_vma_name(struct vm_area_struct *vma) -{ - if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) - return "[vdso]"; - return NULL; -} - - - -#ifdef CONFIG_VDSO32 -static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, - unsigned long *size) -{ - Elf32_Shdr *sechdrs; - unsigned int i; - char *secnames; - - /* Grab section headers and strings so we can tell who is who */ - sechdrs = (void *)ehdr + ehdr->e_shoff; - secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; - - /* Find the section they want */ - for (i = 1; i < ehdr->e_shnum; i++) { - if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { - if (size) - *size = sechdrs[i].sh_size; - return (void *)ehdr + sechdrs[i].sh_offset; - } - } - *size = 0; - return NULL; -} - -static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, - const char *symname) -{ - unsigned int i; - char name[MAX_SYMNAME], *c; - - for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { - if (lib->dynsym[i].st_name == 0) - continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, - MAX_SYMNAME); - c = strchr(name, '@'); - if (c) - *c = 0; - if (strcmp(symname, name) == 0) - return &lib->dynsym[i]; - } - return NULL; -} - -/* Note that we assume the section is .text and the symbol is relative to - * the library base - */ -static unsigned long __init find_function32(struct lib32_elfinfo *lib, - const char *symname) -{ - Elf32_Sym *sym = find_symbol32(lib, symname); - - if (sym == NULL) { - printk(KERN_WARNING "vDSO32: function %s not found !\n", - symname); - return 0; - } - return sym->st_value - VDSO32_LBASE; -} - -static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - Elf32_Sym *sym32_gen, *sym32_fix; - - sym32_gen = find_symbol32(v32, orig); - if (sym32_gen == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig); - return -1; - } - if (fix == NULL) { - sym32_gen->st_name = 0; - return 0; - } - sym32_fix = find_symbol32(v32, fix); - if (sym32_fix == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix); - return -1; - } - sym32_gen->st_value = sym32_fix->st_value; - sym32_gen->st_size = sym32_fix->st_size; - sym32_gen->st_info = sym32_fix->st_info; - sym32_gen->st_other = sym32_fix->st_other; - sym32_gen->st_shndx = sym32_fix->st_shndx; - - return 0; -} -#else /* !CONFIG_VDSO32 */ -static unsigned long __init find_function32(struct lib32_elfinfo *lib, - const char *symname) -{ - return 0; -} - -static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - return 0; -} -#endif /* CONFIG_VDSO32 */ - - -#ifdef CONFIG_PPC64 - -static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, - unsigned long *size) -{ - Elf64_Shdr *sechdrs; - unsigned int i; - char *secnames; - - /* Grab section headers and strings so we can tell who is who */ - sechdrs = (void *)ehdr + ehdr->e_shoff; - secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; - - /* Find the section they want */ - for (i = 1; i < ehdr->e_shnum; i++) { - if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { - if (size) - *size = sechdrs[i].sh_size; - return (void *)ehdr + sechdrs[i].sh_offset; - } - } - if (size) - *size = 0; - return NULL; -} - -static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, - const char *symname) -{ - unsigned int i; - char name[MAX_SYMNAME], *c; - - for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { - if (lib->dynsym[i].st_name == 0) - continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, - MAX_SYMNAME); - c = strchr(name, '@'); - if (c) - *c = 0; - if (strcmp(symname, name) == 0) - return &lib->dynsym[i]; - } - return NULL; -} - -/* Note that we assume the section is .text and the symbol is relative to - * the library base - */ -static unsigned long __init find_function64(struct lib64_elfinfo *lib, - const char *symname) -{ - Elf64_Sym *sym = find_symbol64(lib, symname); - - if (sym == NULL) { - printk(KERN_WARNING "vDSO64: function %s not found !\n", - symname); - return 0; - } - return sym->st_value - VDSO64_LBASE; -} - -static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - Elf64_Sym *sym64_gen, *sym64_fix; - - sym64_gen = find_symbol64(v64, orig); - if (sym64_gen == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig); - return -1; - } - if (fix == NULL) { - sym64_gen->st_name = 0; - return 0; - } - sym64_fix = find_symbol64(v64, fix); - if (sym64_fix == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix); - return -1; - } - sym64_gen->st_value = sym64_fix->st_value; - sym64_gen->st_size = sym64_fix->st_size; - sym64_gen->st_info = sym64_fix->st_info; - sym64_gen->st_other = sym64_fix->st_other; - sym64_gen->st_shndx = sym64_fix->st_shndx; - - return 0; + vma = _install_special_mapping(mm, vdso_base, vdso_size, + VM_READ | VM_EXEC | VM_MAYREAD | + VM_MAYWRITE | VM_MAYEXEC, vdso_spec); + return PTR_ERR_OR_ZERO(vma); } -#endif /* CONFIG_PPC64 */ - - -static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - void *sect; - - /* - * Locate symbol tables & text section - */ - -#ifdef CONFIG_VDSO32 - v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); - v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); - if (v32->dynsym == NULL || v32->dynstr == NULL) { - printk(KERN_ERR "vDSO32: required symbol section not found\n"); - return -1; - } - sect = find_section32(v32->hdr, ".text", NULL); - if (sect == NULL) { - printk(KERN_ERR "vDSO32: the .text section was not found\n"); - return -1; - } - v32->text = sect - vdso32_kbase; -#endif - -#ifdef CONFIG_PPC64 - v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); - v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL); - if (v64->dynsym == NULL || v64->dynstr == NULL) { - printk(KERN_ERR "vDSO64: required symbol section not found\n"); - return -1; - } - sect = find_section64(v64->hdr, ".text", NULL); - if (sect == NULL) { - printk(KERN_ERR "vDSO64: the .text section was not found\n"); - return -1; - } - v64->text = sect - vdso64_kbase; -#endif /* CONFIG_PPC64 */ - - return 0; -} - -static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { - /* - * Find signal trampolines - */ - -#ifdef CONFIG_PPC64 - vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64"); -#endif - vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32"); - vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32"); -} + struct mm_struct *mm = current->mm; + int rc; -static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ -#ifdef CONFIG_VDSO32 - Elf32_Sym *sym32; -#endif -#ifdef CONFIG_PPC64 - Elf64_Sym *sym64; + mm->context.vdso = NULL; - sym64 = find_symbol64(v64, "__kernel_datapage_offset"); - if (sym64 == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol " - "__kernel_datapage_offset !\n"); - return -1; - } - *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) = - (vdso64_pages << PAGE_SHIFT) - - (sym64->st_value - VDSO64_LBASE); -#endif /* CONFIG_PPC64 */ + if (mmap_write_lock_killable(mm)) + return -EINTR; -#ifdef CONFIG_VDSO32 - sym32 = find_symbol32(v32, "__kernel_datapage_offset"); - if (sym32 == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol " - "__kernel_datapage_offset !\n"); - return -1; - } - *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = - (vdso32_pages << PAGE_SHIFT) - - (sym32->st_value - VDSO32_LBASE); -#endif + rc = __arch_setup_additional_pages(bprm, uses_interp); + if (rc) + mm->context.vdso = NULL; - return 0; + mmap_write_unlock(mm); + return rc; } +#define VDSO_DO_FIXUPS(type, value, bits, sec) do { \ + void *__start = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_start); \ + void *__end = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_end); \ + \ + do_##type##_fixups((value), __start, __end); \ +} while (0) -static __init int vdso_fixup_features(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) +static void __init vdso_fixup_features(void) { - unsigned long size; - void *start; - #ifdef CONFIG_PPC64 - start = find_section64(v64->hdr, "__ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->cpu_features, - start, start + size); - - start = find_section64(v64->hdr, "__mmu_ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->mmu_features, - start, start + size); - - start = find_section64(v64->hdr, "__fw_ftr_fixup", &size); - if (start) - do_feature_fixups(powerpc_firmware_features, - start, start + size); - - start = find_section64(v64->hdr, "__lwsync_fixup", &size); - if (start) - do_lwsync_fixups(cur_cpu_spec->cpu_features, - start, start + size); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 64, mmu_ftr_fixup); + VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 64, fw_ftr_fixup); + VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 64, lwsync_fixup); #endif /* CONFIG_PPC64 */ #ifdef CONFIG_VDSO32 - start = find_section32(v32->hdr, "__ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->cpu_features, - start, start + size); - - start = find_section32(v32->hdr, "__mmu_ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->mmu_features, - start, start + size); - + VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 32, ftr_fixup); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 32, mmu_ftr_fixup); #ifdef CONFIG_PPC64 - start = find_section32(v32->hdr, "__fw_ftr_fixup", &size); - if (start) - do_feature_fixups(powerpc_firmware_features, - start, start + size); + VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 32, fw_ftr_fixup); #endif /* CONFIG_PPC64 */ - - start = find_section32(v32->hdr, "__lwsync_fixup", &size); - if (start) - do_lwsync_fixups(cur_cpu_spec->cpu_features, - start, start + size); + VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 32, lwsync_fixup); #endif - - return 0; -} - -static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) { - struct vdso_patch_def *patch = &vdso_patches[i]; - int match = (cur_cpu_spec->cpu_features & patch->ftr_mask) - == patch->ftr_value; - if (!match) - continue; - - DBG("replacing %s with %s...\n", patch->gen_name, - patch->fix_name ? "NONE" : patch->fix_name); - - /* - * Patch the 32 bits and 64 bits symbols. Note that we do not - * patch the "." symbol on 64 bits. - * It would be easy to do, but doesn't seem to be necessary, - * patching the OPD symbol is enough. - */ - vdso_do_func_patch32(v32, v64, patch->gen_name, - patch->fix_name); -#ifdef CONFIG_PPC64 - vdso_do_func_patch64(v32, v64, patch->gen_name, - patch->fix_name); -#endif /* CONFIG_PPC64 */ - } - - return 0; -} - - -static __init int vdso_setup(void) -{ - struct lib32_elfinfo v32; - struct lib64_elfinfo v64; - - v32.hdr = vdso32_kbase; -#ifdef CONFIG_PPC64 - v64.hdr = vdso64_kbase; -#endif - if (vdso_do_find_sections(&v32, &v64)) - return -1; - - if (vdso_fixup_datapage(&v32, &v64)) - return -1; - - if (vdso_fixup_features(&v32, &v64)) - return -1; - - if (vdso_fixup_alt_funcs(&v32, &v64)) - return -1; - - vdso_setup_trampolines(&v32, &v64); - - return 0; } /* @@ -638,27 +202,13 @@ static __init int vdso_setup(void) static void __init vdso_setup_syscall_map(void) { unsigned int i; - extern unsigned long *sys_call_table; -#ifdef CONFIG_PPC64 - extern unsigned long *compat_sys_call_table; -#endif - extern unsigned long sys_ni_syscall; - for (i = 0; i < NR_syscalls; i++) { -#ifdef CONFIG_PPC64 - if (sys_call_table[i] != sys_ni_syscall) - vdso_data->syscall_map_64[i >> 5] |= - 0x80000000UL >> (i & 0x1f); + if (sys_call_table[i] != (unsigned long)&sys_ni_syscall) + vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); if (IS_ENABLED(CONFIG_COMPAT) && - compat_sys_call_table[i] != sys_ni_syscall) - vdso_data->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); -#else /* CONFIG_PPC64 */ - if (sys_call_table[i] != sys_ni_syscall) - vdso_data->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); -#endif /* CONFIG_PPC64 */ + compat_sys_call_table[i] != (unsigned long)&sys_ni_syscall) + vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); } } @@ -689,10 +239,26 @@ int vdso_getcpu_init(void) early_initcall(vdso_getcpu_init); #endif -static int __init vdso_init(void) +static struct page ** __init vdso_setup_pages(void *start, void *end) { int i; + struct page **pagelist; + int pages = (end - start) >> PAGE_SHIFT; + + pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); + if (!pagelist) + panic("%s: Cannot allocate page list for VDSO", __func__); + + pagelist[0] = virt_to_page(vdso_data); + + for (i = 0; i < pages; i++) + pagelist[i + 1] = virt_to_page(start + i * PAGE_SIZE); + + return pagelist; +} +static int __init vdso_init(void) +{ #ifdef CONFIG_PPC64 /* * Fill up the "systemcfg" stuff for backward compatibility @@ -717,75 +283,19 @@ static int __init vdso_init(void) vdso_data->icache_block_size = ppc64_caches.l1i.block_size; vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; - - /* - * Calculate the size of the 64 bits vDSO - */ - vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; - DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages); #endif /* CONFIG_PPC64 */ - -#ifdef CONFIG_VDSO32 - vdso32_kbase = &vdso32_start; - - /* - * Calculate the size of the 32 bits vDSO - */ - vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; - DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages); -#endif - - - /* - * Setup the syscall map in the vDOS - */ vdso_setup_syscall_map(); - /* - * Initialize the vDSO images in memory, that is do necessary - * fixups of vDSO symbols, locate trampolines, etc... - */ - if (vdso_setup()) { - printk(KERN_ERR "vDSO setup failure, not enabled !\n"); - vdso32_pages = 0; -#ifdef CONFIG_PPC64 - vdso64_pages = 0; -#endif - return 0; - } + vdso_fixup_features(); -#ifdef CONFIG_VDSO32 - /* Make sure pages are in the correct state */ - vdso32_pagelist = kcalloc(vdso32_pages + 2, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso32_pagelist == NULL); - for (i = 0; i < vdso32_pages; i++) { - struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - get_page(pg); - vdso32_pagelist[i] = pg; - } - vdso32_pagelist[i++] = virt_to_page(vdso_data); - vdso32_pagelist[i] = NULL; -#endif - -#ifdef CONFIG_PPC64 - vdso64_pagelist = kcalloc(vdso64_pages + 2, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso64_pagelist == NULL); - for (i = 0; i < vdso64_pages; i++) { - struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); - get_page(pg); - vdso64_pagelist[i] = pg; - } - vdso64_pagelist[i++] = virt_to_page(vdso_data); - vdso64_pagelist[i] = NULL; -#endif /* CONFIG_PPC64 */ + if (IS_ENABLED(CONFIG_VDSO32)) + vdso32_spec.pages = vdso_setup_pages(&vdso32_start, &vdso32_end); - get_page(virt_to_page(vdso_data)); + if (IS_ENABLED(CONFIG_PPC64)) + vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end); smp_wmb(); - vdso_ready = 1; return 0; } diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 73eada6bc8cd..59aa2944ecae 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -2,8 +2,20 @@ # List of files in the vdso, has to be asm only for now +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +include $(srctree)/lib/vdso/Makefile + obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +endif + # Build rules ifdef CROSS32_COMPILE @@ -15,14 +27,16 @@ endif CC32FLAGS := ifdef CONFIG_PPC64 CC32FLAGS += -m32 +KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS)) endif -targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +targets := $(obj-vdso32) vdso32.so.dbg obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both @@ -33,33 +47,30 @@ targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc # Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so.dbg # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE - $(call if_changed,vdso32ld) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE + $(call if_changed,vdso32ld_and_check) # assembly rules for the .S files $(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) +$(obj)/vgettimeofday.o: %.o: %.c FORCE + $(call if_changed_dep,vdso32cc) + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE + $(call if_changed,vdsosym) # actual build commands -quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) +quiet_cmd_vdso32ld_and_check = VDSO32L $@ + cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso32.so: $(obj)/vdso32.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso32.so +quiet_cmd_vdso32cc = VDSO32C $@ + cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S index 3440ddf21c8b..f340e82d1981 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -24,11 +24,15 @@ */ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc +BEGIN_FTR_SECTION + b 3f +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_datapage r10, r0 + get_datapage r10 mtlr r12 + .cfi_restore lr #endif #ifdef CONFIG_PPC64 @@ -84,20 +88,11 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) isync li r3,0 blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) - - -/* - * POWER5 version of __kernel_sync_dicache - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) - .cfi_startproc +3: crclr cr0*4+so sync isync li r3,0 blr .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache_p5) - +V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 1d23e2771dba..65244416ab94 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -13,9 +13,6 @@ #include <asm/vdso_datapage.h> .text - .global __kernel_datapage_offset; -__kernel_datapage_offset: - .long 0 /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; @@ -31,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_datapage r3, r0 + get_datapage r3 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP32 beqlr @@ -51,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3, r0 + get_datapage r3 lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) lwz r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 diff --git a/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh new file mode 100755 index 000000000000..c7b54a5dcd3e --- /dev/null +++ b/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon <will.deacon@arm.com +# + +LC_ALL=C +sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso32_offset_\2\t0x\1/p' diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index e7f8f9f1b3f4..a6e29f880e0e 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -12,13 +12,7 @@ #include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> - -/* Offset for the low 32-bit part of a field of long type */ -#ifdef CONFIG_PPC64 -#define LOPART 4 -#else -#define LOPART 0 -#endif +#include <asm/vdso/gettimeofday.h> .text /* @@ -28,32 +22,7 @@ * */ V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr. r10,r3 /* r10 saves tv */ - mr r11,r4 /* r11 saves tz */ - get_datapage r9, r0 - beq 3f - LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */ - bl __do_get_tspec@local /* get sec/usec from tb & kernel */ - stw r3,TVAL32_TV_SEC(r10) - stw r4,TVAL32_TV_USEC(r10) - -3: cmplwi r11,0 /* check if tz is NULL */ - mtlr r12 - crclr cr0*4+so - li r3,0 - beqlr - - lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r9) - stw r4,TZONE_TZ_MINWEST(r11) - stw r5,TZONE_TZ_DSTTIME(r11) - - blr - .cfi_endproc + cvdso_call __c_kernel_gettimeofday V_FUNCTION_END(__kernel_gettimeofday) /* @@ -63,129 +32,18 @@ V_FUNCTION_END(__kernel_gettimeofday) * */ V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpli cr0,r3,CLOCK_REALTIME - cmpli cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - - cmpli cr5,r3,CLOCK_REALTIME_COARSE - cmpli cr6,r3,CLOCK_MONOTONIC_COARSE - cror cr5*4+eq,cr5*4+eq,cr6*4+eq - - cror cr0*4+eq,cr0*4+eq,cr5*4+eq - bne cr0, .Lgettime_fallback - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r11,r4 /* r11 saves tp */ - get_datapage r9, r0 - LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC) /* load up NSEC_PER_SEC */ - beq cr5, .Lcoarse_clocks -.Lprecise_clocks: - bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ - bne cr1, .Lfinish /* not monotonic -> all done */ - - /* - * CLOCK_MONOTONIC - */ - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_xsec. - * At this point, r3,r4 contain our sec/nsec values, r5 and r6 - * can be used, r7 contains NSEC_PER_SEC. - */ - - lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) - lwz r6,WTOM_CLOCK_NSEC(r9) - - /* We now have our offset in r5,r6. We create a fake dependency - * on that value and re-check the counter - */ - or r0,r6,r5 - xor r0,r0,r0 - add r9,r9,r0 - lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - cmpl cr0,r8,r0 /* check if updated */ - bne- .Lprecise_clocks - b .Lfinish_monotonic - - /* - * For coarse clocks we get data directly from the vdso data page, so - * we don't need to call __do_get_tspec, but we still need to do the - * counter trick. - */ -.Lcoarse_clocks: - lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - andi. r0,r8,1 /* pending update ? loop */ - bne- .Lcoarse_clocks - add r9,r9,r0 /* r0 is already 0 */ - - /* - * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE - * too - */ - lwz r3,STAMP_XTIME_SEC+LOPART(r9) - lwz r4,STAMP_XTIME_NSEC+LOPART(r9) - bne cr6,1f - - /* CLOCK_MONOTONIC_COARSE */ - lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) - lwz r6,WTOM_CLOCK_NSEC(r9) - - /* check if counter has updated */ - or r0,r6,r5 -1: or r0,r0,r3 - or r0,r0,r4 - xor r0,r0,r0 - add r3,r3,r0 - lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9) - cmpl cr0,r0,r8 /* check if updated */ - bne- .Lcoarse_clocks - - /* Counter has not updated, so continue calculating proper values for - * sec and nsec if monotonic coarse, or just return with the proper - * values for realtime. - */ - bne cr6, .Lfinish - - /* Calculate and store result. Note that this mimics the C code, - * which may cause funny results if nsec goes negative... is that - * possible at all ? - */ -.Lfinish_monotonic: - add r3,r3,r5 - add r4,r4,r6 - cmpw cr0,r4,r7 - cmpwi cr1,r4,0 - blt 1f - subf r4,r7,r4 - addi r3,r3,1 -1: bge cr1, .Lfinish - addi r3,r3,-1 - add r4,r4,r7 - -.Lfinish: - stw r3,TSPC32_TV_SEC(r11) - stw r4,TSPC32_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -.Lgettime_fallback: - li r0,__NR_clock_gettime - .cfi_restore lr - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_gettime V_FUNCTION_END(__kernel_clock_gettime) +/* + * Exact prototype of clock_gettime64() + * + * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_gettime64) + cvdso_call __c_kernel_clock_gettime64 +V_FUNCTION_END(__kernel_clock_gettime64) /* * Exact prototype of clock_getres() @@ -194,37 +52,7 @@ V_FUNCTION_END(__kernel_clock_gettime) * */ V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmplwi cr0, r3, CLOCK_MAX - cmpwi cr1, r3, CLOCK_REALTIME_COARSE - cmpwi cr7, r3, CLOCK_MONOTONIC_COARSE - bgt cr0, 99f - LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES) - beq cr1, 1f - beq cr7, 1f - - mflr r12 - .cfi_register lr,r12 - get_datapage r3, r0 - lwz r5, CLOCK_HRTIMER_RES(r3) - mtlr r12 -1: li r3,0 - cmpli cr0,r4,0 - crclr cr0*4+so - beqlr - stw r3,TSPC32_TV_SEC(r4) - stw r5,TSPC32_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_getres V_FUNCTION_END(__kernel_clock_getres) @@ -235,105 +63,5 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds t */ - get_datapage r9, r0 - - lwz r3,STAMP_XTIME_SEC+LOPART(r9) - - cmplwi r11,0 /* check if t is NULL */ - mtlr r12 - crclr cr0*4+so - beqlr - stw r3,0(r11) /* store result at *t */ - blr - .cfi_endproc + cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) - -/* - * This is the core of clock_gettime() and gettimeofday(), - * it returns the current time in r3 (seconds) and r4. - * On entry, r7 gives the resolution of r4, either USEC_PER_SEC - * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds. - * It expects the datapage ptr in r9 and doesn't clobber it. - * It clobbers r0, r5 and r6. - * On return, r8 contains the counter value that can be reused. - * This clobbers cr0 but not any other cr field. - */ -__do_get_tspec: - .cfi_startproc - /* Check for update count & load values. We use the low - * order 32 bits of the update count - */ -1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r9,r9,r0 - - /* Load orig stamp (offset to TB) */ - lwz r5,CFG_TB_ORIG_STAMP(r9) - lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) - - /* Get a stable TB value */ -2: MFTBU(r3) - MFTBL(r4) - MFTBU(r0) - cmplw cr0,r3,r0 - bne- 2b - - /* Subtract tb orig stamp and shift left 12 bits. - */ - subfc r4,r6,r4 - subfe r0,r5,r3 - slwi r0,r0,12 - rlwimi. r0,r4,12,20,31 - slwi r4,r4,12 - - /* - * Load scale factor & do multiplication. - * We only use the high 32 bits of the tb_to_xs value. - * Even with a 1GHz timebase clock, the high 32 bits of - * tb_to_xs will be at least 4 million, so the error from - * ignoring the low 32 bits will be no more than 0.25ppm. - * The error will just make the clock run very very slightly - * slow until the next time the kernel updates the VDSO data, - * at which point the clock will catch up to the kernel's value, - * so there is no long-term error accumulation. - */ - lwz r5,CFG_TB_TO_XS(r9) /* load values */ - mulhwu r4,r4,r5 - li r3,0 - - beq+ 4f /* skip high part computation if 0 */ - mulhwu r3,r0,r5 - mullw r5,r0,r5 - addc r4,r4,r5 - addze r3,r3 -4: - /* At this point, we have seconds since the xtime stamp - * as a 32.32 fixed-point number in r3 and r4. - * Load & add the xtime stamp. - */ - lwz r5,STAMP_XTIME_SEC+LOPART(r9) - lwz r6,STAMP_SEC_FRAC(r9) - addc r4,r4,r6 - adde r3,r3,r5 - - /* We create a fake dependency on the result in r3/r4 - * and re-check the counter - */ - or r6,r4,r3 - xor r0,r6,r6 - add r9,r9,r0 - lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - cmplw cr0,r8,r0 /* check if updated */ - bne- 1b - - mulhwu r4,r4,r7 /* convert to micro or nanoseconds */ - - blr - .cfi_endproc diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 7eadac74c7f9..a4b806b0d618 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -4,6 +4,8 @@ * library */ #include <asm/vdso.h> +#include <asm/page.h> +#include <asm-generic/vmlinux.lds.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") @@ -15,7 +17,8 @@ ENTRY(_start) SECTIONS { - . = VDSO32_LBASE + SIZEOF_HEADERS; + PROVIDE(_vdso_datapage = . - PAGE_SIZE); + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } @@ -36,17 +39,25 @@ SECTIONS PROVIDE(etext = .); . = ALIGN(8); + VDSO_ftr_fixup_start = .; __ftr_fixup : { *(__ftr_fixup) } + VDSO_ftr_fixup_end = .; . = ALIGN(8); + VDSO_mmu_ftr_fixup_start = .; __mmu_ftr_fixup : { *(__mmu_ftr_fixup) } + VDSO_mmu_ftr_fixup_end = .; . = ALIGN(8); + VDSO_lwsync_fixup_start = .; __lwsync_fixup : { *(__lwsync_fixup) } + VDSO_lwsync_fixup_end = .; #ifdef CONFIG_PPC64 . = ALIGN(8); + VDSO_fw_ftr_fixup_start = .; __fw_ftr_fixup : { *(__fw_ftr_fixup) } + VDSO_fw_ftr_fixup_end = .; #endif /* @@ -68,49 +79,15 @@ SECTIONS __end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the beginning - * of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS /DISCARD/ : { *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.got1) } } @@ -138,19 +115,14 @@ VERSION { VDSO_VERSION_STRING { global: - /* - * Has to be there for the kernel to find - */ - __kernel_datapage_offset; - __kernel_get_syscall_map; __kernel_gettimeofday; __kernel_clock_gettime; + __kernel_clock_gettime64; __kernel_clock_getres; __kernel_time; __kernel_get_tbfreq; __kernel_sync_dicache; - __kernel_sync_dicache_p5; __kernel_sigtramp32; __kernel_sigtramp_rt32; #if defined(CONFIG_PPC64) || !defined(CONFIG_SMP) @@ -160,3 +132,9 @@ VERSION local: *; }; } + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigtramp32 = __kernel_sigtramp32; +VDSO_sigtramp_rt32 = __kernel_sigtramp_rt32; diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso32/vgettimeofday.c new file mode 100644 index 000000000000..65fb03fb1731 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/vgettimeofday.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementations of gettimeofday() and similar. + */ +#include <linux/types.h> + +int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime32_data(vd, clock, ts); +} + +int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd) +{ + return __cvdso_gettimeofday_data(vd, tv, tz); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_time32_data(vd, clock_id, res); +} + +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +{ + return __cvdso_time_data(vd, time); +} diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index dfd34f68bfa1..d365810a689a 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -1,16 +1,29 @@ # SPDX-License-Identifier: GPL-2.0 # List of files in the vdso, has to be asm only for now +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +include $(srctree)/lib/vdso/Makefile + obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +endif + # Build rules -targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +targets := $(obj-vdso64) vdso64.so.dbg obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both @@ -20,28 +33,23 @@ obj-y += vdso64_wrapper.o targets += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +$(obj)/vgettimeofday.o: %.o: %.c FORCE + # Force dependency (incbin is bad) -$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so.dbg # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE + $(call if_changed,vdso64ld_and_check) -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ -# actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) +include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE + $(call if_changed,vdsosym) -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso64.so: $(obj)/vdso64.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso64.so +# actual build commands +quiet_cmd_vdso64ld_and_check = VDSO64L $@ + cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index cab14324242b..76c3c8cf8ece 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -23,10 +23,14 @@ */ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc +BEGIN_FTR_SECTION + b 3f +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mflr r12 .cfi_register lr,r12 - get_datapage r10, r0 + get_datapage r10 mtlr r12 + .cfi_restore lr lwz r7,CFG_DCACHE_BLOCKSZ(r10) addi r5,r7,-1 @@ -61,19 +65,11 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) isync li r3,0 blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) - - -/* - * POWER5 version of __kernel_sync_dicache - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) - .cfi_startproc +3: crclr cr0*4+so sync isync li r3,0 blr .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache_p5) +V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index 067247d3efb9..00760dc69d68 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -13,9 +13,6 @@ #include <asm/vdso_datapage.h> .text -.global __kernel_datapage_offset; -__kernel_datapage_offset: - .long 0 /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; @@ -31,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr r4,r3 - get_datapage r3, r0 + get_datapage r3 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP64 cmpldi cr0,r4,0 @@ -53,7 +50,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3, r0 + get_datapage r3 ld r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so diff --git a/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh new file mode 100755 index 000000000000..4bf15ffd5933 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon <will.deacon@arm.com +# + +LC_ALL=C +sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso64_offset_\2\t0x\1/p' diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 20f8be40c653..d7a7bfb51081 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -12,6 +12,7 @@ #include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> +#include <asm/vdso/gettimeofday.h> .text /* @@ -21,31 +22,7 @@ * */ V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds tv */ - mr r10,r4 /* r10 holds tz */ - get_datapage r3, r0 - cmpldi r11,0 /* check if tv is NULL */ - beq 2f - lis r7,1000000@ha /* load up USEC_PER_SEC */ - addi r7,r7,1000000@l - bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */ - std r4,TVAL64_TV_SEC(r11) /* store sec in tv */ - std r5,TVAL64_TV_USEC(r11) /* store usec in tv */ -2: cmpldi r10,0 /* check if tz is NULL */ - beq 1f - lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r3) - stw r4,TZONE_TZ_MINWEST(r10) - stw r5,TZONE_TZ_DSTTIME(r10) -1: mtlr r12 - crclr cr0*4+so - li r3,0 /* always success */ - blr - .cfi_endproc + cvdso_call __c_kernel_gettimeofday V_FUNCTION_END(__kernel_gettimeofday) @@ -56,120 +33,7 @@ V_FUNCTION_END(__kernel_gettimeofday) * */ V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - - cmpwi cr5,r3,CLOCK_REALTIME_COARSE - cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE - cror cr5*4+eq,cr5*4+eq,cr6*4+eq - - cror cr0*4+eq,cr0*4+eq,cr5*4+eq - bne cr0,99f - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r11,r4 /* r11 saves tp */ - get_datapage r3, r0 - lis r7,NSEC_PER_SEC@h /* want nanoseconds */ - ori r7,r7,NSEC_PER_SEC@l - beq cr5,70f -50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ - bne cr1,80f /* if not monotonic, all done */ - - /* - * CLOCK_MONOTONIC - */ - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_tspec. - * At this point, r4,r5 contain our sec/nsec values. - */ - - ld r6,WTOM_CLOCK_SEC(r3) - lwa r9,WTOM_CLOCK_NSEC(r3) - - /* We now have our result in r6,r9. We create a fake dependency - * on that result and re-check the counter - */ - or r0,r6,r9 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 50b - b 78f - - /* - * For coarse clocks we get data directly from the vdso data page, so - * we don't need to call __do_get_tspec, but we still need to do the - * counter trick. - */ -70: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 70b - add r3,r3,r0 /* r0 is already 0 */ - - /* - * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE - * too - */ - ld r4,STAMP_XTIME_SEC(r3) - ld r5,STAMP_XTIME_NSEC(r3) - bne cr6,75f - - /* CLOCK_MONOTONIC_COARSE */ - ld r6,WTOM_CLOCK_SEC(r3) - lwa r9,WTOM_CLOCK_NSEC(r3) - - /* check if counter has updated */ - or r0,r6,r9 -75: or r0,r0,r4 - or r0,r0,r5 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 70b - - /* Counter has not updated, so continue calculating proper values for - * sec and nsec if monotonic coarse, or just return with the proper - * values for realtime. - */ - bne cr6,80f - - /* Add wall->monotonic offset and check for overflow or underflow */ -78: add r4,r4,r6 - add r5,r5,r9 - cmpd cr0,r5,r7 - cmpdi cr1,r5,0 - blt 79f - subf r5,r7,r5 - addi r4,r4,1 -79: bge cr1,80f - addi r4,r4,-1 - add r5,r5,r7 - -80: std r4,TSPC64_TV_SEC(r11) - std r5,TSPC64_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_gettime - .cfi_restore lr - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_gettime V_FUNCTION_END(__kernel_clock_gettime) @@ -180,34 +44,7 @@ V_FUNCTION_END(__kernel_clock_gettime) * */ V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f - - mflr r12 - .cfi_register lr,r12 - get_datapage r3, r0 - lwz r5, CLOCK_HRTIMER_RES(r3) - mtlr r12 - li r3,0 - cmpldi cr0,r4,0 - crclr cr0*4+so - beqlr - std r3,TSPC64_TV_SEC(r4) - std r5,TSPC64_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_getres V_FUNCTION_END(__kernel_clock_getres) /* @@ -217,74 +54,5 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds t */ - get_datapage r3, r0 - - ld r4,STAMP_XTIME_SEC(r3) - - cmpldi r11,0 /* check if t is NULL */ - beq 2f - std r4,0(r11) /* store result at *t */ -2: mtlr r12 - crclr cr0*4+so - mr r3,r4 - blr - .cfi_endproc + cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) - - -/* - * This is the core of clock_gettime() and gettimeofday(), - * it returns the current time in r4 (seconds) and r5. - * On entry, r7 gives the resolution of r5, either USEC_PER_SEC - * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds. - * It expects the datapage ptr in r3 and doesn't clobber it. - * It clobbers r0, r6 and r9. - * On return, r8 contains the counter value that can be reused. - * This clobbers cr0 but not any other cr field. - */ -V_FUNCTION_BEGIN(__do_get_tspec) - .cfi_startproc - /* check for update count & load values */ -1: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r3,r3,r0 - - /* Get TB & offset it. We use the MFTB macro which will generate - * workaround code for Cell. - */ - MFTB(r6) - ld r9,CFG_TB_ORIG_STAMP(r3) - subf r6,r9,r6 - - /* Scale result */ - ld r5,CFG_TB_TO_XS(r3) - sldi r6,r6,12 /* compute time since stamp_xtime */ - mulhdu r6,r6,r5 /* in units of 2^-32 seconds */ - - /* Add stamp since epoch */ - ld r4,STAMP_XTIME_SEC(r3) - lwz r5,STAMP_SEC_FRAC(r3) - or r0,r4,r5 - or r0,r0,r6 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld r0,r8 /* check if updated */ - bne- 1b /* reload if so */ - - /* convert to seconds & nanoseconds and add to stamp */ - add r6,r6,r5 /* add on fractional seconds of xtime */ - mulhwu r5,r6,r7 /* compute micro or nanoseconds and */ - srdi r6,r6,32 /* seconds since stamp_xtime */ - clrldi r5,r5,32 - add r4,r4,r6 - blr - .cfi_endproc -V_FUNCTION_END(__do_get_tspec) diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 256fb9720298..6164d1a1ba11 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -4,6 +4,8 @@ * library */ #include <asm/vdso.h> +#include <asm/page.h> +#include <asm-generic/vmlinux.lds.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") @@ -15,7 +17,8 @@ ENTRY(_start) SECTIONS { - . = VDSO64_LBASE + SIZEOF_HEADERS; + PROVIDE(_vdso_datapage = . - PAGE_SIZE); + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } @@ -37,16 +40,24 @@ SECTIONS PROVIDE(etext = .); . = ALIGN(8); + VDSO_ftr_fixup_start = .; __ftr_fixup : { *(__ftr_fixup) } + VDSO_ftr_fixup_end = .; . = ALIGN(8); + VDSO_mmu_ftr_fixup_start = .; __mmu_ftr_fixup : { *(__mmu_ftr_fixup) } + VDSO_mmu_ftr_fixup_end = .; . = ALIGN(8); + VDSO_lwsync_fixup_start = .; __lwsync_fixup : { *(__lwsync_fixup) } + VDSO_lwsync_fixup_end = .; . = ALIGN(8); + VDSO_fw_ftr_fixup_start = .; __fw_ftr_fixup : { *(__fw_ftr_fixup) } + VDSO_fw_ftr_fixup_end = .; /* * Other stuff is appended to the text segment: @@ -61,56 +72,21 @@ SECTIONS .gcc_except_table : { *(.gcc_except_table) } .rela.dyn ALIGN(8) : { *(.rela.dyn) } - .opd ALIGN(8) : { KEEP (*(.opd)) } .got ALIGN(8) : { *(.got .toc) } _end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the beginning - * of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS /DISCARD/ : { *(.note.GNU-stack) *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.opd) } } @@ -138,18 +114,12 @@ VERSION { VDSO_VERSION_STRING { global: - /* - * Has to be there for the kernel to find - */ - __kernel_datapage_offset; - __kernel_get_syscall_map; __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; __kernel_get_tbfreq; __kernel_sync_dicache; - __kernel_sync_dicache_p5; __kernel_sigtramp_rt64; __kernel_getcpu; __kernel_time; @@ -157,3 +127,8 @@ VERSION local: *; }; } + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigtramp_rt64 = __kernel_sigtramp_rt64; diff --git a/arch/powerpc/kernel/vdso64/vgettimeofday.c b/arch/powerpc/kernel/vdso64/vgettimeofday.c new file mode 100644 index 000000000000..5b5500058344 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/vgettimeofday.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementations of gettimeofday() and similar. + */ +#include <linux/time.h> +#include <linux/types.h> + +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd) +{ + return __cvdso_gettimeofday_data(vd, tv, tz); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_data(vd, clock_id, res); +} + +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +{ + return __cvdso_time_data(vd, time); +} diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index e184d17387f6..0318ba436f34 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -200,21 +200,7 @@ SECTIONS EXIT_TEXT } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - INIT_SETUP(16) - } - - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - INIT_CALLS - } - - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - CON_INITCALL - } + INIT_DATA_SECTION(16) . = ALIGN(8); __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) { @@ -242,9 +228,6 @@ SECTIONS __stop___fw_ftr_fixup = .; } #endif - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - INIT_RAM_FS - } PERCPU_SECTION(L1_CACHE_BYTES) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 0effd48c8f4d..b08cc15f31c7 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -840,6 +840,9 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_MMCR1: case SPRN_MMCR2: case SPRN_UMMCR2: + case SPRN_UAMOR: + case SPRN_IAMR: + case SPRN_AMR: #endif break; unprivileged: @@ -1004,6 +1007,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_MMCR2: case SPRN_UMMCR2: case SPRN_TIR: + case SPRN_UAMOR: + case SPRN_IAMR: + case SPRN_AMR: #endif *spr_val = 0; break; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e3b1839fc251..6f612d240392 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1241,9 +1241,9 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) switch (get_xop(inst)) { case OP_31_XOP_MSGSNDP: arg = kvmppc_get_gpr(vcpu, rb); - if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER) + if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER) break; - arg &= 0x3f; + arg &= 0x7f; if (arg >= kvm->arch.emul_smt_mode) break; tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg); @@ -1256,7 +1256,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) break; case OP_31_XOP_MSGCLRP: arg = kvmppc_get_gpr(vcpu, rb); - if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER) + if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER) break; vcpu->arch.vcore->dpdes = 0; vcpu->arch.doorbell_request = 0; @@ -1327,9 +1327,15 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_SYSTEM_RESET: r = RESUME_GUEST; break; - case BOOK3S_INTERRUPT_MACHINE_CHECK: - /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); + case BOOK3S_INTERRUPT_MACHINE_CHECK: { + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + /* + * Print the MCE event to host console. Ratelimit so the guest + * can't flood the host log. + */ + if (__ratelimit(&rs)) + machine_check_print_event_info(&vcpu->arch.mce_evt,false, true); /* * If the guest can do FWNMI, exit to userspace so it can @@ -1357,6 +1363,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, r = RESUME_HOST; break; + } case BOOK3S_INTERRUPT_PROGRAM: { ulong flags; @@ -1516,11 +1523,16 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_MACHINE_CHECK: + { + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); /* Pass the machine check to the L1 guest */ r = RESUME_HOST; /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); + if (__ratelimit(&rs)) + machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); break; + } /* * We get these next two if the guest accesses a page which it thinks * it has mapped but which is not actually present, either because @@ -4949,7 +4961,12 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) * Work out how many sets the TLB has, for the use of * the TLB invalidation loop in book3s_hv_rmhandlers.S. */ - if (radix_enabled()) + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* + * P10 will flush all the congruence class with a single tlbiel + */ + kvm->arch.tlb_sets = 1; + } else if (radix_enabled()) kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */ else if (cpu_has_feature(CPU_FTR_ARCH_300)) kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 8f58dd20b362..8053efdf7ea7 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -694,6 +694,7 @@ static void wait_for_sync(struct kvm_split_mode *sip, int phase) void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip) { + int num_sets; unsigned long rb, set; /* wait for every other thread to get to real mode */ @@ -704,11 +705,19 @@ void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip) mtspr(SPRN_LPID, sip->lpidr_req); isync(); + /* + * P10 will flush all the congruence class with a single tlbiel + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + num_sets = 1; + else + num_sets = POWER9_TLB_SETS_RADIX; + /* Invalidate the TLB on thread 0 */ if (local_paca->kvm_hstate.tid == 0) { sip->do_set = 0; asm volatile("ptesync" : : : "memory"); - for (set = 0; set < POWER9_TLB_SETS_RADIX; ++set) { + for (set = 0; set < num_sets; ++set) { rb = TLBIEL_INVAL_SET_LPID + (set << TLBIEL_INVAL_SET_SHIFT); asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : : diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 6028628ea3ac..d4bca93b79f6 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -65,10 +65,9 @@ static void reload_slb(struct kvm_vcpu *vcpu) * On POWER7, see if we can handle a machine check that occurred inside * the guest in real mode, without switching to the host partition. */ -static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) +static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) { unsigned long srr1 = vcpu->arch.shregs.msr; - struct machine_check_event mce_evt; long handled = 1; if (srr1 & SRR1_MC_LDSTERR) { @@ -106,6 +105,21 @@ static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) handled = 0; } + return handled; +} + +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) +{ + struct machine_check_event mce_evt; + long handled; + + if (vcpu->kvm->arch.fwnmi_enabled) { + /* FWNMI guests handle their own recovery */ + handled = 0; + } else { + handled = kvmppc_realmode_mc_power7(vcpu); + } + /* * Now get the event and stash it in the vcpu struct so it can * be handled by the primary thread in virtual mode. We can't @@ -122,11 +136,6 @@ static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) vcpu->arch.mce_evt = mce_evt; } -void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) -{ - kvmppc_realmode_mc_power7(vcpu); -} - /* Check if dynamic split is in force and return subcore size accordingly. */ static inline int kvmppc_cur_subcore_size(void) { diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b1fefa63e125..913944dc3620 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -239,7 +239,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) smsr |= (guest_msr & vcpu->arch.guest_owned_ext); /* 64-bit Process MSR values */ #ifdef CONFIG_PPC_BOOK3S_64 - smsr |= MSR_ISF | MSR_HV; + smsr |= MSR_HV; #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 3dc129a254b5..b45b750fa77a 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,8 +36,8 @@ #define FUNC(name) name -#define RFI_TO_KERNEL RFI -#define RFI_TO_GUEST RFI +#define RFI_TO_KERNEL rfi +#define RFI_TO_GUEST rfi .macro INTERRUPT_TRAMPOLINE intno diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 5fee5a11550d..303e3cb096db 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -473,7 +473,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, arch_spin_unlock(&ics->lock); local_irq_restore(flags); new_irq = reject; - check_resend = 0; + check_resend = false; goto again; } } else { @@ -501,7 +501,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, state->resend = 0; arch_spin_unlock(&ics->lock); local_irq_restore(flags); - check_resend = 0; + check_resend = false; goto again; } } diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index a0ebc29f30b2..30dfeac731c6 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -219,7 +219,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, /* In single escalation mode, we grab the ESB MMIO of the * interrupt and mask it. Also populate the VCPU v/raddr * of the ESB page for use by asm entry/exit code. Finally - * set the XIVE_IRQ_NO_EOI flag which will prevent the + * set the XIVE_IRQ_FLAG_NO_EOI flag which will prevent the * core code from performing an EOI on the escalation * interrupt, thus leaving it effectively masked after * it fires once. @@ -231,7 +231,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); vcpu->arch.xive_esc_raddr = xd->eoi_page; vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio; - xd->flags |= XIVE_IRQ_NO_EOI; + xd->flags |= XIVE_IRQ_FLAG_NO_EOI; } return 0; @@ -419,37 +419,16 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive, /* Get the right irq */ kvmppc_xive_select_irq(state, &hw_num, &xd); + /* Set PQ to 10, return old P and old Q and remember them */ + val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10); + state->old_p = !!(val & 2); + state->old_q = !!(val & 1); + /* - * If the interrupt is marked as needing masking via - * firmware, we do it here. Firmware masking however - * is "lossy", it won't return the old p and q bits - * and won't set the interrupt to a state where it will - * record queued ones. If this is an issue we should do - * lazy masking instead. - * - * For now, we work around this in unmask by forcing - * an interrupt whenever we unmask a non-LSI via FW - * (if ever). + * Synchronize hardware to sensure the queues are updated when + * masking */ - if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { - xive_native_configure_irq(hw_num, - kvmppc_xive_vp(xive, state->act_server), - MASKED, state->number); - /* set old_p so we can track if an H_EOI was done */ - state->old_p = true; - state->old_q = false; - } else { - /* Set PQ to 10, return old P and old Q and remember them */ - val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10); - state->old_p = !!(val & 2); - state->old_q = !!(val & 1); - - /* - * Synchronize hardware to sensure the queues are updated - * when masking - */ - xive_native_sync_source(hw_num); - } + xive_native_sync_source(hw_num); return old_prio; } @@ -483,23 +462,6 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, /* Get the right irq */ kvmppc_xive_select_irq(state, &hw_num, &xd); - /* - * See comment in xive_lock_and_mask() concerning masking - * via firmware. - */ - if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { - xive_native_configure_irq(hw_num, - kvmppc_xive_vp(xive, state->act_server), - state->act_priority, state->number); - /* If an EOI is needed, do it here */ - if (!state->old_p) - xive_vm_source_eoi(hw_num, xd); - /* If this is not an LSI, force a trigger */ - if (!(xd->flags & OPAL_XIVE_IRQ_LSI)) - xive_irq_trigger(xd); - goto bail; - } - /* Old Q set, set PQ to 11 */ if (state->old_q) xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11); @@ -2125,9 +2087,8 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) if (!q->qpage && !xc->esc_virq[i]) continue; - seq_printf(m, " [q%d]: ", i); - if (q->qpage) { + seq_printf(m, " q[%d]: ", i); idx = q->idx; i0 = be32_to_cpup(q->qpage + idx); idx = (idx + 1) & q->msk; @@ -2141,16 +2102,54 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) irq_data_get_irq_handler_data(d); u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); - seq_printf(m, "E:%c%c I(%d:%llx:%llx)", - (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', - (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', - xc->esc_virq[i], pq, xd->eoi_page); + seq_printf(m, " ESC %d %c%c EOI @%llx", + xc->esc_virq[i], + (pq & XIVE_ESB_VAL_P) ? 'P' : '-', + (pq & XIVE_ESB_VAL_Q) ? 'Q' : '-', + xd->eoi_page); seq_puts(m, "\n"); } } return 0; } +void kvmppc_xive_debug_show_sources(struct seq_file *m, + struct kvmppc_xive_src_block *sb) +{ + int i; + + seq_puts(m, " LISN HW/CHIP TYPE PQ EISN CPU/PRIO\n"); + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; + struct xive_irq_data *xd; + u64 pq; + u32 hw_num; + + if (!state->valid) + continue; + + kvmppc_xive_select_irq(state, &hw_num, &xd); + + pq = xive_vm_esb_load(xd, XIVE_ESB_GET); + + seq_printf(m, "%08x %08x/%02x", state->number, hw_num, + xd->src_chip); + if (state->lsi) + seq_printf(m, " %cLSI", state->asserted ? '^' : ' '); + else + seq_puts(m, " MSI"); + + seq_printf(m, " %s %c%c %08x % 4d/%d", + state->ipi_number == hw_num ? "IPI" : " PT", + pq & XIVE_ESB_VAL_P ? 'P' : '-', + pq & XIVE_ESB_VAL_Q ? 'Q' : '-', + state->eisn, state->act_server, + state->act_priority); + + seq_puts(m, "\n"); + } +} + static int xive_debug_show(struct seq_file *m, void *private) { struct kvmppc_xive *xive = m->private; @@ -2171,7 +2170,7 @@ static int xive_debug_show(struct seq_file *m, void *private) if (!kvm) return 0; - seq_printf(m, "=========\nVCPU state\n=========\n"); + seq_puts(m, "=========\nVCPU state\n=========\n"); kvm_for_each_vcpu(i, vcpu, kvm) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; @@ -2179,11 +2178,12 @@ static int xive_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x" - " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", - xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr, - xc->mfrr, xc->pending, - xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); + seq_printf(m, "VCPU %d: VP:%#x/%02x\n" + " CPPR:%#x HWCPPR:%#x MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", + xc->server_num, xc->vp_id, xc->vp_chip_id, + xc->cppr, xc->hw_cppr, + xc->mfrr, xc->pending, + xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); kvmppc_xive_debug_show_queues(m, vcpu); @@ -2199,13 +2199,25 @@ static int xive_debug_show(struct seq_file *m, void *private) t_vm_h_ipi += xc->stat_vm_h_ipi; } - seq_printf(m, "Hcalls totals\n"); + seq_puts(m, "Hcalls totals\n"); seq_printf(m, " H_XIRR R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr); seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll); seq_printf(m, " H_CPPR R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr); seq_printf(m, " H_EOI R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi); seq_printf(m, " H_IPI R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi); + seq_puts(m, "=========\nSources\n=========\n"); + + for (i = 0; i <= xive->max_sbid; i++) { + struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; + + if (sb) { + arch_spin_lock(&sb->lock); + kvmppc_xive_debug_show_sources(m, sb); + arch_spin_unlock(&sb->lock); + } + } + return 0; } diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 382e3a56e789..86c24a4ad809 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -218,6 +218,17 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp return xive->src_blocks[bid]; } +/* + * When the XIVE resources are allocated at the HW level, the VP + * structures describing the vCPUs of a guest are distributed among + * the chips to optimize the PowerBUS usage. For best performance, the + * guest vCPUs can be pinned to match the VP structure distribution. + * + * Currently, the VP identifiers are deduced from the vCPU id using + * the kvmppc_pack_vcpu_id() routine which is not incorrect but not + * optimal either. It VSMT is used, the result is not continuous and + * the constraints on HW resources described above can not be met. + */ static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) { return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); @@ -290,6 +301,8 @@ extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); */ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); +void kvmppc_xive_debug_show_sources(struct seq_file *m, + struct kvmppc_xive_src_block *sb); struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( struct kvmppc_xive *xive, int irq); void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index a59a94f02733..76800c84f2a3 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -37,9 +37,6 @@ static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) * ordering. */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - val = in_be64(xd->eoi_mmio + offset); return (u8)val; } @@ -1219,18 +1216,31 @@ static int xive_native_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", - xc->server_num, xc->vp_id, + seq_printf(m, "VCPU %d: VP=%#x/%02x\n" + " NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", + xc->server_num, xc->vp_id, xc->vp_chip_id, vcpu->arch.xive_saved_state.nsr, vcpu->arch.xive_saved_state.cppr, vcpu->arch.xive_saved_state.ipb, vcpu->arch.xive_saved_state.pipr, - vcpu->arch.xive_saved_state.w01, - (u32) vcpu->arch.xive_cam_word); + be64_to_cpu(vcpu->arch.xive_saved_state.w01), + be32_to_cpu(vcpu->arch.xive_cam_word)); kvmppc_xive_debug_show_queues(m, vcpu); } + seq_puts(m, "=========\nSources\n=========\n"); + + for (i = 0; i <= xive->max_sbid; i++) { + struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; + + if (sb) { + arch_spin_lock(&sb->lock); + kvmppc_xive_debug_show_sources(m, sb); + arch_spin_unlock(&sb->lock); + } + } + return 0; } diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 4ad3c0279458..b0015e05d99a 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -61,9 +61,6 @@ static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset) if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) offset |= XIVE_ESB_LD_ST_MO; - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - val =__x_readq(__x_eoi_page(xd) + offset); #ifdef __LITTLE_ENDIAN__ val >>= 64-8; @@ -77,8 +74,6 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd) /* If the XIVE supports the new "store EOI facility, use it */ if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI); - else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) - opal_int_eoi(hw_irq); else if (xd->flags & XIVE_IRQ_FLAG_LSI) { /* * For LSIs the HW EOI cycle is used rather than PQ bits, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index b1abcb816439..288a9820ec01 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -500,11 +500,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, vcpu->arch.regs.nip = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; - if (update_esr == true) + if (update_esr) kvmppc_set_esr(vcpu, vcpu->arch.queued_esr); - if (update_dear == true) + if (update_dear) kvmppc_set_dar(vcpu, vcpu->arch.queued_dear); - if (update_epr == true) { + if (update_epr) { if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 13999123b735..cf52d26f49cd 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1087,7 +1087,7 @@ static inline u64 sp_to_dp(u32 fprs) preempt_disable(); enable_kernel_fp(); - asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m" (fprd) : "m" (fprs) + asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m"UPD_CONSTR (fprd) : "m"UPD_CONSTR (fprs) : "fr0"); preempt_enable(); return fprd; @@ -1099,7 +1099,7 @@ static inline u32 dp_to_sp(u64 fprd) preempt_disable(); enable_kernel_fp(); - asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m" (fprs) : "m" (fprd) + asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m"UPD_CONSTR (fprs) : "m"UPD_CONSTR (fprd) : "fr0"); preempt_enable(); return fprs; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 321c12a9ef6b..47821055b94c 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -124,7 +124,7 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) long *start, *end; int i; - start = PTRRELOC(&__start___stf_entry_barrier_fixup), + start = PTRRELOC(&__start___stf_entry_barrier_fixup); end = PTRRELOC(&__stop___stf_entry_barrier_fixup); instrs[0] = 0x60000000; /* nop */ @@ -176,7 +176,7 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) long *start, *end; int i; - start = PTRRELOC(&__start___stf_exit_barrier_fixup), + start = PTRRELOC(&__start___stf_exit_barrier_fixup); end = PTRRELOC(&__stop___stf_exit_barrier_fixup); instrs[0] = 0x60000000; /* nop */ @@ -344,7 +344,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) long *start, *end; int i; - start = PTRRELOC(&__start___rfi_flush_fixup), + start = PTRRELOC(&__start___rfi_flush_fixup); end = PTRRELOC(&__stop___rfi_flush_fixup); instrs[0] = 0x60000000; /* nop */ @@ -417,7 +417,7 @@ void do_barrier_nospec_fixups(bool enable) { void *start, *end; - start = PTRRELOC(&__start___barrier_nospec_fixup), + start = PTRRELOC(&__start___barrier_nospec_fixup); end = PTRRELOC(&__stop___barrier_nospec_fixup); do_barrier_nospec_fixups_range(enable, start, end); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 855457ed09b5..bf7a7d62ae8b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -32,6 +32,10 @@ extern char system_call_vectored_emulate[]; #define XER_OV32 0x00080000U #define XER_CA32 0x00040000U +#ifdef CONFIG_VSX +#define VSX_REGISTER_XTP(rd) ((((rd) & 1) << 5) | ((rd) & 0xfe)) +#endif + #ifdef CONFIG_PPC_FPU /* * Functions in ldstfp.S @@ -279,6 +283,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb) up[1] = tmp; break; } + case 32: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[3]); + up[3] = tmp; + tmp = byterev_8(up[2]); + up[2] = byterev_8(up[1]); + up[1] = tmp; + break; + } + #endif default: WARN_ON_ONCE(1); @@ -709,6 +726,8 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, reg->d[0] = reg->d[1] = 0; switch (op->element_size) { + case 32: + /* [p]lxvp[x] */ case 16: /* whole vector; lxv[x] or lxvl[l] */ if (size == 0) @@ -717,7 +736,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) rev = !rev; if (rev) - do_byte_reverse(reg, 16); + do_byte_reverse(reg, size); break; case 8: /* scalar loads, lxvd2x, lxvdsx */ @@ -793,6 +812,20 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, size = GETSIZE(op->type); switch (op->element_size) { + case 32: + /* [p]stxvp[x] */ + if (size == 0) + break; + if (rev) { + /* reverse 32 bytes */ + buf.d[0] = byterev_8(reg->d[3]); + buf.d[1] = byterev_8(reg->d[2]); + buf.d[2] = byterev_8(reg->d[1]); + buf.d[3] = byterev_8(reg->d[0]); + reg = &buf; + } + memcpy(mem, reg, size); + break; case 16: /* stxv, stxvx, stxvl, stxvll */ if (size == 0) @@ -861,28 +894,43 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - emulate_vsx_load(op, &buf, mem, cross_endian); + nr_vsx_regs = size / sizeof(__vector128); + emulate_vsx_load(op, buf, mem, cross_endian); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - load_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } } else { - current->thread.fp_state.fpr[reg][0] = buf.d[0]; - current->thread.fp_state.fpr[reg][1] = buf.d[1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0]; + current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1]; + } } } else { - if (regs->msr & MSR_VEC) - load_vsrn(reg, &buf); - else - current->thread.vr_state.vr[reg - 32] = buf.v; + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.vr_state.vr[reg - 32 + i] = buf[j].v; + } + } } preempt_enable(); return 0; @@ -893,30 +941,45 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size)) return -EFAULT; + nr_vsx_regs = size / sizeof(__vector128); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - store_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } } else { - buf.d[0] = current->thread.fp_state.fpr[reg][0]; - buf.d[1] = current->thread.fp_state.fpr[reg][1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0]; + buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1]; + } } } else { - if (regs->msr & MSR_VEC) - store_vsrn(reg, &buf); - else - buf.v = current->thread.vr_state.vr[reg - 32]; + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].v = current->thread.vr_state.vr[reg - 32 + i]; + } + } } preempt_enable(); - emulate_vsx_store(op, &buf, mem, cross_endian); + emulate_vsx_store(op, buf, mem, cross_endian); return copy_mem_out(mem, ea, size, regs); } #endif /* CONFIG_VSX */ @@ -1346,6 +1409,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, switch (opcode) { #ifdef __powerpc64__ case 1: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); rd = (suffix >> 21) & 0x1f; @@ -2400,6 +2466,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->vsx_flags = VSX_SPLAT; break; + case 333: /* lxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, 0, 32); + op->element_size = 32; + break; + case 364: /* lxvwsx */ op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); @@ -2428,6 +2502,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, VSX_CHECK_VEC; break; } + case 461: /* stxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, 0, 32); + op->element_size = 32; + break; case 524: /* lxsspx */ op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); @@ -2669,6 +2750,22 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif #ifdef CONFIG_VSX + case 6: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + op->ea = dqform_ea(word, regs); + op->reg = VSX_REGISTER_XTP(rd); + op->element_size = 32; + switch (word & 0xf) { + case 0: /* lxvp */ + op->type = MKOP(LOAD_VSX, 0, 32); + break; + case 1: /* stxvp */ + op->type = MKOP(STORE_VSX, 0, 32); + break; + } + break; + case 61: /* stfdp, lxv, stxsd, stxssp, stxv */ switch (word & 7) { case 0: /* stfdp with LSB of DS field = 0 */ @@ -2733,6 +2830,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } break; case 1: /* Prefixed instructions */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); op->update_reg = ra; @@ -2751,6 +2851,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 41: /* plwa */ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4); break; +#ifdef CONFIG_VSX case 42: /* plxsd */ op->reg = rd + 32; op->type = MKOP(LOAD_VSX, PREFIXED, 8); @@ -2791,18 +2892,33 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; break; +#endif /* CONFIG_VSX */ case 56: /* plq */ op->type = MKOP(LOAD, PREFIXED, 16); break; case 57: /* pld */ op->type = MKOP(LOAD, PREFIXED, 8); break; - case 60: /* stq */ +#ifdef CONFIG_VSX + case 58: /* plxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ + case 60: /* pstq */ op->type = MKOP(STORE, PREFIXED, 16); break; case 61: /* pstd */ op->type = MKOP(STORE, PREFIXED, 8); break; +#ifdef CONFIG_VSX + case 62: /* pstxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ } break; case 1: /* Type 01 Eight-Byte Register-to-Register */ diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 0a201b771477..783d1b85ecfe 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -612,6 +612,273 @@ static void __init test_lxvd2x_stxvd2x(void) } #endif /* CONFIG_VSX */ +#ifdef CONFIG_VSX +static void __init test_lxvp_stxvp(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[4] = (unsigned long)&c[0].a; + + /* + * lxvp XTp,DQ(RA) + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVP(34, 4, 0))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvp", "FAIL"); + } + + /*** stxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvp XSp,DQ(RA) + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVP(34, 4, 0))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvp", "FAIL"); + } +} +#else +static void __init test_lxvp_stxvp(void) +{ + show_result("lxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_lxvpx_stxvpx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvpx ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[3] = (unsigned long)&c[0].a; + regs.gpr[4] = 0; + + /* + * lxvpx XTp,RA,RB + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVPX(34, 3, 4))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvpx", "FAIL"); + } + + /*** stxvpx ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvpx XSp,RA,RB + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVPX(34, 3, 4))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvpx", "FAIL"); + } +} +#else +static void __init test_lxvpx_stxvpx(void) +{ + show_result("lxvpx", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvpx", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_plxvp_pstxvp(void) +{ + struct ppc_inst instr; + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("pstxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + /*** plxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&c[0].a; + + /* + * plxvp XTp,D(RA),R + * XTp = 32xTX + 2xTp + * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PLXVP(34, 0, 3, 0) >> 32, + PPC_RAW_PLXVP(34, 0, 3, 0) & 0xffffffff); + + stepped = emulate_step(®s, instr); + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("plxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("plxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("plxvp", "FAIL"); + } + + /*** pstxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * pstxvp XSp,D(RA),R + * XSp = 32xSX + 2xSp + * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PSTXVP(34, 0, 3, 0) >> 32, + PPC_RAW_PSTXVP(34, 0, 3, 0) & 0xffffffff); + + stepped = emulate_step(®s, instr); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("pstxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("pstxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("pstxvp", "FAIL"); + } +} +#else +static void __init test_plxvp_pstxvp(void) +{ + show_result("plxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("pstxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + static void __init run_tests_load_store(void) { test_ld(); @@ -628,6 +895,9 @@ static void __init run_tests_load_store(void) test_plfd_pstfd(); test_lvx_stvx(); test_lxvd2x_stxvd2x(); + test_lxvp_stxvp(); + test_lxvpx_stxvpx(); + test_plxvp_pstxvp(); } struct compute_test { diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 1732eaa740a9..3f972db17761 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -6,4 +6,4 @@ ifdef CONFIG_KASAN CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += mmu.o hash_low.o mmu_context.o tlb.o +obj-y += mmu.o hash_low.o mmu_context.o tlb.o nohash_low.o diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index b2c912e517b9..0e6dc830c38b 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -26,12 +26,11 @@ #include <asm/feature-fixups.h> #include <asm/code-patching-asm.h> -#ifdef CONFIG_SMP - .section .bss - .align 2 -mmu_hash_lock: - .space 4 -#endif /* CONFIG_SMP */ +#ifdef CONFIG_PTE_64BIT +#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ +#else +#define PTE_FLAGS_OFFSET 0 +#endif /* * Load a PTE into the hash table, if possible. @@ -65,13 +64,14 @@ _GLOBAL(hash_page) /* Get PTE (linux-style) and check access */ lis r0, TASK_SIZE@h /* check if kernel address */ cmplw 0,r4,r0 + mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ - mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ + lwz r5,PGDIR(r8) /* virt page-table root */ blt+ 112f /* assume user more likely */ - lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + lis r5,swapper_pg_dir@ha /* if kernel address, use */ + addi r5,r5,swapper_pg_dir@l /* kernel page table */ rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ -112: +112: tophys(r5, r5) #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ lwz r8,0(r5) /* get pmd entry */ @@ -94,25 +94,33 @@ _GLOBAL(hash_page) rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */ #else rlwimi r8,r4,23,20,28 /* compute pte address */ + /* + * If PTE_64BIT is set, the low word is the flags word; use that + * word for locking since it contains all the interesting bits. + */ + addi r8,r8,PTE_FLAGS_OFFSET #endif - rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ - ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE /* * Update the linux PTE atomically. We do the lwarx up-front * because almost always, there won't be a permission violation * and there won't already be an HPTE, and thus we will have * to update the PTE to set _PAGE_HASHPTE. -- paulus. - * - * If PTE_64BIT is set, the low word is the flags word; use that - * word for locking since it contains all the interesting bits. */ -#if (PTE_FLAGS_OFFSET != 0) - addi r8,r8,PTE_FLAGS_OFFSET -#endif .Lretry: lwarx r6,0,r8 /* get linux-style pte, flag word */ +#ifdef CONFIG_PPC_KUAP + mfsrin r5,r4 + rlwinm r0,r9,28,_PAGE_RW /* MSR[PR] => _PAGE_RW */ + rlwinm r5,r5,12,_PAGE_RW /* Ks => _PAGE_RW */ + andc r5,r5,r0 /* Ks & ~MSR[PR] */ + andc r5,r6,r5 /* Clear _PAGE_RW when Ks = 1 && MSR[PR] = 0 */ + andc. r5,r3,r5 /* check access & ~permission */ +#else andc. r5,r3,r6 /* check access & ~permission */ +#endif + rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE #ifdef CONFIG_SMP bne- .Lhash_page_out /* return if access not permitted */ #else @@ -179,12 +187,6 @@ _GLOBAL(add_hash_page) mflr r0 stw r0,4(r1) - /* Convert context and va to VSID */ - mulli r3,r3,897*16 /* multiply context by context skew */ - rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ - mulli r0,r0,0x111 /* multiply by ESID skew */ - add r3,r3,r0 /* note create_hpte trims to 24 bits */ - #ifdef CONFIG_SMP lwz r8,TASK_CPU(r2) /* to go in mmu_hash_lock */ oris r8,r8,12 @@ -248,6 +250,12 @@ _GLOBAL(add_hash_page) stwcx. r5,0,r8 bne- 1b + /* Convert context and va to VSID */ + mulli r3,r3,897*16 /* multiply context by context skew */ + rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ + mulli r0,r0,0x111 /* multiply by ESID skew */ + add r3,r3,r0 /* note create_hpte trims to 24 bits */ + bl create_hpte 9: @@ -350,11 +358,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ 10f /* no PTE: go look for an empty slot */ tlbie r4 - lis r4, (htab_hash_searches - PAGE_OFFSET)@ha - lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) - addi r6,r6,1 /* count how many searches we do */ - stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) - /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ mtctr r0 addi r4,r3,-HPTE_SIZE @@ -384,12 +387,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ beq+ .Lfound_empty - /* update counter of times that the primary PTEG is full */ - lis r4, (primary_pteg_full - PAGE_OFFSET)@ha - lwz r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) - addi r6,r6,1 - stw r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) - patch_site 0f, patch__hash_page_C /* Search the secondary PTEG for an empty slot */ ori r5,r5,PTE_H /* set H (secondary hash) bit */ @@ -411,30 +408,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) * and we know there is a definite (although small) speed * advantage to putting the PTE in the primary PTEG, we always * put the PTE in the primary PTEG. - * - * In addition, we skip any slot that is mapping kernel text in - * order to avoid a deadlock when not using BAT mappings if - * trying to hash in the kernel hash code itself after it has - * already taken the hash table lock. This works in conjunction - * with pre-faulting of the kernel text. - * - * If the hash table bucket is full of kernel text entries, we'll - * lockup here but that shouldn't happen */ -1: lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ + lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ lwz r6, (next_slot - PAGE_OFFSET)@l(r4) addi r6,r6,HPTE_SIZE /* search for candidate */ andi. r6,r6,7*HPTE_SIZE stw r6,next_slot@l(r4) add r4,r3,r6 - LDPTE r0,HPTE_SIZE/2(r4) /* get PTE second word */ - clrrwi r0,r0,12 - lis r6,etext@h - ori r6,r6,etext@l /* get etext */ - tophys(r6,r6) - cmpl cr0,r0,r6 /* compare and try again */ - blt 1b #ifndef CONFIG_SMP /* Store PTE in PTEG */ @@ -482,10 +463,6 @@ _ASM_NOKPROBE_SYMBOL(create_hpte) .align 2 next_slot: .space 4 -primary_pteg_full: - .space 4 -htab_hash_searches: - .space 4 .previous /* @@ -517,8 +494,9 @@ _GLOBAL(flush_hash_pages) rlwimi r5,r4,22,20,29 #else rlwimi r5,r4,23,20,28 + addi r5,r5,PTE_FLAGS_OFFSET #endif -1: lwz r0,PTE_FLAGS_OFFSET(r5) +1: lwz r0,0(r5) cmpwi cr1,r6,1 andi. r0,r0,_PAGE_HASHPTE bne 2f @@ -562,9 +540,6 @@ _GLOBAL(flush_hash_pages) * already clear, we're done (for this pte). If not, * clear it (atomically) and proceed. -- paulus. */ -#if (PTE_FLAGS_OFFSET != 0) - addi r5,r5,PTE_FLAGS_OFFSET -#endif 33: lwarx r8,0,r5 /* fetch the pte flags word */ andi. r0,r8,_PAGE_HASHPTE beq 8f /* done if HASHPTE is already clear */ @@ -633,77 +608,3 @@ _GLOBAL(flush_hash_pages) .previous EXPORT_SYMBOL(flush_hash_pages) _ASM_NOKPROBE_SYMBOL(flush_hash_pages) - -/* - * Flush an entry from the TLB - */ -_GLOBAL(_tlbie) -#ifdef CONFIG_SMP - lwz r8,TASK_CPU(r2) - oris r8,r8,11 - mfmsr r10 - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - rlwinm r0,r0,0,28,26 /* clear DR */ - mtmsr r0 - isync - lis r9,mmu_hash_lock@h - ori r9,r9,mmu_hash_lock@l - tophys(r9,r9) -10: lwarx r7,0,r9 - cmpwi 0,r7,0 - bne- 10b - stwcx. r8,0,r9 - bne- 10b - eieio - tlbie r3 - sync - TLBSYNC - li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ - mtmsr r10 - isync -#else /* CONFIG_SMP */ - tlbie r3 - sync -#endif /* CONFIG_SMP */ - blr -_ASM_NOKPROBE_SYMBOL(_tlbie) - -/* - * Flush the entire TLB. 603/603e only - */ -_GLOBAL(_tlbia) -#if defined(CONFIG_SMP) - lwz r8,TASK_CPU(r2) - oris r8,r8,10 - mfmsr r10 - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - rlwinm r0,r0,0,28,26 /* clear DR */ - mtmsr r0 - isync - lis r9,mmu_hash_lock@h - ori r9,r9,mmu_hash_lock@l - tophys(r9,r9) -10: lwarx r7,0,r9 - cmpwi 0,r7,0 - bne- 10b - stwcx. r8,0,r9 - bne- 10b -#endif /* CONFIG_SMP */ - li r5, 32 - lis r4, KERNELBASE@h - mtctr r5 - sync -0: tlbie r4 - addi r4, r4, 0x1000 - bdnz 0b - sync -#ifdef CONFIG_SMP - TLBSYNC - li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ - mtmsr r10 - isync -#endif /* CONFIG_SMP */ - blr -_ASM_NOKPROBE_SYMBOL(_tlbia) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index a59e7ec98180..859e5bd603ac 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -33,19 +33,23 @@ u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0}; -struct hash_pte *Hash; -static unsigned long Hash_size, Hash_mask; -unsigned long _SDR1; -static unsigned int hash_mb, hash_mb2; +static struct hash_pte __initdata *Hash = (struct hash_pte *)early_hash; +static unsigned long __initdata Hash_size, Hash_mask; +static unsigned int __initdata hash_mb, hash_mb2; +unsigned long __initdata _SDR1; struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ -struct batrange { /* stores address ranges mapped by BATs */ +static struct batrange { /* stores address ranges mapped by BATs */ unsigned long start; unsigned long limit; phys_addr_t phys; } bat_addrs[8]; +#ifdef CONFIG_SMP +unsigned long mmu_hash_lock; +#endif + /* * Return PA for this VA if it is mapped by a BAT, or 0 */ @@ -157,11 +161,9 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long done; unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (__map_without_bats) { - pr_debug("RAM mapped without BATs\n"); - return base; - } - if (debug_pagealloc_enabled()) { + + if (debug_pagealloc_enabled() || __map_without_bats) { + pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; if (top >= border) @@ -304,11 +306,11 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* * Preload a translation in the hash table */ -void hash_preload(struct mm_struct *mm, unsigned long ea) +static void hash_preload(struct mm_struct *mm, unsigned long ea) { pmd_t *pmd; - if (!Hash) + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) return; pmd = pmd_off(mm, ea); if (!pmd_none(*pmd)) @@ -469,3 +471,7 @@ void __init setup_kuap(bool disabled) pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); } #endif + +void __init early_init_mmu(void) +{ +} diff --git a/arch/powerpc/mm/book3s32/nohash_low.S b/arch/powerpc/mm/book3s32/nohash_low.S new file mode 100644 index 000000000000..19f418b0ed2d --- /dev/null +++ b/arch/powerpc/mm/book3s32/nohash_low.S @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file contains low-level assembler routines for managing + * the PowerPC 603 tlb invalidation. + */ + +#include <asm/page.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* + * Flush an entry from the TLB + */ +#ifdef CONFIG_SMP +_GLOBAL(_tlbie) + lwz r8,TASK_CPU(r2) + oris r8,r8,11 + mfmsr r10 + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b + eieio + tlbie r3 + sync + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + isync + blr +_ASM_NOKPROBE_SYMBOL(_tlbie) +#endif /* CONFIG_SMP */ + +/* + * Flush the entire TLB. 603/603e only + */ +_GLOBAL(_tlbia) +#if defined(CONFIG_SMP) + lwz r8,TASK_CPU(r2) + oris r8,r8,10 + mfmsr r10 + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b +#endif /* CONFIG_SMP */ + li r5, 32 + lis r4, KERNELBASE@h + mtctr r5 + sync +0: tlbie r4 + addi r4, r4, 0x1000 + bdnz 0b + sync +#ifdef CONFIG_SMP + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + isync +#endif /* CONFIG_SMP */ + blr +_ASM_NOKPROBE_SYMBOL(_tlbia) diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index b6c7427daa6f..19f0ef950d77 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -30,35 +30,6 @@ #include <mm/mmu_decl.h> /* - * Called when unmapping pages to flush entries from the TLB/hash table. - */ -void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) -{ - unsigned long ptephys; - - if (Hash) { - ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(mm->context.id, addr, ptephys, 1); - } -} -EXPORT_SYMBOL(flush_hash_entry); - -/* - * Called at the end of a mmu_gather operation to make sure the - * TLB flush is completely done. - */ -void tlb_flush(struct mmu_gather *tlb) -{ - if (!Hash) { - /* - * 603 needs to flush the whole TLB here since - * it doesn't use a hash table. - */ - _tlbia(); - } -} - -/* * TLB flushing: * * - flush_tlb_mm(mm) flushes the specified mm context TLB's @@ -71,8 +42,12 @@ void tlb_flush(struct mmu_gather *tlb) * -- Cort */ -static void flush_range(struct mm_struct *mm, unsigned long start, - unsigned long end) +/* + * For each address in the range, find the pte for the address + * and check _PAGE_HASHPTE bit; if it is set, find and destroy + * the corresponding HPTE. + */ +void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) { pmd_t *pmd; unsigned long pmd_end; @@ -80,13 +55,6 @@ static void flush_range(struct mm_struct *mm, unsigned long start, unsigned int ctx = mm->context.id; start &= PAGE_MASK; - if (!Hash) { - if (end - start <= PAGE_SIZE) - _tlbie(start); - else - _tlbia(); - return; - } if (start >= end) return; end = (end - 1) | ~PAGE_MASK; @@ -105,28 +73,15 @@ static void flush_range(struct mm_struct *mm, unsigned long start, ++pmd; } } - -/* - * Flush kernel TLB entries in the given range - */ -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - flush_range(&init_mm, start, end); -} -EXPORT_SYMBOL(flush_tlb_kernel_range); +EXPORT_SYMBOL(hash__flush_range); /* * Flush all the (user) entries for the address space described by mm. */ -void flush_tlb_mm(struct mm_struct *mm) +void hash__flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *mp; - if (!Hash) { - _tlbia(); - return; - } - /* * It is safe to go down the mm's list of vmas when called * from dup_mmap, holding mmap_lock. It would also be safe from @@ -134,38 +89,18 @@ void flush_tlb_mm(struct mm_struct *mm) * but it seems dup_mmap is the only SMP case which gets here. */ for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) - flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); + hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); } -EXPORT_SYMBOL(flush_tlb_mm); +EXPORT_SYMBOL(hash__flush_tlb_mm); -void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { struct mm_struct *mm; pmd_t *pmd; - if (!Hash) { - _tlbie(vmaddr); - return; - } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; pmd = pmd_off(mm, vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); } -EXPORT_SYMBOL(flush_tlb_page); - -/* - * For each address in the range, find the pte for the address - * and check _PAGE_HASHPTE bit; if it is set, find and destroy - * the corresponding HPTE. - */ -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_range(vma->vm_mm, start, end); -} -EXPORT_SYMBOL(flush_tlb_range); - -void __init early_init_mmu(void) -{ -} +EXPORT_SYMBOL(hash__flush_tlb_page); diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index fd393b8be14f..1b56d3af47d4 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -17,7 +17,7 @@ endif obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o -obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o +obj-$(CONFIG_PPC_PKEY) += pkeys.o # Instrumenting the SLB fault path can lead to duplicate SLB entries KCOV_INSTRUMENT_slb.o := n diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 22e787123cdf..7de1a8a0c62a 100644 --- a/arch/powerpc/mm/book3s64/hash_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -54,7 +54,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * PP bits. _PAGE_USER is already PP bit 0x2, so we only * need to add in 0x1 if it's a read-only user page */ - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 7084ce2951e6..998c6817ed47 100644 --- a/arch/powerpc/mm/book3s64/hash_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -72,7 +72,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * Handle the subpage protection bits */ subpg_pte = new_pte & ~subpg_prot; - rflags = htab_convert_pte_flags(subpg_pte); + rflags = htab_convert_pte_flags(subpg_pte, flags); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { @@ -260,7 +260,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, new_pte |= _PAGE_DIRTY; } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && diff --git a/arch/powerpc/mm/book3s64/hash_hugepage.c b/arch/powerpc/mm/book3s64/hash_hugepage.c index 440823797de7..c0fabe6c5a12 100644 --- a/arch/powerpc/mm/book3s64/hash_hugepage.c +++ b/arch/powerpc/mm/book3s64/hash_hugepage.c @@ -57,7 +57,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!(old_pmd & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))) return 0; - rflags = htab_convert_pte_flags(new_pmd); + rflags = htab_convert_pte_flags(new_pmd, flags); #if 0 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index 964467b3a776..b5e9fff8c217 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -70,7 +70,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP)) return 0; - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); if (unlikely(mmu_psize == MMU_PAGE_16G)) offset = PTRS_PER_PUD; else diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index fd9c7f91b092..567e0c6b3978 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -443,7 +443,7 @@ void hash__mark_initmem_nx(void) start = (unsigned long)__init_begin; end = (unsigned long)__init_end; - pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); WARN_ON(!hash__change_memory_range(start, end, pp)); } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 24702c0a92e0..73b06adb6eeb 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -112,6 +112,7 @@ int mmu_linear_psize = MMU_PAGE_4K; EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; +EXPORT_SYMBOL_GPL(mmu_vmalloc_psize); #ifdef CONFIG_SPARSEMEM_VMEMMAP int mmu_vmemmap_psize = MMU_PAGE_4K; #endif @@ -186,7 +187,7 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = { * - We make sure R is always set and never lost * - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping */ -unsigned long htab_convert_pte_flags(unsigned long pteflags) +unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags) { unsigned long rflags = 0; @@ -240,7 +241,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) */ rflags |= HPTE_R_M; - rflags |= pte_to_hpte_pkey_bits(pteflags); + rflags |= pte_to_hpte_pkey_bits(pteflags, flags); return rflags; } @@ -255,7 +256,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, shift = mmu_psize_defs[psize].shift; step = 1 << shift; - prot = htab_convert_pte_flags(prot); + prot = htab_convert_pte_flags(prot, HPTE_USE_KERNEL_KEY); DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n", vstart, vend, pstart, prot, psize, ssize); @@ -845,7 +846,6 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end) { int rc = htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); - WARN_ON(rc < 0); if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) pr_warn("Hash collision while resizing HPT\n"); @@ -1317,12 +1317,14 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, vsid = get_kernel_vsid(ea, mmu_kernel_ssize); psize = mmu_vmalloc_psize; ssize = mmu_kernel_ssize; + flags |= HPTE_USE_KERNEL_KEY; break; case IO_REGION_ID: vsid = get_kernel_vsid(ea, mmu_kernel_ssize); psize = mmu_io_psize; ssize = mmu_kernel_ssize; + flags |= HPTE_USE_KERNEL_KEY; break; default: /* @@ -1901,7 +1903,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) unsigned long hash; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); long ret; hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 563faa10bb66..685d7bb3d26f 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -263,7 +263,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) goto unlock_exit; /* Are there still mappings? */ - if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { + if (atomic64_cmpxchg(&mem->mapped, 1, 0) != 1) { ++mem->used; ret = -EBUSY; goto unlock_exit; diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index e18ae50a275c..5b3a3bae21aa 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -136,12 +136,18 @@ static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) return __pmd(pmd_val(pmd) | pgprot_val(pgprot)); } +/* + * At some point we should be able to get rid of + * pmd_mkhuge() and mk_huge_pmd() when we update all the + * other archs to mark the pmd huge in pfn_pmd() + */ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) { unsigned long pmdv; pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK; - return pmd_set_protbits(__pmd(pmdv), pgprot); + + return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot)); } pmd_t mk_pmd(struct page *page, pgprot_t pgprot) diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index b1d091a97611..f1c6f264ed91 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -9,9 +9,12 @@ #include <asm/mmu_context.h> #include <asm/mmu.h> #include <asm/setup.h> +#include <asm/smp.h> + #include <linux/pkeys.h> #include <linux/of_fdt.h> + int num_pkey; /* Max number of pkeys supported */ /* * Keys marked in the reservation list cannot be allocated by userspace @@ -25,8 +28,8 @@ static u32 initial_allocation_mask __ro_after_init; * Even if we allocate keys with sys_pkey_alloc(), we need to make sure * other thread still find the access denied using the same keys. */ -static u64 default_amr = ~0x0UL; -static u64 default_iamr = 0x5555555555555555UL; +u64 default_amr __ro_after_init = ~0x0UL; +u64 default_iamr __ro_after_init = 0x5555555555555555UL; u64 default_uamor __ro_after_init; /* * Key used to implement PROT_EXEC mmap. Denies READ/WRITE @@ -89,12 +92,14 @@ static int scan_pkey_feature(void) } } +#ifdef CONFIG_PPC_MEM_KEYS /* * Adjust the upper limit, based on the number of bits supported by * arch-neutral code. */ pkeys_total = min_t(int, pkeys_total, ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1)); +#endif return pkeys_total; } @@ -102,6 +107,7 @@ void __init pkey_early_init_devtree(void) { int pkeys_total, i; +#ifdef CONFIG_PPC_MEM_KEYS /* * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. @@ -117,7 +123,7 @@ void __init pkey_early_init_devtree(void) BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) != (sizeof(u64) * BITS_PER_BYTE)); - +#endif /* * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 */ @@ -179,6 +185,27 @@ void __init pkey_early_init_devtree(void) default_uamor &= ~(0x3ul << pkeyshift(execute_only_key)); } + if (unlikely(num_pkey <= 3)) { + /* + * Insufficient number of keys to support + * KUAP/KUEP feature. + */ + disable_kuep = true; + disable_kuap = true; + WARN(1, "Disabling kernel user protection due to low (%d) max supported keys\n", num_pkey); + } else { + /* handle key which is used by kernel for KAUP */ + reserved_allocation_mask |= (0x1 << 3); + /* + * Mark access for kup_key in default amr so that + * we continue to operate with that AMR in + * copy_to/from_user(). + */ + default_amr &= ~(0x3ul << pkeyshift(3)); + default_iamr &= ~(0x1ul << pkeyshift(3)); + default_uamor &= ~(0x3ul << pkeyshift(3)); + } + /* * Allow access for only key 0. And prevent any other modification. */ @@ -223,54 +250,92 @@ out: return; } -void pkey_mm_init(struct mm_struct *mm) +#ifdef CONFIG_PPC_KUEP +void setup_kuep(bool disabled) { - if (!mmu_has_feature(MMU_FTR_PKEY)) + if (disabled) return; - mm_pkey_allocation_map(mm) = initial_allocation_mask; - mm->context.execute_only_pkey = execute_only_key; + /* + * On hash if PKEY feature is not enabled, disable KUAP too. + */ + if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Execution Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUEP; + } + + /* + * Radix always uses key0 of the IAMR to determine if an access is + * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction + * fetch. + */ + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + isync(); } +#endif -static inline u64 read_amr(void) +#ifdef CONFIG_PPC_KUAP +void setup_kuap(bool disabled) { - return mfspr(SPRN_AMR); + if (disabled) + return; + /* + * On hash if PKEY feature is not enabled, disable KUAP too. + */ + if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Access Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUAP; + } + + /* + * Set the default kernel AMR values on all cpus. + */ + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + isync(); } +#endif -static inline void write_amr(u64 value) +static inline void update_current_thread_amr(u64 value) { - mtspr(SPRN_AMR, value); + current->thread.regs->amr = value; } -static inline u64 read_iamr(void) +static inline void update_current_thread_iamr(u64 value) { if (!likely(pkey_execute_disable_supported)) - return 0x0UL; + return; - return mfspr(SPRN_IAMR); + current->thread.regs->iamr = value; } -static inline void write_iamr(u64 value) +#ifdef CONFIG_PPC_MEM_KEYS +void pkey_mm_init(struct mm_struct *mm) { - if (!likely(pkey_execute_disable_supported)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; - - mtspr(SPRN_IAMR, value); + mm_pkey_allocation_map(mm) = initial_allocation_mask; + mm->context.execute_only_pkey = execute_only_key; } static inline void init_amr(int pkey, u8 init_bits) { u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); - u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); + u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); - write_amr(old_amr | new_amr_bits); + update_current_thread_amr(old_amr | new_amr_bits); } static inline void init_iamr(int pkey, u8 init_bits) { u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); - u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); + u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); - write_iamr(old_iamr | new_iamr_bits); + update_current_thread_iamr(old_iamr | new_iamr_bits); } /* @@ -313,42 +378,6 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, return 0; } -void thread_pkey_regs_save(struct thread_struct *thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - /* - * TODO: Skip saving registers if @thread hasn't used any keys yet. - */ - thread->amr = read_amr(); - thread->iamr = read_iamr(); -} - -void thread_pkey_regs_restore(struct thread_struct *new_thread, - struct thread_struct *old_thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - if (old_thread->amr != new_thread->amr) - write_amr(new_thread->amr); - if (old_thread->iamr != new_thread->iamr) - write_iamr(new_thread->iamr); -} - -void thread_pkey_regs_init(struct thread_struct *thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - thread->amr = default_amr; - thread->iamr = default_iamr; - - write_amr(default_amr); - write_iamr(default_iamr); -} - int execute_only_pkey(struct mm_struct *mm) { return mm->context.execute_only_pkey; @@ -397,9 +426,9 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) pkey_shift = pkeyshift(pkey); if (execute) - return !(read_iamr() & (IAMR_EX_BIT << pkey_shift)); + return !(current_thread_iamr() & (IAMR_EX_BIT << pkey_shift)); - amr = read_amr(); + amr = current_thread_amr(); if (write) return !(amr & (AMR_WR_BIT << pkey_shift)); @@ -445,3 +474,5 @@ void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm); mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; } + +#endif /* CONFIG_PPC_MEM_KEYS */ diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 3adcf730f478..98f0b243c1ab 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -589,48 +589,6 @@ static void radix_init_amor(void) mtspr(SPRN_AMOR, (3ul << 62)); } -#ifdef CONFIG_PPC_KUEP -void setup_kuep(bool disabled) -{ - if (disabled || !early_radix_enabled()) - return; - - if (smp_processor_id() == boot_cpuid) { - pr_info("Activating Kernel Userspace Execution Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_KUEP; - } - - /* - * Radix always uses key0 of the IAMR to determine if an access is - * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction - * fetch. - */ - mtspr(SPRN_IAMR, (1ul << 62)); -} -#endif - -#ifdef CONFIG_PPC_KUAP -void setup_kuap(bool disabled) -{ - if (disabled || !early_radix_enabled()) - return; - - if (smp_processor_id() == boot_cpuid) { - pr_info("Activating Kernel Userspace Access Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP; - } - - /* Make sure userspace can't change the AMR */ - mtspr(SPRN_UAMOR, 0); - - /* - * Set the default kernel AMR values on all cpus. - */ - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - isync(); -} -#endif - void __init radix__early_init_mmu(void) { unsigned long lpcr; @@ -721,6 +679,9 @@ void radix__early_init_mmu_secondary(void) radix__switch_mmu_context(NULL, &init_mm); tlbiel_all(); + + /* Make sure userspace can't change the AMR */ + mtspr(SPRN_UAMOR, 0); } void radix__mmu_cleanup_all(void) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index b487b489d4b6..fb66d154b26c 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -56,14 +56,21 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) if (early_cpu_has_feature(CPU_FTR_HVMODE)) { /* MSR[HV] should flush partition scope translations first. */ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); - for (set = 1; set < num_sets; set++) - tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, + RIC_FLUSH_TLB, 0); + } } /* Flush process scoped entries. */ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); - for (set = 1; set < num_sets; set++) - tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); + } ppc_after_tlbiel_barrier(); } @@ -300,9 +307,11 @@ static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) return; } - /* For the remaining sets, just flush the TLB */ - for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) - __tlbiel_pid(pid, set, RIC_FLUSH_TLB); + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + /* For the remaining sets, just flush the TLB */ + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) + __tlbiel_pid(pid, set, RIC_FLUSH_TLB); + } ppc_after_tlbiel_barrier(); asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index c30fcbfa0e32..584567970c11 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -28,35 +28,8 @@ #include "internal.h" -enum slb_index { - LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ - KSTACK_INDEX = 1, /* Kernel stack map */ -}; - static long slb_allocate_user(struct mm_struct *mm, unsigned long ea); -#define slb_esid_mask(ssize) \ - (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) - -static inline unsigned long mk_esid_data(unsigned long ea, int ssize, - enum slb_index index) -{ - return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; -} - -static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, - unsigned long flags) -{ - return (vsid << slb_vsid_shift(ssize)) | flags | - ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); -} - -static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, - unsigned long flags) -{ - return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); -} - bool stress_slb_enabled __initdata; static int __init parse_stress_slb(char *p) @@ -255,7 +228,6 @@ void slb_dump_contents(struct slb_entry *slb_ptr) return; pr_err("SLB contents of cpu 0x%x\n", smp_processor_id()); - pr_err("Last SLB entry inserted at slot %d\n", get_paca()->stab_rr); for (i = 0; i < mmu_slb_size; i++) { e = slb_ptr->esid; @@ -265,34 +237,38 @@ void slb_dump_contents(struct slb_entry *slb_ptr) if (!e && !v) continue; - pr_err("%02d %016lx %016lx\n", i, e, v); + pr_err("%02d %016lx %016lx %s\n", i, e, v, + (e & SLB_ESID_V) ? "VALID" : "NOT VALID"); - if (!(e & SLB_ESID_V)) { - pr_err("\n"); + if (!(e & SLB_ESID_V)) continue; - } + llp = v & SLB_VSID_LLP; if (v & SLB_VSID_B_1T) { - pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n", + pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n", GET_ESID_1T(e), (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp); } else { - pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n", + pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n", GET_ESID(e), (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp); } } - pr_err("----------------------------------\n"); - - /* Dump slb cache entires as well. */ - pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr); - pr_err("Valid SLB cache entries:\n"); - n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES); - for (i = 0; i < n; i++) - pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); - pr_err("Rest of SLB cache entries:\n"); - for (i = n; i < SLB_CACHE_ENTRIES; i++) - pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { + /* RR is not so useful as it's often not used for allocation */ + pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr); + + /* Dump slb cache entires as well. */ + pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr); + pr_err("Valid SLB cache entries:\n"); + n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES); + for (i = 0; i < n; i++) + pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + pr_err("Rest of SLB cache entries:\n"); + for (i = n; i < SLB_CACHE_ENTRIES; i++) + pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + } } void slb_vmalloc_update(void) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0add963a849b..8961b44f350c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -210,28 +210,26 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return true; } - if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) && - !search_exception_tables(regs->nip)) { - pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n", - address, - from_kuid(&init_user_ns, current_uid())); - } - // Kernel fault on kernel address is bad if (address >= TASK_SIZE) return true; - // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad - if (!search_exception_tables(regs->nip)) - return true; + // Read/write fault blocked by KUAP is bad, it can never succeed. + if (bad_kuap_fault(regs, address, is_write)) { + pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n", + is_write ? "write" : "read", address, + from_kuid(&init_user_ns, current_uid())); - // Read/write fault in a valid region (the exception table search passed - // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, address, is_write)) - return true; + // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad + if (!search_exception_tables(regs->nip)) + return true; + + // Read/write fault in a valid region (the exception table search passed + // above), but blocked by KUAP is bad, it can never succeed. + return WARN(true, "Bug: %s fault blocked by KUAP!", is_write ? "Write" : "Read"); + } - // What's left? Kernel fault on user in well defined regions (extable - // matched), and allowed by KUAP in the faulting context. + // What's left? Kernel fault on user and allowed by KUAP in the faulting context. return false; } @@ -303,7 +301,6 @@ static inline void cmo_account_page_fault(void) static inline void cmo_account_page_fault(void) { } #endif /* CONFIG_PPC_SMLPAR */ -#ifdef CONFIG_PPC_BOOK3S static void sanity_check_fault(bool is_write, bool is_user, unsigned long error_code, unsigned long address) { @@ -320,6 +317,9 @@ static void sanity_check_fault(bool is_write, bool is_user, return; } + if (!IS_ENABLED(CONFIG_PPC_BOOK3S)) + return; + /* * For hash translation mode, we should never get a * PROTFAULT. Any update to pte to reduce access will result in us @@ -354,10 +354,6 @@ static void sanity_check_fault(bool is_write, bool is_user, WARN_ON_ONCE(error_code & DSISR_PROTFAULT); } -#else -static void sanity_check_fault(bool is_write, bool is_user, - unsigned long error_code, unsigned long address) { } -#endif /* CONFIG_PPC_BOOK3S */ /* * Define the correct "is_write" bit in error_code based @@ -365,17 +361,19 @@ static void sanity_check_fault(bool is_write, bool is_user, */ #if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) #define page_fault_is_write(__err) ((__err) & ESR_DST) -#define page_fault_is_bad(__err) (0) #else #define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE) -#if defined(CONFIG_PPC_8xx) +#endif + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#define page_fault_is_bad(__err) (0) +#elif defined(CONFIG_PPC_8xx) #define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G) #elif defined(CONFIG_PPC64) #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S) #else #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) #endif -#endif /* * For 600- and 800-family processors, the error_code parameter is DSISR @@ -547,10 +545,20 @@ NOKPROBE_SYMBOL(__do_page_fault); int do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { + const struct exception_table_entry *entry; enum ctx_state prev_state = exception_enter(); int rc = __do_page_fault(regs, address, error_code); exception_exit(prev_state); - return rc; + if (likely(!rc)) + return 0; + + entry = search_exception_tables(regs->nip); + if (unlikely(!entry)) + return rc; + + instruction_pointer_set(regs, extable_fixup(entry)); + + return 0; } NOKPROBE_SYMBOL(do_page_fault); @@ -559,17 +567,10 @@ NOKPROBE_SYMBOL(do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { - const struct exception_table_entry *entry; int is_write = page_fault_is_write(regs->dsisr); - /* Are we prepared to handle this fault? */ - if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = extable_fixup(entry); - return; - } - /* kernel has accessed a bad area */ switch (TRAP(regs)) { @@ -603,3 +604,15 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) die("Kernel access of bad area", regs, sig); } + +void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +{ + const struct exception_table_entry *entry; + + /* Are we prepared to handle this fault? */ + entry = search_exception_tables(instruction_pointer(regs)); + if (entry) + instruction_pointer_set(regs, extable_fixup(entry)); + else + __bad_page_fault(regs, address, sig); +} diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 36c3800769fb..8b3cc4d688e8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -294,6 +294,21 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {} #endif +/* Return true when the entry to be freed maps more than the area being freed */ +static bool range_is_outside_limits(unsigned long start, unsigned long end, + unsigned long floor, unsigned long ceiling, + unsigned long mask) +{ + if ((start & mask) < floor) + return true; + if (ceiling) { + ceiling &= mask; + if (!ceiling) + return true; + } + return end - 1 > ceiling - 1; +} + static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, unsigned long start, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -309,15 +324,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift > pdshift) num_hugepd = 1 << (shift - pdshift); - start &= pdmask; - if (start < floor) - return; - if (ceiling) { - ceiling &= pdmask; - if (! ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, pdmask)) return; for (i = 0; i < num_hugepd; i++, hpdp++) @@ -334,18 +341,9 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { - unsigned long start = addr; pgtable_t token = pmd_pgtable(*pmd); - start &= PMD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PMD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(addr, end, floor, ceiling, PMD_MASK)) return; pmd_clear(pmd); @@ -395,20 +393,12 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, addr, next, floor, ceiling); } while (addr = next, addr != end); - start &= PUD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PUD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK)) return; - pmd = pmd_offset(pud, start); + pmd = pmd_offset(pud, start & PUD_MASK); pud_clear(pud); - pmd_free_tlb(tlb, pmd, start); + pmd_free_tlb(tlb, pmd, start & PUD_MASK); mm_dec_nr_pmds(tlb->mm); } @@ -446,20 +436,12 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, } } while (addr = next, addr != end); - start &= PGDIR_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PGDIR_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK)) return; - pud = pud_offset(p4d, start); + pud = pud_offset(p4d, start & PGDIR_MASK); p4d_clear(p4d); - pud_free_tlb(tlb, pud, start); + pud_free_tlb(tlb, pud, start & PGDIR_MASK); mm_dec_nr_puds(tlb->mm); } diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 8e0d792ac296..3a82f89827a5 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -28,8 +28,8 @@ EXPORT_SYMBOL_GPL(kernstart_addr); unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE; EXPORT_SYMBOL_GPL(kernstart_virt_addr); -static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); -static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); +bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); +bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); static int __init parse_nosmep(char *p) { @@ -47,12 +47,6 @@ static int __init parse_nosmap(char *p) } early_param("nosmap", parse_nosmap); -void __ref setup_kup(void) -{ - setup_kuep(disable_kuep); - setup_kuap(disable_kuap); -} - #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ memset(addr, 0, sizeof(void *) << (shift)); \ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 25284fdb300c..afab328d0887 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -54,11 +54,7 @@ #include <mm/mmu_decl.h> -#ifndef CPU_FTR_COHERENT_ICACHE -#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */ -#define CPU_FTR_NOEXECUTE 0 -#endif - +static DEFINE_MUTEX(linear_mapping_mutex); unsigned long long memory_limit; bool init_mem_is_free; @@ -116,46 +112,70 @@ static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, } } -int __ref arch_add_memory(int nid, u64 start, u64 size, - struct mhp_params *params) +int __ref arch_create_linear_mapping(int nid, u64 start, u64 size, + struct mhp_params *params) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; int rc; start = (unsigned long)__va(start); + mutex_lock(&linear_mapping_mutex); rc = create_section_mapping(start, start + size, nid, params->pgprot); + mutex_unlock(&linear_mapping_mutex); if (rc) { - pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", + pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n", start, start + size, rc); return -EFAULT; } - - return __add_pages(nid, start_pfn, nr_pages, params); + return 0; } -void __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_linear_mapping(u64 start, u64 size) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - __remove_pages(start_pfn, nr_pages, altmap); - /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); + mutex_lock(&linear_mapping_mutex); ret = remove_section_mapping(start, start + size); - WARN_ON_ONCE(ret); + mutex_unlock(&linear_mapping_mutex); + if (ret) + pr_warn("Unable to remove linear mapping for 0x%llx..0x%llx: %d\n", + start, start + size, ret); /* Ensure all vmalloc mappings are flushed in case they also * hit that section of memory */ vm_unmap_aliases(); } + +int __ref arch_add_memory(int nid, u64 start, u64 size, + struct mhp_params *params) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + int rc; + + rc = arch_create_linear_mapping(nid, start, size, params); + if (rc) + return rc; + rc = __add_pages(nid, start_pfn, nr_pages, params); + if (rc) + arch_remove_linear_mapping(start, size); + return rc; +} + +void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + __remove_pages(start_pfn, nr_pages, altmap); + arch_remove_linear_mapping(start, size); +} #endif #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -525,7 +545,7 @@ void __flush_dcache_icache(void *p) * space occurs, before returning to user space. */ - if (cpu_has_feature(MMU_FTR_TYPE_44x)) + if (mmu_has_feature(MMU_FTR_TYPE_44x)) return; invalidate_icache_range(addr, addr + PAGE_SIZE); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 1b6d39e9baed..998810e68562 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -82,17 +82,12 @@ static inline void print_system_hash_info(void) {} #else /* CONFIG_PPC_MMU_NOHASH */ -extern void _tlbie(unsigned long address); -extern void _tlbia(void); - void print_system_hash_info(void); #endif /* CONFIG_PPC_MMU_NOHASH */ #ifdef CONFIG_PPC32 -void hash_preload(struct mm_struct *mm, unsigned long ea); - extern void mapin_ram(void); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); @@ -101,7 +96,6 @@ extern int __map_without_bats; extern unsigned int rtas_data, rtas_size; struct hash_pte; -extern struct hash_pte *Hash; extern u8 early_hash[]; #endif /* CONFIG_PPC32 */ diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 231ca95f9ffb..19a3eec1d8c5 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -186,8 +186,7 @@ void mmu_mark_initmem_nx(void) mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); - if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_pin_tlb(block_mapped_ram, false); + mmu_pin_tlb(block_mapped_ram, false); } #ifdef CONFIG_STRICT_KERNEL_RWX diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c index 36bda962d3b3..03dacbe940e5 100644 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -223,15 +223,9 @@ void flush_instruction_cache(void) { unsigned long tmp; - if (IS_ENABLED(CONFIG_E200)) { - tmp = mfspr(SPRN_L1CSR0); - tmp |= L1CSR0_CFI | L1CSR0_CLFC; - mtspr(SPRN_L1CSR0, tmp); - } else { - tmp = mfspr(SPRN_L1CSR1); - tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; - mtspr(SPRN_L1CSR1, tmp); - } + tmp = mfspr(SPRN_L1CSR1); + tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; + mtspr(SPRN_L1CSR1, tmp); isync(); } diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index eaeee402f96e..68797e072f55 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -92,36 +92,25 @@ _GLOBAL(__tlbil_va) tlbsx. r6,0,r3 bne 10f sync -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#ifndef CONFIG_PPC_47x /* On 440 There are only 64 TLB entries, so r3 < 64, which means bit * 22, is clear. Since 22 is the V bit in the TLB_PAGEID, loading this * value will invalidate the TLB entry. */ tlbwe r6,r6,PPC44x_TLB_PAGEID - isync -10: wrtee r10 - blr -2: -#ifdef CONFIG_PPC_47x +#else oris r7,r6,0x8000 /* specify way explicitly */ clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */ ori r4,r4,PPC47x_TLBE_SIZE tlbwe r4,r7,0 /* write it */ +#endif /* !CONFIG_PPC_47x */ isync - wrtee r10 +10: wrtee r10 blr -#else /* CONFIG_PPC_47x */ -1: trap - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; -#endif /* !CONFIG_PPC_47x */ _GLOBAL(_tlbil_all) _GLOBAL(_tlbil_pid) -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#ifndef CONFIG_PPC_47x li r3,0 sync @@ -136,8 +125,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) isync blr -2: -#ifdef CONFIG_PPC_47x +#else /* 476 variant. There's not simple way to do this, hopefully we'll * try to limit the amount of such full invalidates */ @@ -179,11 +167,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) b 1b /* Then loop */ 1: isync /* Sync shadows */ wrtee r11 -#else /* CONFIG_PPC_47x */ -1: trap - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; -#endif /* !CONFIG_PPC_47x */ blr +#endif /* !CONFIG_PPC_47x */ #ifdef CONFIG_PPC_47x diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 079159e97bca..e0ec67a16887 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -84,7 +84,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) pg = pte_alloc_kernel(pd, va); else pg = early_pte_alloc_kernel(pd, va); - if (pg != 0) { + if (pg) { err = 0; /* The PTE should never be already set nor present in the * hash table @@ -112,10 +112,6 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) ktext = ((char *)v >= _stext && (char *)v < etext) || ((char *)v >= _sinittext && (char *)v < _einittext); map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL); -#ifdef CONFIG_PPC_BOOK3S_32 - if (ktext) - hash_preload(&init_mm, v); -#endif v += PAGE_SIZE; p += PAGE_SIZE; } diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index e53c3c161257..f970d1510d3d 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -153,6 +153,8 @@ static void mpc8xx_pmu_read(struct perf_event *event) static void mpc8xx_pmu_del(struct perf_event *event, int flags) { + struct ppc_inst insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2)); + mpc8xx_pmu_read(event); /* If it was the last user, stop counting to avoid useles overhead */ @@ -164,22 +166,12 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) mtspr(SPRN_ICTRL, 7); break; case PERF_8xx_ID_ITLB_LOAD_MISS: - if (atomic_dec_return(&itlb_miss_ref) == 0) { - /* mfspr r10, SPRN_SPRG_SCRATCH0 */ - struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_SPRG_SCRATCH0)); - + if (atomic_dec_return(&itlb_miss_ref) == 0) patch_instruction_site(&patch__itlbmiss_exit_1, insn); - } break; case PERF_8xx_ID_DTLB_LOAD_MISS: - if (atomic_dec_return(&dtlb_miss_ref) == 0) { - /* mfspr r10, SPRN_DAR */ - struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_DAR)); - + if (atomic_dec_return(&dtlb_miss_ref) == 0) patch_instruction_site(&patch__dtlbmiss_exit_1, insn); - } break; } } diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index ae24d4a00da6..d6fa6e25234f 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -33,7 +33,7 @@ static inline int __read_user_stack(const void __user *ptr, void *ret, rc = copy_from_user_nofault(ret, ptr, size); - if (IS_ENABLED(CONFIG_PPC64) && rc) + if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc) return read_user_stack_slow(ptr, ret, size); return rc; diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index 64e4013d8060..b83c47b7947f 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -59,8 +59,8 @@ static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) { if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) return 1; - if (vdso32_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_sigtramp) + if (current->mm->context.vdso && + nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp32)) return 1; return 0; } @@ -70,8 +70,8 @@ static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) if (nip == fp + offsetof(struct rt_signal_frame_32, uc.uc_mcontext.mc_pad)) return 1; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) + if (current->mm->context.vdso && + nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp_rt32)) return 1; return 0; } diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index fed90e827f3a..8d0df4226328 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -21,7 +21,8 @@ /* * On 64-bit we don't want to invoke hash_page on user addresses from * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. + * to find which page (if any) is mapped and access it directly. Radix + * has no need for this so it doesn't use read_user_stack_slow. */ int read_user_stack_slow(const void __user *ptr, void *buf, int nb) { @@ -67,8 +68,8 @@ static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) { if (nip == fp + offsetof(struct signal_frame_64, tramp)) return 1; - if (vdso64_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) + if (current->mm->context.vdso && + nip == VDSO64_SYMBOL(current->mm->context.vdso, sigtramp_rt64)) return 1; return 0; } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6586f7e71cfb..28206b1fe172 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -95,6 +95,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define SPRN_SIER3 0 #define MMCRA_SAMPLE_ENABLE 0 #define MMCRA_BHRB_DISABLE 0 +#define MMCR0_PMCCEXT 0 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { @@ -137,6 +138,9 @@ static void pmao_restore_workaround(bool ebb) { } bool is_sier_available(void) { + if (!ppmu) + return false; + if (ppmu->flags & PPMU_HAS_SIER) return true; @@ -250,11 +254,32 @@ static inline u32 perf_flags_from_msr(struct pt_regs *regs) static inline u32 perf_get_misc_flags(struct pt_regs *regs) { bool use_siar = regs_use_siar(regs); + unsigned long mmcra = regs->dsisr; + int marked = mmcra & MMCRA_SAMPLE_ENABLE; if (!use_siar) return perf_flags_from_msr(regs); /* + * Check the address in SIAR to identify the + * privilege levels since the SIER[MSR_HV, MSR_PR] + * bits are not set for marked events in power10 + * DD1. + */ + if (marked && (ppmu->flags & PPMU_P10_DD1)) { + unsigned long siar = mfspr(SPRN_SIAR); + if (siar) { + if (is_kernel_addr(siar)) + return PERF_RECORD_MISC_KERNEL; + return PERF_RECORD_MISC_USER; + } else { + if (is_kernel_addr(regs->nip)) + return PERF_RECORD_MISC_KERNEL; + return PERF_RECORD_MISC_USER; + } + } + + /* * If we don't have flags in MMCRA, rather than using * the MSR, we intuit the flags from the address in * SIAR which should give slightly more reliable @@ -350,7 +375,14 @@ static inline int siar_valid(struct pt_regs *regs) int marked = mmcra & MMCRA_SAMPLE_ENABLE; if (marked) { - if (ppmu->flags & PPMU_HAS_SIER) + /* + * SIER[SIAR_VALID] is not set for some + * marked events on power10 DD1, so drop + * the check for SIER[SIAR_VALID] and return true. + */ + if (ppmu->flags & PPMU_P10_DD1) + return 0x1; + else if (ppmu->flags & PPMU_HAS_SIER) return regs->dar & SIER_SIAR_VALID; if (ppmu->flags & PPMU_SIAR_VALID) @@ -1242,6 +1274,9 @@ static void power_pmu_disable(struct pmu *pmu) val |= MMCR0_FC; val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO | MMCR0_FC56); + /* Set mmcr0 PMCCEXT for p10 */ + if (ppmu->flags & PPMU_ARCH_31) + val |= MMCR0_PMCCEXT; /* * The barrier is to make sure the mtspr has been @@ -1881,7 +1916,7 @@ static bool is_event_blacklisted(u64 ev) static int power_pmu_event_init(struct perf_event *event) { u64 ev; - unsigned long flags; + unsigned long flags, irq_flags; struct perf_event *ctrs[MAX_HWEVENTS]; u64 events[MAX_HWEVENTS]; unsigned int cflags[MAX_HWEVENTS]; @@ -1989,7 +2024,9 @@ static int power_pmu_event_init(struct perf_event *event) if (check_excludes(ctrs, cflags, n, 1)) return -EINVAL; - cpuhw = &get_cpu_var(cpu_hw_events); + local_irq_save(irq_flags); + cpuhw = this_cpu_ptr(&cpu_hw_events); + err = power_check_constraints(cpuhw, events, cflags, n + 1); if (has_branch_stack(event)) { @@ -2000,13 +2037,13 @@ static int power_pmu_event_init(struct perf_event *event) event->attr.branch_sample_type); if (bhrb_filter == -1) { - put_cpu_var(cpu_hw_events); + local_irq_restore(irq_flags); return -EOPNOTSUPP; } cpuhw->bhrb_filter = bhrb_filter; } - put_cpu_var(cpu_hw_events); + local_irq_restore(irq_flags); if (err) return -EINVAL; @@ -2125,6 +2162,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_event_update_userpage(event); /* + * Due to hardware limitation, sometimes SIAR could sample a kernel + * address even when freeze on supervisor state (kernel) is set in + * MMCR2. Check attr.exclude_kernel and address to drop the sample in + * these cases. + */ + if (event->attr.exclude_kernel && record) + if (is_kernel_addr(mfspr(SPRN_SIAR))) + record = 0; + + /* * Finally record data if requested. */ if (record) { @@ -2180,8 +2227,14 @@ unsigned long perf_misc_flags(struct pt_regs *regs) unsigned long perf_instruction_pointer(struct pt_regs *regs) { bool use_siar = regs_use_siar(regs); + unsigned long siar = mfspr(SPRN_SIAR); - if (use_siar && siar_valid(regs)) + if (ppmu->flags & PPMU_P10_DD1) { + if (siar) + return siar; + else + return regs->nip; + } else if (use_siar && siar_valid(regs)) return mfspr(SPRN_SIAR) + perf_ip_adjust(regs); else if (use_siar) return 0; // no valid instruction pointer diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 2848904df638..6ab5b272090a 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -247,6 +247,9 @@ void isa207_get_mem_weight(u64 *weight) u64 sier = mfspr(SPRN_SIER); u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mantissa = P10_MMCRA_THR_CTR_MANT(mmcra); + if (val == 0 || val == 7) *weight = 0; else @@ -311,9 +314,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (unit >= 6 && unit <= 9) { - if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { - mask |= CNST_L2L3_GROUP_MASK; - value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (unit == 6) { + mask |= CNST_L2L3_GROUP_MASK; + value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + } } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { mask |= CNST_CACHE_GROUP_MASK; value |= CNST_CACHE_GROUP_VAL(event & 0xff); @@ -339,12 +344,22 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) value |= CNST_L1_QUAL_VAL(cache); } + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mask |= CNST_RADIX_SCOPE_GROUP_MASK; + value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT); + } + if (is_event_marked(event)) { mask |= CNST_SAMPLE_MASK; value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); } - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (event_is_threshold(event)) { + mask |= CNST_THRESH_CTL_SEL_MASK; + value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT); + } + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { mask |= CNST_THRESH_MASK; value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); @@ -456,6 +471,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev, } } + /* Set RADIX_SCOPE_QUAL bit */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) & + p10_EVENT_RADIX_SCOPE_QUAL_MASK; + mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT; + } + if (is_event_marked(event[i])) { mmcra |= MMCRA_SAMPLE_ENABLE; @@ -539,6 +561,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, if (!(pmc_inuse & 0x60)) mmcr->mmcr0 |= MMCR0_FC56; + /* + * Set mmcr0 (PMCCEXT) for p10 which + * will restrict access to group B registers + * when MMCR0 PMCC=0b00. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mmcr->mmcr0 |= MMCR0_PMCCEXT; + mmcr->mmcr1 = mmcr1; mmcr->mmcra = mmcra; mmcr->mmcr2 = mmcr2; diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 7025de5e60e7..454b32c31440 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -101,6 +101,9 @@ #define p10_EVENT_CACHE_SEL_MASK 0x3ull #define p10_EVENT_MMCR3_MASK 0x7fffull #define p10_EVENT_MMCR3_SHIFT 45 +#define p10_EVENT_RADIX_SCOPE_QUAL_SHIFT 9 +#define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1 +#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45 #define p10_EVENT_VALID_MASK \ ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ @@ -112,6 +115,7 @@ (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + (p10_EVENT_RADIX_SCOPE_QUAL_MASK << p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) | \ EVENT_LINUX_MASK | \ EVENT_PSEL_MASK)) /* @@ -125,9 +129,9 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] - * | | | | - * BHRB IFM -* | | | Count of events for each PMC. + * [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1] + * | | | | | + * BHRB IFM -* | | |*radix_scope | Count of events for each PMC. * EBB -* | | p1, p2, p3, p4, p5, p6. * L1 I/D qualifier -* | * nc - number of counters -* @@ -145,6 +149,9 @@ #define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32) #define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK) +#define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32) +#define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff) + #define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) #define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) @@ -165,6 +172,9 @@ #define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55) #define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f) +#define CNST_RADIX_SCOPE_GROUP_VAL(v) (((v) & 0x1ull) << 21) +#define CNST_RADIX_SCOPE_GROUP_MASK CNST_RADIX_SCOPE_GROUP_VAL(1) + /* * For NC we are counting up to 4 events. This requires three bits, and we need * the fifth event to overflow and set the 4th bit. To achieve that we bias the @@ -221,6 +231,10 @@ #define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\ MMCRA_THR_CTR_EXP_MASK) +#define P10_MMCRA_THR_CTR_MANT_MASK 0xFFul +#define P10_MMCRA_THR_CTR_MANT(v) (((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\ + P10_MMCRA_THR_CTR_MANT_MASK) + /* MMCRA Threshold Compare bit constant for power9 */ #define p9_MMCRA_THR_CMP_SHIFT 45 diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h index 60c1b8111082..e45dafe818ed 100644 --- a/arch/powerpc/perf/power10-events-list.h +++ b/arch/powerpc/perf/power10-events-list.h @@ -15,6 +15,9 @@ EVENT(PM_EXEC_STALL, 0x30008); EVENT(PM_RUN_INST_CMPL, 0x500fa); EVENT(PM_BR_CMPL, 0x4d05e); EVENT(PM_BR_MPRED_CMPL, 0x400f6); +EVENT(PM_BR_FIN, 0x2f04a); +EVENT(PM_MPRED_BR_FIN, 0x3e098); +EVENT(PM_LD_DEMAND_MISS_L1_FIN, 0x400f0); /* All L1 D cache load references counted at finish, gated by reject */ EVENT(PM_LD_REF_L1, 0x100fc); @@ -36,6 +39,12 @@ EVENT(PM_IC_PREF_REQ, 0x040a0); EVENT(PM_DATA_FROM_L3, 0x01340000001c040); /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ EVENT(PM_DATA_FROM_L3MISS, 0x300fe); +/* All successful D-side store dispatches for this thread */ +EVENT(PM_L2_ST, 0x010000046080); +/* All successful D-side store dispatches for this thread that were L2 Miss */ +EVENT(PM_L2_ST_MISS, 0x26880); +/* Total HW L3 prefetches(Load+store) */ +EVENT(PM_L3_PF_MISS_L3, 0x100000016080); /* Data PTEG reload */ EVENT(PM_DTLB_MISS, 0x300fc); /* ITLB Reloaded */ diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 9dbe8f9b89b4..79e0206ca454 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -23,10 +23,10 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] m [ pmcxsel ] - * | | | | | | - * | | | | | *- mark - * | | | *- L1/L2/L3 cache_sel | + * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] | m [ pmcxsel ] + * | | | | | | | + * | | | | | | *- mark + * | | | *- L1/L2/L3 cache_sel | |*-radix_scope_qual * | | sdar_mode | * | *- sampling mode for marked events *- combine * | @@ -59,6 +59,7 @@ * * MMCR1[16] = cache_sel[0] * MMCR1[17] = cache_sel[1] + * MMCR1[18] = radix_scope_qual * * if mark: * MMCRA[63] = 1 (SAMPLE_ENABLE) @@ -113,6 +114,9 @@ GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS); GENERIC_EVENT_ATTR(mem-stores, MEM_STORES); +GENERIC_EVENT_ATTR(branch-instructions, PM_BR_FIN); +GENERIC_EVENT_ATTR(branch-misses, PM_MPRED_BR_FIN); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_DEMAND_MISS_L1_FIN); CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); @@ -123,12 +127,15 @@ CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ); CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PF_MISS_L3); +CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS); +CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST); CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); -static struct attribute *power10_events_attr[] = { +static struct attribute *power10_events_attr_dd1[] = { GENERIC_EVENT_PTR(PM_RUN_CYC), GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), GENERIC_EVENT_PTR(PM_BR_CMPL), @@ -153,6 +160,39 @@ static struct attribute *power10_events_attr[] = { NULL }; +static struct attribute *power10_events_attr[] = { + GENERIC_EVENT_PTR(PM_RUN_CYC), + GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_BR_FIN), + GENERIC_EVENT_PTR(PM_MPRED_BR_FIN), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN), + GENERIC_EVENT_PTR(MEM_LOADS), + GENERIC_EVENT_PTR(MEM_STORES), + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_REQ), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_L3_PF_MISS_L3), + CACHE_EVENT_PTR(PM_L2_ST_MISS), + CACHE_EVENT_PTR(PM_L2_ST), + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BR_CMPL), + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + +static struct attribute_group power10_pmu_events_group_dd1 = { + .name = "events", + .attrs = power10_events_attr_dd1, +}; + static struct attribute_group power10_pmu_events_group = { .name = "events", .attrs = power10_events_attr, @@ -175,6 +215,7 @@ PMU_FORMAT_ATTR(src_sel, "config:45-46"); PMU_FORMAT_ATTR(invert_bit, "config:47"); PMU_FORMAT_ATTR(src_mask, "config:48-53"); PMU_FORMAT_ATTR(src_match, "config:54-59"); +PMU_FORMAT_ATTR(radix_scope, "config:9"); static struct attribute *power10_pmu_format_attr[] = { &format_attr_event.attr, @@ -194,6 +235,7 @@ static struct attribute *power10_pmu_format_attr[] = { &format_attr_invert_bit.attr, &format_attr_src_mask.attr, &format_attr_src_match.attr, + &format_attr_radix_scope.attr, NULL, }; @@ -202,13 +244,19 @@ static struct attribute_group power10_pmu_format_group = { .attrs = power10_pmu_format_attr, }; +static const struct attribute_group *power10_pmu_attr_groups_dd1[] = { + &power10_pmu_format_group, + &power10_pmu_events_group_dd1, + NULL, +}; + static const struct attribute_group *power10_pmu_attr_groups[] = { &power10_pmu_format_group, &power10_pmu_events_group, NULL, }; -static int power10_generic_events[] = { +static int power10_generic_events_dd1[] = { [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, @@ -217,6 +265,15 @@ static int power10_generic_events[] = { [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, }; +static int power10_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_FIN, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_MPRED_BR_FIN, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_DEMAND_MISS_L1_FIN, +}; + static u64 power10_bhrb_filter_map(u64 branch_sample_type) { u64 pmu_bhrb_filter = 0; @@ -273,7 +330,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power10_cache_events_dd1[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = PM_LD_REF_L1, @@ -374,6 +431,107 @@ static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; +static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_LD_REF_L1, + [C(RESULT_MISS)] = PM_LD_MISS_L1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ST_MISS_L1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS, + [C(RESULT_MISS)] = 0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1, + [C(RESULT_MISS)] = PM_L1_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_IC_PREF_REQ, + [C(RESULT_MISS)] = 0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_DATA_FROM_L3, + [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = PM_L2_ST, + [C(RESULT_MISS)] = PM_L2_ST_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_L3_PF_MISS_L3, + [C(RESULT_MISS)] = 0, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_BR_CMPL, + [C(RESULT_MISS)] = PM_BR_MPRED_CMPL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + #undef C static struct power_pmu power10_pmu = { @@ -403,6 +561,7 @@ static struct power_pmu power10_pmu = { int init_power10_pmu(void) { + unsigned int pvr; int rc; /* Comes from cpu_specs[] */ @@ -410,9 +569,20 @@ int init_power10_pmu(void) strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) return -ENODEV; + pvr = mfspr(SPRN_PVR); + /* Add the ppmu flag for power10 DD1 */ + if ((PVR_CFG(pvr) == 1)) + power10_pmu.flags |= PPMU_P10_DD1; + /* Set the PERF_REG_EXTENDED_MASK here */ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31; + if ((PVR_CFG(pvr) == 1)) { + power10_pmu.generic_events = power10_generic_events_dd1; + power10_pmu.attr_groups = power10_pmu_attr_groups_dd1; + power10_pmu.cache_events = &power10_cache_events_dd1; + } + rc = register_power_pmu(&power10_pmu); if (rc) return rc; diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 6aa8defb5857..8d6029099848 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -106,6 +106,7 @@ int __init corenet_gen_publish_devices(void) { return of_platform_bus_probe(NULL, of_device_ids, NULL); } +machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); static const char * const boards[] __initconst = { "fsl,P2041RDB", @@ -206,5 +207,3 @@ define_machine(corenet_generic) { .power_save = e500_idle, #endif }; - -machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index abb2b45b2789..60cc5b537a98 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -194,13 +194,6 @@ config PIN_TLB_IMMR CONFIG_PIN_TLB_DATA is also selected, it will reduce CONFIG_PIN_TLB_DATA to 24 Mbytes. -config PIN_TLB_TEXT - bool "Pinned TLB for TEXT" - depends on PIN_TLB - default y - help - This pins kernel text with 8M pages. - endmenu endmenu diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c index aed4bc75f352..aef179fcbd4f 100644 --- a/arch/powerpc/platforms/8xx/micropatch.c +++ b/arch/powerpc/platforms/8xx/micropatch.c @@ -360,6 +360,17 @@ void __init cpm_load_patch(cpm8xx_t *cp) if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) { smc_uart_t *smp; + if (IS_ENABLED(CONFIG_PPC_EARLY_DEBUG_CPM)) { + int i; + + for (i = 0; i < sizeof(*smp); i += 4) { + u32 __iomem *src = (u32 __iomem *)&cp->cp_dparam[PROFF_SMC1 + i]; + u32 __iomem *dst = (u32 __iomem *)&cp->cp_dparam[PROFF_DSP1 + i]; + + out_be32(dst, in_be32(src)); + } + } + smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1]; out_be16(&smp->smc_rpbase, 0x1ec0); smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2]; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index c194c4ae8bc7..3ce907523b1e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -11,9 +11,6 @@ config PPC64 This option selects whether a 32-bit or a 64-bit kernel will be built. -config PPC_BOOK3S_32 - bool - menu "Processor support" choice prompt "Processor Type" @@ -23,20 +20,19 @@ choice The most common ones are the desktop and server CPUs (603, 604, 740, 750, 74xx) CPUs from Freescale and IBM, with their embedded 512x/52xx/82xx/83xx/86xx counterparts. - The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500 + The other embedded parts, namely 4xx, 8xx and e500 (85xx) each form a family of their own that is not compatible with the others. If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx. -config PPC_BOOK3S_6xx +config PPC_BOOK3S_32 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" - select PPC_BOOK3S_32 - select PPC_FPU + imply PPC_FPU select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select HAVE_ARCH_VMAP_STACK if !ADB_PMU + select HAVE_ARCH_VMAP_STACK config PPC_85xx bool "Freescale 85xx" @@ -66,11 +62,24 @@ config 44x select HAVE_PCI select PHYS_64BIT -config E200 - bool "Freescale e200" - endchoice +config PPC_BOOK3S_603 + bool "Support for 603 SW loaded TLB" + depends on PPC_BOOK3S_32 + default y + help + Provide support for processors based on the 603 cores. Those + processors don't have a HASH MMU and provide SW TLB loading. + +config PPC_BOOK3S_604 + bool "Support for 604+ HASH MMU" if PPC_BOOK3S_603 + depends on PPC_BOOK3S_32 + default y + help + Provide support for processors not based on the 603 cores. + Those processors have a HASH MMU. + choice prompt "Processor Type" depends on PPC64 @@ -218,9 +227,20 @@ config PPC_E500MC such as e5500/e6500), and must be disabled for running on e500v1 or e500v2. -config PPC_FPU +config PPC_FPU_REGS bool + +config PPC_FPU + bool "Support for Floating Point Unit (FPU)" if PPC_MPC832x default y if PPC64 + select PPC_FPU_REGS + help + This must be enabled to support the Floating Point Unit + Most 6xx have an FPU but e300c2 core (mpc832x) don't have + an FPU, so when building an embedded kernel for that target + you can disable FPU support. + + If unsure say Y. config FSL_EMB_PERFMON bool "Freescale Embedded Perfmon" @@ -247,12 +267,12 @@ config 4xx config BOOKE bool - depends on E200 || E500 || 44x || PPC_BOOK3E + depends on E500 || 44x || PPC_BOOK3E default y config FSL_BOOKE bool - depends on (E200 || E500) && PPC32 + depends on E500 && PPC32 default y # this is for common code between PPC32 & PPC64 FSL BOOKE @@ -317,7 +337,7 @@ config VSX config SPE_POSSIBLE def_bool y - depends on E200 || (E500 && !PPC_E500MC) + depends on E500 && !PPC_E500MC config SPE bool "SPE Support" @@ -395,6 +415,11 @@ config PPC_KUAP_DEBUG Add extra debugging for Kernel Userspace Access Protection (KUAP) If you're unsure, say N. +config PPC_PKEY + def_bool y + depends on PPC_BOOK3S_64 + depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP + config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION @@ -464,7 +489,7 @@ config NR_CPUS config NOT_COHERENT_CACHE bool - depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \ + depends on 4xx || PPC_8xx || PPC_MPC512x || \ GAMECUBE_COMMON || AMIGAONE select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index 7e0f8ba6e54a..d497a60003d2 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -44,7 +44,8 @@ #define SL_TB 0xa0 #define SL_R2 0xa8 #define SL_CR 0xac -#define SL_R12 0xb0 /* r12 to r31 */ +#define SL_LR 0xb0 +#define SL_R12 0xb4 /* r12 to r31 */ #define SL_SIZE (SL_R12 + 80) .section .text @@ -63,105 +64,107 @@ _GLOBAL(low_sleep_handler) blr #else mflr r0 - stw r0,4(r1) - stwu r1,-SL_SIZE(r1) + lis r11,sleep_storage@ha + addi r11,r11,sleep_storage@l + stw r0,SL_LR(r11) mfcr r0 - stw r0,SL_CR(r1) - stw r2,SL_R2(r1) - stmw r12,SL_R12(r1) + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) /* Save MSR & SDR1 */ mfmsr r4 - stw r4,SL_MSR(r1) + stw r4,SL_MSR(r11) mfsdr1 r4 - stw r4,SL_SDR1(r1) + stw r4,SL_SDR1(r11) /* Get a stable timebase and save it */ 1: mftbu r4 - stw r4,SL_TB(r1) + stw r4,SL_TB(r11) mftb r5 - stw r5,SL_TB+4(r1) + stw r5,SL_TB+4(r11) mftbu r3 cmpw r3,r4 bne 1b /* Save SPRGs */ mfsprg r4,0 - stw r4,SL_SPRG0(r1) + stw r4,SL_SPRG0(r11) mfsprg r4,1 - stw r4,SL_SPRG0+4(r1) + stw r4,SL_SPRG0+4(r11) mfsprg r4,2 - stw r4,SL_SPRG0+8(r1) + stw r4,SL_SPRG0+8(r11) mfsprg r4,3 - stw r4,SL_SPRG0+12(r1) + stw r4,SL_SPRG0+12(r11) /* Save BATs */ mfdbatu r4,0 - stw r4,SL_DBAT0(r1) + stw r4,SL_DBAT0(r11) mfdbatl r4,0 - stw r4,SL_DBAT0+4(r1) + stw r4,SL_DBAT0+4(r11) mfdbatu r4,1 - stw r4,SL_DBAT1(r1) + stw r4,SL_DBAT1(r11) mfdbatl r4,1 - stw r4,SL_DBAT1+4(r1) + stw r4,SL_DBAT1+4(r11) mfdbatu r4,2 - stw r4,SL_DBAT2(r1) + stw r4,SL_DBAT2(r11) mfdbatl r4,2 - stw r4,SL_DBAT2+4(r1) + stw r4,SL_DBAT2+4(r11) mfdbatu r4,3 - stw r4,SL_DBAT3(r1) + stw r4,SL_DBAT3(r11) mfdbatl r4,3 - stw r4,SL_DBAT3+4(r1) + stw r4,SL_DBAT3+4(r11) mfibatu r4,0 - stw r4,SL_IBAT0(r1) + stw r4,SL_IBAT0(r11) mfibatl r4,0 - stw r4,SL_IBAT0+4(r1) + stw r4,SL_IBAT0+4(r11) mfibatu r4,1 - stw r4,SL_IBAT1(r1) + stw r4,SL_IBAT1(r11) mfibatl r4,1 - stw r4,SL_IBAT1+4(r1) + stw r4,SL_IBAT1+4(r11) mfibatu r4,2 - stw r4,SL_IBAT2(r1) + stw r4,SL_IBAT2(r11) mfibatl r4,2 - stw r4,SL_IBAT2+4(r1) + stw r4,SL_IBAT2+4(r11) mfibatu r4,3 - stw r4,SL_IBAT3(r1) + stw r4,SL_IBAT3(r11) mfibatl r4,3 - stw r4,SL_IBAT3+4(r1) + stw r4,SL_IBAT3+4(r11) BEGIN_MMU_FTR_SECTION mfspr r4,SPRN_DBAT4U - stw r4,SL_DBAT4(r1) + stw r4,SL_DBAT4(r11) mfspr r4,SPRN_DBAT4L - stw r4,SL_DBAT4+4(r1) + stw r4,SL_DBAT4+4(r11) mfspr r4,SPRN_DBAT5U - stw r4,SL_DBAT5(r1) + stw r4,SL_DBAT5(r11) mfspr r4,SPRN_DBAT5L - stw r4,SL_DBAT5+4(r1) + stw r4,SL_DBAT5+4(r11) mfspr r4,SPRN_DBAT6U - stw r4,SL_DBAT6(r1) + stw r4,SL_DBAT6(r11) mfspr r4,SPRN_DBAT6L - stw r4,SL_DBAT6+4(r1) + stw r4,SL_DBAT6+4(r11) mfspr r4,SPRN_DBAT7U - stw r4,SL_DBAT7(r1) + stw r4,SL_DBAT7(r11) mfspr r4,SPRN_DBAT7L - stw r4,SL_DBAT7+4(r1) + stw r4,SL_DBAT7+4(r11) mfspr r4,SPRN_IBAT4U - stw r4,SL_IBAT4(r1) + stw r4,SL_IBAT4(r11) mfspr r4,SPRN_IBAT4L - stw r4,SL_IBAT4+4(r1) + stw r4,SL_IBAT4+4(r11) mfspr r4,SPRN_IBAT5U - stw r4,SL_IBAT5(r1) + stw r4,SL_IBAT5(r11) mfspr r4,SPRN_IBAT5L - stw r4,SL_IBAT5+4(r1) + stw r4,SL_IBAT5+4(r11) mfspr r4,SPRN_IBAT6U - stw r4,SL_IBAT6(r1) + stw r4,SL_IBAT6(r11) mfspr r4,SPRN_IBAT6L - stw r4,SL_IBAT6+4(r1) + stw r4,SL_IBAT6+4(r11) mfspr r4,SPRN_IBAT7U - stw r4,SL_IBAT7(r1) + stw r4,SL_IBAT7(r11) mfspr r4,SPRN_IBAT7L - stw r4,SL_IBAT7+4(r1) + stw r4,SL_IBAT7+4(r11) END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) /* Backup various CPU config stuffs */ @@ -180,9 +183,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) lis r5,grackle_wake_up@ha addi r5,r5,grackle_wake_up@l tophys(r5,r5) - stw r5,SL_PC(r1) + stw r5,SL_PC(r11) lis r4,KERNELBASE@h - tophys(r5,r1) + tophys(r5,r11) addi r5,r5,SL_PC lis r6,MAGIC@ha addi r6,r6,MAGIC@l @@ -194,12 +197,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) tophys(r3,r3) stw r3,0x80(r4) stw r5,0x84(r4) - /* Store a pointer to our backup storage into - * a kernel global - */ - lis r3,sleep_storage@ha - addi r3,r3,sleep_storage@l - stw r5,0(r3) .globl low_cpu_offline_self low_cpu_offline_self: @@ -279,7 +276,7 @@ _GLOBAL(core99_wake_up) lis r3,sleep_storage@ha addi r3,r3,sleep_storage@l tophys(r3,r3) - lwz r1,0(r3) + addi r1,r3,SL_PC /* Pass thru to older resume code ... */ _ASM_NOKPROBE_SYMBOL(core99_wake_up) @@ -399,13 +396,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blt 1b sync - /* restore the MSR and turn on the MMU */ - lwz r3,SL_MSR(r1) - bl turn_on_mmu - - /* get back the stack pointer */ - tovirt(r1,r1) - /* Restore TB */ li r3,0 mttbl r3 @@ -419,28 +409,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtcr r0 lwz r2,SL_R2(r1) lmw r12,SL_R12(r1) - addi r1,r1,SL_SIZE - lwz r0,4(r1) - mtlr r0 - blr -_ASM_NOKPROBE_SYMBOL(grackle_wake_up) -turn_on_mmu: - mflr r4 - tovirt(r4,r4) + /* restore the MSR and SP and turn on the MMU and return */ + lwz r3,SL_MSR(r1) + lwz r4,SL_LR(r1) + lwz r1,SL_SP(r1) mtsrr0 r4 mtsrr1 r3 sync isync rfi -_ASM_NOKPROBE_SYMBOL(turn_on_mmu) +_ASM_NOKPROBE_SYMBOL(grackle_wake_up) #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ - .section .data + .section .bss .balign L1_CACHE_BYTES sleep_storage: - .long 0 + .space SL_SIZE .balign L1_CACHE_BYTES, 0 #endif /* CONFIG_PPC_BOOK3S_32 */ diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 938803eab0ad..619b093a0657 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -27,11 +27,11 @@ config OPAL_PRD recovery diagnostics on OpenPower machines config PPC_MEMTRACE - bool "Enable removal of RAM from kernel mappings for tracing" - depends on PPC_POWERNV && MEMORY_HOTREMOVE + bool "Enable runtime allocation of RAM for tracing" + depends on PPC_POWERNV && MEMORY_HOTPLUG && CONTIG_ALLOC help - Enabling this option allows for the removal of memory (RAM) - from the kernel mappings to be used for hardware tracing. + Enabling this option allows for runtime allocation of memory (RAM) + for hardware tracing. config PPC_VAS bool "IBM Virtual Accelerator Switchboard (VAS)" diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 1ed7c5286487..e6f461812856 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -589,6 +589,7 @@ struct p9_sprs { u64 spurr; u64 dscr; u64 wort; + u64 ciabr; u64 mmcra; u32 mmcr0; @@ -668,6 +669,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) sprs.spurr = mfspr(SPRN_SPURR); sprs.dscr = mfspr(SPRN_DSCR); sprs.wort = mfspr(SPRN_WORT); + sprs.ciabr = mfspr(SPRN_CIABR); sprs.mmcra = mfspr(SPRN_MMCRA); sprs.mmcr0 = mfspr(SPRN_MMCR0); @@ -785,6 +787,7 @@ core_woken: mtspr(SPRN_SPURR, sprs.spurr); mtspr(SPRN_DSCR, sprs.dscr); mtspr(SPRN_WORT, sprs.wort); + mtspr(SPRN_CIABR, sprs.ciabr); mtspr(SPRN_MMCRA, sprs.mmcra); mtspr(SPRN_MMCR0, sprs.mmcr0); diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 6828108486f8..5fc9408bb0b3 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -30,6 +30,7 @@ struct memtrace_entry { char name[16]; }; +static DEFINE_MUTEX(memtrace_mutex); static u64 memtrace_size; static struct memtrace_entry *memtrace_array; @@ -50,84 +51,52 @@ static const struct file_operations memtrace_fops = { .open = simple_open, }; -static int check_memblock_online(struct memory_block *mem, void *arg) +static void memtrace_clear_range(unsigned long start_pfn, + unsigned long nr_pages) { - if (mem->state != MEM_ONLINE) - return -1; + unsigned long pfn; - return 0; -} - -static int change_memblock_state(struct memory_block *mem, void *arg) -{ - unsigned long state = (unsigned long)arg; - - mem->state = state; - - return 0; -} - -/* called with device_hotplug_lock held */ -static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) -{ - const unsigned long start = PFN_PHYS(start_pfn); - const unsigned long size = PFN_PHYS(nr_pages); - - if (walk_memory_blocks(start, size, NULL, check_memblock_online)) - return false; - - walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE, - change_memblock_state); - - if (offline_pages(start_pfn, nr_pages)) { - walk_memory_blocks(start, size, (void *)MEM_ONLINE, - change_memblock_state); - return false; + /* As HIGHMEM does not apply, use clear_page() directly. */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { + if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) + cond_resched(); + clear_page(__va(PFN_PHYS(pfn))); } - - walk_memory_blocks(start, size, (void *)MEM_OFFLINE, - change_memblock_state); - - - return true; } static u64 memtrace_alloc_node(u32 nid, u64 size) { - u64 start_pfn, end_pfn, nr_pages, pfn; - u64 base_pfn; - u64 bytes = memory_block_size_bytes(); + const unsigned long nr_pages = PHYS_PFN(size); + unsigned long pfn, start_pfn; + struct page *page; - if (!node_spanned_pages(nid)) + /* + * Trace memory needs to be aligned to the size, which is guaranteed + * by alloc_contig_pages(). + */ + page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE | + __GFP_NOWARN, nid, NULL); + if (!page) return 0; + start_pfn = page_to_pfn(page); - start_pfn = node_start_pfn(nid); - end_pfn = node_end_pfn(nid); - nr_pages = size >> PAGE_SHIFT; - - /* Trace memory needs to be aligned to the size */ - end_pfn = round_down(end_pfn - nr_pages, nr_pages); - - lock_device_hotplug(); - for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { - if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { - /* - * Remove memory in memory block size chunks so that - * iomem resources are always split to the same size and - * we never try to remove memory that spans two iomem - * resources. - */ - end_pfn = base_pfn + nr_pages; - for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { - __remove_memory(nid, pfn << PAGE_SHIFT, bytes); - } - unlock_device_hotplug(); - return base_pfn << PAGE_SHIFT; - } - } - unlock_device_hotplug(); + /* + * Clear the range while we still have a linear mapping. + * + * TODO: use __GFP_ZERO with alloc_contig_pages() once supported. + */ + memtrace_clear_range(start_pfn, nr_pages); - return 0; + /* + * Set pages PageOffline(), to indicate that nobody (e.g., hibernation, + * dumping, ...) should be touching these pages. + */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) + __SetPageOffline(pfn_to_page(pfn)); + + arch_remove_linear_mapping(PFN_PHYS(start_pfn), size); + + return PFN_PHYS(start_pfn); } static int memtrace_init_regions_runtime(u64 size) @@ -197,16 +166,30 @@ static int memtrace_init_debugfs(void) return ret; } -static int online_mem_block(struct memory_block *mem, void *arg) +static int memtrace_free(int nid, u64 start, u64 size) { - return device_online(&mem->dev); + struct mhp_params params = { .pgprot = PAGE_KERNEL }; + const unsigned long nr_pages = PHYS_PFN(size); + const unsigned long start_pfn = PHYS_PFN(start); + unsigned long pfn; + int ret; + + ret = arch_create_linear_mapping(nid, start, size, ¶ms); + if (ret) + return ret; + + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) + __ClearPageOffline(pfn_to_page(pfn)); + + free_contig_range(start_pfn, nr_pages); + return 0; } /* - * Iterate through the chunks of memory we have removed from the kernel - * and attempt to add them back to the kernel. + * Iterate through the chunks of memory we allocated and attempt to expose + * them back to the kernel. */ -static int memtrace_online(void) +static int memtrace_free_regions(void) { int i, ret = 0; struct memtrace_entry *ent; @@ -214,7 +197,7 @@ static int memtrace_online(void) for (i = memtrace_array_nr - 1; i >= 0; i--) { ent = &memtrace_array[i]; - /* We have onlined this chunk previously */ + /* We have freed this chunk previously */ if (ent->nid == NUMA_NO_NODE) continue; @@ -224,30 +207,25 @@ static int memtrace_online(void) ent->mem = 0; } - if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) { - pr_err("Failed to add trace memory to node %d\n", + if (memtrace_free(ent->nid, ent->start, ent->size)) { + pr_err("Failed to free trace memory on node %d\n", ent->nid); ret += 1; continue; } - lock_device_hotplug(); - walk_memory_blocks(ent->start, ent->size, NULL, - online_mem_block); - unlock_device_hotplug(); - /* - * Memory was added successfully so clean up references to it - * so on reentry we can tell that this chunk was added. + * Memory was freed successfully so clean up references to it + * so on reentry we can tell that this chunk was freed. */ debugfs_remove_recursive(ent->dir); - pr_info("Added trace memory back to node %d\n", ent->nid); + pr_info("Freed trace memory back on node %d\n", ent->nid); ent->size = ent->start = ent->nid = NUMA_NO_NODE; } if (ret) return ret; - /* If all chunks of memory were added successfully, reset globals */ + /* If all chunks of memory were freed successfully, reset globals */ kfree(memtrace_array); memtrace_array = NULL; memtrace_size = 0; @@ -257,6 +235,7 @@ static int memtrace_online(void) static int memtrace_enable_set(void *data, u64 val) { + int rc = -EAGAIN; u64 bytes; /* @@ -269,25 +248,29 @@ static int memtrace_enable_set(void *data, u64 val) return -EINVAL; } - /* Re-add/online previously removed/offlined memory */ - if (memtrace_size) { - if (memtrace_online()) - return -EAGAIN; - } + mutex_lock(&memtrace_mutex); - if (!val) - return 0; + /* Free all previously allocated memory. */ + if (memtrace_size && memtrace_free_regions()) + goto out_unlock; + + if (!val) { + rc = 0; + goto out_unlock; + } - /* Offline and remove memory */ + /* Allocate memory. */ if (memtrace_init_regions_runtime(val)) - return -EINVAL; + goto out_unlock; if (memtrace_init_debugfs()) - return -EINVAL; + goto out_unlock; memtrace_size = val; - - return 0; + rc = 0; +out_unlock: + mutex_unlock(&memtrace_mutex); + return rc; } static int memtrace_enable_get(void *data, u64 *val) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index abeaa533b976..b711dc3262a3 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -385,7 +385,8 @@ static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group) for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->take_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->take_ownership) continue; pe->table_group.ops->take_ownership(&pe->table_group); } @@ -401,7 +402,8 @@ static void pnv_npu_peers_release_ownership( for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->release_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->release_ownership) continue; pe->table_group.ops->release_ownership(&pe->table_group); } @@ -623,6 +625,11 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n", @@ -670,6 +677,11 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index ecdad219d704..9105efcf242a 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -483,3 +483,117 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); + +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + u64 mmio_atsd; + int rc; + + /* ATSD physical address. + * ATSD LAUNCH register: write access initiates a shoot down to + * initiate the TLB Invalidate command. + */ + rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", + 0, &mmio_atsd); + if (rc) { + dev_info(&dev->dev, "No available ATSD found\n"); + return rc; + } + + /* Assign a register set to a Logical Partition and MMIO ATSD + * LPARID register to the required value. + */ + rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev), + lparid, lpcr); + if (rc) { + dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc); + return rc; + } + + *arva = ioremap(mmio_atsd, 24); + if (!(*arva)) { + dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd); + rc = -ENOMEM; + } + + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar); + +void pnv_ocxl_unmap_lpar(void __iomem *arva) +{ + iounmap(arva); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); + +void pnv_ocxl_tlb_invalidate(void __iomem *arva, + unsigned long pid, + unsigned long addr, + unsigned long page_size) +{ + unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT); + u64 val = 0ull; + int pend; + u8 size; + + if (!(arva)) + return; + + if (addr) { + /* load Abbreviated Virtual Address register with + * the necessary value + */ + val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51)); + out_be64(arva + PNV_OCXL_ATSD_AVA, val); + } + + /* Write access initiates a shoot down to initiate the + * TLB Invalidate command + */ + val = PNV_OCXL_ATSD_LNCH_R; + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10); + if (addr) + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00); + else { + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01); + val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON; + } + val |= PNV_OCXL_ATSD_LNCH_PRS; + /* Actual Page Size to be invalidated + * 000 4KB + * 101 64KB + * 001 2MB + * 010 1GB + */ + size = 0b101; + if (page_size == 0x1000) + size = 0b000; + if (page_size == 0x200000) + size = 0b001; + if (page_size == 0x40000000) + size = 0b010; + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size); + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid); + out_be64(arva + PNV_OCXL_ATSD_LNCH, val); + + /* Poll the ATSD status register to determine when the + * TLB Invalidate has been completed. + */ + val = in_be64(arva + PNV_OCXL_ATSD_STAT); + pend = val >> 63; + + while (pend) { + if (time_after_eq(jiffies, timeout)) { + pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n", + __func__, val, pid); + return; + } + cpu_relax(); + val = in_be64(arva + PNV_OCXL_ATSD_STAT); + pend = val >> 63; + } +} +EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate); diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 37b380eef41a..5821b0fa8614 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -171,8 +171,8 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, opal_rc = opal_read_elog(__pa(elog->buffer), elog->size, elog->id); if (opal_rc != OPAL_SUCCESS) { - pr_err("ELOG: log read failed for log-id=%llx\n", - elog->id); + pr_err_ratelimited("ELOG: log read failed for log-id=%llx\n", + elog->id); kfree(elog->buffer); elog->buffer = NULL; return -EIO; diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index 3e1f064a18db..f0c1830deb51 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -213,6 +213,8 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) "A hypervisor resource error occurred", "CAPP recovery process is in progress", }; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); /* Print things out */ if (hmi_evt->version < OpalHMIEvt_V1) { @@ -240,19 +242,22 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) break; } - printk("%s%s Hypervisor Maintenance interrupt [%s]\n", - level, sevstr, - hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? - "Recovered" : "Not recovered"); - error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? - hmi_error_types[hmi_evt->type] - : "Unknown"; - printk("%s Error detail: %s\n", level, error_info); - printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer)); - if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || - (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) - printk("%s TFMR: %016llx\n", level, + if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || __ratelimit(&rs)) { + printk("%s%s Hypervisor Maintenance interrupt [%s]\n", + level, sevstr, + hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? + hmi_error_types[hmi_evt->type] + : "Unknown"; + printk("%s Error detail: %s\n", level, error_info); + printk("%s HMER: %016llx\n", level, + be64_to_cpu(hmi_evt->hmer)); + if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || + (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) + printk("%s TFMR: %016llx\n", level, be64_to_cpu(hmi_evt->tfmr)); + } if (hmi_evt->version < OpalHMIEvt_V2) return; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d95954ad4c0a..c61c3b62c8c6 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -731,7 +731,7 @@ int opal_hmi_exception_early2(struct pt_regs *regs) return 1; } -/* HMI exception handler called in virtual mode during check_irq_replay. */ +/* HMI exception handler called in virtual mode when irqs are next enabled. */ int opal_handle_hmi_exception(struct pt_regs *regs) { /* diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2b4ceb5e6ce4..c4f72cdc9b51 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2613,8 +2613,10 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; pdn = pci_get_pdn(dev); - if (!pdn || pdn->pe_number == IODA_INVALID_PE) + if (!pdn || pdn->pe_number == IODA_INVALID_PE) { + pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n"); return false; + } return true; } diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c index c4434f20f42f..28aac933a439 100644 --- a/arch/powerpc/platforms/powernv/pci-sriov.c +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -422,7 +422,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) { struct pnv_iov_data *iov; struct pnv_phb *phb; - unsigned int win; + int win; struct resource *res; int i, j; int64_t rc; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index c62aaa29a9d5..b431f41c6cb5 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -382,7 +382,6 @@ static int ps3_system_bus_probe(struct device *_dev) static int ps3_system_bus_remove(struct device *_dev) { - int result = 0; struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); struct ps3_system_bus_driver *drv; @@ -393,13 +392,13 @@ static int ps3_system_bus_remove(struct device *_dev) BUG_ON(!drv); if (drv->remove) - result = drv->remove(dev); + drv->remove(dev); else dev_dbg(&dev->core, "%s:%d %s: no remove method\n", __func__, __LINE__, drv->core.name); pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core)); - return result; + return 0; } static void ps3_system_bus_shutdown(struct device *_dev) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index a02012f1b04a..12cbffd3c2e3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -746,6 +746,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) parent = of_find_node_by_path("/cpus"); if (!parent) { pr_warn("Could not find CPU root node in device tree\n"); + kfree(cpu_drcs); return -1; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 7efe6ec5d14a..8377f1f7c78e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -479,7 +479,7 @@ static int dlpar_memory_remove_by_index(u32 drc_index) int lmb_found; int rc; - pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index); + pr_debug("Attempting to hot-remove LMB, drc index %x\n", drc_index); lmb_found = 0; for_each_drmem_lmb(lmb) { @@ -497,10 +497,10 @@ static int dlpar_memory_remove_by_index(u32 drc_index) rc = -EINVAL; if (rc) - pr_info("Failed to hot-remove memory at %llx\n", - lmb->base_addr); + pr_debug("Failed to hot-remove memory at %llx\n", + lmb->base_addr); else - pr_info("Memory at %llx was hot-removed\n", lmb->base_addr); + pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr); return rc; } @@ -717,8 +717,8 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) if (!drmem_lmb_reserved(lmb)) continue; - pr_info("Memory at %llx (drc index %x) was hot-added\n", - lmb->base_addr, lmb->drc_index); + pr_debug("Memory at %llx (drc index %x) was hot-added\n", + lmb->base_addr, lmb->drc_index); drmem_remove_lmb_reservation(lmb); } rc = 0; diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 2f73cb5bf12d..ea4d6a660e0d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -12,9 +12,11 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/kobject.h> +#include <linux/nmi.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/stat.h> +#include <linux/stop_machine.h> #include <linux/completion.h> #include <linux/device.h> #include <linux/delay.h> @@ -59,18 +61,10 @@ static int mobility_rtas_call(int token, char *buf, s32 scope) return rc; } -static int delete_dt_node(__be32 phandle) +static int delete_dt_node(struct device_node *dn) { - struct device_node *dn; - - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (!dn) - return -ENOENT; - pr_debug("removing node %pOFfp\n", dn); - dlpar_detach_node(dn); - of_node_put(dn); return 0; } @@ -135,10 +129,9 @@ static int update_dt_property(struct device_node *dn, struct property **prop, return 0; } -static int update_dt_node(__be32 phandle, s32 scope) +static int update_dt_node(struct device_node *dn, s32 scope) { struct update_props_workarea *upwa; - struct device_node *dn; struct property *prop = NULL; int i, rc, rtas_rc; char *prop_data; @@ -155,14 +148,8 @@ static int update_dt_node(__be32 phandle, s32 scope) if (!rtas_buf) return -ENOMEM; - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (!dn) { - kfree(rtas_buf); - return -ENOENT; - } - upwa = (struct update_props_workarea *)&rtas_buf[0]; - upwa->phandle = phandle; + upwa->phandle = cpu_to_be32(dn->phandle); do { rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf, @@ -208,11 +195,12 @@ static int update_dt_node(__be32 phandle, s32 scope) rc = update_dt_property(dn, &prop, prop_name, vd, prop_data); if (rc) { - printk(KERN_ERR "Could not update %s" - " property\n", prop_name); + pr_err("updating %s property failed: %d\n", + prop_name, rc); } prop_data += vd; + break; } cond_resched(); @@ -221,26 +209,18 @@ static int update_dt_node(__be32 phandle, s32 scope) cond_resched(); } while (rtas_rc == 1); - of_node_put(dn); kfree(rtas_buf); return 0; } -static int add_dt_node(__be32 parent_phandle, __be32 drc_index) +static int add_dt_node(struct device_node *parent_dn, __be32 drc_index) { struct device_node *dn; - struct device_node *parent_dn; int rc; - parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle)); - if (!parent_dn) - return -ENOENT; - dn = dlpar_configure_connector(drc_index, parent_dn); - if (!dn) { - of_node_put(parent_dn); + if (!dn) return -ENOENT; - } rc = dlpar_attach_node(dn, parent_dn); if (rc) @@ -248,7 +228,6 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) pr_debug("added node %pOFfp\n", dn); - of_node_put(parent_dn); return rc; } @@ -261,7 +240,7 @@ int pseries_devicetree_update(s32 scope) update_nodes_token = rtas_token("ibm,update-nodes"); if (update_nodes_token == RTAS_UNKNOWN_SERVICE) - return -EINVAL; + return 0; rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); if (!rtas_buf) @@ -281,22 +260,31 @@ int pseries_devicetree_update(s32 scope) data++; for (i = 0; i < node_count; i++) { + struct device_node *np; __be32 phandle = *data++; __be32 drc_index; + np = of_find_node_by_phandle(be32_to_cpu(phandle)); + if (!np) { + pr_warn("Failed lookup: phandle 0x%x for action 0x%x\n", + be32_to_cpu(phandle), action); + continue; + } + switch (action) { case DELETE_DT_NODE: - delete_dt_node(phandle); + delete_dt_node(np); break; case UPDATE_DT_NODE: - update_dt_node(phandle, scope); + update_dt_node(np, scope); break; case ADD_DT_NODE: drc_index = *data++; - add_dt_node(phandle, drc_index); + add_dt_node(np, drc_index); break; } + of_node_put(np); cond_resched(); } } @@ -311,21 +299,8 @@ int pseries_devicetree_update(s32 scope) void post_mobility_fixup(void) { int rc; - int activate_fw_token; - - activate_fw_token = rtas_token("ibm,activate-firmware"); - if (activate_fw_token == RTAS_UNKNOWN_SERVICE) { - printk(KERN_ERR "Could not make post-mobility " - "activate-fw call.\n"); - return; - } - - do { - rc = rtas_call(activate_fw_token, 0, 1, NULL); - } while (rtas_busy_delay(rc)); - if (rc) - printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc); + rtas_activate_firmware(); /* * We don't want CPUs to go online/offline while the device @@ -342,8 +317,7 @@ void post_mobility_fixup(void) rc = pseries_devicetree_update(MIGRATION_SCOPE); if (rc) - printk(KERN_ERR "Post-mobility device tree update " - "failed: %d\n", rc); + pr_err("device tree update failed: %d\n", rc); cacheinfo_rebuild(); @@ -358,6 +332,279 @@ void post_mobility_fixup(void) return; } +static int poll_vasi_state(u64 handle, unsigned long *res) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long hvrc; + int ret; + + hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle); + switch (hvrc) { + case H_SUCCESS: + ret = 0; + *res = retbuf[0]; + break; + case H_PARAMETER: + ret = -EINVAL; + break; + case H_FUNCTION: + ret = -EOPNOTSUPP; + break; + case H_HARDWARE: + default: + pr_err("unexpected H_VASI_STATE result %ld\n", hvrc); + ret = -EIO; + break; + } + return ret; +} + +static int wait_for_vasi_session_suspending(u64 handle) +{ + unsigned long state; + int ret; + + /* + * Wait for transition from H_VASI_ENABLED to + * H_VASI_SUSPENDING. Treat anything else as an error. + */ + while (true) { + ret = poll_vasi_state(handle, &state); + + if (ret != 0 || state == H_VASI_SUSPENDING) { + break; + } else if (state == H_VASI_ENABLED) { + ssleep(1); + } else { + pr_err("unexpected H_VASI_STATE result %lu\n", state); + ret = -EIO; + break; + } + } + + /* + * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or + * ibm,suspend-me are also unimplemented, we'll recover then. + */ + if (ret == -EOPNOTSUPP) + ret = 0; + + return ret; +} + +static void prod_single(unsigned int target_cpu) +{ + long hvrc; + int hwid; + + hwid = get_hard_smp_processor_id(target_cpu); + hvrc = plpar_hcall_norets(H_PROD, hwid); + if (hvrc == H_SUCCESS) + return; + pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n", + target_cpu, hwid, hvrc); +} + +static void prod_others(void) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + prod_single(cpu); + } +} + +static u16 clamp_slb_size(void) +{ + u16 prev = mmu_slb_size; + + slb_set_size(SLB_MIN_SIZE); + + return prev; +} + +static int do_suspend(void) +{ + u16 saved_slb_size; + int status; + int ret; + + pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id()); + + /* + * The destination processor model may have fewer SLB entries + * than the source. We reduce mmu_slb_size to a safe minimum + * before suspending in order to minimize the possibility of + * programming non-existent entries on the destination. If + * suspend fails, we restore it before returning. On success + * the OF reconfig path will update it from the new device + * tree after resuming on the destination. + */ + saved_slb_size = clamp_slb_size(); + + ret = rtas_ibm_suspend_me(&status); + if (ret != 0) { + pr_err("ibm,suspend-me error: %d\n", status); + slb_set_size(saved_slb_size); + } + + return ret; +} + +static int do_join(void *arg) +{ + atomic_t *counter = arg; + long hvrc; + int ret; + + /* Must ensure MSR.EE off for H_JOIN. */ + hard_irq_disable(); + hvrc = plpar_hcall_norets(H_JOIN); + + switch (hvrc) { + case H_CONTINUE: + /* + * All other CPUs are offline or in H_JOIN. This CPU + * attempts the suspend. + */ + ret = do_suspend(); + break; + case H_SUCCESS: + /* + * The suspend is complete and this cpu has received a + * prod. + */ + ret = 0; + break; + case H_BAD_MODE: + case H_HARDWARE: + default: + ret = -EIO; + pr_err_ratelimited("H_JOIN error %ld on CPU %i\n", + hvrc, smp_processor_id()); + break; + } + + if (atomic_inc_return(counter) == 1) { + pr_info("CPU %u waking all threads\n", smp_processor_id()); + prod_others(); + } + /* + * Execution may have been suspended for several seconds, so + * reset the watchdog. + */ + touch_nmi_watchdog(); + return ret; +} + +/* + * Abort reason code byte 0. We use only the 'Migrating partition' value. + */ +enum vasi_aborting_entity { + ORCHESTRATOR = 1, + VSP_SOURCE = 2, + PARTITION_FIRMWARE = 3, + PLATFORM_FIRMWARE = 4, + VSP_TARGET = 5, + MIGRATING_PARTITION = 6, +}; + +static void pseries_cancel_migration(u64 handle, int err) +{ + u32 reason_code; + u32 detail; + u8 entity; + long hvrc; + + entity = MIGRATING_PARTITION; + detail = abs(err) & 0xffffff; + reason_code = (entity << 24) | detail; + + hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle, + H_VASI_SIGNAL_CANCEL, reason_code); + if (hvrc) + pr_err("H_VASI_SIGNAL error: %ld\n", hvrc); +} + +static int pseries_suspend(u64 handle) +{ + const unsigned int max_attempts = 5; + unsigned int retry_interval_ms = 1; + unsigned int attempt = 1; + int ret; + + while (true) { + atomic_t counter = ATOMIC_INIT(0); + unsigned long vasi_state; + int vasi_err; + + ret = stop_machine(do_join, &counter, cpu_online_mask); + if (ret == 0) + break; + /* + * Encountered an error. If the VASI stream is still + * in Suspending state, it's likely a transient + * condition related to some device in the partition + * and we can retry in the hope that the cause has + * cleared after some delay. + * + * A better design would allow drivers etc to prepare + * for the suspend and avoid conditions which prevent + * the suspend from succeeding. For now, we have this + * mitigation. + */ + pr_notice("Partition suspend attempt %u of %u error: %d\n", + attempt, max_attempts, ret); + + if (attempt == max_attempts) + break; + + vasi_err = poll_vasi_state(handle, &vasi_state); + if (vasi_err == 0) { + if (vasi_state != H_VASI_SUSPENDING) { + pr_notice("VASI state %lu after failed suspend\n", + vasi_state); + break; + } + } else if (vasi_err != -EOPNOTSUPP) { + pr_err("VASI state poll error: %d", vasi_err); + break; + } + + pr_notice("Will retry partition suspend after %u ms\n", + retry_interval_ms); + + msleep(retry_interval_ms); + retry_interval_ms *= 10; + attempt++; + } + + return ret; +} + +static int pseries_migrate_partition(u64 handle) +{ + int ret; + + ret = wait_for_vasi_session_suspending(handle); + if (ret) + return ret; + + ret = pseries_suspend(handle); + if (ret == 0) + post_mobility_fixup(); + else + pseries_cancel_migration(handle, ret); + + return ret; +} + +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) +{ + return pseries_migrate_partition(handle); +} + static ssize_t migration_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) @@ -369,17 +616,10 @@ static ssize_t migration_store(struct class *class, if (rc) return rc; - do { - rc = rtas_ibm_suspend_me(streamid); - if (rc == -EAGAIN) - ssleep(1); - } while (rc == -EAGAIN); - + rc = pseries_migrate_partition(streamid); if (rc) return rc; - post_mobility_fixup(); - return count; } diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 911534b89c85..72a4d4167849 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -290,6 +290,25 @@ static void fixup_winbond_82c105(struct pci_dev* dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, fixup_winbond_82c105); +static enum pci_bus_speed prop_to_pci_speed(u32 prop) +{ + switch (prop) { + case 0x01: + return PCIE_SPEED_2_5GT; + case 0x02: + return PCIE_SPEED_5_0GT; + case 0x04: + return PCIE_SPEED_8_0GT; + case 0x08: + return PCIE_SPEED_16_0GT; + case 0x10: + return PCIE_SPEED_32_0GT; + default: + pr_debug("Unexpected PCI link speed property value\n"); + return PCI_SPEED_UNKNOWN; + } +} + int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) { struct device_node *dn, *pdn; @@ -322,35 +341,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) return 0; } - switch (pcie_link_speed_stats[0]) { - case 0x01: - bus->max_bus_speed = PCIE_SPEED_2_5GT; - break; - case 0x02: - bus->max_bus_speed = PCIE_SPEED_5_0GT; - break; - case 0x04: - bus->max_bus_speed = PCIE_SPEED_8_0GT; - break; - default: - bus->max_bus_speed = PCI_SPEED_UNKNOWN; - break; - } - - switch (pcie_link_speed_stats[1]) { - case 0x01: - bus->cur_bus_speed = PCIE_SPEED_2_5GT; - break; - case 0x02: - bus->cur_bus_speed = PCIE_SPEED_5_0GT; - break; - case 0x04: - bus->cur_bus_speed = PCIE_SPEED_8_0GT; - break; - default: - bus->cur_bus_speed = PCI_SPEED_UNKNOWN; - break; - } - + bus->max_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[0]); + bus->cur_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[1]); return 0; } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index b2b245b25edb..149cec2212e6 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -526,8 +526,11 @@ static int mce_handle_err_realmode(int disposition, u8 error_type) #ifdef CONFIG_PPC_BOOK3S_64 if (disposition == RTAS_DISP_NOT_RECOVERED) { switch (error_type) { - case MC_ERROR_TYPE_SLB: case MC_ERROR_TYPE_ERAT: + flush_erat(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + case MC_ERROR_TYPE_SLB: /* * Store the old slb content in paca before flushing. * Print this when we go to virtual mode. diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 92922491a81c..c70b4be9f0a5 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -42,6 +42,7 @@ #include <asm/plpar_wrappers.h> #include <asm/code-patching.h> #include <asm/svm.h> +#include <asm/kvm_guest.h> #include "pseries.h" @@ -210,7 +211,7 @@ static __init void pSeries_smp_probe(void) if (!cpu_has_feature(CPU_FTR_SMT)) return; - if (is_kvm_guest()) { + if (check_kvm_guest()) { /* * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp * faults to the hypervisor which then reads the instruction diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 81e0ac58d620..1b902cbf85c5 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -13,13 +13,8 @@ #include <asm/mmu.h> #include <asm/rtas.h> #include <asm/topology.h> -#include "../../kernel/cacheinfo.h" -static u64 stream_id; static struct device suspend_dev; -static DECLARE_COMPLETION(suspend_work); -static struct rtas_suspend_me_data suspend_data; -static atomic_t suspending; /** * pseries_suspend_begin - First phase of hibernation @@ -29,7 +24,7 @@ static atomic_t suspending; * Return value: * 0 on success / other on failure **/ -static int pseries_suspend_begin(suspend_state_t state) +static int pseries_suspend_begin(u64 stream_id) { long vasi_state, rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; @@ -49,41 +44,10 @@ static int pseries_suspend_begin(suspend_state_t state) vasi_state); return -EIO; } - - return 0; -} - -/** - * pseries_suspend_cpu - Suspend a single CPU - * - * Makes the H_JOIN call to suspend the CPU - * - **/ -static int pseries_suspend_cpu(void) -{ - if (atomic_read(&suspending)) - return rtas_suspend_cpu(&suspend_data); return 0; } /** - * pseries_suspend_enable_irqs - * - * Post suspend configuration updates - * - **/ -static void pseries_suspend_enable_irqs(void) -{ - /* - * Update configuration which can be modified based on device tree - * changes during resume. - */ - cacheinfo_cpu_offline(smp_processor_id()); - post_mobility_fixup(); - cacheinfo_cpu_online(smp_processor_id()); -} - -/** * pseries_suspend_enter - Final phase of hibernation * * Return value: @@ -91,28 +55,7 @@ static void pseries_suspend_enable_irqs(void) **/ static int pseries_suspend_enter(suspend_state_t state) { - int rc = rtas_suspend_last_cpu(&suspend_data); - - atomic_set(&suspending, 0); - atomic_set(&suspend_data.done, 1); - return rc; -} - -/** - * pseries_prepare_late - Prepare to suspend all other CPUs - * - * Return value: - * 0 on success / other on failure - **/ -static int pseries_prepare_late(void) -{ - atomic_set(&suspending, 1); - atomic_set(&suspend_data.working, 0); - atomic_set(&suspend_data.done, 0); - atomic_set(&suspend_data.error, 0); - suspend_data.complete = &suspend_work; - reinit_completion(&suspend_work); - return 0; + return rtas_ibm_suspend_me(NULL); } /** @@ -132,6 +75,7 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + u64 stream_id; int rc; if (!capable(CAP_SYS_ADMIN)) @@ -140,7 +84,7 @@ static ssize_t store_hibernate(struct device *dev, stream_id = simple_strtoul(buf, NULL, 16); do { - rc = pseries_suspend_begin(PM_SUSPEND_MEM); + rc = pseries_suspend_begin(stream_id); if (rc == -EAGAIN) ssleep(1); } while (rc == -EAGAIN); @@ -148,10 +92,11 @@ static ssize_t store_hibernate(struct device *dev, if (!rc) rc = pm_suspend(PM_SUSPEND_MEM); - stream_id = 0; - - if (!rc) + if (!rc) { rc = count; + post_mobility_fixup(); + } + return rc; } @@ -187,8 +132,6 @@ static struct bus_type suspend_subsys = { static const struct platform_suspend_ops pseries_suspend_ops = { .valid = suspend_valid_only_mem, - .begin = pseries_suspend_begin, - .prepare_late = pseries_prepare_late, .enter = pseries_suspend_enter, }; @@ -231,15 +174,9 @@ static int __init pseries_suspend_init(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; - suspend_data.token = rtas_token("ibm,suspend-me"); - if (suspend_data.token == RTAS_UNKNOWN_SERVICE) - return 0; - if ((rc = pseries_suspend_sysfs_register(&suspend_dev))) return rc; - ppc_md.suspend_disable_cpu = pseries_suspend_cpu; - ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs; suspend_set_ops(&pseries_suspend_ops); return 0; } diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index f6b253e2be40..36ec0bdd8b63 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -191,7 +191,7 @@ static int mpic_msgr_probe(struct platform_device *dev) /* IO map the message register block. */ of_address_to_resource(np, 0, &rsrc); - msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc)); + msgr_block_addr = devm_ioremap(&dev->dev, rsrc.start, resource_size(&rsrc)); if (!msgr_block_addr) { dev_err(&dev->dev, "Failed to iomap MPIC message registers"); return -EFAULT; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a80440af491a..595310e056f4 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -200,10 +200,6 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) offset |= XIVE_ESB_LD_ST_MO; - /* Handle HW errata */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0); else @@ -214,10 +210,6 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data) { - /* Handle HW errata */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) xive_ops->esb_rw(xd->hw_irq, offset, data, 1); else @@ -356,50 +348,40 @@ static void xive_do_queue_eoi(struct xive_cpu *xc) * EOI an interrupt at the source. There are several methods * to do this depending on the HW version and source type */ -static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) +static void xive_do_source_eoi(struct xive_irq_data *xd) { + u8 eoi_val; + xd->stale_p = false; + /* If the XIVE supports the new "store EOI facility, use it */ - if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) { xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); - else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) { - /* - * The FW told us to call it. This happens for some - * interrupt sources that need additional HW whacking - * beyond the ESB manipulation. For example LPC interrupts - * on P9 DD1.0 needed a latch to be clared in the LPC bridge - * itself. The Firmware will take care of it. - */ - if (WARN_ON_ONCE(!xive_ops->eoi)) - return; - xive_ops->eoi(hw_irq); - } else { - u8 eoi_val; + return; + } - /* - * Otherwise for EOI, we use the special MMIO that does - * a clear of both P and Q and returns the old Q, - * except for LSIs where we use the "EOI cycle" special - * load. - * - * This allows us to then do a re-trigger if Q was set - * rather than synthesizing an interrupt in software - * - * For LSIs the HW EOI cycle is used rather than PQ bits, - * as they are automatically re-triggred in HW when still - * pending. - */ - if (xd->flags & XIVE_IRQ_FLAG_LSI) - xive_esb_read(xd, XIVE_ESB_LOAD_EOI); - else { - eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); - DBG_VERBOSE("eoi_val=%x\n", eoi_val); - - /* Re-trigger if needed */ - if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) - out_be64(xd->trig_mmio, 0); - } + /* + * For LSIs, we use the "EOI cycle" special load rather than + * PQ bits, as they are automatically re-triggered in HW when + * still pending. + */ + if (xd->flags & XIVE_IRQ_FLAG_LSI) { + xive_esb_read(xd, XIVE_ESB_LOAD_EOI); + return; } + + /* + * Otherwise, we use the special MMIO that does a clear of + * both P and Q and returns the old Q. This allows us to then + * do a re-trigger if Q was set rather than synthesizing an + * interrupt in software + */ + eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); + DBG_VERBOSE("eoi_val=%x\n", eoi_val); + + /* Re-trigger if needed */ + if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) + out_be64(xd->trig_mmio, 0); } /* irq_chip eoi callback, called with irq descriptor lock held */ @@ -416,8 +398,8 @@ static void xive_irq_eoi(struct irq_data *d) * been passed-through to a KVM guest */ if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && - !(xd->flags & XIVE_IRQ_NO_EOI)) - xive_do_source_eoi(irqd_to_hwirq(d), xd); + !(xd->flags & XIVE_IRQ_FLAG_NO_EOI)) + xive_do_source_eoi(xd); else xd->stale_p = true; @@ -432,9 +414,7 @@ static void xive_irq_eoi(struct irq_data *d) } /* - * Helper used to mask and unmask an interrupt source. This - * is only called for normal interrupts that do not require - * masking/unmasking via firmware. + * Helper used to mask and unmask an interrupt source. */ static void xive_do_source_set_mask(struct xive_irq_data *xd, bool mask) @@ -681,20 +661,6 @@ static void xive_irq_unmask(struct irq_data *d) pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd); - /* - * This is a workaround for PCI LSI problems on P9, for - * these, we call FW to set the mask. The problems might - * be fixed by P9 DD2.0, if that is the case, firmware - * will no longer set that flag. - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - xive_ops->configure_irq(hw_irq, - get_hard_smp_processor_id(xd->target), - xive_irq_priority, d->irq); - return; - } - xive_do_source_set_mask(xd, false); } @@ -704,20 +670,6 @@ static void xive_irq_mask(struct irq_data *d) pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd); - /* - * This is a workaround for PCI LSI problems on P9, for - * these, we call OPAL to set the mask. The problems might - * be fixed by P9 DD2.0, if that is the case, firmware - * will no longer set that flag. - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - xive_ops->configure_irq(hw_irq, - get_hard_smp_processor_id(xd->target), - 0xff, d->irq); - return; - } - xive_do_source_set_mask(xd, true); } @@ -837,14 +789,7 @@ static int xive_irq_retrigger(struct irq_data *d) * 11, then perform an EOI. */ xive_esb_read(xd, XIVE_ESB_SET_PQ_11); - - /* - * Note: We pass "0" to the hw_irq argument in order to - * avoid calling into the backend EOI code which we don't - * want to do in the case of a re-trigger. Backends typically - * only do EOI for LSIs anyway. - */ - xive_do_source_eoi(0, xd); + xive_do_source_eoi(xd); return 1; } @@ -861,13 +806,6 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) u8 pq; /* - * We only support this on interrupts that do not require - * firmware calls for masking and unmasking - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) - return -EIO; - - /* * This is called by KVM with state non-NULL for enabling * pass-through or NULL for disabling it */ @@ -966,7 +904,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * while masked, the generic code will re-mask it anyway. */ if (!xd->saved_p) - xive_do_source_eoi(hw_irq, xd); + xive_do_source_eoi(xd); } return 0; @@ -1110,7 +1048,7 @@ static void xive_ipi_eoi(struct irq_data *d) DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); - xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); + xive_do_source_eoi(&xc->ipi_data); xive_do_queue_eoi(xc); } @@ -1142,7 +1080,7 @@ static void __init xive_request_ipi(void) return; /* Initialize it */ - virq = irq_create_mapping(xive_irq_domain, 0); + virq = irq_create_mapping(xive_irq_domain, XIVE_IPI_HW_IRQ); xive_ipi_irq = virq; WARN_ON(request_irq(virq, xive_muxed_ipi_action, @@ -1242,7 +1180,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, #ifdef CONFIG_SMP /* IPIs are special and come up with HW number 0 */ - if (hw == 0) { + if (hw == XIVE_IPI_HW_IRQ) { /* * IPIs are marked per-cpu. We use separate HW interrupts under * the hood but associated with the same "linux" interrupt @@ -1271,7 +1209,7 @@ static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) if (!data) return; hw_irq = (unsigned int)irqd_to_hwirq(data); - if (hw_irq) + if (hw_irq != XIVE_IPI_HW_IRQ) xive_irq_free_data(virq); } @@ -1303,16 +1241,71 @@ static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node, return xive_ops->match(node); } +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +static const char * const esb_names[] = { "RESET", "OFF", "PENDING", "QUEUED" }; + +static const struct { + u64 mask; + char *name; +} xive_irq_flags[] = { + { XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" }, + { XIVE_IRQ_FLAG_LSI, "LSI" }, + { XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" }, + { XIVE_IRQ_FLAG_NO_EOI, "NO_EOI" }, +}; + +static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind) +{ + struct xive_irq_data *xd; + u64 val; + int i; + + /* No IRQ domain level information. To be done */ + if (!irqd) + return; + + if (!is_xive_irq(irq_data_get_irq_chip(irqd))) + return; + + seq_printf(m, "%*sXIVE:\n", ind, ""); + ind++; + + xd = irq_data_get_irq_handler_data(irqd); + if (!xd) { + seq_printf(m, "%*snot assigned\n", ind, ""); + return; + } + + val = xive_esb_read(xd, XIVE_ESB_GET); + seq_printf(m, "%*sESB: %s\n", ind, "", esb_names[val & 0x3]); + seq_printf(m, "%*sPstate: %s %s\n", ind, "", xd->stale_p ? "stale" : "", + xd->saved_p ? "saved" : ""); + seq_printf(m, "%*sTarget: %d\n", ind, "", xd->target); + seq_printf(m, "%*sChip: %d\n", ind, "", xd->src_chip); + seq_printf(m, "%*sTrigger: 0x%016llx\n", ind, "", xd->trig_page); + seq_printf(m, "%*sEOI: 0x%016llx\n", ind, "", xd->eoi_page); + seq_printf(m, "%*sFlags: 0x%llx\n", ind, "", xd->flags); + for (i = 0; i < ARRAY_SIZE(xive_irq_flags); i++) { + if (xd->flags & xive_irq_flags[i].mask) + seq_printf(m, "%*s%s\n", ind + 12, "", xive_irq_flags[i].name); + } +} +#endif + static const struct irq_domain_ops xive_irq_domain_ops = { .match = xive_irq_domain_match, .map = xive_irq_domain_map, .unmap = xive_irq_domain_unmap, .xlate = xive_irq_domain_xlate, +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + .debug_show = xive_irq_domain_debug_show, +#endif }; -static void __init xive_init_host(void) +static void __init xive_init_host(struct device_node *np) { - xive_irq_domain = irq_domain_add_nomap(NULL, XIVE_MAX_IRQ, + xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ, &xive_irq_domain_ops, NULL); if (WARN_ON(xive_irq_domain == NULL)) return; @@ -1421,7 +1414,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) * Ignore anything that isn't a XIVE irq and ignore * IPIs, so can just be dropped. */ - if (d->domain != xive_irq_domain || hw_irq == 0) + if (d->domain != xive_irq_domain || hw_irq == XIVE_IPI_HW_IRQ) continue; /* @@ -1446,7 +1439,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) * still asserted. Otherwise do an MSI retrigger. */ if (xd->flags & XIVE_IRQ_FLAG_LSI) - xive_do_source_eoi(irqd_to_hwirq(d), xd); + xive_do_source_eoi(xd); else xive_irq_retrigger(d); @@ -1513,8 +1506,8 @@ void xive_shutdown(void) xive_ops->shutdown(); } -bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio) +bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops, + void __iomem *area, u32 offset, u8 max_prio) { xive_tima = area; xive_tima_offset = offset; @@ -1525,7 +1518,7 @@ bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 o __xive_enabled = true; pr_devel("Initializing host..\n"); - xive_init_host(); + xive_init_host(np); pr_devel("Initializing boot CPU..\n"); @@ -1655,7 +1648,7 @@ static int xive_core_debug_show(struct seq_file *m, void *private) hw_irq = (unsigned int)irqd_to_hwirq(d); /* IPIs are special (HW number 0) */ - if (hw_irq) + if (hw_irq != XIVE_IPI_HW_IRQ) xive_debug_show_irq(m, hw_irq, d); } return 0; diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index cb58ec7ce77a..05a800a3104e 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -64,12 +64,6 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->flags |= XIVE_IRQ_FLAG_STORE_EOI; if (opal_flags & OPAL_XIVE_IRQ_LSI) data->flags |= XIVE_IRQ_FLAG_LSI; - if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG) - data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG; - if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW) - data->flags |= XIVE_IRQ_FLAG_MASK_FW; - if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW) - data->flags |= XIVE_IRQ_FLAG_EOI_FW; data->eoi_page = be64_to_cpu(eoi_page); data->trig_page = be64_to_cpu(trig_page); data->esb_shift = be32_to_cpu(esb_shift); @@ -128,6 +122,8 @@ static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio, return rc == 0 ? 0 : -ENXIO; } +#define vp_err(vp, fmt, ...) pr_err("VP[0x%x]: " fmt, vp, ##__VA_ARGS__) + /* This can be called multiple time to change a queue configuration */ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, __be32 *qpage, u32 order, bool can_escalate) @@ -155,7 +151,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, &esc_irq_be, NULL); if (rc) { - pr_err("Error %lld getting queue info prio %d\n", rc, prio); + vp_err(vp_id, "Failed to get queue %d info : %lld\n", prio, rc); rc = -EIO; goto fail; } @@ -178,7 +174,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, msleep(OPAL_BUSY_DELAY_MS); } if (rc) { - pr_err("Error %lld setting queue for prio %d\n", rc, prio); + vp_err(vp_id, "Failed to set queue %d info: %lld\n", prio, rc); rc = -EIO; } else { /* @@ -205,7 +201,7 @@ static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) msleep(OPAL_BUSY_DELAY_MS); } if (rc) - pr_err("Error %lld disabling queue for prio %d\n", rc, prio); + vp_err(vp_id, "Failed to disable queue %d : %lld\n", prio, rc); } void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) @@ -384,15 +380,6 @@ static void xive_native_update_pending(struct xive_cpu *xc) } } -static void xive_native_eoi(u32 hw_irq) -{ - /* - * Not normally used except if specific interrupts need - * a workaround on EOI. - */ - opal_int_eoi(hw_irq); -} - static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { s64 rc; @@ -475,7 +462,6 @@ static const struct xive_ops xive_native_ops = { .match = xive_native_match, .shutdown = xive_native_shutdown, .update_pending = xive_native_update_pending, - .eoi = xive_native_eoi, .setup_cpu = xive_native_setup_cpu, .teardown_cpu = xive_native_teardown_cpu, .sync_source = xive_native_sync_source, @@ -622,7 +608,7 @@ bool __init xive_native_init(void) xive_native_setup_pools(); /* Initialize XIVE core with our backend */ - if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS, + if (!xive_core_init(np, &xive_native_ops, tima, TM_QW3_HV_PHYS, max_prio)) { opal_xive_reset(OPAL_XIVE_MODE_EMU); return false; @@ -714,6 +700,8 @@ int xive_native_enable_vp(u32 vp_id, bool single_escalation) break; msleep(OPAL_BUSY_DELAY_MS); } + if (rc) + vp_err(vp_id, "Failed to enable VP : %lld\n", rc); return rc ? -EIO : 0; } EXPORT_SYMBOL_GPL(xive_native_enable_vp); @@ -728,6 +716,8 @@ int xive_native_disable_vp(u32 vp_id) break; msleep(OPAL_BUSY_DELAY_MS); } + if (rc) + vp_err(vp_id, "Failed to disable VP : %lld\n", rc); return rc ? -EIO : 0; } EXPORT_SYMBOL_GPL(xive_native_disable_vp); @@ -739,8 +729,10 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) s64 rc; rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be); - if (rc) + if (rc) { + vp_err(vp_id, "Failed to get VP info : %lld\n", rc); return -EIO; + } *out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu; *out_chip_id = be32_to_cpu(vp_chip_id_be); @@ -771,8 +763,7 @@ int xive_native_get_queue_info(u32 vp_id, u32 prio, rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize, &qeoi_page, &escalate_irq, &qflags); if (rc) { - pr_err("OPAL failed to get queue info for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to get queue %d info : %lld\n", prio, rc); return -EIO; } @@ -800,8 +791,7 @@ int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex) rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle, &opal_qindex); if (rc) { - pr_err("OPAL failed to get queue state for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to get queue %d state : %lld\n", prio, rc); return -EIO; } @@ -820,8 +810,7 @@ int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex) rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex); if (rc) { - pr_err("OPAL failed to set queue state for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to set queue %d state : %lld\n", prio, rc); return -EIO; } @@ -843,8 +832,7 @@ int xive_native_get_vp_state(u32 vp_id, u64 *out_state) rc = opal_xive_get_vp_state(vp_id, &state); if (rc) { - pr_err("OPAL failed to get vp state for VCPU %d : %lld\n", - vp_id, rc); + vp_err(vp_id, "failed to get vp state : %lld\n", rc); return -EIO; } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 1e3674d7ea7b..01ccc0786ada 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -628,11 +628,6 @@ static void xive_spapr_update_pending(struct xive_cpu *xc) } } -static void xive_spapr_eoi(u32 hw_irq) -{ - /* Not used */; -} - static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { /* Only some debug on the TIMA settings */ @@ -677,7 +672,6 @@ static const struct xive_ops xive_spapr_ops = { .match = xive_spapr_match, .shutdown = xive_spapr_shutdown, .update_pending = xive_spapr_update_pending, - .eoi = xive_spapr_eoi, .setup_cpu = xive_spapr_setup_cpu, .teardown_cpu = xive_spapr_teardown_cpu, .sync_source = xive_spapr_sync_source, @@ -857,7 +851,7 @@ bool __init xive_spapr_init(void) } /* Initialize XIVE core with our backend */ - if (!xive_core_init(&xive_spapr_ops, tima, TM_QW1_OS, max_prio)) + if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) return false; pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index b7b901da2168..9cf57c722faa 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,6 +5,8 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +#define XIVE_IPI_HW_IRQ 0 /* interrupt source # for IPIs */ + /* * A "disabled" interrupt should never fire, to catch problems * we set its logical number to this @@ -50,7 +52,6 @@ struct xive_ops { void (*shutdown)(void); void (*update_pending)(struct xive_cpu *xc); - void (*eoi)(u32 hw_irq); void (*sync_source)(u32 hw_irq); u64 (*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write); #ifdef CONFIG_SMP @@ -61,8 +62,8 @@ struct xive_ops { const char *name; }; -bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio); +bool xive_core_init(struct device_node *np, const struct xive_ops *ops, + void __iomem *area, u32 offset, u8 max_prio); __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift); int xive_core_debug_init(void); diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c index 5c1a50912229..9b0d85bff021 100644 --- a/arch/powerpc/xmon/nonstdio.c +++ b/arch/powerpc/xmon/nonstdio.c @@ -178,7 +178,7 @@ void xmon_printf(const char *format, ...) if (n && rc == 0) { /* No udbg hooks, fallback to printk() - dangerous */ - printk("%s", xmon_outbuf); + pr_cont("%s", xmon_outbuf); } } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 55c43a6c9111..dcd817ca2edf 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1383,6 +1383,7 @@ static long check_bp_loc(unsigned long addr) return 1; } +#ifndef CONFIG_PPC_8xx static int find_free_data_bpt(void) { int i; @@ -1394,6 +1395,7 @@ static int find_free_data_bpt(void) printf("Couldn't find free breakpoint register\n"); return -1; } +#endif static void print_data_bpts(void) { @@ -1745,9 +1747,9 @@ static void print_bug_trap(struct pt_regs *regs) #ifdef CONFIG_DEBUG_BUGVERBOSE printf("kernel BUG at %s:%u!\n", - bug->file, bug->line); + (char *)bug + bug->file_disp, bug->line); #else - printf("kernel BUG at %px!\n", (void *)bug->bug_addr); + printf("kernel BUG at %px!\n", (void *)bug + bug->bug_addr_disp); #endif #endif /* CONFIG_BUG */ } |