summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig14
-rw-r--r--arch/powerpc/Kconfig.debug5
-rw-r--r--arch/powerpc/Makefile10
-rw-r--r--arch/powerpc/boot/Makefile6
-rw-r--r--arch/powerpc/boot/decompress.c4
-rw-r--r--arch/powerpc/boot/devtree.c59
-rw-r--r--arch/powerpc/boot/dts/microwatt.dts138
-rw-r--r--arch/powerpc/boot/microwatt.c24
-rw-r--r--arch/powerpc/boot/ns16550.c9
-rwxr-xr-xarch/powerpc/boot/wrapper5
-rw-r--r--arch/powerpc/boot/zImage.ps3.lds.S2
-rw-r--r--arch/powerpc/configs/32-bit.config1
-rw-r--r--arch/powerpc/configs/64-bit.config1
-rw-r--r--arch/powerpc/configs/microwatt_defconfig98
-rw-r--r--arch/powerpc/configs/mpc885_ads_defconfig25
-rw-r--r--arch/powerpc/configs/powernv_defconfig1
-rw-r--r--arch/powerpc/configs/ppc64_defconfig2
-rw-r--r--arch/powerpc/configs/pseries_defconfig2
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h12
-rw-r--r--arch/powerpc/include/asm/atomic.h140
-rw-r--r--arch/powerpc/include/asm/barrier.h2
-rw-r--r--arch/powerpc/include/asm/book3s/32/hash.h45
-rw-r--r--arch/powerpc/include/asm/book3s/32/kup.h195
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h41
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h38
-rw-r--r--arch/powerpc/include/asm/book3s/64/kup.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h4
-rw-r--r--arch/powerpc/include/asm/book3s/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/checksum.h2
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h30
-rw-r--r--arch/powerpc/include/asm/code-patching.h34
-rw-r--r--arch/powerpc/include/asm/cputhreads.h30
-rw-r--r--arch/powerpc/include/asm/exception-64s.h13
-rw-r--r--arch/powerpc/include/asm/head-64.h2
-rw-r--r--arch/powerpc/include/asm/hvcall.h14
-rw-r--r--arch/powerpc/include/asm/hw_irq.h23
-rw-r--r--arch/powerpc/include/asm/inst.h94
-rw-r--r--arch/powerpc/include/asm/interrupt.h67
-rw-r--r--arch/powerpc/include/asm/irq.h5
-rw-r--r--arch/powerpc/include/asm/jump_label.h2
-rw-r--r--arch/powerpc/include/asm/kup.h50
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h3
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h8
-rw-r--r--arch/powerpc/include/asm/kvm_guest.h4
-rw-r--r--arch/powerpc/include/asm/kvm_host.h21
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h18
-rw-r--r--arch/powerpc/include/asm/livepatch.h2
-rw-r--r--arch/powerpc/include/asm/mmu.h19
-rw-r--r--arch/powerpc/include/asm/mmu_context.h19
-rw-r--r--arch/powerpc/include/asm/mmzone.h4
-rw-r--r--arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h5
-rw-r--r--arch/powerpc/include/asm/nohash/32/kup-8xx.h46
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-44x.h1
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-8xx.h43
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/paca.h9
-rw-r--r--arch/powerpc/include/asm/pgalloc.h5
-rw-r--r--arch/powerpc/include/asm/pgtable.h11
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h84
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h15
-rw-r--r--arch/powerpc/include/asm/probes.h4
-rw-r--r--arch/powerpc/include/asm/processor.h21
-rw-r--r--arch/powerpc/include/asm/ps3.h4
-rw-r--r--arch/powerpc/include/asm/ptrace.h50
-rw-r--r--arch/powerpc/include/asm/qspinlock.h2
-rw-r--r--arch/powerpc/include/asm/reg.h13
-rw-r--r--arch/powerpc/include/asm/security_features.h4
-rw-r--r--arch/powerpc/include/asm/set_memory.h34
-rw-r--r--arch/powerpc/include/asm/setup.h1
-rw-r--r--arch/powerpc/include/asm/sstep.h7
-rw-r--r--arch/powerpc/include/asm/time.h12
-rw-r--r--arch/powerpc/include/asm/unaligned.h22
-rw-r--r--arch/powerpc/include/asm/uprobes.h4
-rw-r--r--arch/powerpc/include/asm/vas.h109
-rw-r--r--arch/powerpc/include/asm/xics.h4
-rw-r--r--arch/powerpc/include/uapi/asm/papr_pdsm.h6
-rw-r--r--arch/powerpc/include/uapi/asm/vas-api.h6
-rw-r--r--arch/powerpc/kernel/asm-offsets.c74
-rw-r--r--arch/powerpc/kernel/crash_dump.c6
-rw-r--r--arch/powerpc/kernel/entry_32.S54
-rw-r--r--arch/powerpc/kernel/entry_64.S516
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c4
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S52
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S501
-rw-r--r--arch/powerpc/kernel/firmware.c10
-rw-r--r--arch/powerpc/kernel/fpu.S4
-rw-r--r--arch/powerpc/kernel/head_32.h41
-rw-r--r--arch/powerpc/kernel/head_40x.S36
-rw-r--r--arch/powerpc/kernel/head_44x.S50
-rw-r--r--arch/powerpc/kernel/head_64.S25
-rw-r--r--arch/powerpc/kernel/head_8xx.S25
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S139
-rw-r--r--arch/powerpc/kernel/head_booke.h31
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S37
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c4
-rw-r--r--arch/powerpc/kernel/interrupt.c484
-rw-r--r--arch/powerpc/kernel/interrupt_64.S770
-rw-r--r--arch/powerpc/kernel/irq.c96
-rw-r--r--arch/powerpc/kernel/jump_label.c4
-rw-r--r--arch/powerpc/kernel/kgdb.c19
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c4
-rw-r--r--arch/powerpc/kernel/kprobes.c78
-rw-r--r--arch/powerpc/kernel/mce.c3
-rw-r--r--arch/powerpc/kernel/mce_power.c50
-rw-r--r--arch/powerpc/kernel/misc_32.S6
-rw-r--r--arch/powerpc/kernel/module.c4
-rw-r--r--arch/powerpc/kernel/module_32.c19
-rw-r--r--arch/powerpc/kernel/module_64.c55
-rw-r--r--arch/powerpc/kernel/optprobes.c155
-rw-r--r--arch/powerpc/kernel/paca.c2
-rw-r--r--arch/powerpc/kernel/process.c111
-rw-r--r--arch/powerpc/kernel/prom.c2
-rw-r--r--arch/powerpc/kernel/prom_init.c121
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-adv.c20
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-noadv.c14
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-view.c5
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c2
-rw-r--r--arch/powerpc/kernel/rtas.c14
-rw-r--r--arch/powerpc/kernel/security.c24
-rw-r--r--arch/powerpc/kernel/setup-common.c5
-rw-r--r--arch/powerpc/kernel/setup_32.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c15
-rw-r--r--arch/powerpc/kernel/signal.c12
-rw-r--r--arch/powerpc/kernel/signal_32.c106
-rw-r--r--arch/powerpc/kernel/signal_64.c58
-rw-r--r--arch/powerpc/kernel/smp.c18
-rw-r--r--arch/powerpc/kernel/stacktrace.c34
-rw-r--r--arch/powerpc/kernel/syscalls.c3
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kernel/sysfs.c12
-rw-r--r--arch/powerpc/kernel/tau_6xx.c2
-rw-r--r--arch/powerpc/kernel/time.c21
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c51
-rw-r--r--arch/powerpc/kernel/traps.c49
-rw-r--r--arch/powerpc/kernel/udbg_16550.c39
-rw-r--r--arch/powerpc/kernel/uprobes.c8
-rw-r--r--arch/powerpc/kernel/vector.S8
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S19
-rw-r--r--arch/powerpc/kernel/watchdog.c1
-rw-r--r--arch/powerpc/kexec/core.c4
-rw-r--r--arch/powerpc/kexec/crash.c4
-rw-r--r--arch/powerpc/kvm/Makefile4
-rw-r--r--arch/powerpc/kvm/book3s.c108
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c3
-rw-r--r--arch/powerpc/kvm/book3s_64_entry.S416
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c27
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c12
-rw-r--r--arch/powerpc/kvm/book3s_hv.c817
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c137
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S9
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c122
-rw-r--r--arch/powerpc/kvm/book3s_hv_p9_entry.c508
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c14
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c15
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S684
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c3
-rw-r--r--arch/powerpc/kvm/book3s_pr.c4
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c2
-rw-r--r--arch/powerpc/kvm/book3s_segment.S3
-rw-r--r--arch/powerpc/kvm/book3s_xive.c114
-rw-r--r--arch/powerpc/kvm/book3s_xive.h7
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c11
-rw-r--r--arch/powerpc/kvm/booke.c76
-rw-r--r--arch/powerpc/kvm/powerpc.c3
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/lib/code-patching.c178
-rw-r--r--arch/powerpc/lib/error-inject.c2
-rw-r--r--arch/powerpc/lib/feature-fixups.c266
-rw-r--r--arch/powerpc/lib/restart_table.c56
-rw-r--r--arch/powerpc/lib/sstep.c39
-rw-r--r--arch/powerpc/lib/test_emulate_step.c38
-rw-r--r--arch/powerpc/math-emu/math.c2
-rw-r--r--arch/powerpc/math-emu/math_efp.c2
-rw-r--r--arch/powerpc/mm/Makefile4
-rw-r--r--arch/powerpc/mm/book3s32/Makefile1
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S6
-rw-r--r--arch/powerpc/mm/book3s32/kuap.c33
-rw-r--r--arch/powerpc/mm/book3s32/kuep.c42
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c20
-rw-r--r--arch/powerpc/mm/book3s32/mmu_context.c48
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c24
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c33
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c228
-rw-r--r--arch/powerpc/mm/ioremap_32.c4
-rw-r--r--arch/powerpc/mm/ioremap_64.c2
-rw-r--r--arch/powerpc/mm/maccess.c4
-rw-r--r--arch/powerpc/mm/mem.c12
-rw-r--r--arch/powerpc/mm/mmu_context.c4
-rw-r--r--arch/powerpc/mm/nohash/44x.c17
-rw-r--r--arch/powerpc/mm/nohash/8xx.c42
-rw-r--r--arch/powerpc/mm/nohash/mmu_context.c173
-rw-r--r--arch/powerpc/mm/nohash/tlb_low.S13
-rw-r--r--arch/powerpc/mm/pageattr.c134
-rw-r--r--arch/powerpc/mm/pgtable.c8
-rw-r--r--arch/powerpc/mm/pgtable_32.c60
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c22
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c13
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c220
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c14
-rw-r--r--arch/powerpc/perf/Makefile6
-rw-r--r--arch/powerpc/perf/callchain.c2
-rw-r--r--arch/powerpc/perf/core-book3s.c6
-rw-r--r--arch/powerpc/perf/generic-compat-pmu.c170
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c1
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_smp.c4
-rw-r--r--arch/powerpc/platforms/Kconfig3
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype16
-rw-r--r--arch/powerpc/platforms/Makefile2
-rw-r--r--arch/powerpc/platforms/book3s/Kconfig15
-rw-r--r--arch/powerpc/platforms/book3s/Makefile2
-rw-r--r--arch/powerpc/platforms/book3s/vas-api.c493
-rw-r--r--arch/powerpc/platforms/cell/Kconfig1
-rw-r--r--arch/powerpc/platforms/cell/pmu.c1
-rw-r--r--arch/powerpc/platforms/cell/spider-pci.c3
-rw-r--r--arch/powerpc/platforms/cell/spufs/switch.c6
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.c1
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c4
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c4
-rw-r--r--arch/powerpc/platforms/microwatt/Kconfig13
-rw-r--r--arch/powerpc/platforms/microwatt/Makefile1
-rw-r--r--arch/powerpc/platforms/microwatt/rng.c48
-rw-r--r--arch/powerpc/platforms/microwatt/setup.c41
-rw-r--r--arch/powerpc/platforms/pasemi/idle.c4
-rw-r--r--arch/powerpc/platforms/powermac/Kconfig1
-rw-r--r--arch/powerpc/platforms/powermac/bootx_init.c2
-rw-r--r--arch/powerpc/platforms/powermac/smp.c4
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig14
-rw-r--r--arch/powerpc/platforms/powernv/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/idle.c52
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci.c2
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c10
-rw-r--r--arch/powerpc/platforms/powernv/vas-api.c278
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c27
-rw-r--r--arch/powerpc/platforms/powernv/vas-fault.c173
-rw-r--r--arch/powerpc/platforms/powernv/vas-trace.h4
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c264
-rw-r--r--arch/powerpc/platforms/powernv/vas.h50
-rw-r--r--arch/powerpc/platforms/ps3/Kconfig10
-rw-r--r--arch/powerpc/platforms/ps3/interrupt.c5
-rw-r--r--arch/powerpc/platforms/ps3/mm.c12
-rw-r--r--arch/powerpc/platforms/ps3/setup.c43
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c9
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c9
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c92
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S29
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c1
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c101
-rw-r--r--arch/powerpc/platforms/pseries/ras.c6
-rw-r--r--arch/powerpc/platforms/pseries/setup.c9
-rw-r--r--arch/powerpc/platforms/pseries/smp.c4
-rw-r--r--arch/powerpc/platforms/pseries/vas.c595
-rw-r--r--arch/powerpc/platforms/pseries/vas.h125
-rw-r--r--arch/powerpc/sysdev/ehv_pic.c1
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_err.c1
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c2
-rw-r--r--arch/powerpc/sysdev/fsl_rio.c4
-rw-r--r--arch/powerpc/sysdev/i8259.c3
-rw-r--r--arch/powerpc/sysdev/mpic.c2
-rw-r--r--arch/powerpc/sysdev/tsi108_pci.c3
-rw-r--r--arch/powerpc/sysdev/xics/Kconfig3
-rw-r--r--arch/powerpc/sysdev/xics/Makefile1
-rw-r--r--arch/powerpc/sysdev/xics/icp-hv.c1
-rw-r--r--arch/powerpc/sysdev/xics/icp-opal.c1
-rw-r--r--arch/powerpc/sysdev/xics/ics-native.c257
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c4
-rw-r--r--arch/powerpc/sysdev/xive/Kconfig1
-rw-r--r--arch/powerpc/xmon/xmon.c209
274 files changed, 8786 insertions, 5386 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 088dd2afcfe4..d01e3401581d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -140,7 +140,9 @@ config PPC
select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
+ select ARCH_HAS_SET_MEMORY
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION)
+ select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -187,7 +189,7 @@ config PPC
select GENERIC_VDSO_TIME_NS
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
- select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
+ select HAVE_ARCH_HUGE_VMAP if PPC_RADIX_MMU || PPC_8xx
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
@@ -266,6 +268,7 @@ config PPC
select PPC_DAWR if PPC64
select RTC_LIB
select SPARSE_IRQ
+ select STRICT_KERNEL_RWX if STRICT_MODULE_RWX
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select VIRT_TO_BUS if !PPC64
@@ -289,6 +292,7 @@ config PANIC_TIMEOUT
config COMPAT
bool "Enable support for 32bit binaries"
depends on PPC64
+ depends on !CC_IS_CLANG || CLANG_VERSION >= 120000
default y if !CPU_LITTLE_ENDIAN
select ARCH_WANT_OLD_COMPAT_IPC
select COMPAT_OLD_SIGACTION
@@ -403,10 +407,6 @@ config PPC_ADV_DEBUG_DAC_RANGE
config PPC_DAWR
bool
-config ZONE_DMA
- bool
- default y if PPC_BOOK3E_64
-
config PGTABLE_LEVELS
int
default 2 if !PPC64
@@ -426,7 +426,7 @@ source "kernel/Kconfig.hz"
config MATH_EMULATION
bool "Math emulation"
- depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE
+ depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE || PPC_MICROWATT
select PPC_FPU_REGS
help
Some PowerPC chips designed for embedded applications do not have
@@ -671,7 +671,7 @@ config NODES_SHIFT
int
default "8" if PPC64
default "4"
- depends on NEED_MULTIPLE_NODES
+ depends on NUMA
config USE_PERCPU_NUMA_NODE_ID
def_bool y
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 6342f9da4545..205cd77f321f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -84,6 +84,11 @@ config MSI_BITMAP_SELFTEST
config PPC_IRQ_SOFT_MASK_DEBUG
bool "Include extra checks for powerpc irq soft masking"
+ depends on PPC64
+
+config PPC_RFI_SRR_DEBUG
+ bool "Include extra checks for RFI SRR register validity"
+ depends on PPC_BOOK3S_64
config XMON
bool "Include xmon kernel debugger"
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 3212d076ac6a..712c5e8768ce 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -376,6 +376,16 @@ ppc64_book3e_allmodconfig:
$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \
-f $(srctree)/Makefile allmodconfig
+PHONY += ppc32_randconfig
+ppc32_randconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/32-bit.config \
+ -f $(srctree)/Makefile randconfig
+
+PHONY += ppc64_randconfig
+ppc64_randconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/64-bit.config \
+ -f $(srctree)/Makefile randconfig
+
define archhelp
@echo '* zImage - Build default images selected by kernel config'
@echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)'
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 2b8da923ceca..e312ea802aa6 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -163,6 +163,8 @@ src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S
src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S
src-plat-$(CONFIG_MVME7100) += motload-head.S mvme7100.c
+src-plat-$(CONFIG_PPC_MICROWATT) += fixed-head.S microwatt.c
+
src-wlib := $(sort $(src-wlib-y))
src-plat := $(sort $(src-plat-y))
src-boot := $(src-wlib) $(src-plat) empty.c
@@ -227,7 +229,7 @@ $(obj)/wrapper.a: $(obj-wlib) FORCE
hostprogs := addnote hack-coff mktree
-targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a)
+targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds
extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \
$(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds
@@ -355,6 +357,8 @@ image-$(CONFIG_MVME5100) += dtbImage.mvme5100
# Board port in arch/powerpc/platform/amigaone/Kconfig
image-$(CONFIG_AMIGAONE) += cuImage.amigaone
+image-$(CONFIG_PPC_MICROWATT) += dtbImage.microwatt
+
# For 32-bit powermacs, build the COFF and miboot images
# as well as the ELF images.
ifdef CONFIG_PPC32
diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c
index 6098b879ac97..977eb15a6d17 100644
--- a/arch/powerpc/boot/decompress.c
+++ b/arch/powerpc/boot/decompress.c
@@ -99,8 +99,8 @@ static void print_err(char *s)
* partial_decompress - decompresses part or all of a compressed buffer
* @inbuf: input buffer
* @input_size: length of the input buffer
- * @outbuf: input buffer
- * @output_size: length of the input buffer
+ * @outbuf: output buffer
+ * @output_size: length of the output buffer
* @skip number of output bytes to ignore
*
* This function takes compressed data from inbuf, decompresses and write it to
diff --git a/arch/powerpc/boot/devtree.c b/arch/powerpc/boot/devtree.c
index 5d91036ad626..58fbcfcc98c9 100644
--- a/arch/powerpc/boot/devtree.c
+++ b/arch/powerpc/boot/devtree.c
@@ -13,6 +13,7 @@
#include "string.h"
#include "stdio.h"
#include "ops.h"
+#include "of.h"
void dt_fixup_memory(u64 start, u64 size)
{
@@ -23,21 +24,25 @@ void dt_fixup_memory(u64 start, u64 size)
root = finddevice("/");
if (getprop(root, "#address-cells", &naddr, sizeof(naddr)) < 0)
naddr = 2;
+ else
+ naddr = be32_to_cpu(naddr);
if (naddr < 1 || naddr > 2)
fatal("Can't cope with #address-cells == %d in /\n\r", naddr);
if (getprop(root, "#size-cells", &nsize, sizeof(nsize)) < 0)
nsize = 1;
+ else
+ nsize = be32_to_cpu(nsize);
if (nsize < 1 || nsize > 2)
fatal("Can't cope with #size-cells == %d in /\n\r", nsize);
i = 0;
if (naddr == 2)
- memreg[i++] = start >> 32;
- memreg[i++] = start & 0xffffffff;
+ memreg[i++] = cpu_to_be32(start >> 32);
+ memreg[i++] = cpu_to_be32(start & 0xffffffff);
if (nsize == 2)
- memreg[i++] = size >> 32;
- memreg[i++] = size & 0xffffffff;
+ memreg[i++] = cpu_to_be32(size >> 32);
+ memreg[i++] = cpu_to_be32(size & 0xffffffff);
memory = finddevice("/memory");
if (! memory) {
@@ -45,9 +50,9 @@ void dt_fixup_memory(u64 start, u64 size)
setprop_str(memory, "device_type", "memory");
}
- printf("Memory <- <0x%x", memreg[0]);
+ printf("Memory <- <0x%x", be32_to_cpu(memreg[0]));
for (i = 1; i < (naddr + nsize); i++)
- printf(" 0x%x", memreg[i]);
+ printf(" 0x%x", be32_to_cpu(memreg[i]));
printf("> (%ldMB)\n\r", (unsigned long)(size >> 20));
setprop(memory, "reg", memreg, (naddr + nsize)*sizeof(u32));
@@ -65,10 +70,10 @@ void dt_fixup_cpu_clocks(u32 cpu, u32 tb, u32 bus)
printf("CPU bus-frequency <- 0x%x (%dMHz)\n\r", bus, MHZ(bus));
while ((devp = find_node_by_devtype(devp, "cpu"))) {
- setprop_val(devp, "clock-frequency", cpu);
- setprop_val(devp, "timebase-frequency", tb);
+ setprop_val(devp, "clock-frequency", cpu_to_be32(cpu));
+ setprop_val(devp, "timebase-frequency", cpu_to_be32(tb));
if (bus > 0)
- setprop_val(devp, "bus-frequency", bus);
+ setprop_val(devp, "bus-frequency", cpu_to_be32(bus));
}
timebase_period_ns = 1000000000 / tb;
@@ -80,7 +85,7 @@ void dt_fixup_clock(const char *path, u32 freq)
if (devp) {
printf("%s: clock-frequency <- %x (%dMHz)\n\r", path, freq, MHZ(freq));
- setprop_val(devp, "clock-frequency", freq);
+ setprop_val(devp, "clock-frequency", cpu_to_be32(freq));
}
}
@@ -133,8 +138,12 @@ void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize)
{
if (getprop(node, "#address-cells", naddr, 4) != 4)
*naddr = 2;
+ else
+ *naddr = be32_to_cpu(*naddr);
if (getprop(node, "#size-cells", nsize, 4) != 4)
*nsize = 1;
+ else
+ *nsize = be32_to_cpu(*nsize);
}
static void copy_val(u32 *dest, u32 *src, int naddr)
@@ -163,9 +172,9 @@ static int add_reg(u32 *reg, u32 *add, int naddr)
int i, carry = 0;
for (i = MAX_ADDR_CELLS - 1; i >= MAX_ADDR_CELLS - naddr; i--) {
- u64 tmp = (u64)reg[i] + add[i] + carry;
+ u64 tmp = (u64)be32_to_cpu(reg[i]) + be32_to_cpu(add[i]) + carry;
carry = tmp >> 32;
- reg[i] = (u32)tmp;
+ reg[i] = cpu_to_be32((u32)tmp);
}
return !carry;
@@ -180,18 +189,18 @@ static int compare_reg(u32 *reg, u32 *range, u32 *rangesize)
u32 end;
for (i = 0; i < MAX_ADDR_CELLS; i++) {
- if (reg[i] < range[i])
+ if (be32_to_cpu(reg[i]) < be32_to_cpu(range[i]))
return 0;
- if (reg[i] > range[i])
+ if (be32_to_cpu(reg[i]) > be32_to_cpu(range[i]))
break;
}
for (i = 0; i < MAX_ADDR_CELLS; i++) {
- end = range[i] + rangesize[i];
+ end = be32_to_cpu(range[i]) + be32_to_cpu(rangesize[i]);
- if (reg[i] < end)
+ if (be32_to_cpu(reg[i]) < end)
break;
- if (reg[i] > end)
+ if (be32_to_cpu(reg[i]) > end)
return 0;
}
@@ -240,7 +249,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
return 0;
dt_get_reg_format(parent, &naddr, &nsize);
-
if (nsize > 2)
return 0;
@@ -252,10 +260,10 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
copy_val(last_addr, prop_buf + offset, naddr);
- ret_size = prop_buf[offset + naddr];
+ ret_size = be32_to_cpu(prop_buf[offset + naddr]);
if (nsize == 2) {
ret_size <<= 32;
- ret_size |= prop_buf[offset + naddr + 1];
+ ret_size |= be32_to_cpu(prop_buf[offset + naddr + 1]);
}
for (;;) {
@@ -278,7 +286,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
offset = find_range(last_addr, prop_buf, prev_naddr,
naddr, prev_nsize, buflen / 4);
-
if (offset < 0)
return 0;
@@ -296,8 +303,7 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
if (naddr > 2)
return 0;
- ret_addr = ((u64)last_addr[2] << 32) | last_addr[3];
-
+ ret_addr = ((u64)be32_to_cpu(last_addr[2]) << 32) | be32_to_cpu(last_addr[3]);
if (sizeof(void *) == 4 &&
(ret_addr >= 0x100000000ULL || ret_size > 0x100000000ULL ||
ret_addr + ret_size > 0x100000000ULL))
@@ -350,11 +356,14 @@ int dt_is_compatible(void *node, const char *compat)
int dt_get_virtual_reg(void *node, void **addr, int nres)
{
unsigned long xaddr;
- int n;
+ int n, i;
n = getprop(node, "virtual-reg", addr, nres * 4);
- if (n > 0)
+ if (n > 0) {
+ for (i = 0; i < n/4; i ++)
+ ((u32 *)addr)[i] = be32_to_cpu(((u32 *)addr)[i]);
return n / 4;
+ }
for (n = 0; n < nres; n++) {
if (!dt_xlate_reg(node, n, &xaddr, NULL))
diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
new file mode 100644
index 000000000000..974abbdda249
--- /dev/null
+++ b/arch/powerpc/boot/dts/microwatt.dts
@@ -0,0 +1,138 @@
+/dts-v1/;
+
+/ {
+ #size-cells = <0x02>;
+ #address-cells = <0x02>;
+ model-name = "microwatt";
+ compatible = "microwatt-soc";
+
+ aliases {
+ serial0 = &UART0;
+ };
+
+ reserved-memory {
+ #size-cells = <0x02>;
+ #address-cells = <0x02>;
+ ranges;
+ };
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x00000000 0x00000000 0x10000000>;
+ };
+
+ cpus {
+ #size-cells = <0x00>;
+ #address-cells = <0x01>;
+
+ ibm,powerpc-cpu-features {
+ display-name = "Microwatt";
+ isa = <3000>;
+ device_type = "cpu-features";
+ compatible = "ibm,powerpc-cpu-features";
+
+ mmu-radix {
+ isa = <3000>;
+ usable-privilege = <2>;
+ };
+
+ little-endian {
+ isa = <2050>;
+ usable-privilege = <3>;
+ hwcap-bit-nr = <1>;
+ };
+
+ cache-inhibited-large-page {
+ isa = <2040>;
+ usable-privilege = <2>;
+ };
+
+ fixed-point-v3 {
+ isa = <3000>;
+ usable-privilege = <3>;
+ };
+
+ no-execute {
+ isa = <2010>;
+ usable-privilege = <2>;
+ };
+
+ floating-point {
+ hwcap-bit-nr = <27>;
+ isa = <0>;
+ usable-privilege = <3>;
+ };
+ };
+
+ PowerPC,Microwatt@0 {
+ i-cache-sets = <2>;
+ ibm,dec-bits = <64>;
+ reservation-granule-size = <64>;
+ clock-frequency = <100000000>;
+ timebase-frequency = <100000000>;
+ i-tlb-sets = <1>;
+ ibm,ppc-interrupt-server#s = <0>;
+ i-cache-block-size = <64>;
+ d-cache-block-size = <64>;
+ d-cache-sets = <2>;
+ i-tlb-size = <64>;
+ cpu-version = <0x990000>;
+ status = "okay";
+ i-cache-size = <0x1000>;
+ ibm,processor-radix-AP-encodings = <0x0c 0xa0000010 0x20000015 0x4000001e>;
+ tlb-size = <0>;
+ tlb-sets = <0>;
+ device_type = "cpu";
+ d-tlb-size = <128>;
+ d-tlb-sets = <2>;
+ reg = <0>;
+ general-purpose;
+ 64-bit;
+ d-cache-size = <0x1000>;
+ ibm,chip-id = <0>;
+ };
+ };
+
+ soc@c0000000 {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&ICS>;
+
+ ranges = <0 0 0xc0000000 0x40000000>;
+
+ interrupt-controller@4000 {
+ compatible = "openpower,xics-presentation", "ibm,ppc-xicp";
+ ibm,interrupt-server-ranges = <0x0 0x1>;
+ reg = <0x4000 0x100>;
+ };
+
+ ICS: interrupt-controller@5000 {
+ compatible = "openpower,xics-sources";
+ interrupt-controller;
+ interrupt-ranges = <0x10 0x10>;
+ reg = <0x5000 0x100>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+ };
+
+ UART0: serial@2000 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x2000 0x8>;
+ clock-frequency = <100000000>;
+ current-speed = <115200>;
+ reg-shift = <2>;
+ fifo-size = <16>;
+ interrupts = <0x10 0x1>;
+ };
+ };
+
+ chosen {
+ bootargs = "";
+ ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00
+ 00 00 00 00 00 00 00 00 40 00 40];
+ stdout-path = &UART0;
+ };
+};
diff --git a/arch/powerpc/boot/microwatt.c b/arch/powerpc/boot/microwatt.c
new file mode 100644
index 000000000000..ca9d83617fc1
--- /dev/null
+++ b/arch/powerpc/boot/microwatt.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <stddef.h>
+#include "stdio.h"
+#include "types.h"
+#include "io.h"
+#include "ops.h"
+
+BSS_STACK(8192);
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5)
+{
+ unsigned long heapsize = 16*1024*1024 - (unsigned long)_end;
+
+ /*
+ * Disable interrupts and turn off MSR_RI, since we'll
+ * shortly be overwriting the interrupt vectors.
+ */
+ __asm__ volatile("mtmsrd %0,1" : : "r" (0));
+
+ simple_alloc_init(_end, heapsize, 32, 64);
+ fdt_init(_dtb_start);
+ serial_console_init();
+}
diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c
index b0da4466d419..f16d2be1d0f3 100644
--- a/arch/powerpc/boot/ns16550.c
+++ b/arch/powerpc/boot/ns16550.c
@@ -15,6 +15,7 @@
#include "stdio.h"
#include "io.h"
#include "ops.h"
+#include "of.h"
#define UART_DLL 0 /* Out: Divisor Latch Low */
#define UART_DLM 1 /* Out: Divisor Latch High */
@@ -58,16 +59,20 @@ int ns16550_console_init(void *devp, struct serial_console_data *scdp)
int n;
u32 reg_offset;
- if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1)
+ if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1) {
+ printf("virt reg parse fail...\r\n");
return -1;
+ }
n = getprop(devp, "reg-offset", &reg_offset, sizeof(reg_offset));
if (n == sizeof(reg_offset))
- reg_base += reg_offset;
+ reg_base += be32_to_cpu(reg_offset);
n = getprop(devp, "reg-shift", &reg_shift, sizeof(reg_shift));
if (n != sizeof(reg_shift))
reg_shift = 0;
+ else
+ reg_shift = be32_to_cpu(reg_shift);
scdp->open = ns16550_open;
scdp->putc = ns16550_putc;
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index cdb796b76e2e..1f4676bab786 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -342,6 +342,11 @@ gamecube|wii)
link_address='0x600000'
platformo="$object/$platform-head.o $object/$platform.o"
;;
+microwatt)
+ link_address='0x500000'
+ platformo="$object/fixed-head.o $object/$platform.o"
+ binary=y
+ ;;
treeboot-currituck)
link_address='0x1000000'
;;
diff --git a/arch/powerpc/boot/zImage.ps3.lds.S b/arch/powerpc/boot/zImage.ps3.lds.S
index 7b2ff2eaa73a..d0ffb493614d 100644
--- a/arch/powerpc/boot/zImage.ps3.lds.S
+++ b/arch/powerpc/boot/zImage.ps3.lds.S
@@ -8,7 +8,7 @@ SECTIONS
.kernel:vmlinux.bin : { *(.kernel:vmlinux.bin) }
_vmlinux_end = .;
- . = ALIGN(4096);
+ . = ALIGN(8);
_dtb_start = .;
.kernel:dtb : { *(.kernel:dtb) }
_dtb_end = .;
diff --git a/arch/powerpc/configs/32-bit.config b/arch/powerpc/configs/32-bit.config
new file mode 100644
index 000000000000..ad6546850c68
--- /dev/null
+++ b/arch/powerpc/configs/32-bit.config
@@ -0,0 +1 @@
+# CONFIG_PPC64 is not set
diff --git a/arch/powerpc/configs/64-bit.config b/arch/powerpc/configs/64-bit.config
new file mode 100644
index 000000000000..0fe6406929e2
--- /dev/null
+++ b/arch/powerpc/configs/64-bit.config
@@ -0,0 +1 @@
+CONFIG_PPC64=y
diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
new file mode 100644
index 000000000000..a08b739123da
--- /dev/null
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -0,0 +1,98 @@
+# CONFIG_SWAP is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_TICK_CPU_ACCOUNTING=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_PPC64=y
+# CONFIG_PPC_KUEP is not set
+# CONFIG_PPC_KUAP is not set
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_NR_IRQS=64
+CONFIG_PANIC_TIMEOUT=10
+# CONFIG_PPC_POWERNV is not set
+# CONFIG_PPC_PSERIES is not set
+CONFIG_PPC_MICROWATT=y
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ=y
+CONFIG_HZ_100=y
+# CONFIG_PPC_MEM_KEYS is not set
+# CONFIG_SECCOMP is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+# CONFIG_COREDUMP is not set
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_INET=y
+CONFIG_INET_UDP_DIAG=y
+CONFIG_INET_RAW_DIAG=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_PARTITIONED_MASTER=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_NETDEVICES=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_NVRAM is not set
+CONFIG_RANDOM_TRUST_CPU=y
+CONFIG_SPI=y
+CONFIG_SPI_DEBUG=y
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_SPIDEV=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_NVMEM is not set
+CONFIG_EXT4_FS=y
+# CONFIG_FILE_LOCKING is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
+CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MISC is not set
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_FTRACE is not set
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_PPC_DISABLE_WERROR=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+# CONFIG_XMON_DEFAULT_RO_MODE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 949ff9ccda5e..d21f266cea9a 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -57,3 +57,28 @@ CONFIG_CRC32_SLICEBY4=y
CONFIG_DEBUG_INFO=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PPC_16K_PAGES=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_FS=y
+CONFIG_PPC_PTDUMP=y
+CONFIG_MODULES=y
+CONFIG_SPI=y
+CONFIG_SPI_FSL_SPI=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_DEV_TALITOS=y
+CONFIG_8xx_GPIO=y
+CONFIG_WATCHDOG=y
+CONFIG_8xxx_WDT=y
+CONFIG_SMC_UCODE_PATCH=y
+CONFIG_ADVANCED_OPTIONS=y
+CONFIG_PIN_TLB=y
+CONFIG_PERF_EVENTS=y
+CONFIG_MATH_EMULATION=y
+CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_IPV6=y
+CONFIG_BPF_JIT=y
+CONFIG_DEBUG_VM_PGTABLE=y
+CONFIG_BDI_SWITCH=y
+CONFIG_PPC_EARLY_DEBUG=y
+CONFIG_PPC_EARLY_DEBUG_CPM_ADDR=0xff002008
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 2c87e856d839..8bfeea6c7de7 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -309,6 +309,7 @@ CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_FUNCTION_TRACER=y
CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_PPC_EMULATED_STATS=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 701811c91a6f..0ad2291337a7 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -368,7 +368,9 @@ CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_FUNCTION_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 50168dde4ea5..b183629f1bcf 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -289,7 +289,9 @@ CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_FUNCTION_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 1c7b75834e04..222823861a67 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -71,8 +71,13 @@ void __init machine_init(u64 dt_ptr);
#endif
long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs);
notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv);
-notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr);
-notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr);
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
+#ifdef CONFIG_PPC64
+unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs);
+unsigned long interrupt_exit_user_restart(struct pt_regs *regs);
+unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs);
+#endif
long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
u32 len_high, u32 len_low);
@@ -120,6 +125,7 @@ extern s32 patch__call_flush_branch_caches3;
extern s32 patch__flush_count_cache_return;
extern s32 patch__flush_link_stack_return;
extern s32 patch__call_kvm_flush_link_stack;
+extern s32 patch__call_kvm_flush_link_stack_p9;
extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_branch_caches;
@@ -140,7 +146,7 @@ void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
-int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 61c6e8b200e8..a1732a79e92a 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -23,7 +23,7 @@
#define __atomic_release_fence() \
__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
-static __inline__ int atomic_read(const atomic_t *v)
+static __inline__ int arch_atomic_read(const atomic_t *v)
{
int t;
@@ -32,13 +32,13 @@ static __inline__ int atomic_read(const atomic_t *v)
return t;
}
-static __inline__ void atomic_set(atomic_t *v, int i)
+static __inline__ void arch_atomic_set(atomic_t *v, int i)
{
__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i));
}
#define ATOMIC_OP(op, asm_op) \
-static __inline__ void atomic_##op(int a, atomic_t *v) \
+static __inline__ void arch_atomic_##op(int a, atomic_t *v) \
{ \
int t; \
\
@@ -53,7 +53,7 @@ static __inline__ void atomic_##op(int a, atomic_t *v) \
} \
#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
-static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
+static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v) \
{ \
int t; \
\
@@ -70,7 +70,7 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
}
#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \
-static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
+static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
{ \
int res, t; \
\
@@ -94,11 +94,11 @@ static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
ATOMIC_OPS(add, add)
ATOMIC_OPS(sub, subf)
-#define atomic_add_return_relaxed atomic_add_return_relaxed
-#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
-#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
-#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, asm_op) \
@@ -109,16 +109,16 @@ ATOMIC_OPS(and, and)
ATOMIC_OPS(or, or)
ATOMIC_OPS(xor, xor)
-#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
-#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
-#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP_RELAXED
#undef ATOMIC_OP_RETURN_RELAXED
#undef ATOMIC_OP
-static __inline__ void atomic_inc(atomic_t *v)
+static __inline__ void arch_atomic_inc(atomic_t *v)
{
int t;
@@ -131,9 +131,9 @@ static __inline__ void atomic_inc(atomic_t *v)
: "r" (&v->counter)
: "cc", "xer");
}
-#define atomic_inc atomic_inc
+#define arch_atomic_inc arch_atomic_inc
-static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
+static __inline__ int arch_atomic_inc_return_relaxed(atomic_t *v)
{
int t;
@@ -149,7 +149,7 @@ static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
return t;
}
-static __inline__ void atomic_dec(atomic_t *v)
+static __inline__ void arch_atomic_dec(atomic_t *v)
{
int t;
@@ -162,9 +162,9 @@ static __inline__ void atomic_dec(atomic_t *v)
: "r" (&v->counter)
: "cc", "xer");
}
-#define atomic_dec atomic_dec
+#define arch_atomic_dec arch_atomic_dec
-static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
+static __inline__ int arch_atomic_dec_return_relaxed(atomic_t *v)
{
int t;
@@ -180,17 +180,20 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
return t;
}
-#define atomic_inc_return_relaxed atomic_inc_return_relaxed
-#define atomic_dec_return_relaxed atomic_dec_return_relaxed
+#define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed
+#define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_cmpxchg_relaxed(v, o, n) \
- cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define atomic_cmpxchg_acquire(v, o, n) \
- cmpxchg_acquire(&((v)->counter), (o), (n))
+#define arch_atomic_cmpxchg(v, o, n) \
+ (arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic_cmpxchg_relaxed(v, o, n) \
+ arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define arch_atomic_cmpxchg_acquire(v, o, n) \
+ arch_cmpxchg_acquire(&((v)->counter), (o), (n))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+#define arch_atomic_xchg(v, new) \
+ (arch_xchg(&((v)->counter), new))
+#define arch_atomic_xchg_relaxed(v, new) \
+ arch_xchg_relaxed(&((v)->counter), (new))
/*
* Don't want to override the generic atomic_try_cmpxchg_acquire, because
@@ -199,7 +202,7 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
* would be a surprise).
*/
static __always_inline bool
-atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
+arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
{
int r, o = *old;
@@ -229,7 +232,7 @@ atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
int t;
@@ -250,7 +253,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
return t;
}
-#define atomic_fetch_add_unless atomic_fetch_add_unless
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
/**
* atomic_inc_not_zero - increment unless the number is zero
@@ -259,7 +262,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
* Atomically increments @v by 1, so long as @v is non-zero.
* Returns non-zero if @v was non-zero, and zero otherwise.
*/
-static __inline__ int atomic_inc_not_zero(atomic_t *v)
+static __inline__ int arch_atomic_inc_not_zero(atomic_t *v)
{
int t1, t2;
@@ -280,14 +283,14 @@ static __inline__ int atomic_inc_not_zero(atomic_t *v)
return t1;
}
-#define atomic_inc_not_zero(v) atomic_inc_not_zero((v))
+#define arch_atomic_inc_not_zero(v) arch_atomic_inc_not_zero((v))
/*
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
-static __inline__ int atomic_dec_if_positive(atomic_t *v)
+static __inline__ int arch_atomic_dec_if_positive(atomic_t *v)
{
int t;
@@ -307,13 +310,13 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
return t;
}
-#define atomic_dec_if_positive atomic_dec_if_positive
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
#ifdef __powerpc64__
#define ATOMIC64_INIT(i) { (i) }
-static __inline__ s64 atomic64_read(const atomic64_t *v)
+static __inline__ s64 arch_atomic64_read(const atomic64_t *v)
{
s64 t;
@@ -322,13 +325,13 @@ static __inline__ s64 atomic64_read(const atomic64_t *v)
return t;
}
-static __inline__ void atomic64_set(atomic64_t *v, s64 i)
+static __inline__ void arch_atomic64_set(atomic64_t *v, s64 i)
{
__asm__ __volatile__("std%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i));
}
#define ATOMIC64_OP(op, asm_op) \
-static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \
+static __inline__ void arch_atomic64_##op(s64 a, atomic64_t *v) \
{ \
s64 t; \
\
@@ -344,7 +347,7 @@ static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \
#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
static inline s64 \
-atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
+arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
{ \
s64 t; \
\
@@ -362,7 +365,7 @@ atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
static inline s64 \
-atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
+arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
{ \
s64 res, t; \
\
@@ -386,11 +389,11 @@ atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
ATOMIC64_OPS(add, add)
ATOMIC64_OPS(sub, subf)
-#define atomic64_add_return_relaxed atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
-#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
#undef ATOMIC64_OPS
#define ATOMIC64_OPS(op, asm_op) \
@@ -401,16 +404,16 @@ ATOMIC64_OPS(and, and)
ATOMIC64_OPS(or, or)
ATOMIC64_OPS(xor, xor)
-#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
-#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
#undef ATOPIC64_OPS
#undef ATOMIC64_FETCH_OP_RELAXED
#undef ATOMIC64_OP_RETURN_RELAXED
#undef ATOMIC64_OP
-static __inline__ void atomic64_inc(atomic64_t *v)
+static __inline__ void arch_atomic64_inc(atomic64_t *v)
{
s64 t;
@@ -423,9 +426,9 @@ static __inline__ void atomic64_inc(atomic64_t *v)
: "r" (&v->counter)
: "cc", "xer");
}
-#define atomic64_inc atomic64_inc
+#define arch_atomic64_inc arch_atomic64_inc
-static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
+static __inline__ s64 arch_atomic64_inc_return_relaxed(atomic64_t *v)
{
s64 t;
@@ -441,7 +444,7 @@ static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
return t;
}
-static __inline__ void atomic64_dec(atomic64_t *v)
+static __inline__ void arch_atomic64_dec(atomic64_t *v)
{
s64 t;
@@ -454,9 +457,9 @@ static __inline__ void atomic64_dec(atomic64_t *v)
: "r" (&v->counter)
: "cc", "xer");
}
-#define atomic64_dec atomic64_dec
+#define arch_atomic64_dec arch_atomic64_dec
-static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
+static __inline__ s64 arch_atomic64_dec_return_relaxed(atomic64_t *v)
{
s64 t;
@@ -472,14 +475,14 @@ static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
return t;
}
-#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
-#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
+#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
+#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
/*
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1.
*/
-static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
+static __inline__ s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 t;
@@ -498,16 +501,19 @@ static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
return t;
}
-#define atomic64_dec_if_positive atomic64_dec_if_positive
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-#define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_cmpxchg_relaxed(v, o, n) \
- cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define atomic64_cmpxchg_acquire(v, o, n) \
- cmpxchg_acquire(&((v)->counter), (o), (n))
+#define arch_atomic64_cmpxchg(v, o, n) \
+ (arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic64_cmpxchg_relaxed(v, o, n) \
+ arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define arch_atomic64_cmpxchg_acquire(v, o, n) \
+ arch_cmpxchg_acquire(&((v)->counter), (o), (n))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+#define arch_atomic64_xchg(v, new) \
+ (arch_xchg(&((v)->counter), new))
+#define arch_atomic64_xchg_relaxed(v, new) \
+ arch_xchg_relaxed(&((v)->counter), (new))
/**
* atomic64_fetch_add_unless - add unless the number is a given value
@@ -518,7 +524,7 @@ static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 t;
@@ -539,7 +545,7 @@ static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
return t;
}
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
/**
* atomic_inc64_not_zero - increment unless the number is zero
@@ -548,7 +554,7 @@ static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
* Atomically increments @v by 1, so long as @v is non-zero.
* Returns non-zero if @v was non-zero, and zero otherwise.
*/
-static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
+static __inline__ int arch_atomic64_inc_not_zero(atomic64_t *v)
{
s64 t1, t2;
@@ -569,7 +575,7 @@ static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
return t1 != 0;
}
-#define atomic64_inc_not_zero(v) atomic64_inc_not_zero((v))
+#define arch_atomic64_inc_not_zero(v) arch_atomic64_inc_not_zero((v))
#endif /* __powerpc64__ */
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 7ae29cfb06c0..f0e687236484 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -46,6 +46,8 @@
# define SMPWMB eieio
#endif
+/* clang defines this macro for a builtin, which will not work with runtime patching */
+#undef __lwsync
#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
#define dma_rmb() __lwsync()
#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h
deleted file mode 100644
index 2a0a467d2985..000000000000
--- a/arch/powerpc/include/asm/book3s/32/hash.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_BOOK3S_32_HASH_H
-#define _ASM_POWERPC_BOOK3S_32_HASH_H
-#ifdef __KERNEL__
-
-/*
- * The "classic" 32-bit implementation of the PowerPC MMU uses a hash
- * table containing PTEs, together with a set of 16 segment registers,
- * to define the virtual to physical address mapping.
- *
- * We use the hash table as an extended TLB, i.e. a cache of currently
- * active mappings. We maintain a two-level page table tree, much
- * like that used by the i386, for the sake of the Linux memory
- * management code. Low-level assembler code in hash_low_32.S
- * (procedure hash_page) is responsible for extracting ptes from the
- * tree and putting them into the hash table when necessary, and
- * updating the accessed and modified bits in the page table tree.
- */
-
-#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */
-#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */
-#define _PAGE_USER 0x004 /* usermode access allowed */
-#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */
-#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */
-#define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */
-#define _PAGE_WRITETHRU 0x040 /* W: cache write-through */
-#define _PAGE_DIRTY 0x080 /* C: page changed */
-#define _PAGE_ACCESSED 0x100 /* R: page referenced */
-#define _PAGE_EXEC 0x200 /* software: exec allowed */
-#define _PAGE_RW 0x400 /* software: user write access allowed */
-#define _PAGE_SPECIAL 0x800 /* software: Special page */
-
-#ifdef CONFIG_PTE_64BIT
-/* We never clear the high word of the pte */
-#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE)
-#else
-#define _PTE_NONE_MASK _PAGE_HASHPTE
-#endif
-
-#define _PMD_PRESENT 0
-#define _PMD_PRESENT_MASK (PAGE_MASK)
-#define _PMD_BAD (~PAGE_MASK)
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_BOOK3S_32_HASH_H */
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 1670dfe9d4f1..64201125a287 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -7,35 +7,104 @@
#ifndef __ASSEMBLY__
+#include <linux/jump_label.h>
+
+extern struct static_key_false disable_kuap_key;
+extern struct static_key_false disable_kuep_key;
+
+static __always_inline bool kuap_is_disabled(void)
+{
+ return !IS_ENABLED(CONFIG_PPC_KUAP) || static_branch_unlikely(&disable_kuap_key);
+}
+
+static __always_inline bool kuep_is_disabled(void)
+{
+ return !IS_ENABLED(CONFIG_PPC_KUEP) || static_branch_unlikely(&disable_kuep_key);
+}
+
+static inline void kuep_lock(void)
+{
+ if (kuep_is_disabled())
+ return;
+
+ update_user_segments(mfsr(0) | SR_NX);
+}
+
+static inline void kuep_unlock(void)
+{
+ if (kuep_is_disabled())
+ return;
+
+ update_user_segments(mfsr(0) & ~SR_NX);
+}
+
#ifdef CONFIG_PPC_KUAP
#include <linux/sched.h>
-static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
+#define KUAP_NONE (~0UL)
+#define KUAP_ALL (~1UL)
+
+static inline void kuap_lock_one(unsigned long addr)
{
- addr &= 0xf0000000; /* align addr to start of segment */
- barrier(); /* make sure thread.kuap is updated before playing with SRs */
- while (addr < end) {
- mtsr(sr, addr);
- sr += 0x111; /* next VSID */
- sr &= 0xf0ffffff; /* clear VSID overflow */
- addr += 0x10000000; /* address of next segment */
- }
+ mtsr(mfsr(addr) | SR_KS, addr);
+ isync(); /* Context sync required after mtsr() */
+}
+
+static inline void kuap_unlock_one(unsigned long addr)
+{
+ mtsr(mfsr(addr) & ~SR_KS, addr);
+ isync(); /* Context sync required after mtsr() */
+}
+
+static inline void kuap_lock_all(void)
+{
+ update_user_segments(mfsr(0) | SR_KS);
+ isync(); /* Context sync required after mtsr() */
+}
+
+static inline void kuap_unlock_all(void)
+{
+ update_user_segments(mfsr(0) & ~SR_KS);
isync(); /* Context sync required after mtsr() */
}
+void kuap_lock_all_ool(void);
+void kuap_unlock_all_ool(void);
+
+static inline void kuap_lock(unsigned long addr, bool ool)
+{
+ if (likely(addr != KUAP_ALL))
+ kuap_lock_one(addr);
+ else if (!ool)
+ kuap_lock_all();
+ else
+ kuap_lock_all_ool();
+}
+
+static inline void kuap_unlock(unsigned long addr, bool ool)
+{
+ if (likely(addr != KUAP_ALL))
+ kuap_unlock_one(addr);
+ else if (!ool)
+ kuap_unlock_all();
+ else
+ kuap_unlock_all_ool();
+}
+
static inline void kuap_save_and_lock(struct pt_regs *regs)
{
unsigned long kuap = current->thread.kuap;
- u32 addr = kuap & 0xf0000000;
- u32 end = kuap << 28;
+
+ if (kuap_is_disabled())
+ return;
regs->kuap = kuap;
- if (unlikely(!kuap))
+ if (unlikely(kuap == KUAP_NONE))
return;
- current->thread.kuap = 0;
- kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* Set Ks */
+ current->thread.kuap = KUAP_NONE;
+ kuap_lock(kuap, false);
}
static inline void kuap_user_restore(struct pt_regs *regs)
@@ -44,22 +113,22 @@ static inline void kuap_user_restore(struct pt_regs *regs)
static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
{
- u32 addr = regs->kuap & 0xf0000000;
- u32 end = regs->kuap << 28;
+ if (kuap_is_disabled())
+ return;
current->thread.kuap = regs->kuap;
- if (unlikely(regs->kuap == kuap))
- return;
-
- kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */
+ kuap_unlock(regs->kuap, false);
}
static inline unsigned long kuap_get_and_assert_locked(void)
{
unsigned long kuap = current->thread.kuap;
- WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0);
+ if (kuap_is_disabled())
+ return KUAP_NONE;
+
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE);
return kuap;
}
@@ -72,84 +141,78 @@ static inline void kuap_assert_locked(void)
static __always_inline void allow_user_access(void __user *to, const void __user *from,
u32 size, unsigned long dir)
{
- u32 addr, end;
+ if (kuap_is_disabled())
+ return;
BUILD_BUG_ON(!__builtin_constant_p(dir));
- BUILD_BUG_ON(dir & ~KUAP_READ_WRITE);
if (!(dir & KUAP_WRITE))
return;
- addr = (__force u32)to;
-
- if (unlikely(addr >= TASK_SIZE || !size))
- return;
-
- end = min(addr + size, TASK_SIZE);
-
- current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf);
- kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */
+ current->thread.kuap = (__force u32)to;
+ kuap_unlock_one((__force u32)to);
}
-static __always_inline void prevent_user_access(void __user *to, const void __user *from,
- u32 size, unsigned long dir)
+static __always_inline void prevent_user_access(unsigned long dir)
{
- u32 addr, end;
-
- BUILD_BUG_ON(!__builtin_constant_p(dir));
+ u32 kuap = current->thread.kuap;
- if (dir & KUAP_CURRENT_WRITE) {
- u32 kuap = current->thread.kuap;
-
- if (unlikely(!kuap))
- return;
+ if (kuap_is_disabled())
+ return;
- addr = kuap & 0xf0000000;
- end = kuap << 28;
- } else if (dir & KUAP_WRITE) {
- addr = (__force u32)to;
- end = min(addr + size, TASK_SIZE);
+ BUILD_BUG_ON(!__builtin_constant_p(dir));
- if (unlikely(addr >= TASK_SIZE || !size))
- return;
- } else {
+ if (!(dir & KUAP_WRITE))
return;
- }
- current->thread.kuap = 0;
- kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* set Ks */
+ current->thread.kuap = KUAP_NONE;
+ kuap_lock(kuap, true);
}
static inline unsigned long prevent_user_access_return(void)
{
unsigned long flags = current->thread.kuap;
- unsigned long addr = flags & 0xf0000000;
- unsigned long end = flags << 28;
- void __user *to = (__force void __user *)addr;
- if (flags)
- prevent_user_access(to, to, end - addr, KUAP_READ_WRITE);
+ if (kuap_is_disabled())
+ return KUAP_NONE;
+
+ if (flags != KUAP_NONE) {
+ current->thread.kuap = KUAP_NONE;
+ kuap_lock(flags, true);
+ }
return flags;
}
static inline void restore_user_access(unsigned long flags)
{
- unsigned long addr = flags & 0xf0000000;
- unsigned long end = flags << 28;
- void __user *to = (__force void __user *)addr;
+ if (kuap_is_disabled())
+ return;
- if (flags)
- allow_user_access(to, to, end - addr, KUAP_READ_WRITE);
+ if (flags != KUAP_NONE) {
+ current->thread.kuap = flags;
+ kuap_unlock(flags, true);
+ }
}
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
- unsigned long begin = regs->kuap & 0xf0000000;
- unsigned long end = regs->kuap << 28;
+ unsigned long kuap = regs->kuap;
+
+ if (kuap_is_disabled())
+ return false;
+
+ if (!is_write || kuap == KUAP_ALL)
+ return false;
+ if (kuap == KUAP_NONE)
+ return true;
+
+ /* If faulting address doesn't match unlocked segment, unlock all */
+ if ((kuap ^ address) & 0xf0000000)
+ regs->kuap = KUAP_ALL;
- return is_write && (address < begin || address >= end);
+ return false;
}
#endif /* CONFIG_PPC_KUAP */
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index b85f8e114a9c..f5be185cbdf8 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -67,6 +67,16 @@ struct ppc_bat {
#ifndef __ASSEMBLY__
/*
+ * This macro defines the mapping from contexts to VSIDs (virtual
+ * segment IDs). We use a skew on both the context and the high 4 bits
+ * of the 32-bit virtual address (the "effective segment ID") in order
+ * to spread out the entries in the MMU hash table. Note, if this
+ * function is changed then hash functions will have to be
+ * changed to correspond.
+ */
+#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
+
+/*
* Hardware Page Table Entry
* Note that the xpn and x bitfields are used only by processors that
* support extended addressing; otherwise, those bits are reserved.
@@ -102,6 +112,37 @@ extern s32 patch__hash_page_B, patch__hash_page_C;
extern s32 patch__flush_hash_A0, patch__flush_hash_A1, patch__flush_hash_A2;
extern s32 patch__flush_hash_B;
+#include <asm/reg.h>
+#include <asm/task_size_32.h>
+
+static __always_inline void update_user_segment(u32 n, u32 val)
+{
+ if (n << 28 < TASK_SIZE)
+ mtsr(val + n * 0x111, n << 28);
+}
+
+static __always_inline void update_user_segments(u32 val)
+{
+ val &= 0xf0ffffff;
+
+ update_user_segment(0, val);
+ update_user_segment(1, val);
+ update_user_segment(2, val);
+ update_user_segment(3, val);
+ update_user_segment(4, val);
+ update_user_segment(5, val);
+ update_user_segment(6, val);
+ update_user_segment(7, val);
+ update_user_segment(8, val);
+ update_user_segment(9, val);
+ update_user_segment(10, val);
+ update_user_segment(11, val);
+ update_user_segment(12, val);
+ update_user_segment(13, val);
+ update_user_segment(14, val);
+ update_user_segment(15, val);
+}
+
#endif /* !__ASSEMBLY__ */
/* We happily ignore the smaller BATs on 601, we don't actually use
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 83c65845a1a9..609c80f67194 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -4,7 +4,43 @@
#include <asm-generic/pgtable-nopmd.h>
-#include <asm/book3s/32/hash.h>
+/*
+ * The "classic" 32-bit implementation of the PowerPC MMU uses a hash
+ * table containing PTEs, together with a set of 16 segment registers,
+ * to define the virtual to physical address mapping.
+ *
+ * We use the hash table as an extended TLB, i.e. a cache of currently
+ * active mappings. We maintain a two-level page table tree, much
+ * like that used by the i386, for the sake of the Linux memory
+ * management code. Low-level assembler code in hash_low_32.S
+ * (procedure hash_page) is responsible for extracting ptes from the
+ * tree and putting them into the hash table when necessary, and
+ * updating the accessed and modified bits in the page table tree.
+ */
+
+#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */
+#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */
+#define _PAGE_USER 0x004 /* usermode access allowed */
+#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */
+#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */
+#define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */
+#define _PAGE_WRITETHRU 0x040 /* W: cache write-through */
+#define _PAGE_DIRTY 0x080 /* C: page changed */
+#define _PAGE_ACCESSED 0x100 /* R: page referenced */
+#define _PAGE_EXEC 0x200 /* software: exec allowed */
+#define _PAGE_RW 0x400 /* software: user write access allowed */
+#define _PAGE_SPECIAL 0x800 /* software: Special page */
+
+#ifdef CONFIG_PTE_64BIT
+/* We never clear the high word of the pte */
+#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE)
+#else
+#define _PTE_NONE_MASK _PAGE_HASHPTE
+#endif
+
+#define _PMD_PRESENT 0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD (~PAGE_MASK)
/* And here we include common definitions */
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index 9700da3a4093..a1cc73a88710 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -398,8 +398,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user
#endif /* !CONFIG_PPC_KUAP */
-static inline void prevent_user_access(void __user *to, const void __user *from,
- unsigned long size, unsigned long dir)
+static inline void prevent_user_access(unsigned long dir)
{
set_kuap(AMR_KUAP_BLOCKED);
if (static_branch_unlikely(&uaccess_flush_key))
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index eace8c3f7b0a..c02f42d1031e 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -19,6 +19,7 @@ struct mmu_psize_def {
int penc[MMU_PAGE_COUNT]; /* HPTE encoding */
unsigned int tlbiel; /* tlbiel supported for that page size */
unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
+ unsigned long h_rpt_pgsize; /* H_RPT_INVALIDATE page size encoding */
union {
unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
unsigned long ap; /* Ap encoding used by PowerISA 3.0 */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index a666d561b44d..4d9941b2fe51 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -232,6 +232,9 @@ extern unsigned long __pmd_frag_size_shift;
#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE)
#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
+#define MAX_PTRS_PER_PGD (1 << (H_PGD_INDEX_SIZE > RADIX_PGD_INDEX_SIZE ? \
+ H_PGD_INDEX_SIZE : RADIX_PGD_INDEX_SIZE))
+
/* PMD_SHIFT determines what a second-level page table entry can map */
#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
#define PMD_SIZE (1UL << PMD_SHIFT)
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 8b33601cdb9d..a46fd37ad552 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -4,6 +4,10 @@
#include <asm/hvcall.h>
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
struct vm_area_struct;
struct mm_struct;
struct mmu_gather;
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index 0e1263455d73..ad130e15a126 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -8,7 +8,6 @@
#include <asm/book3s/32/pgtable.h>
#endif
-#define FIRST_USER_ADDRESS 0UL
#ifndef __ASSEMBLY__
/* Insert a PTE, top-level function is out of line. It uses an inline
* low level function in the respective pgtable-* files
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index d5da7ddbf0fc..350de8f90250 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -91,7 +91,7 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
}
#define HAVE_ARCH_CSUM_ADD
-static inline __wsum csum_add(__wsum csum, __wsum addend)
+static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
{
#ifdef __powerpc64__
u64 res = (__force u64)csum;
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..05f246c0e36e 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -185,14 +185,14 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_local");
return x;
}
-#define xchg_local(ptr,x) \
+#define arch_xchg_local(ptr,x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_local((ptr), \
(unsigned long)_x_, sizeof(*(ptr))); \
})
-#define xchg_relaxed(ptr, x) \
+#define arch_xchg_relaxed(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_relaxed((ptr), \
@@ -467,7 +467,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
return old;
}
-#define cmpxchg(ptr, o, n) \
+#define arch_cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -476,7 +476,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
})
-#define cmpxchg_local(ptr, o, n) \
+#define arch_cmpxchg_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -484,7 +484,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
(unsigned long)_n_, sizeof(*(ptr))); \
})
-#define cmpxchg_relaxed(ptr, o, n) \
+#define arch_cmpxchg_relaxed(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -493,7 +493,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
sizeof(*(ptr))); \
})
-#define cmpxchg_acquire(ptr, o, n) \
+#define arch_cmpxchg_acquire(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -502,29 +502,29 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
sizeof(*(ptr))); \
})
#ifdef CONFIG_PPC64
-#define cmpxchg64(ptr, o, n) \
+#define arch_cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg((ptr), (o), (n)); \
+ arch_cmpxchg((ptr), (o), (n)); \
})
-#define cmpxchg64_local(ptr, o, n) \
+#define arch_cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_local((ptr), (o), (n)); \
+ arch_cmpxchg_local((ptr), (o), (n)); \
})
-#define cmpxchg64_relaxed(ptr, o, n) \
+#define arch_cmpxchg64_relaxed(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_relaxed((ptr), (o), (n)); \
+ arch_cmpxchg_relaxed((ptr), (o), (n)); \
})
-#define cmpxchg64_acquire(ptr, o, n) \
+#define arch_cmpxchg64_acquire(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_acquire((ptr), (o), (n)); \
+ arch_cmpxchg_acquire((ptr), (o), (n)); \
})
#else
#include <asm-generic/cmpxchg-local.h>
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
#endif
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index f1d029bf906e..a95f63788c6b 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -23,13 +23,13 @@
#define BRANCH_ABSOLUTE 0x2
bool is_offset_in_branch_range(long offset);
-int create_branch(struct ppc_inst *instr, const struct ppc_inst *addr,
+int create_branch(struct ppc_inst *instr, const u32 *addr,
unsigned long target, int flags);
-int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr,
+int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
unsigned long target, int flags);
-int patch_branch(struct ppc_inst *addr, unsigned long target, int flags);
-int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr);
-int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr);
+int patch_branch(u32 *addr, unsigned long target, int flags);
+int patch_instruction(u32 *addr, struct ppc_inst instr);
+int raw_patch_instruction(u32 *addr, struct ppc_inst instr);
static inline unsigned long patch_site_addr(s32 *site)
{
@@ -38,18 +38,18 @@ static inline unsigned long patch_site_addr(s32 *site)
static inline int patch_instruction_site(s32 *site, struct ppc_inst instr)
{
- return patch_instruction((struct ppc_inst *)patch_site_addr(site), instr);
+ return patch_instruction((u32 *)patch_site_addr(site), instr);
}
static inline int patch_branch_site(s32 *site, unsigned long target, int flags)
{
- return patch_branch((struct ppc_inst *)patch_site_addr(site), target, flags);
+ return patch_branch((u32 *)patch_site_addr(site), target, flags);
}
static inline int modify_instruction(unsigned int *addr, unsigned int clr,
unsigned int set)
{
- return patch_instruction((struct ppc_inst *)addr, ppc_inst((*addr & ~clr) | set));
+ return patch_instruction(addr, ppc_inst((*addr & ~clr) | set));
}
static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int set)
@@ -59,10 +59,8 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned
int instr_is_relative_branch(struct ppc_inst instr);
int instr_is_relative_link_branch(struct ppc_inst instr);
-int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr);
-unsigned long branch_target(const struct ppc_inst *instr);
-int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest,
- const struct ppc_inst *src);
+unsigned long branch_target(const u32 *instr);
+int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src);
extern bool is_conditional_branch(struct ppc_inst instr);
#ifdef CONFIG_PPC_BOOK3E_64
void __patch_exception(int exc, unsigned long addr);
@@ -73,9 +71,9 @@ void __patch_exception(int exc, unsigned long addr);
#endif
#define OP_RT_RA_MASK 0xffff0000UL
-#define LIS_R2 (PPC_INST_ADDIS | __PPC_RT(R2))
-#define ADDIS_R2_R12 (PPC_INST_ADDIS | __PPC_RT(R2) | __PPC_RA(R12))
-#define ADDI_R2_R2 (PPC_INST_ADDI | __PPC_RT(R2) | __PPC_RA(R2))
+#define LIS_R2 (PPC_RAW_LIS(_R2, 0))
+#define ADDIS_R2_R12 (PPC_RAW_ADDIS(_R2, _R12, 0))
+#define ADDI_R2_R2 (PPC_RAW_ADDI(_R2, _R2, 0))
static inline unsigned long ppc_function_entry(void *func)
@@ -180,12 +178,10 @@ static inline unsigned long ppc_kallsyms_lookup_name(const char *name)
#define R2_STACK_OFFSET 40
#endif
-#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \
- ___PPC_RA(__REG_R1) | R2_STACK_OFFSET)
+#define PPC_INST_LD_TOC PPC_RAW_LD(_R2, _R1, R2_STACK_OFFSET)
/* usually preceded by a mflr r0 */
-#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \
- ___PPC_RA(__REG_R1) | PPC_LR_STKOFF)
+#define PPC_INST_STD_LR PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)
#endif /* CONFIG_PPC64 */
#endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index 98c8bd155bf9..b167186aaee4 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -98,6 +98,36 @@ static inline int cpu_last_thread_sibling(int cpu)
return cpu | (threads_per_core - 1);
}
+/*
+ * tlb_thread_siblings are siblings which share a TLB. This is not
+ * architected, is not something a hypervisor could emulate and a future
+ * CPU may change behaviour even in compat mode, so this should only be
+ * used on PowerNV, and only with care.
+ */
+static inline int cpu_first_tlb_thread_sibling(int cpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return cpu & ~0x6; /* Big Core */
+ else
+ return cpu_first_thread_sibling(cpu);
+}
+
+static inline int cpu_last_tlb_thread_sibling(int cpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return cpu | 0x6; /* Big Core */
+ else
+ return cpu_last_thread_sibling(cpu);
+}
+
+static inline int cpu_tlb_thread_sibling_step(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return 2; /* Big Core */
+ else
+ return 1;
+}
+
static inline u32 get_tensr(void)
{
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index c1a8aac01cf9..bb6f78fcf981 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -35,6 +35,19 @@
/* PACA save area size in u64 units (exgen, exmc, etc) */
#define EX_SIZE 10
+/* PACA save area offsets */
+#define EX_R9 0
+#define EX_R10 8
+#define EX_R11 16
+#define EX_R12 24
+#define EX_R13 32
+#define EX_DAR 40
+#define EX_DSISR 48
+#define EX_CCR 52
+#define EX_CFAR 56
+#define EX_PPR 64
+#define EX_CTR 72
+
/*
* maximum recursive depth of MCE exceptions
*/
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index 4cb9efa2eb21..242204e12993 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -16,7 +16,7 @@
.section ".head.data.\name\()","a",@progbits
.endm
.macro use_ftsec name
- .section ".head.text.\name\()"
+ .section ".head.text.\name\()","ax",@progbits
.endm
/*
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index e3b29eda8074..9bcf345cb208 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -294,6 +294,13 @@
#define H_RESIZE_HPT_COMMIT 0x370
#define H_REGISTER_PROC_TBL 0x37C
#define H_SIGNAL_SYS_RESET 0x380
+#define H_ALLOCATE_VAS_WINDOW 0x388
+#define H_MODIFY_VAS_WINDOW 0x38C
+#define H_DEALLOCATE_VAS_WINDOW 0x390
+#define H_QUERY_VAS_WINDOW 0x394
+#define H_QUERY_VAS_CAPABILITIES 0x398
+#define H_QUERY_NX_CAPABILITIES 0x39C
+#define H_GET_NX_FAULT 0x3A0
#define H_INT_GET_SOURCE_INFO 0x3A8
#define H_INT_SET_SOURCE_CONFIG 0x3AC
#define H_INT_GET_SOURCE_CONFIG 0x3B0
@@ -393,6 +400,9 @@
#define H_CPU_BEHAV_FAVOUR_SECURITY_H (1ull << 60) // IBM bit 3
#define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5
#define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6
+#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY (1ull << 56) // IBM bit 7
+#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS (1ull << 55) // IBM bit 8
+#define H_CPU_BEHAV_NO_STF_BARRIER (1ull << 54) // IBM bit 9
/* Flag values used in H_REGISTER_PROC_TBL hcall */
#define PROC_TABLE_OP_MASK 0x18
@@ -413,9 +423,9 @@
#define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */
#define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */
#define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */
-/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */
+/* Invalidate caching of Process Table Entries if H_RPTI_TYPE_NESTED is clear */
#define H_RPTI_TYPE_PRT 0x0008
-/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */
+/* Invalidate caching of Partition Table Entries if H_RPTI_TYPE_NESTED is set */
#define H_RPTI_TYPE_PAT 0x0008
#define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
H_RPTI_TYPE_PRT)
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 56a98936a6a9..21cc571ea9c2 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -18,8 +18,17 @@
* PACA flags in paca->irq_happened.
*
* This bits are set when interrupts occur while soft-disabled
- * and allow a proper replay. Additionally, PACA_IRQ_HARD_DIS
- * is set whenever we manually hard disable.
+ * and allow a proper replay.
+ *
+ * The PACA_IRQ_HARD_DIS is set whenever we hard disable. It is almost
+ * always in synch with the MSR[EE] state, except:
+ * - A window in interrupt entry, where hardware disables MSR[EE] and that
+ * must be "reconciled" with the soft mask state.
+ * - NMI interrupts that hit in awkward places, until they fix the state.
+ * - When local irqs are being enabled and state is being fixed up.
+ * - When returning from an interrupt there are some windows where this
+ * can become out of synch, but gets fixed before the RFI or before
+ * executing the next user instruction (see arch/powerpc/kernel/interrupt.c).
*/
#define PACA_IRQ_HARD_DIS 0x01
#define PACA_IRQ_DBELL 0x02
@@ -389,7 +398,15 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
return !(regs->msr & MSR_EE);
}
-static inline void may_hard_irq_enable(void) { }
+static inline bool may_hard_irq_enable(void)
+{
+ return false;
+}
+
+static inline void do_hard_irq_enable(void)
+{
+ BUILD_BUG();
+}
static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
{
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 268d3bd073c8..b11c0e2f9639 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -8,17 +8,17 @@
#define ___get_user_instr(gu_op, dest, ptr) \
({ \
- long __gui_ret = 0; \
- unsigned long __gui_ptr = (unsigned long)ptr; \
+ long __gui_ret; \
+ u32 __user *__gui_ptr = (u32 __user *)ptr; \
struct ppc_inst __gui_inst; \
unsigned int __prefix, __suffix; \
- __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \
+ \
+ __chk_user_ptr(ptr); \
+ __gui_ret = gu_op(__prefix, __gui_ptr); \
if (__gui_ret == 0) { \
if ((__prefix >> 26) == OP_PREFIX) { \
- __gui_ret = gu_op(__suffix, \
- (unsigned int __user *)__gui_ptr + 1); \
- __gui_inst = ppc_inst_prefix(__prefix, \
- __suffix); \
+ __gui_ret = gu_op(__suffix, __gui_ptr + 1); \
+ __gui_inst = ppc_inst_prefix(__prefix, __suffix); \
} else { \
__gui_inst = ppc_inst(__prefix); \
} \
@@ -29,14 +29,15 @@
})
#else /* !CONFIG_PPC64 */
#define ___get_user_instr(gu_op, dest, ptr) \
- gu_op((dest).val, (u32 __user *)(ptr))
+({ \
+ __chk_user_ptr(ptr); \
+ gu_op((dest).val, (u32 __user *)(ptr)); \
+})
#endif /* CONFIG_PPC64 */
-#define get_user_instr(x, ptr) \
- ___get_user_instr(get_user, x, ptr)
+#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr)
-#define __get_user_instr(x, ptr) \
- ___get_user_instr(__get_user, x, ptr)
+#define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr)
/*
* Instruction data type for POWER
@@ -59,9 +60,9 @@ static inline int ppc_inst_primary_opcode(struct ppc_inst x)
return ppc_inst_val(x) >> 26;
}
-#ifdef CONFIG_PPC64
-#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff })
+#define ppc_inst(x) ((struct ppc_inst){ .val = (x) })
+#ifdef CONFIG_PPC64
#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) })
static inline u32 ppc_inst_suffix(struct ppc_inst x)
@@ -69,68 +70,43 @@ static inline u32 ppc_inst_suffix(struct ppc_inst x)
return x.suffix;
}
-static inline bool ppc_inst_prefixed(struct ppc_inst x)
-{
- return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff;
-}
+#else
+#define ppc_inst_prefix(x, y) ppc_inst(x)
-static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
+static inline u32 ppc_inst_suffix(struct ppc_inst x)
{
- return ppc_inst_prefix(swab32(ppc_inst_val(x)),
- swab32(ppc_inst_suffix(x)));
+ return 0;
}
-static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr)
-{
- u32 val, suffix;
-
- val = *(u32 *)ptr;
- if ((val >> 26) == OP_PREFIX) {
- suffix = *((u32 *)ptr + 1);
- return ppc_inst_prefix(val, suffix);
- } else {
- return ppc_inst(val);
- }
-}
+#endif /* CONFIG_PPC64 */
-static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
+static inline struct ppc_inst ppc_inst_read(const u32 *ptr)
{
- return *(u64 *)&x == *(u64 *)&y;
+ if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX)
+ return ppc_inst_prefix(*ptr, *(ptr + 1));
+ else
+ return ppc_inst(*ptr);
}
-#else
-
-#define ppc_inst(x) ((struct ppc_inst){ .val = x })
-
-#define ppc_inst_prefix(x, y) ppc_inst(x)
-
static inline bool ppc_inst_prefixed(struct ppc_inst x)
{
- return false;
-}
-
-static inline u32 ppc_inst_suffix(struct ppc_inst x)
-{
- return 0;
+ return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX;
}
static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
{
- return ppc_inst(swab32(ppc_inst_val(x)));
-}
-
-static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr)
-{
- return *ptr;
+ return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x)));
}
static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
{
- return ppc_inst_val(x) == ppc_inst_val(y);
+ if (ppc_inst_val(x) != ppc_inst_val(y))
+ return false;
+ if (!ppc_inst_prefixed(x))
+ return true;
+ return ppc_inst_suffix(x) == ppc_inst_suffix(y);
}
-#endif /* CONFIG_PPC64 */
-
static inline int ppc_inst_len(struct ppc_inst x)
{
return ppc_inst_prefixed(x) ? 8 : 4;
@@ -140,13 +116,13 @@ static inline int ppc_inst_len(struct ppc_inst x)
* Return the address of the next instruction, if the instruction @value was
* located at @location.
*/
-static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *value)
+static inline u32 *ppc_inst_next(u32 *location, u32 *value)
{
struct ppc_inst tmp;
tmp = ppc_inst_read(value);
- return location + ppc_inst_len(tmp);
+ return (void *)location + ppc_inst_len(tmp);
}
static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x)
@@ -178,6 +154,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins
__str; \
})
-int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src);
+int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src);
#endif /* _ASM_POWERPC_INST_H */
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 59f704408d65..d4bdf7d274ac 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -73,13 +73,47 @@
#include <asm/kprobes.h>
#include <asm/runlatch.h>
+#ifdef CONFIG_PPC_BOOK3S_64
+extern char __end_soft_masked[];
+bool search_kernel_soft_mask_table(unsigned long addr);
+unsigned long search_kernel_restart_table(unsigned long addr);
+
+DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+
+static inline bool is_implicit_soft_masked(struct pt_regs *regs)
+{
+ if (regs->msr & MSR_PR)
+ return false;
+
+ if (regs->nip >= (unsigned long)__end_soft_masked)
+ return false;
+
+ return search_kernel_soft_mask_table(regs->nip);
+}
+
+static inline void srr_regs_clobbered(void)
+{
+ local_paca->srr_valid = 0;
+ local_paca->hsrr_valid = 0;
+}
+#else
+static inline bool is_implicit_soft_masked(struct pt_regs *regs)
+{
+ return false;
+}
+
+static inline void srr_regs_clobbered(void)
+{
+}
+#endif
+
static inline void nap_adjust_return(struct pt_regs *regs)
{
#ifdef CONFIG_PPC_970_NAP
if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
/* Can avoid a test-and-clear because NMIs do not call this */
clear_thread_local_flags(_TLF_NAPPING);
- regs->nip = (unsigned long)power4_idle_nap_return;
+ regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return);
}
#endif
}
@@ -129,9 +163,18 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
* CT_WARN_ON comes here via program_check_exception,
* so avoid recursion.
*/
- if (TRAP(regs) != INTERRUPT_PROGRAM)
+ if (TRAP(regs) != INTERRUPT_PROGRAM) {
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
+ BUG_ON(is_implicit_soft_masked(regs));
+ }
+#ifdef CONFIG_PPC_BOOK3S
+ /* Move this under a debugging check */
+ if (arch_irq_disabled_regs(regs))
+ BUG_ON(search_kernel_restart_table(regs->nip));
+#endif
}
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
#endif
booke_restore_dbcr0();
@@ -186,6 +229,7 @@ struct interrupt_nmi_state {
u8 irq_soft_mask;
u8 irq_happened;
u8 ftrace_enabled;
+ u64 softe;
#endif
};
@@ -211,6 +255,7 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
#ifdef CONFIG_PPC64
state->irq_soft_mask = local_paca->irq_soft_mask;
state->irq_happened = local_paca->irq_happened;
+ state->softe = regs->softe;
/*
* Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
@@ -220,12 +265,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) &&
- regs->nip < (unsigned long)__end_interrupts) {
- // Kernel code running below __end_interrupts is
- // implicitly soft-masked.
+ if (is_implicit_soft_masked(regs)) {
+ // Adjust regs->softe soft implicit soft-mask, so
+ // arch_irq_disabled_regs(regs) behaves as expected.
regs->softe = IRQS_ALL_DISABLED;
}
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
/* Don't do any per-CPU operations until interrupt state is fixed */
@@ -258,11 +304,20 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
*/
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S
+ if (arch_irq_disabled_regs(regs)) {
+ unsigned long rst = search_kernel_restart_table(regs->nip);
+ if (rst)
+ regs_set_return_ip(regs, rst);
+ }
+#endif
+
if (nmi_disables_ftrace(regs))
this_cpu_set_ftrace_enabled(state->ftrace_enabled);
/* Check we didn't change the pending interrupt mask. */
WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
+ regs->softe = state->softe;
local_paca->irq_happened = state->irq_happened;
local_paca->irq_soft_mask = state->irq_soft_mask;
#endif
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index b2bd58830430..4982f3711fc3 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -6,7 +6,6 @@
/*
*/
-#include <linux/irqdomain.h>
#include <linux/threads.h>
#include <linux/list.h>
#include <linux/radix-tree.h>
@@ -23,8 +22,8 @@ extern atomic_t ppc_n_lost_interrupts;
/* Total number of virq in the platform */
#define NR_IRQS CONFIG_NR_IRQS
-/* Same thing, used by the generic IRQ code */
-#define NR_IRQS_LEGACY NUM_ISA_INTERRUPTS
+/* Number of irqs reserved for a legacy isa controller */
+#define NR_IRQS_LEGACY 16
extern irq_hw_number_t virq_to_hw(unsigned int virq);
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 2d5c6bec2b4f..93ce3ec25387 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -50,7 +50,7 @@ l_yes:
1098: nop; \
.pushsection __jump_table, "aw"; \
.long 1098b - ., LABEL - .; \
- FTR_ENTRY_LONG KEY; \
+ FTR_ENTRY_LONG KEY - .; \
.popsection
#endif
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index ec96232529ac..1df763002726 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -5,14 +5,6 @@
#define KUAP_READ 1
#define KUAP_WRITE 2
#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE)
-/*
- * For prevent_user_access() only.
- * Use the current saved situation instead of the to/from/size params.
- * Used on book3s/32
- */
-#define KUAP_CURRENT_READ 4
-#define KUAP_CURRENT_WRITE 8
-#define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE)
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/kup.h>
@@ -46,10 +38,7 @@ void setup_kuep(bool disabled);
static inline void setup_kuep(bool disabled) { }
#endif /* CONFIG_PPC_KUEP */
-#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
-void kuep_lock(void);
-void kuep_unlock(void);
-#else
+#ifndef CONFIG_PPC_BOOK3S_32
static inline void kuep_lock(void) { }
static inline void kuep_unlock(void) { }
#endif
@@ -83,8 +72,7 @@ static inline unsigned long kuap_get_and_assert_locked(void)
#ifndef CONFIG_PPC_BOOK3S_64
static inline void allow_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir) { }
-static inline void prevent_user_access(void __user *to, const void __user *from,
- unsigned long size, unsigned long dir) { }
+static inline void prevent_user_access(unsigned long dir) { }
static inline unsigned long prevent_user_access_return(void) { return 0UL; }
static inline void restore_user_access(unsigned long flags) { }
#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -96,53 +84,53 @@ static __always_inline void setup_kup(void)
setup_kuap(disable_kuap);
}
-static inline void allow_read_from_user(const void __user *from, unsigned long size)
+static __always_inline void allow_read_from_user(const void __user *from, unsigned long size)
{
barrier_nospec();
allow_user_access(NULL, from, size, KUAP_READ);
}
-static inline void allow_write_to_user(void __user *to, unsigned long size)
+static __always_inline void allow_write_to_user(void __user *to, unsigned long size)
{
allow_user_access(to, NULL, size, KUAP_WRITE);
}
-static inline void allow_read_write_user(void __user *to, const void __user *from,
- unsigned long size)
+static __always_inline void allow_read_write_user(void __user *to, const void __user *from,
+ unsigned long size)
{
barrier_nospec();
allow_user_access(to, from, size, KUAP_READ_WRITE);
}
-static inline void prevent_read_from_user(const void __user *from, unsigned long size)
+static __always_inline void prevent_read_from_user(const void __user *from, unsigned long size)
{
- prevent_user_access(NULL, from, size, KUAP_READ);
+ prevent_user_access(KUAP_READ);
}
-static inline void prevent_write_to_user(void __user *to, unsigned long size)
+static __always_inline void prevent_write_to_user(void __user *to, unsigned long size)
{
- prevent_user_access(to, NULL, size, KUAP_WRITE);
+ prevent_user_access(KUAP_WRITE);
}
-static inline void prevent_read_write_user(void __user *to, const void __user *from,
- unsigned long size)
+static __always_inline void prevent_read_write_user(void __user *to, const void __user *from,
+ unsigned long size)
{
- prevent_user_access(to, from, size, KUAP_READ_WRITE);
+ prevent_user_access(KUAP_READ_WRITE);
}
-static inline void prevent_current_access_user(void)
+static __always_inline void prevent_current_access_user(void)
{
- prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT);
+ prevent_user_access(KUAP_READ_WRITE);
}
-static inline void prevent_current_read_from_user(void)
+static __always_inline void prevent_current_read_from_user(void)
{
- prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_READ);
+ prevent_user_access(KUAP_READ);
}
-static inline void prevent_current_write_to_user(void)
+static __always_inline void prevent_current_write_to_user(void)
{
- prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_WRITE);
+ prevent_user_access(KUAP_WRITE);
}
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index a3633560493b..fbbf3cec92e9 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -147,6 +147,7 @@
#define KVM_GUEST_MODE_SKIP 2
#define KVM_GUEST_MODE_GUEST_HV 3
#define KVM_GUEST_MODE_HOST_HV 4
+#define KVM_GUEST_MODE_HV_P9 5 /* ISA >= v3.0 path */
#define KVM_INST_FETCH_FAILED -1
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index e6b53c6e21e3..caaa0f592d8e 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -307,6 +307,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
void kvmhv_release_all_nested(struct kvm *kvm);
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end);
int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
u64 time_limit, unsigned long lpcr);
void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9bb9bb370b53..eaf3a562bf1e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -153,10 +153,18 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
return radix;
}
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
+
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
/*
+ * Invalid HDSISR value which is used to indicate when HW has not set the reg.
+ * Used to work around an errata.
+ */
+#define HDSISR_CANARY 0x7fff
+
+/*
* We use a lock bit in HPTE dword 0 to synchronize updates and
* accesses to each HPTE, and another bit to indicate non-present
* HPTEs.
diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h
index 2fca299f7e19..c63105d2c9e7 100644
--- a/arch/powerpc/include/asm/kvm_guest.h
+++ b/arch/powerpc/include/asm/kvm_guest.h
@@ -16,10 +16,10 @@ static inline bool is_kvm_guest(void)
return static_branch_unlikely(&kvm_guest);
}
-bool check_kvm_guest(void);
+int check_kvm_guest(void);
#else
static inline bool is_kvm_guest(void) { return false; }
-static inline bool check_kvm_guest(void) { return false; }
+static inline int check_kvm_guest(void) { return 0; }
#endif
#endif /* _ASM_POWERPC_KVM_GUEST_H_ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7f2e90db2050..9f52f282b1aa 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -81,12 +81,13 @@ struct kvmppc_book3s_shadow_vcpu;
struct kvm_nested_guest;
struct kvm_vm_stat {
- ulong remote_tlb_flush;
- ulong num_2M_pages;
- ulong num_1G_pages;
+ struct kvm_vm_stat_generic generic;
+ u64 num_2M_pages;
+ u64 num_1G_pages;
};
struct kvm_vcpu_stat {
+ struct kvm_vcpu_stat_generic generic;
u64 sum_exits;
u64 mmio_exits;
u64 signal_exits;
@@ -102,14 +103,8 @@ struct kvm_vcpu_stat {
u64 emulated_inst_exits;
u64 dec_exits;
u64 ext_intr_exits;
- u64 halt_poll_success_ns;
- u64 halt_poll_fail_ns;
u64 halt_wait_ns;
- u64 halt_successful_poll;
- u64 halt_attempted_poll;
u64 halt_successful_wait;
- u64 halt_poll_invalid;
- u64 halt_wakeup;
u64 dbell_exits;
u64 gdbell_exits;
u64 ld;
@@ -298,7 +293,6 @@ struct kvm_arch {
u8 fwnmi_enabled;
u8 secure_guest;
u8 svm_enabled;
- bool threads_indep;
bool nested_enable;
bool dawr1_enabled;
pgd_t *pgtable;
@@ -684,7 +678,12 @@ struct kvm_vcpu_arch {
ulong fault_dar;
u32 fault_dsisr;
unsigned long intr_msr;
- ulong fault_gpa; /* guest real address of page fault (POWER9) */
+ /*
+ * POWER9 and later: fault_gpa contains the guest real address of page
+ * fault for a radix guest, or segment descriptor (equivalent to result
+ * from slbmfev of SLB entry that translated the EA) for hash guests.
+ */
+ ulong fault_gpa;
#endif
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 5bf8ae9bb2cc..2d88944f9f34 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -129,6 +129,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
+extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
@@ -606,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm);
extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
@@ -638,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
+static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+ { return 0; }
#endif
#ifdef CONFIG_KVM_XIVE
@@ -655,8 +659,6 @@ extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority);
extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
-extern void kvmppc_xive_init_module(void);
-extern void kvmppc_xive_exit_module(void);
extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
@@ -671,6 +673,8 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu);
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
{
@@ -680,8 +684,6 @@ static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
-extern void kvmppc_xive_native_init_module(void);
-extern void kvmppc_xive_native_exit_module(void);
extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val);
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
@@ -695,8 +697,6 @@ static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority) { return -1; }
static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
-static inline void kvmppc_xive_init_module(void) { }
-static inline void kvmppc_xive_exit_module(void) { }
static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
@@ -711,14 +711,14 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
-static inline void kvmppc_xive_native_init_module(void) { }
-static inline void kvmppc_xive_native_exit_module(void) { }
static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val)
{ return 0; }
@@ -754,7 +754,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long tce_value, unsigned long npages);
long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
unsigned int yield_count);
-long kvmppc_h_random(struct kvm_vcpu *vcpu);
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu);
void kvmhv_commence_exit(int trap);
void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
void kvmppc_subcore_enter_guest(void);
diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h
index ae25e6e72997..4fe018cc207b 100644
--- a/arch/powerpc/include/asm/livepatch.h
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -16,7 +16,7 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip)
{
struct pt_regs *regs = ftrace_get_regs(fregs);
- regs->nip = ip;
+ regs_set_return_ip(regs, ip);
}
#define klp_get_ftrace_location klp_get_ftrace_location
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 607168b1aef4..27016b98ecb2 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -220,7 +220,7 @@ enum {
#elif defined(CONFIG_44x)
#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_44x
#endif
-#if defined(CONFIG_E200) || defined(CONFIG_E500)
+#ifdef CONFIG_E500
#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E
#endif
@@ -324,7 +324,6 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
}
#endif /* !CONFIG_DEBUG_VM */
-#ifdef CONFIG_PPC_RADIX_MMU
static inline bool radix_enabled(void)
{
return mmu_has_feature(MMU_FTR_TYPE_RADIX);
@@ -334,17 +333,6 @@ static inline bool early_radix_enabled(void)
{
return early_mmu_has_feature(MMU_FTR_TYPE_RADIX);
}
-#else
-static inline bool radix_enabled(void)
-{
- return false;
-}
-
-static inline bool early_radix_enabled(void)
-{
- return false;
-}
-#endif
#ifdef CONFIG_STRICT_KERNEL_RWX
static inline bool strict_kernel_rwx_enabled(void)
@@ -357,6 +345,11 @@ static inline bool strict_kernel_rwx_enabled(void)
return false;
}
#endif
+
+static inline bool strict_module_rwx_enabled(void)
+{
+ return IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && strict_kernel_rwx_enabled();
+}
#endif /* !__ASSEMBLY__ */
/* The kernel use the constants below to index in the page sizes array.
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 4bc45d3ed8b0..9ba6b585337f 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -57,7 +57,6 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
static inline void mm_iommu_init(struct mm_struct *mm) { }
#endif
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
-extern void set_context(unsigned long id, pgd_t *pgd);
#ifdef CONFIG_PPC_BOOK3S_64
extern void radix__switch_mmu_context(struct mm_struct *prev,
@@ -122,12 +121,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
}
#endif
-#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
-#else
-static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
-#endif
-
extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
@@ -222,6 +215,18 @@ static inline void mm_context_add_copro(struct mm_struct *mm) { }
static inline void mm_context_remove_copro(struct mm_struct *mm) { }
#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end);
+#else
+static inline void do_h_rpt_invalidate_prt(unsigned long pid,
+ unsigned long lpid,
+ unsigned long type,
+ unsigned long pg_sizes,
+ unsigned long start,
+ unsigned long end) { }
+#endif
extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index 6cda76b57c5d..4c6c6dbd182f 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -18,7 +18,7 @@
* flags field of the struct page
*/
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
extern struct pglist_data *node_data[];
/*
@@ -41,7 +41,7 @@ u64 memory_hotplug_max(void);
#else
#define memory_hotplug_max() memblock_end_of_DRAM()
-#endif /* CONFIG_NEED_MULTIPLE_NODES */
+#endif /* CONFIG_NUMA */
#ifdef CONFIG_FA_DUMP
#define __HAVE_ARCH_RESERVED_KERNEL_PAGES
#endif
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
index 39be9aea86db..64b6c608eca4 100644
--- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -66,10 +66,9 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
}
#ifdef CONFIG_PPC_4K_PAGES
-static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
- struct page *page, int writable)
+static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
{
- size_t size = huge_page_size(hstate_vma(vma));
+ size_t size = 1UL << shift;
if (size == SZ_16K)
return __pte(pte_val(entry) & ~_PAGE_HUGE);
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 295ef5639609..882a0bc7887a 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -9,10 +9,22 @@
#ifndef __ASSEMBLY__
+#include <linux/jump_label.h>
+
#include <asm/reg.h>
+extern struct static_key_false disable_kuap_key;
+
+static __always_inline bool kuap_is_disabled(void)
+{
+ return static_branch_unlikely(&disable_kuap_key);
+}
+
static inline void kuap_save_and_lock(struct pt_regs *regs)
{
+ if (kuap_is_disabled())
+ return;
+
regs->kuap = mfspr(SPRN_MD_AP);
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}
@@ -23,12 +35,20 @@ static inline void kuap_user_restore(struct pt_regs *regs)
static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
{
+ if (kuap_is_disabled())
+ return;
+
mtspr(SPRN_MD_AP, regs->kuap);
}
static inline unsigned long kuap_get_and_assert_locked(void)
{
- unsigned long kuap = mfspr(SPRN_MD_AP);
+ unsigned long kuap;
+
+ if (kuap_is_disabled())
+ return MD_APG_INIT;
+
+ kuap = mfspr(SPRN_MD_AP);
if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16);
@@ -38,25 +58,35 @@ static inline unsigned long kuap_get_and_assert_locked(void)
static inline void kuap_assert_locked(void)
{
- if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+ if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && !kuap_is_disabled())
kuap_get_and_assert_locked();
}
static inline void allow_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir)
{
+ if (kuap_is_disabled())
+ return;
+
mtspr(SPRN_MD_AP, MD_APG_INIT);
}
-static inline void prevent_user_access(void __user *to, const void __user *from,
- unsigned long size, unsigned long dir)
+static inline void prevent_user_access(unsigned long dir)
{
+ if (kuap_is_disabled())
+ return;
+
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}
static inline unsigned long prevent_user_access_return(void)
{
- unsigned long flags = mfspr(SPRN_MD_AP);
+ unsigned long flags;
+
+ if (kuap_is_disabled())
+ return MD_APG_INIT;
+
+ flags = mfspr(SPRN_MD_AP);
mtspr(SPRN_MD_AP, MD_APG_KUAP);
@@ -65,12 +95,18 @@ static inline unsigned long prevent_user_access_return(void)
static inline void restore_user_access(unsigned long flags)
{
+ if (kuap_is_disabled())
+ return;
+
mtspr(SPRN_MD_AP, flags);
}
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
+ if (kuap_is_disabled())
+ return false;
+
return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000);
}
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h
index 2d92a39d8f2e..43ceca128531 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h
@@ -113,6 +113,7 @@ typedef struct {
/* patch sites */
extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I;
+extern s32 patch__tlb_44x_kuep, patch__tlb_47x_kuep;
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 6e4faa0a9b35..997cec973406 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -178,6 +178,7 @@
#ifndef __ASSEMBLY__
#include <linux/mmdebug.h>
+#include <linux/sizes.h>
void mmu_pin_tlb(unsigned long top, bool readonly);
@@ -225,6 +226,48 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
BUG();
}
+static inline bool arch_vmap_try_size(unsigned long addr, unsigned long end, u64 pfn,
+ unsigned int max_page_shift, unsigned long size)
+{
+ if (end - addr < size)
+ return false;
+
+ if ((1UL << max_page_shift) < size)
+ return false;
+
+ if (!IS_ALIGNED(addr, size))
+ return false;
+
+ if (!IS_ALIGNED(PFN_PHYS(pfn), size))
+ return false;
+
+ return true;
+}
+
+static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
+ u64 pfn, unsigned int max_page_shift)
+{
+ if (arch_vmap_try_size(addr, end, pfn, max_page_shift, SZ_512K))
+ return SZ_512K;
+ if (PAGE_SIZE == SZ_16K)
+ return SZ_16K;
+ if (arch_vmap_try_size(addr, end, pfn, max_page_shift, SZ_16K))
+ return SZ_16K;
+ return PAGE_SIZE;
+}
+#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
+
+static inline int arch_vmap_pte_supported_shift(unsigned long size)
+{
+ if (size >= SZ_512K)
+ return 19;
+ else if (size >= SZ_16K)
+ return 14;
+ else
+ return PAGE_SHIFT;
+}
+#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
+
/* patch sites */
extern s32 patch__itlbmiss_exit_1, patch__dtlbmiss_exit_1;
extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf;
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 96522f7f0618..f06ae00f2a65 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -54,7 +54,6 @@ extern int icache_44x_need_flush;
#define PGD_MASKED_BITS 0
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
-#define FIRST_USER_ADDRESS 0UL
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 57cd3892bfe0..53fbfdfac93d 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -12,8 +12,6 @@
#include <asm/barrier.h>
#include <asm/asm-const.h>
-#define FIRST_USER_ADDRESS 0UL
-
/*
* Size of EA range mapped by our pagetables.
*/
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ec18ac818e3a..dc05a862e72a 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -149,11 +149,9 @@ struct paca_struct {
#endif /* CONFIG_PPC_BOOK3E */
#ifdef CONFIG_PPC_BOOK3S
- mm_context_id_t mm_ctx_id;
#ifdef CONFIG_PPC_MM_SLICES
unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
- unsigned long mm_ctx_slb_addr_limit;
#else
u16 mm_ctx_user_psize;
u16 mm_ctx_sllp;
@@ -167,9 +165,16 @@ struct paca_struct {
u64 kstack; /* Saved Kernel stack addr */
u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */
u64 saved_msr; /* MSR saved here by enter_rtas */
+#ifdef CONFIG_PPC64
+ u64 exit_save_r1; /* Syscall/interrupt R1 save */
+#endif
#ifdef CONFIG_PPC_BOOK3E
u16 trap_save; /* Used when bad stack is encountered */
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ u8 hsrr_valid; /* HSRRs set for HRFID */
+ u8 srr_valid; /* SRRs set for RFID */
+#endif
u8 irq_soft_mask; /* mask for irq soft masking */
u8 irq_happened; /* irq happened while soft-disabled */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 6dd78a2dc03a..3360cad78ace 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -70,9 +70,4 @@ extern struct kmem_cache *pgtable_cache[];
#include <asm/nohash/pgalloc.h>
#endif
-static inline pgtable_t pmd_pgtable(pmd_t pmd)
-{
- return (pgtable_t)pmd_page_vaddr(pmd);
-}
-
#endif /* _ASM_POWERPC_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index c6a676714f04..d564d0ecd4cd 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -41,6 +41,10 @@ struct mm_struct;
#ifndef __ASSEMBLY__
+#ifndef MAX_PTRS_PER_PGD
+#define MAX_PTRS_PER_PGD PTRS_PER_PGD
+#endif
+
/* Keep these as a macros to avoid include dependency mess */
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
@@ -72,6 +76,7 @@ extern unsigned long empty_zero_page[];
extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
+void poking_init(void);
extern unsigned long ioremap_bot;
@@ -152,6 +157,12 @@ static inline bool p4d_is_leaf(p4d_t p4d)
}
#endif
+#define pmd_pgtable pmd_pgtable
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
+{
+ return (pgtable_t)pmd_page_vaddr(pmd);
+}
+
#ifdef CONFIG_PPC64
#define is_ioremap_addr is_ioremap_addr
static inline bool is_ioremap_addr(const void *x)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index ac41776661e9..bede76dd3db7 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -76,6 +76,40 @@
#define __REGA0_R30 30
#define __REGA0_R31 31
+/* For use with PPC_RAW_() macros */
+#define _R0 0
+#define _R1 1
+#define _R2 2
+#define _R3 3
+#define _R4 4
+#define _R5 5
+#define _R6 6
+#define _R7 7
+#define _R8 8
+#define _R9 9
+#define _R10 10
+#define _R11 11
+#define _R12 12
+#define _R13 13
+#define _R14 14
+#define _R15 15
+#define _R16 16
+#define _R17 17
+#define _R18 18
+#define _R19 19
+#define _R20 20
+#define _R21 21
+#define _R22 22
+#define _R23 23
+#define _R24 24
+#define _R25 25
+#define _R26 26
+#define _R27 27
+#define _R28 28
+#define _R29 29
+#define _R30 30
+#define _R31 31
+
#define IMM_L(i) ((uintptr_t)(i) & 0xffff)
#define IMM_DS(i) ((uintptr_t)(i) & 0xfffc)
#define IMM_DQ(i) ((uintptr_t)(i) & 0xfff0)
@@ -222,13 +256,11 @@
#define PPC_INST_LWSYNC 0x7c2004ac
#define PPC_INST_SYNC 0x7c0004ac
#define PPC_INST_SYNC_MASK 0xfc0007fe
-#define PPC_INST_ISYNC 0x4c00012c
#define PPC_INST_MCRXR 0x7c000400
#define PPC_INST_MCRXR_MASK 0xfc0007fe
#define PPC_INST_MFSPR_PVR 0x7c1f42a6
#define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe
#define PPC_INST_MTMSRD 0x7c000164
-#define PPC_INST_NOP 0x60000000
#define PPC_INST_POPCNTB 0x7c0000f4
#define PPC_INST_POPCNTB_MASK 0xfc0007fe
#define PPC_INST_RFEBB 0x4c000124
@@ -241,10 +273,10 @@
#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe
#define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6
#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe
-#define PPC_INST_SC 0x44000002
#define PPC_INST_STRING 0x7c00042a
#define PPC_INST_STRING_MASK 0xfc0007fe
#define PPC_INST_STRING_GEN_MASK 0xfc00067e
+#define PPC_INST_SETB 0x7c000100
#define PPC_INST_STSWI 0x7c0005aa
#define PPC_INST_STSWX 0x7c00052a
#define PPC_INST_TRECHKPT 0x7c0007dd
@@ -252,18 +284,9 @@
#define PPC_INST_TSR 0x7c0005dd
#define PPC_INST_LD 0xe8000000
#define PPC_INST_STD 0xf8000000
-#define PPC_INST_MFLR 0x7c0802a6
-#define PPC_INST_MTCTR 0x7c0903a6
-#define PPC_INST_ADDI 0x38000000
#define PPC_INST_ADDIS 0x3c000000
#define PPC_INST_ADD 0x7c000214
-#define PPC_INST_BLR 0x4e800020
-#define PPC_INST_BCTR 0x4e800420
-#define PPC_INST_BCTRL 0x4e800421
#define PPC_INST_DIVD 0x7c0003d2
-#define PPC_INST_RLDICR 0x78000004
-#define PPC_INST_ORI 0x60000000
-#define PPC_INST_ORIS 0x64000000
#define PPC_INST_BRANCH 0x48000000
#define PPC_INST_BL 0x48000001
#define PPC_INST_BRANCH_COND 0x40800000
@@ -323,6 +346,8 @@
#define PPC_LO(v) ((v) & 0xffff)
#define PPC_HI(v) (((v) >> 16) & 0xffff)
#define PPC_HA(v) PPC_HI((v) + 0x8000)
+#define PPC_HIGHER(v) (((v) >> 32) & 0xffff)
+#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff)
/*
* Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
@@ -383,6 +408,10 @@
#define PPC_RAW_STBCIX(s, a, b) (0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
#define PPC_RAW_DCBFPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21))
#define PPC_RAW_DCBSTPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21))
+#define PPC_RAW_SC() (0x44000002)
+#define PPC_RAW_SYNC() (0x7c0004ac)
+#define PPC_RAW_ISYNC() (0x4c00012c)
+
/*
* Define what the VSX XX1 form instructions will look like, then add
* the 128 bit load store instructions based on that.
@@ -404,10 +433,10 @@
#define PPC_RAW_STXVP(xsp, a, i) (0x18000001 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i))
#define PPC_RAW_LXVPX(xtp, a, b) (0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_STXVPX(xsp, a, b) (0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_RAW_PLXVP(xtp, i, a, pr) \
- ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i)))
-#define PPC_RAW_PSTXVP(xsp, i, a, pr) \
- ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i)))
+#define PPC_RAW_PLXVP_P(xtp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i))
+#define PPC_RAW_PLXVP_S(xtp, i, a, pr) (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))
+#define PPC_RAW_PSTXVP_P(xsp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i))
+#define PPC_RAW_PSTXVP_S(xsp, i, a, pr) (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))
#define PPC_RAW_NAP (0x4c000364)
#define PPC_RAW_SLEEP (0x4c0003a4)
#define PPC_RAW_WINKLE (0x4c0003e4)
@@ -445,16 +474,17 @@
#define PPC_RAW_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
#define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_ADDC_DOT(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
-#define PPC_RAW_NOP() (PPC_INST_NOP)
-#define PPC_RAW_BLR() (PPC_INST_BLR)
+#define PPC_RAW_NOP() PPC_RAW_ORI(0, 0, 0)
+#define PPC_RAW_BLR() (0x4e800020)
#define PPC_RAW_BLRL() (0x4e800021)
#define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r))
-#define PPC_RAW_MFLR(t) (PPC_INST_MFLR | ___PPC_RT(t))
-#define PPC_RAW_BCTR() (PPC_INST_BCTR)
-#define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r))
-#define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_MFLR(t) (0x7c0802a6 | ___PPC_RT(t))
+#define PPC_RAW_BCTR() (0x4e800420)
+#define PPC_RAW_BCTRL() (0x4e800421)
+#define PPC_RAW_MTCTR(r) (0x7c0903a6 | ___PPC_RT(r))
+#define PPC_RAW_ADDI(d, a, i) (0x38000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
#define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i)
-#define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIS(d, a, i) (0x3c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
#define PPC_RAW_ADDIC(d, a, i) (0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
#define PPC_RAW_ADDIC_DOT(d, a, i) (0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
#define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i)
@@ -499,8 +529,8 @@
#define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
#define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
#define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a)
-#define PPC_RAW_ORI(d, a, i) (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
-#define PPC_RAW_ORIS(d, a, i) (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ORI(d, a, i) (0x60000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ORIS(d, a, i) (0x64000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
#define PPC_RAW_NOR(d, a, b) (0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
#define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
#define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
@@ -519,7 +549,7 @@
(0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
#define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
#define PPC_RAW_RLDICL(d, a, i, mb) (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb))
-#define PPC_RAW_RLDICR(d, a, i, me) (PPC_INST_RLDICR | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me))
+#define PPC_RAW_RLDICR(d, a, i, me) (0x78000004 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me))
/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
#define PPC_RAW_SLWI(d, a, i) PPC_RAW_RLWINM(d, a, i, 0, 31-(i))
@@ -533,6 +563,8 @@
#define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a))
#define PPC_RAW_MFSPR(d, spr) (0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr))
+#define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr))
+#define PPC_RAW_EIEIO() (0x7c0006ac)
/* Deal with instructions that older assemblers aren't aware of */
#define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index d6739d700f0a..116c1519728a 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -762,6 +762,21 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
stringify_in_c(.long (_target) - . ;) \
stringify_in_c(.previous)
+#define SOFT_MASK_TABLE(_start, _end) \
+ stringify_in_c(.section __soft_mask_table,"a";)\
+ stringify_in_c(.balign 8;) \
+ stringify_in_c(.llong (_start);) \
+ stringify_in_c(.llong (_end);) \
+ stringify_in_c(.previous)
+
+#define RESTART_TABLE(_start, _end, _target) \
+ stringify_in_c(.section __restart_table,"a";)\
+ stringify_in_c(.balign 8;) \
+ stringify_in_c(.llong (_start);) \
+ stringify_in_c(.llong (_end);) \
+ stringify_in_c(.llong (_target);) \
+ stringify_in_c(.previous)
+
#ifdef CONFIG_PPC_FSL_BOOK3E
#define BTB_FLUSH(reg) \
lis reg,BUCSR_INIT@h; \
diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h
index 84dd1addd434..c5d984700d24 100644
--- a/arch/powerpc/include/asm/probes.h
+++ b/arch/powerpc/include/asm/probes.h
@@ -34,14 +34,14 @@ typedef u32 ppc_opcode_t;
/* Enable single stepping for the current task */
static inline void enable_single_step(struct pt_regs *regs)
{
- regs->msr |= MSR_SINGLESTEP;
+ regs_set_return_msr(regs, regs->msr | MSR_SINGLESTEP);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
/*
* We turn off Critical Input Exception(CE) to ensure that the single
* step will be for the instruction we have the probe on; if we don't,
* it is possible we'd get the single step reported for CE.
*/
- regs->msr &= ~MSR_CE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_CE);
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
#ifdef CONFIG_PPC_47x
isync();
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 7bf8a15af224..f348e564f7dd 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -276,7 +276,15 @@ struct thread_struct {
#define SPEFSCR_INIT
#endif
-#ifdef CONFIG_PPC32
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+#define INIT_THREAD { \
+ .ksp = INIT_SP, \
+ .pgdir = swapper_pg_dir, \
+ .kuap = ~0UL, /* KUAP_NONE */ \
+ .fpexc_mode = MSR_FE0 | MSR_FE1, \
+ SPEFSCR_INIT \
+}
+#elif defined(CONFIG_PPC32)
#define INIT_THREAD { \
.ksp = INIT_SP, \
.pgdir = swapper_pg_dir, \
@@ -339,17 +347,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
#define spin_end() HMT_medium()
-#define spin_until_cond(cond) \
-do { \
- if (unlikely(!(cond))) { \
- spin_begin(); \
- do { \
- spin_cpu_relax(); \
- } while (!(cond)); \
- spin_end(); \
- } \
-} while (0)
-
#endif
/* Check that a certain kernel stack pointer is valid in task_struct p */
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index e646c7f218bc..8a0d8fb35328 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -71,6 +71,7 @@ struct ps3_dma_region_ops;
* @bus_addr: The 'translated' bus address of the region.
* @len: The length in bytes of the region.
* @offset: The offset from the start of memory of the region.
+ * @dma_mask: Device dma_mask.
* @ioid: The IOID of the device who owns this region
* @chunk_list: Opaque variable used by the ioc page manager.
* @region_ops: struct ps3_dma_region_ops - dma region operations
@@ -85,6 +86,7 @@ struct ps3_dma_region {
enum ps3_dma_region_type region_type;
unsigned long len;
unsigned long offset;
+ u64 dma_mask;
/* driver variables (set by ps3_dma_region_create) */
unsigned long bus_addr;
@@ -232,7 +234,7 @@ enum lv1_result {
static inline const char* ps3_result(int result)
{
-#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT)
+#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) || defined(CONFIG_PS3_VERBOSE_RESULT)
switch (result) {
case LV1_SUCCESS:
return "LV1_SUCCESS (0)";
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index b476a685f066..3e5d470a6155 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -48,11 +48,12 @@ struct pt_regs
unsigned long result;
};
};
-
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_KUAP)
union {
struct {
#ifdef CONFIG_PPC64
unsigned long ppr;
+ unsigned long exit_result;
#endif
union {
#ifdef CONFIG_PPC_KUAP
@@ -68,6 +69,7 @@ struct pt_regs
};
unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */
};
+#endif
};
#endif
@@ -122,6 +124,41 @@ struct pt_regs
#endif /* __powerpc64__ */
#ifndef __ASSEMBLY__
+#include <asm/paca.h>
+
+#ifdef CONFIG_SMP
+extern unsigned long profile_pc(struct pt_regs *regs);
+#else
+#define profile_pc(regs) instruction_pointer(regs)
+#endif
+
+long do_syscall_trace_enter(struct pt_regs *regs);
+void do_syscall_trace_leave(struct pt_regs *regs);
+
+static inline void set_return_regs_changed(void)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ local_paca->hsrr_valid = 0;
+ local_paca->srr_valid = 0;
+#endif
+}
+
+static inline void regs_set_return_ip(struct pt_regs *regs, unsigned long ip)
+{
+ regs->nip = ip;
+ set_return_regs_changed();
+}
+
+static inline void regs_set_return_msr(struct pt_regs *regs, unsigned long msr)
+{
+ regs->msr = msr;
+ set_return_regs_changed();
+}
+
+static inline void regs_add_return_ip(struct pt_regs *regs, long offset)
+{
+ regs_set_return_ip(regs, regs->nip + offset);
+}
static inline unsigned long instruction_pointer(struct pt_regs *regs)
{
@@ -131,7 +168,7 @@ static inline unsigned long instruction_pointer(struct pt_regs *regs)
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- regs->nip = val;
+ regs_set_return_ip(regs, val);
}
static inline unsigned long user_stack_pointer(struct pt_regs *regs)
@@ -144,15 +181,6 @@ static inline unsigned long frame_pointer(struct pt_regs *regs)
return 0;
}
-#ifdef CONFIG_SMP
-extern unsigned long profile_pc(struct pt_regs *regs);
-#else
-#define profile_pc(regs) instruction_pointer(regs)
-#endif
-
-long do_syscall_trace_enter(struct pt_regs *regs);
-void do_syscall_trace_leave(struct pt_regs *regs);
-
#ifdef __powerpc64__
#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
#else
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 07318bc63e3d..b676c4fb90fd 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -37,7 +37,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
u32 val = 0;
- if (likely(atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
+ if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
return;
queued_spin_lock_slowpath(lock, val);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7c81d3e563b2..be85cf156a1f 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -393,6 +393,7 @@
#define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */
#define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */
#define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */
+#define SPRN_TRIG2 0x372
#define SPRN_PMCR 0x374 /* Power Management Control Register */
#define SPRN_RWMR 0x375 /* Region-Weighting Mode Register */
@@ -1435,8 +1436,6 @@ static inline void mtsr(u32 val, u32 idx)
}
#endif
-#define proc_trap() asm volatile("trap")
-
extern unsigned long current_stack_frame(void);
register unsigned long current_stack_pointer asm("r1");
@@ -1447,16 +1446,6 @@ extern void scom970_write(unsigned int address, unsigned long value);
struct pt_regs;
extern void ppc_save_regs(struct pt_regs *regs);
-
-static inline void update_power8_hid0(unsigned long hid0)
-{
- /*
- * The HID0 update on Power8 should at the very least be
- * preceded by a SYNC instruction followed by an ISYNC
- * instruction
- */
- asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
-}
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_REG_H */
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index b774a4477d5f..792eefaf230b 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -92,6 +92,9 @@ static inline bool security_ftr_enabled(u64 feature)
// The L1-D cache should be flushed after user accesses from the kernel
#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull
+// The STF flush should be executed on privilege state switch
+#define SEC_FTR_STF_BARRIER 0x0000000000010000ull
+
// Features enabled by default
#define SEC_FTR_DEFAULT \
(SEC_FTR_L1D_FLUSH_HV | \
@@ -99,6 +102,7 @@ static inline bool security_ftr_enabled(u64 feature)
SEC_FTR_BNDS_CHK_SPEC_BAR | \
SEC_FTR_L1D_FLUSH_ENTRY | \
SEC_FTR_L1D_FLUSH_UACCESS | \
+ SEC_FTR_STF_BARRIER | \
SEC_FTR_FAVOUR_SECURITY)
#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h
new file mode 100644
index 000000000000..b040094f7920
--- /dev/null
+++ b/arch/powerpc/include/asm/set_memory.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SET_MEMORY_H
+#define _ASM_POWERPC_SET_MEMORY_H
+
+#define SET_MEMORY_RO 0
+#define SET_MEMORY_RW 1
+#define SET_MEMORY_NX 2
+#define SET_MEMORY_X 3
+
+int change_memory_attr(unsigned long addr, int numpages, long action);
+
+static inline int set_memory_ro(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_RO);
+}
+
+static inline int set_memory_rw(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_RW);
+}
+
+static inline int set_memory_nx(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_NX);
+}
+
+static inline int set_memory_x(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_X);
+}
+
+int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot);
+
+#endif
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index e89bfebd4e00..6c1a7d217d1a 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -10,7 +10,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex);
extern unsigned int rtas_data;
extern unsigned long long memory_limit;
extern bool init_mem_is_free;
-extern unsigned long klimit;
extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
struct device_node;
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 972ed0df154d..1df867c2e054 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -13,12 +13,11 @@ struct pt_regs;
* we don't allow putting a breakpoint on an mtmsrd instruction.
* Similarly we don't allow breakpoints on rfid instructions.
* These macros tell us if an instruction is a mtmsrd or rfid.
- * Note that IS_MTMSRD returns true for both an mtmsr (32-bit)
- * and an mtmsrd (64-bit).
+ * Note that these return true for both mtmsr/rfi (32-bit)
+ * and mtmsrd/rfid (64-bit).
*/
#define IS_MTMSRD(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x7c000124)
-#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000024)
-#define IS_RFI(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000064)
+#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x4c000024)
enum instruction_type {
COMPUTE, /* arith/logical/CR op, etc. */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 8dd3cdb25338..8c2c3dd4ddba 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -97,6 +97,18 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
extern void secondary_cpu_time_init(void);
extern void __init time_init(void);
+#ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, irq_work_pending)));
+ return x;
+}
+#endif
+
DECLARE_PER_CPU(u64, decrementers_next_tb);
/* Convert timebase ticks to nanoseconds */
diff --git a/arch/powerpc/include/asm/unaligned.h b/arch/powerpc/include/asm/unaligned.h
deleted file mode 100644
index ce69c5eff95e..000000000000
--- a/arch/powerpc/include/asm/unaligned.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_UNALIGNED_H
-#define _ASM_POWERPC_UNALIGNED_H
-
-#ifdef __KERNEL__
-
-/*
- * The PowerPC can do unaligned accesses itself based on its endian mode.
- */
-#include <linux/unaligned/access_ok.h>
-#include <linux/unaligned/generic.h>
-
-#ifdef __LITTLE_ENDIAN__
-#define get_unaligned __get_unaligned_le
-#define put_unaligned __put_unaligned_le
-#else
-#define get_unaligned __get_unaligned_be
-#define put_unaligned __put_unaligned_be
-#endif
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_UNALIGNED_H */
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index 5bf65f5d44a9..fe683371336f 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -24,8 +24,8 @@ typedef ppc_opcode_t uprobe_opcode_t;
struct arch_uprobe {
union {
- struct ppc_inst insn;
- struct ppc_inst ixol;
+ u32 insn[2];
+ u32 ixol[2];
};
};
diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index e33f80b0ea81..57573d9c1e09 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -5,8 +5,10 @@
#ifndef _ASM_POWERPC_VAS_H
#define _ASM_POWERPC_VAS_H
-
-struct vas_window;
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/icswx.h>
+#include <uapi/asm/vas-api.h>
/*
* Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25
@@ -49,6 +51,64 @@ enum vas_cop_type {
};
/*
+ * User space VAS windows are opened by tasks and take references
+ * to pid and mm until windows are closed.
+ * Stores pid, mm, and tgid for each window.
+ */
+struct vas_user_win_ref {
+ struct pid *pid; /* PID of owner */
+ struct pid *tgid; /* Thread group ID of owner */
+ struct mm_struct *mm; /* Linux process mm_struct */
+};
+
+/*
+ * Common VAS window struct on PowerNV and PowerVM
+ */
+struct vas_window {
+ u32 winid;
+ u32 wcreds_max; /* Window credits */
+ enum vas_cop_type cop;
+ struct vas_user_win_ref task_ref;
+ char *dbgname;
+ struct dentry *dbgdir;
+};
+
+/*
+ * User space window operations used for powernv and powerVM
+ */
+struct vas_user_win_ops {
+ struct vas_window * (*open_win)(int vas_id, u64 flags,
+ enum vas_cop_type);
+ u64 (*paste_addr)(struct vas_window *);
+ int (*close_win)(struct vas_window *);
+};
+
+static inline void put_vas_user_win_ref(struct vas_user_win_ref *ref)
+{
+ /* Drop references to pid, tgid, and mm */
+ put_pid(ref->pid);
+ put_pid(ref->tgid);
+ if (ref->mm)
+ mmdrop(ref->mm);
+}
+
+static inline void vas_user_win_add_mm_context(struct vas_user_win_ref *ref)
+{
+ mm_context_add_vas_window(ref->mm);
+ /*
+ * Even a process that has no foreign real address mapping can
+ * use an unpaired COPY instruction (to no real effect). Issue
+ * CP_ABORT to clear any pending COPY and prevent a covert
+ * channel.
+ *
+ * __switch_to() will issue CP_ABORT on future context switches
+ * if process / thread has any open VAS window (Use
+ * current->mm->context.vas_windows).
+ */
+ asm volatile(PPC_CP_ABORT);
+}
+
+/*
* Receive window attributes specified by the (in-kernel) owner of window.
*/
struct vas_rx_win_attr {
@@ -100,6 +160,7 @@ struct vas_tx_win_attr {
bool rx_win_ord_mode;
};
+#ifdef CONFIG_PPC_POWERNV
/*
* Helper to map a chip id to VAS id.
* For POWER9, this is a 1:1 mapping. In the future this maybe a 1:N
@@ -162,6 +223,43 @@ int vas_copy_crb(void *crb, int offset);
*/
int vas_paste_crb(struct vas_window *win, int offset, bool re);
+int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type,
+ const char *name);
+void vas_unregister_api_powernv(void);
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+
+/* VAS Capabilities */
+#define VAS_GZIP_QOS_FEAT 0x1
+#define VAS_GZIP_DEF_FEAT 0x2
+#define VAS_GZIP_QOS_FEAT_BIT PPC_BIT(VAS_GZIP_QOS_FEAT) /* Bit 1 */
+#define VAS_GZIP_DEF_FEAT_BIT PPC_BIT(VAS_GZIP_DEF_FEAT) /* Bit 2 */
+
+/* NX Capabilities */
+#define VAS_NX_GZIP_FEAT 0x1
+#define VAS_NX_GZIP_FEAT_BIT PPC_BIT(VAS_NX_GZIP_FEAT) /* Bit 1 */
+
+/*
+ * These structs are used to retrieve overall VAS capabilities that
+ * the hypervisor provides.
+ */
+struct hv_vas_all_caps {
+ __be64 descriptor;
+ __be64 feat_type;
+} __packed __aligned(0x1000);
+
+struct vas_all_caps {
+ u64 descriptor;
+ u64 feat_type;
+};
+
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result);
+int vas_register_api_pseries(struct module *mod,
+ enum vas_cop_type cop_type, const char *name);
+void vas_unregister_api_pseries(void);
+#endif
+
/*
* Register / unregister coprocessor type to VAS API which will be exported
* to user space. Applications can use this API to open / close window
@@ -171,7 +269,12 @@ int vas_paste_crb(struct vas_window *win, int offset, bool re);
* used for others in future.
*/
int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
- const char *name);
+ const char *name,
+ const struct vas_user_win_ops *vops);
void vas_unregister_coproc_api(void);
+int get_vas_user_win_ref(struct vas_user_win_ref *task_ref);
+void vas_update_csb(struct coprocessor_request_block *crb,
+ struct vas_user_win_ref *task_ref);
+void vas_dump_crb(struct coprocessor_request_block *crb);
#endif /* __ASM_POWERPC_VAS_H */
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 8e903b3f9c24..d9cf192368ad 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -65,8 +65,12 @@ struct icp_ops {
extern const struct icp_ops *icp_ops;
+#ifdef CONFIG_PPC_ICS_NATIVE
/* Native ICS */
extern int ics_native_init(void);
+#else
+static inline int ics_native_init(void) { return -ENODEV; }
+#endif
/* RTAS ICS */
#ifdef CONFIG_PPC_ICS_RTAS
diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
index 50ef95e2f5b1..82488b1e7276 100644
--- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
+++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
@@ -77,6 +77,9 @@
/* Indicate that the 'dimm_fuel_gauge' field is valid */
#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
+/* Indicate that the 'dimm_dsc' field is valid */
+#define PDSM_DIMM_DSC_VALID 2
+
/*
* Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
* Various flags indicate the health status of the dimm.
@@ -105,6 +108,9 @@ struct nd_papr_pdsm_health {
/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
__u16 dimm_fuel_gauge;
+
+ /* Extension flag PDSM_DIMM_DSC_VALID */
+ __u64 dimm_dsc;
};
__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
};
diff --git a/arch/powerpc/include/uapi/asm/vas-api.h b/arch/powerpc/include/uapi/asm/vas-api.h
index ebd4b2424785..7c81301ecdba 100644
--- a/arch/powerpc/include/uapi/asm/vas-api.h
+++ b/arch/powerpc/include/uapi/asm/vas-api.h
@@ -13,11 +13,15 @@
#define VAS_MAGIC 'v'
#define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 0x20, struct vas_tx_win_open_attr)
+/* Flags to VAS TX open window ioctl */
+/* To allocate a window with QoS credit, otherwise use default credit */
+#define VAS_TX_WIN_FLAG_QOS_CREDIT 0x0000000000000001
+
struct vas_tx_win_open_attr {
__u32 version;
__s16 vas_id; /* specific instance of vas or -1 for default */
__u16 reserved1;
- __u64 flags; /* Future use */
+ __u64 flags;
__u64 reserved2[6];
};
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 28af4efb4587..a47eefa09bcb 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -86,11 +86,7 @@ int main(void)
OFFSET(PACA_CANARY, paca_struct, canary);
#endif
#endif
- OFFSET(MMCONTEXTID, mm_struct, context.id);
-#ifdef CONFIG_PPC64
- DEFINE(SIGSEGV, SIGSEGV);
- DEFINE(NMI_MASK, NMI_MASK);
-#else
+#ifdef CONFIG_PPC32
#ifdef CONFIG_PPC_RTAS
OFFSET(RTAS_SP, thread_struct, rtas_sp);
#endif
@@ -119,7 +115,6 @@ int main(void)
#ifdef CONFIG_ALTIVEC
OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr);
OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area);
- OFFSET(THREAD_VRSAVE, thread_struct, vrsave);
OFFSET(THREAD_USED_VR, thread_struct, used_vr);
OFFSET(VRSTATE_VSCR, thread_vr_state, vscr);
OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec);
@@ -150,22 +145,15 @@ int main(void)
#ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
OFFSET(THREAD_ACC, thread_struct, acc);
- OFFSET(THREAD_SPEFSCR, thread_struct, spefscr);
OFFSET(THREAD_USED_SPE, thread_struct, used_spe);
#endif /* CONFIG_SPE */
#endif /* CONFIG_PPC64 */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- OFFSET(THREAD_DBCR0, thread_struct, debug.dbcr0);
-#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu);
#endif
#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
#endif
-#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
- OFFSET(KUAP, thread_struct, kuap);
-#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
@@ -185,19 +173,12 @@ int main(void)
sizeof(struct pt_regs) + 16);
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
- OFFSET(TI_FLAGS, thread_info, flags);
OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
- OFFSET(TI_PREEMPT, thread_info, preempt_count);
#ifdef CONFIG_PPC64
OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size);
- OFFSET(DCACHEL1BLOCKSPERPAGE, ppc64_caches, l1d.blocks_per_page);
- OFFSET(ICACHEL1BLOCKSIZE, ppc64_caches, l1i.block_size);
- OFFSET(ICACHEL1LOGBLOCKSIZE, ppc64_caches, l1i.log_block_size);
- OFFSET(ICACHEL1BLOCKSPERPAGE, ppc64_caches, l1i.blocks_per_page);
/* paca */
- DEFINE(PACA_SIZE, sizeof(struct paca_struct));
OFFSET(PACAPACAINDEX, paca_struct, paca_index);
OFFSET(PACAPROCSTART, paca_struct, cpu_start);
OFFSET(PACAKSAVE, paca_struct, kstack);
@@ -209,18 +190,13 @@ int main(void)
OFFSET(PACATOC, paca_struct, kernel_toc);
OFFSET(PACAKBASE, paca_struct, kernelbase);
OFFSET(PACAKMSR, paca_struct, kernel_msr);
+#ifdef CONFIG_PPC_BOOK3S_64
+ OFFSET(PACAHSRR_VALID, paca_struct, hsrr_valid);
+ OFFSET(PACASRR_VALID, paca_struct, srr_valid);
+#endif
OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
-#ifdef CONFIG_PPC_BOOK3S
- OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
-#ifdef CONFIG_PPC_MM_SLICES
- OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
- OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
- OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
- DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
-#endif /* CONFIG_PPC_MM_SLICES */
-#endif
#ifdef CONFIG_PPC_BOOK3E
OFFSET(PACAPGD, paca_struct, pgd);
@@ -241,21 +217,9 @@ int main(void)
#endif /* CONFIG_PPC_BOOK3E */
#ifdef CONFIG_PPC_BOOK3S_64
- OFFSET(PACASLBCACHE, paca_struct, slb_cache);
- OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
- OFFSET(PACASTABRR, paca_struct, stab_rr);
- OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
-#ifdef CONFIG_PPC_MM_SLICES
- OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
-#else
- OFFSET(PACACONTEXTSLLP, paca_struct, mm_ctx_sllp);
-#endif /* CONFIG_PPC_MM_SLICES */
OFFSET(PACA_EXGEN, paca_struct, exgen);
OFFSET(PACA_EXMC, paca_struct, exmc);
OFFSET(PACA_EXNMI, paca_struct, exnmi);
-#ifdef CONFIG_PPC_PSERIES
- OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
-#endif
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
@@ -264,9 +228,7 @@ int main(void)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
#endif
- OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
- OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
#endif /* CONFIG_PPC_BOOK3S_64 */
OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
#ifdef CONFIG_PPC_BOOK3S_64
@@ -282,6 +244,9 @@ int main(void)
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default);
+#ifdef CONFIG_PPC64
+ OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1);
+#endif
#ifdef CONFIG_PPC_BOOK3E
OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
#endif
@@ -343,10 +308,6 @@ int main(void)
STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr);
STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr);
#endif
-#ifdef CONFIG_PPC_KUAP
- STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap);
-#endif
-
#if defined(CONFIG_PPC32)
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
@@ -368,10 +329,6 @@ int main(void)
#endif
#endif
-#ifndef CONFIG_PPC64
- OFFSET(MM_PGD, mm_struct, pgd);
-#endif /* ! CONFIG_PPC64 */
-
/* About the CPU features table */
OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features);
OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup);
@@ -404,13 +361,6 @@ int main(void)
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE)));
-#else
- DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
-#endif
- DEFINE(PTE_SIZE, sizeof(pte_t));
-
#ifdef CONFIG_KVM
OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack);
OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid);
@@ -482,11 +432,9 @@ int main(void)
OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid);
OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr);
OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1);
- OFFSET(KVM_NEED_FLUSH, kvm, arch.need_tlb_flush.bits);
OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
- OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
@@ -514,7 +462,6 @@ int main(void)
OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1);
OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
- OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires);
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
@@ -525,7 +472,6 @@ int main(void)
OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra);
OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs);
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
- OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar);
OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar);
OFFSET(VCPU_SIER, kvm_vcpu, arch.sier);
@@ -534,7 +480,6 @@ int main(void)
OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
- OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa);
OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
@@ -646,10 +591,8 @@ int main(void)
HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
- HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
- HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
@@ -757,7 +700,6 @@ int main(void)
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
- DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
#ifdef CONFIG_PPC_8xx
DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 735e89337398..5693e1c67c2b 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -35,7 +35,7 @@ void __init reserve_kdump_trampoline(void)
static void __init create_trampoline(unsigned long addr)
{
- struct ppc_inst *p = (struct ppc_inst *)addr;
+ u32 *p = (u32 *)addr;
/* The maximum range of a single instruction branch, is the current
* instruction's address + (32 MB - 4) bytes. For the trampoline we
@@ -45,8 +45,8 @@ static void __init create_trampoline(unsigned long addr)
* branch to "addr" we jump to ("addr" + 32 MB). Although it requires
* two instructions it doesn't require any registers.
*/
- patch_instruction(p, ppc_inst(PPC_INST_NOP));
- patch_branch((void *)p + 4, addr + PHYSICAL_START, 0);
+ patch_instruction(p, ppc_inst(PPC_RAW_NOP()));
+ patch_branch(p + 1, addr + PHYSICAL_START, 0);
}
void __init setup_kdump_trampoline(void)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 9160285cb2f4..0273a1349006 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -32,6 +32,7 @@
#include <asm/barrier.h>
#include <asm/kup.h>
#include <asm/bug.h>
+#include <asm/interrupt.h>
#include "head_32.h"
@@ -74,6 +75,24 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
.globl transfer_to_syscall
transfer_to_syscall:
+ stw r11, GPR1(r1)
+ stw r11, 0(r1)
+ mflr r12
+ stw r12, _LINK(r1)
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#endif
+ lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ SAVE_GPR(2, r1)
+ addi r12,r12,STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r1)
+ li r2, INTERRUPT_SYSCALL
+ stw r12,8(r1)
+ stw r2,_TRAP(r1)
+ SAVE_GPR(0, r1)
+ SAVE_4GPRS(3, r1)
+ SAVE_2GPRS(7, r1)
+ addi r2,r10,-THREAD
SAVE_NVGPRS(r1)
/* Calling convention has r9 = orig r0, r10 = regs */
@@ -176,28 +195,6 @@ _GLOBAL(_switch)
/* r3-r12 are caller saved -- Cort */
SAVE_NVGPRS(r1)
stw r0,_NIP(r1) /* Return to switch caller */
- mfmsr r11
- li r0,MSR_FP /* Disable floating-point */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- oris r0,r0,MSR_VEC@h /* Disable altivec */
- mfspr r12,SPRN_VRSAVE /* save vrsave register value */
- stw r12,THREAD+THREAD_VRSAVE(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
- oris r0,r0,MSR_SPE@h /* Disable SPE */
- mfspr r12,SPRN_SPEFSCR /* save spefscr register value */
- stw r12,THREAD+THREAD_SPEFSCR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
- and. r0,r0,r11 /* FP or altivec or SPE enabled? */
- beq+ 1f
- andc r11,r11,r0
- mtmsr r11
- isync
-1: stw r11,_MSR(r1)
mfcr r10
stw r10,_CCR(r1)
stw r1,KSP(r3) /* Set old stack pointer */
@@ -218,19 +215,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
mr r3,r2
addi r2,r4,-THREAD /* Update current */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- lwz r0,THREAD+THREAD_VRSAVE(r2)
- mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
- lwz r0,THREAD+THREAD_SPEFSCR(r2)
- mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
-
lwz r0,_CCR(r1)
mtcrf 0xFF,r0
/* r3-r12 are destroyed -- Cort */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 03727308d8cc..15720f8661a1 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -32,7 +32,6 @@
#include <asm/irqflags.h>
#include <asm/hw_irq.h>
#include <asm/context_tracking.h>
-#include <asm/tm.h>
#include <asm/ppc-opcode.h>
#include <asm/barrier.h>
#include <asm/export.h>
@@ -48,372 +47,7 @@
/*
* System calls.
*/
- .section ".toc","aw"
-SYS_CALL_TABLE:
- .tc sys_call_table[TC],sys_call_table
-
-#ifdef CONFIG_COMPAT
-COMPAT_SYS_CALL_TABLE:
- .tc compat_sys_call_table[TC],compat_sys_call_table
-#endif
-
-/* This value is used to mark exception frames on the stack. */
-exception_marker:
- .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-
.section ".text"
- .align 7
-
-#ifdef CONFIG_PPC_BOOK3S
-.macro system_call_vectored name trapnr
- .globl system_call_vectored_\name
-system_call_vectored_\name:
-_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
- extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
- bne .Ltabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
- SCV_INTERRUPT_TO_KERNEL
- mr r10,r1
- ld r1,PACAKSAVE(r13)
- std r10,0(r1)
- std r11,_NIP(r1)
- std r12,_MSR(r1)
- std r0,GPR0(r1)
- std r10,GPR1(r1)
- std r2,GPR2(r1)
- ld r2,PACATOC(r13)
- mfcr r12
- li r11,0
- /* Can we avoid saving r3-r8 in common case? */
- std r3,GPR3(r1)
- std r4,GPR4(r1)
- std r5,GPR5(r1)
- std r6,GPR6(r1)
- std r7,GPR7(r1)
- std r8,GPR8(r1)
- /* Zero r9-r12, this should only be required when restoring all GPRs */
- std r11,GPR9(r1)
- std r11,GPR10(r1)
- std r11,GPR11(r1)
- std r11,GPR12(r1)
- std r9,GPR13(r1)
- SAVE_NVGPRS(r1)
- std r11,_XER(r1)
- std r11,_LINK(r1)
- std r11,_CTR(r1)
-
- li r11,\trapnr
- std r11,_TRAP(r1)
- std r12,_CCR(r1)
- addi r10,r1,STACK_FRAME_OVERHEAD
- ld r11,exception_marker@toc(r2)
- std r11,-16(r10) /* "regshere" marker */
-
-BEGIN_FTR_SECTION
- HMT_MEDIUM
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
- /*
- * scv enters with MSR[EE]=1 and is immediately considered soft-masked.
- * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
- * and interrupts may be masked and pending already.
- * system_call_exception() will call trace_hardirqs_off() which means
- * interrupts could already have been blocked before trace_hardirqs_off,
- * but this is the best we can do.
- */
-
- /* Calling convention has r9 = orig r0, r10 = regs */
- mr r9,r0
- bl system_call_exception
-
-.Lsyscall_vectored_\name\()_exit:
- addi r4,r1,STACK_FRAME_OVERHEAD
- li r5,1 /* scv */
- bl syscall_exit_prepare
-
- ld r2,_CCR(r1)
- ld r4,_NIP(r1)
- ld r5,_MSR(r1)
-
-BEGIN_FTR_SECTION
- stdcx. r0,0,r1 /* to clear the reservation */
-END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
-BEGIN_FTR_SECTION
- HMT_MEDIUM_LOW
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
- cmpdi r3,0
- bne .Lsyscall_vectored_\name\()_restore_regs
-
- /* rfscv returns with LR->NIA and CTR->MSR */
- mtlr r4
- mtctr r5
-
- /* Could zero these as per ABI, but we may consider a stricter ABI
- * which preserves these if libc implementations can benefit, so
- * restore them for now until further measurement is done. */
- ld r0,GPR0(r1)
- ld r4,GPR4(r1)
- ld r5,GPR5(r1)
- ld r6,GPR6(r1)
- ld r7,GPR7(r1)
- ld r8,GPR8(r1)
- /* Zero volatile regs that may contain sensitive kernel data */
- li r9,0
- li r10,0
- li r11,0
- li r12,0
- mtspr SPRN_XER,r0
-
- /*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * The value of AMR only matters while we're in the kernel.
- */
- mtcr r2
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r13,GPR13(r1)
- ld r1,GPR1(r1)
- RFSCV_TO_USER
- b . /* prevent speculative execution */
-
-.Lsyscall_vectored_\name\()_restore_regs:
- li r3,0
- mtmsrd r3,1
- mtspr SPRN_SRR0,r4
- mtspr SPRN_SRR1,r5
-
- ld r3,_CTR(r1)
- ld r4,_LINK(r1)
- ld r5,_XER(r1)
-
- REST_NVGPRS(r1)
- ld r0,GPR0(r1)
- mtcr r2
- mtctr r3
- mtlr r4
- mtspr SPRN_XER,r5
- REST_10GPRS(2, r1)
- REST_2GPRS(12, r1)
- ld r1,GPR1(r1)
- RFI_TO_USER
-.endm
-
-system_call_vectored common 0x3000
-/*
- * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
- * which is tested by system_call_exception when r0 is -1 (as set by vector
- * entry code).
- */
-system_call_vectored sigill 0x7ff0
-
-
-/*
- * Entered via kernel return set up by kernel/sstep.c, must match entry regs
- */
- .globl system_call_vectored_emulate
-system_call_vectored_emulate:
-_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
- li r10,IRQS_ALL_DISABLED
- stb r10,PACAIRQSOFTMASK(r13)
- b system_call_vectored_common
-#endif
-
- .balign IFETCH_ALIGN_BYTES
- .globl system_call_common_real
-system_call_common_real:
- ld r10,PACAKMSR(r13) /* get MSR value for kernel */
- mtmsrd r10
-
- .balign IFETCH_ALIGN_BYTES
- .globl system_call_common
-system_call_common:
-_ASM_NOKPROBE_SYMBOL(system_call_common)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
- extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
- bne .Ltabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
- mr r10,r1
- ld r1,PACAKSAVE(r13)
- std r10,0(r1)
- std r11,_NIP(r1)
- std r12,_MSR(r1)
- std r0,GPR0(r1)
- std r10,GPR1(r1)
- std r2,GPR2(r1)
-#ifdef CONFIG_PPC_FSL_BOOK3E
-START_BTB_FLUSH_SECTION
- BTB_FLUSH(r10)
-END_BTB_FLUSH_SECTION
-#endif
- ld r2,PACATOC(r13)
- mfcr r12
- li r11,0
- /* Can we avoid saving r3-r8 in common case? */
- std r3,GPR3(r1)
- std r4,GPR4(r1)
- std r5,GPR5(r1)
- std r6,GPR6(r1)
- std r7,GPR7(r1)
- std r8,GPR8(r1)
- /* Zero r9-r12, this should only be required when restoring all GPRs */
- std r11,GPR9(r1)
- std r11,GPR10(r1)
- std r11,GPR11(r1)
- std r11,GPR12(r1)
- std r9,GPR13(r1)
- SAVE_NVGPRS(r1)
- std r11,_XER(r1)
- std r11,_CTR(r1)
- mflr r10
-
- /*
- * This clears CR0.SO (bit 28), which is the error indication on
- * return from this system call.
- */
- rldimi r12,r11,28,(63-28)
- li r11,0xc00
- std r10,_LINK(r1)
- std r11,_TRAP(r1)
- std r12,_CCR(r1)
- addi r10,r1,STACK_FRAME_OVERHEAD
- ld r11,exception_marker@toc(r2)
- std r11,-16(r10) /* "regshere" marker */
-
- /*
- * We always enter kernel from userspace with irq soft-mask enabled and
- * nothing pending. system_call_exception() will call
- * trace_hardirqs_off().
- */
- li r11,IRQS_ALL_DISABLED
- li r12,PACA_IRQ_HARD_DIS
- stb r11,PACAIRQSOFTMASK(r13)
- stb r12,PACAIRQHAPPENED(r13)
-
- /* Calling convention has r9 = orig r0, r10 = regs */
- mr r9,r0
- bl system_call_exception
-
-.Lsyscall_exit:
- addi r4,r1,STACK_FRAME_OVERHEAD
- li r5,0 /* !scv */
- bl syscall_exit_prepare
-
- ld r2,_CCR(r1)
- ld r4,_NIP(r1)
- ld r5,_MSR(r1)
- ld r6,_LINK(r1)
-
-BEGIN_FTR_SECTION
- stdcx. r0,0,r1 /* to clear the reservation */
-END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
- mtspr SPRN_SRR0,r4
- mtspr SPRN_SRR1,r5
- mtlr r6
-
- cmpdi r3,0
- bne .Lsyscall_restore_regs
- /* Zero volatile regs that may contain sensitive kernel data */
- li r0,0
- li r4,0
- li r5,0
- li r6,0
- li r7,0
- li r8,0
- li r9,0
- li r10,0
- li r11,0
- li r12,0
- mtctr r0
- mtspr SPRN_XER,r0
-.Lsyscall_restore_regs_cont:
-
-BEGIN_FTR_SECTION
- HMT_MEDIUM_LOW
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
- /*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * The value of AMR only matters while we're in the kernel.
- */
- mtcr r2
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r13,GPR13(r1)
- ld r1,GPR1(r1)
- RFI_TO_USER
- b . /* prevent speculative execution */
-
-.Lsyscall_restore_regs:
- ld r3,_CTR(r1)
- ld r4,_XER(r1)
- REST_NVGPRS(r1)
- mtctr r3
- mtspr SPRN_XER,r4
- ld r0,GPR0(r1)
- REST_8GPRS(4, r1)
- ld r12,GPR12(r1)
- b .Lsyscall_restore_regs_cont
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-.Ltabort_syscall:
- /* Firstly we need to enable TM in the kernel */
- mfmsr r10
- li r9, 1
- rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG
- mtmsrd r10, 0
-
- /* tabort, this dooms the transaction, nothing else */
- li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
- TABORT(R9)
-
- /*
- * Return directly to userspace. We have corrupted user register state,
- * but userspace will never see that register state. Execution will
- * resume after the tbegin of the aborted transaction with the
- * checkpointed register state.
- */
- li r9, MSR_RI
- andc r10, r10, r9
- mtmsrd r10, 1
- mtspr SPRN_SRR0, r11
- mtspr SPRN_SRR1, r12
- RFI_TO_USER
- b . /* prevent speculative execution */
-#endif
-
-#ifdef CONFIG_PPC_BOOK3S
-_GLOBAL(ret_from_fork_scv)
- bl schedule_tail
- REST_NVGPRS(r1)
- li r3,0 /* fork() return value */
- b .Lsyscall_vectored_common_exit
-#endif
-
-_GLOBAL(ret_from_fork)
- bl schedule_tail
- REST_NVGPRS(r1)
- li r3,0 /* fork() return value */
- b .Lsyscall_exit
-
-_GLOBAL(ret_from_kernel_thread)
- bl schedule_tail
- REST_NVGPRS(r1)
- mtctr r14
- mr r3,r15
-#ifdef PPC64_ELF_ABI_v2
- mr r12,r14
-#endif
- bctrl
- li r3,0
- b .Lsyscall_exit
#ifdef CONFIG_PPC_BOOK3S_64
@@ -630,156 +264,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
addi r1,r1,SWITCH_FRAME_SIZE
blr
- /*
- * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
- * touched, no exit work created, then this can be used.
- */
- .balign IFETCH_ALIGN_BYTES
- .globl fast_interrupt_return
-fast_interrupt_return:
-_ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
- kuap_check_amr r3, r4
- ld r5,_MSR(r1)
- andi. r0,r5,MSR_PR
-#ifdef CONFIG_PPC_BOOK3S
- bne .Lfast_user_interrupt_return_amr
- kuap_kernel_restore r3, r4
- andi. r0,r5,MSR_RI
- li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
- bne+ .Lfast_kernel_interrupt_return
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b . /* should not get here */
-#else
- bne .Lfast_user_interrupt_return
- b .Lfast_kernel_interrupt_return
-#endif
-
- .balign IFETCH_ALIGN_BYTES
- .globl interrupt_return
-interrupt_return:
-_ASM_NOKPROBE_SYMBOL(interrupt_return)
- ld r4,_MSR(r1)
- andi. r0,r4,MSR_PR
- beq .Lkernel_interrupt_return
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl interrupt_exit_user_prepare
- cmpdi r3,0
- bne- .Lrestore_nvgprs
-
-#ifdef CONFIG_PPC_BOOK3S
-.Lfast_user_interrupt_return_amr:
- kuap_user_restore r3, r4
-#endif
-.Lfast_user_interrupt_return:
- ld r11,_NIP(r1)
- ld r12,_MSR(r1)
-BEGIN_FTR_SECTION
- ld r10,_PPR(r1)
- mtspr SPRN_PPR,r10
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
-
-BEGIN_FTR_SECTION
- stdcx. r0,0,r1 /* to clear the reservation */
-FTR_SECTION_ELSE
- ldarx r0,0,r1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
- ld r3,_CCR(r1)
- ld r4,_LINK(r1)
- ld r5,_CTR(r1)
- ld r6,_XER(r1)
- li r0,0
-
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
- REST_GPR(13, r1)
-
- mtcr r3
- mtlr r4
- mtctr r5
- mtspr SPRN_XER,r6
-
- REST_4GPRS(2, r1)
- REST_GPR(6, r1)
- REST_GPR(0, r1)
- REST_GPR(1, r1)
- RFI_TO_USER
- b . /* prevent speculative execution */
-
-.Lrestore_nvgprs:
- REST_NVGPRS(r1)
- b .Lfast_user_interrupt_return
-
- .balign IFETCH_ALIGN_BYTES
-.Lkernel_interrupt_return:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl interrupt_exit_kernel_prepare
-
-.Lfast_kernel_interrupt_return:
- cmpdi cr1,r3,0
- ld r11,_NIP(r1)
- ld r12,_MSR(r1)
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
-
-BEGIN_FTR_SECTION
- stdcx. r0,0,r1 /* to clear the reservation */
-FTR_SECTION_ELSE
- ldarx r0,0,r1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
- ld r3,_LINK(r1)
- ld r4,_CTR(r1)
- ld r5,_XER(r1)
- ld r6,_CCR(r1)
- li r0,0
-
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
-
- mtlr r3
- mtctr r4
- mtspr SPRN_XER,r5
-
- /*
- * Leaving a stale exception_marker on the stack can confuse
- * the reliable stack unwinder later on. Clear it.
- */
- std r0,STACK_FRAME_OVERHEAD-16(r1)
-
- REST_4GPRS(2, r1)
-
- bne- cr1,1f /* emulate stack store */
- mtcr r6
- REST_GPR(6, r1)
- REST_GPR(0, r1)
- REST_GPR(1, r1)
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-1: /*
- * Emulate stack store with update. New r1 value was already calculated
- * and updated in our interrupt regs by emulate_loadstore, but we can't
- * store the previous value of r1 to the stack before re-loading our
- * registers from it, otherwise they could be clobbered. Use
- * PACA_EXGEN as temporary storage to hold the store data, as
- * interrupts are disabled here so it won't be clobbered.
- */
- mtcr r6
- std r9,PACA_EXGEN+0(r13)
- addi r9,r1,INT_FRAME_SIZE /* get original r1 */
- REST_GPR(6, r1)
- REST_GPR(0, r1)
- REST_GPR(1, r1)
- std r9,0(r1) /* perform store component of stdu */
- ld r9,PACA_EXGEN+0(r13)
-
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
#ifdef CONFIG_PPC_RTAS
/*
* On CHRP, the Run-Time Abstraction Services (RTAS) have to be
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 2ed14d4a47f5..93b0f3ec8fb0 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -38,9 +38,9 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
for (i = 0; i < (len / 4); i++) {
struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i]));
- patch_instruction((struct ppc_inst *)(epapr_hypercall_start + i), inst);
+ patch_instruction(epapr_hypercall_start + i, inst);
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
- patch_instruction((struct ppc_inst *)(epapr_ev_idle_start + i), inst);
+ patch_instruction(epapr_ev_idle_start + i, inst);
#endif
}
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index f1ae710274bc..1401787b0b93 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -26,6 +26,10 @@
#include <asm/feature-fixups.h>
#include <asm/context_tracking.h>
+/* 64e interrupt returns always use SRR registers */
+#define fast_interrupt_return fast_interrupt_return_srr
+#define interrupt_return interrupt_return_srr
+
/* XXX This will ultimately add space for a special exception save
* structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
* when taking special interrupts. For now we don't support that,
@@ -897,6 +901,34 @@ kernel_dbg_exc:
bl unknown_exception
b interrupt_return
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+ ld r11,PACATOC(r13)
+ ld r14,__start___restart_table@got(r11)
+ ld r15,__stop___restart_table@got(r11)
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table)
+ LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table)
+#endif
+300:
+ cmpd r14,r15
+ beq 302f
+ ld r11,0(r14)
+ cmpld r10,r11
+ blt 301f
+ ld r11,8(r14)
+ cmpld r10,r11
+ bge 301f
+ ld r11,16(r14)
+ b 303f
+301:
+ addi r14,r14,24
+ b 300b
+302:
+ li r11,0
+303:
+.endm
+
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
* accordingly and if the interrupt is level sensitive, we hard disable
@@ -905,6 +937,9 @@ kernel_dbg_exc:
*/
.macro masked_interrupt_book3e paca_irq full_mask
+ std r14,PACA_EXGEN+EX_R14(r13)
+ std r15,PACA_EXGEN+EX_R15(r13)
+
lbz r10,PACAIRQHAPPENED(r13)
.if \full_mask == 1
ori r10,r10,\paca_irq | PACA_IRQ_HARD_DIS
@@ -914,15 +949,23 @@ kernel_dbg_exc:
stb r10,PACAIRQHAPPENED(r13)
.if \full_mask == 1
- rldicl r10,r11,48,1 /* clear MSR_EE */
- rotldi r11,r10,16
+ xori r11,r11,MSR_EE /* clear MSR_EE */
mtspr SPRN_SRR1,r11
.endif
+ mfspr r10,SPRN_SRR0
+ SEARCH_RESTART_TABLE
+ cmpdi r11,0
+ beq 1f
+ mtspr SPRN_SRR0,r11 /* return to restart address */
+1:
+
lwz r11,PACA_EXGEN+EX_CR(r13)
mtcr r11
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
mfspr r13,SPRN_SPRG_GEN_SCRATCH
rfi
b .
@@ -1282,7 +1325,12 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
+#ifdef CONFIG_RELOCATABLE
+ ld r5,PACATOC(r13)
+ ld r3,1f@got(r5)
+#else
LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
+#endif
mtctr r3
bctr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index fa8e52a0239e..4aec59a77d4c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,22 +21,6 @@
#include <asm/feature-fixups.h>
#include <asm/kup.h>
-/* PACA save area offsets (exgen, exmc, etc) */
-#define EX_R9 0
-#define EX_R10 8
-#define EX_R11 16
-#define EX_R12 24
-#define EX_R13 32
-#define EX_DAR 40
-#define EX_DSISR 48
-#define EX_CCR 52
-#define EX_CFAR 56
-#define EX_PPR 64
-#define EX_CTR 72
-.if EX_SIZE != 10
- .error "EX_SIZE is wrong"
-.endif
-
/*
* Following are fixed section helper macros.
*
@@ -133,7 +117,6 @@ name:
#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
-#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */
#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
@@ -190,9 +173,6 @@ do_define_int n
.ifndef IMASK
IMASK=0
.endif
- .ifndef IKVM_SKIP
- IKVM_SKIP=0
- .endif
.ifndef IKVM_REAL
IKVM_REAL=0
.endif
@@ -207,8 +187,6 @@ do_define_int n
.endif
.endm
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* All interrupts which set HSRR registers, as well as SRESET and MCE and
* syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
@@ -238,88 +216,28 @@ do_define_int n
/*
* If an interrupt is taken while a guest is running, it is immediately routed
- * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first
- * to kvmppc_interrupt_hv, which handles the PR guest case.
+ * to KVM to handle.
*/
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
-.macro KVMTEST name
+.macro KVMTEST name handler
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0
- bne \name\()_kvm
-.endm
-
-.macro GEN_KVM name
- .balign IFETCH_ALIGN_BYTES
-\name\()_kvm:
-
- .if IKVM_SKIP
- cmpwi r10,KVM_GUEST_MODE_SKIP
- beq 89f
- .else
-BEGIN_FTR_SECTION
- ld r10,IAREA+EX_CFAR(r13)
- std r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
- .endif
-
- ld r10,IAREA+EX_CTR(r13)
- mtctr r10
-BEGIN_FTR_SECTION
- ld r10,IAREA+EX_PPR(r13)
- std r10,HSTATE_PPR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ld r11,IAREA+EX_R11(r13)
- ld r12,IAREA+EX_R12(r13)
- std r12,HSTATE_SCRATCH0(r13)
- sldi r12,r9,32
- ld r9,IAREA+EX_R9(r13)
- ld r10,IAREA+EX_R10(r13)
/* HSRR variants have the 0x2 bit added to their trap number */
.if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
- ori r12,r12,(IVEC + 0x2)
+ li r10,(IVEC + 0x2)
FTR_SECTION_ELSE
- ori r12,r12,(IVEC)
+ li r10,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
.elseif IHSRR
- ori r12,r12,(IVEC+ 0x2)
+ li r10,(IVEC + 0x2)
.else
- ori r12,r12,(IVEC)
+ li r10,(IVEC)
.endif
- b kvmppc_interrupt
-
- .if IKVM_SKIP
-89: mtocrf 0x80,r9
- ld r10,IAREA+EX_CTR(r13)
- mtctr r10
- ld r9,IAREA+EX_R9(r13)
- ld r10,IAREA+EX_R10(r13)
- ld r11,IAREA+EX_R11(r13)
- ld r12,IAREA+EX_R12(r13)
- .if IHSRR_IF_HVMODE
- BEGIN_FTR_SECTION
- b kvmppc_skip_Hinterrupt
- FTR_SECTION_ELSE
- b kvmppc_skip_interrupt
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif IHSRR
- b kvmppc_skip_Hinterrupt
- .else
- b kvmppc_skip_interrupt
- .endif
- .endif
-.endm
-
-#else
-.macro KVMTEST name
-.endm
-.macro GEN_KVM name
-.endm
+ bne \handler
#endif
+.endm
/*
* This is the BOOK3S interrupt entry code macro.
@@ -461,7 +379,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
DEFINE_FIXED_SYMBOL(\name\()_common_real)
\name\()_common_real:
.if IKVM_REAL
- KVMTEST \name
+ KVMTEST \name kvm_interrupt
.endif
ld r10,PACAKMSR(r13) /* get MSR value for kernel */
@@ -484,7 +402,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
DEFINE_FIXED_SYMBOL(\name\()_common_virt)
\name\()_common_virt:
.if IKVM_VIRT
- KVMTEST \name
+ KVMTEST \name kvm_interrupt
1:
.endif
.endif /* IVIRT */
@@ -498,7 +416,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
DEFINE_FIXED_SYMBOL(\name\()_common_real)
\name\()_common_real:
.if IKVM_REAL
- KVMTEST \name
+ KVMTEST \name kvm_interrupt
.endif
.endm
@@ -510,18 +428,31 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
/* If coming from user, skip soft-mask tests. */
andi. r10,r12,MSR_PR
- bne 2f
-
- /* Kernel code running below __end_interrupts is implicitly
- * soft-masked */
- LOAD_HANDLER(r10, __end_interrupts)
+ bne 3f
+
+ /*
+ * Kernel code running below __end_soft_masked may be
+ * implicitly soft-masked if it is within the regions
+ * in the soft mask table.
+ */
+ LOAD_HANDLER(r10, __end_soft_masked)
cmpld r11,r10
+ bge+ 1f
+
+ /* SEARCH_SOFT_MASK_TABLE clobbers r9,r10,r12 */
+ mtctr r12
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ SEARCH_SOFT_MASK_TABLE
+ cmpdi r12,0
+ mfctr r12 /* Restore r12 to SRR1 */
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ beq 1f /* Not in soft-mask table */
li r10,IMASK
- blt- 1f
+ b 2f /* In soft-mask table, always mask */
/* Test the soft mask state against our interrupt's bit */
- lbz r10,PACAIRQSOFTMASK(r13)
-1: andi. r10,r10,IMASK
+1: lbz r10,PACAIRQSOFTMASK(r13)
+2: andi. r10,r10,IMASK
/* Associate vector numbers with bits in paca->irq_happened */
.if IVEC == 0x500 || IVEC == 0xea0
li r10,PACA_IRQ_EE
@@ -552,7 +483,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
.if ISTACK
andi. r10,r12,MSR_PR /* See if coming from user */
-2: mr r10,r1 /* Save r1 */
+3: mr r10,r1 /* Save r1 */
subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
beq- 100f
ld r1,PACAKSAVE(r13) /* kernel stack to use */
@@ -567,6 +498,20 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
std r0,GPR0(r1) /* save r0 in stackframe */
std r10,GPR1(r1) /* save r1 in stackframe */
+ /* Mark our [H]SRRs valid for return */
+ li r10,1
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ stb r10,PACAHSRR_VALID(r13)
+ FTR_SECTION_ELSE
+ stb r10,PACASRR_VALID(r13)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ stb r10,PACAHSRR_VALID(r13)
+ .else
+ stb r10,PACASRR_VALID(r13)
+ .endif
+
.if ISET_RI
li r10,MSR_RI
mtmsrd r10,1 /* Set MSR_RI */
@@ -659,6 +604,66 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
__GEN_COMMON_BODY \name
.endm
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+ mr r12,r2
+ ld r2,PACATOC(r13)
+ LOAD_REG_ADDR(r9, __start___restart_table)
+ LOAD_REG_ADDR(r10, __stop___restart_table)
+ mr r2,r12
+#else
+ LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___restart_table)
+ LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___restart_table)
+#endif
+300:
+ cmpd r9,r10
+ beq 302f
+ ld r12,0(r9)
+ cmpld r11,r12
+ blt 301f
+ ld r12,8(r9)
+ cmpld r11,r12
+ bge 301f
+ ld r12,16(r9)
+ b 303f
+301:
+ addi r9,r9,24
+ b 300b
+302:
+ li r12,0
+303:
+.endm
+
+.macro SEARCH_SOFT_MASK_TABLE
+#ifdef CONFIG_RELOCATABLE
+ mr r12,r2
+ ld r2,PACATOC(r13)
+ LOAD_REG_ADDR(r9, __start___soft_mask_table)
+ LOAD_REG_ADDR(r10, __stop___soft_mask_table)
+ mr r2,r12
+#else
+ LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___soft_mask_table)
+ LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___soft_mask_table)
+#endif
+300:
+ cmpd r9,r10
+ beq 302f
+ ld r12,0(r9)
+ cmpld r11,r12
+ blt 301f
+ ld r12,8(r9)
+ cmpld r11,r12
+ bge 301f
+ li r12,1
+ b 303f
+301:
+ addi r9,r9,16
+ b 300b
+302:
+ li r12,0
+303:
+.endm
+
/*
* Restore all registers including H/SRR0/1 saved in a stack frame of a
* standard exception.
@@ -666,10 +671,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.macro EXCEPTION_RESTORE_REGS hsrr=0
/* Move original SRR0 and SRR1 into the respective regs */
ld r9,_MSR(r1)
+ li r10,0
.if \hsrr
mtspr SPRN_HSRR1,r9
+ stb r10,PACAHSRR_VALID(r13)
.else
mtspr SPRN_SRR1,r9
+ stb r10,PACASRR_VALID(r13)
.endif
ld r9,_NIP(r1)
.if \hsrr
@@ -786,17 +794,17 @@ __start_interrupts:
* scv instructions enter the kernel without changing EE, RI, ME, or HV.
* In particular, this means we can take a maskable interrupt at any point
* in the scv handler, which is unlike any other interrupt. This is solved
- * by treating the instruction addresses below __end_interrupts as being
- * soft-masked.
+ * by treating the instruction addresses in the handler as being soft-masked,
+ * by adding a SOFT_MASK_TABLE entry for them.
*
* AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and
* ensure scv is never executed with relocation off, which means AIL-0
* should never happen.
*
- * Before leaving the below __end_interrupts text, at least of the following
- * must be true:
+ * Before leaving the following inside-__end_soft_masked text, at least of the
+ * following must be true:
* - MSR[PR]=1 (i.e., return to userspace)
- * - MSR_EE|MSR_RI is set (no reentrant exceptions)
+ * - MSR_EE|MSR_RI is clear (no reentrant exceptions)
* - Standard kernel environment is set up (stack, paca, etc)
*
* Call convention:
@@ -804,6 +812,7 @@ __start_interrupts:
* syscall register convention is in Documentation/powerpc/syscall64-abi.rst
*/
EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
+1:
/* SCV 0 */
mr r9,r13
GET_PACA(r13)
@@ -833,8 +842,11 @@ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
b system_call_vectored_sigill
#endif
.endr
+2:
EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
+SOFT_MASK_TABLE(1b, 2b) // Treat scv vectors as soft-masked, see comment above.
+
#ifdef CONFIG_RELOCATABLE
TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
__LOAD_HANDLER(r10, system_call_vectored_common)
@@ -1000,8 +1012,6 @@ EXC_COMMON_BEGIN(system_reset_common)
EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
- GEN_KVM system_reset
-
/**
* Interrupt 0x200 - Machine Check Interrupt (MCE).
@@ -1070,7 +1080,6 @@ INT_DEFINE_BEGIN(machine_check)
ISET_RI=0
IDAR=1
IDSISR=1
- IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(machine_check)
@@ -1166,7 +1175,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
/*
* Check if we are coming from guest. If yes, then run the normal
* exception handler which will take the
- * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+ * machine_check_kvm->kvm_interrupt branch to deliver the MC event
* to guest.
*/
lbz r11,HSTATE_IN_GUEST(r13)
@@ -1234,9 +1243,7 @@ EXC_COMMON_BEGIN(machine_check_common)
mtmsrd r10,1
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
- b interrupt_return
-
- GEN_KVM machine_check
+ b interrupt_return_srr
#ifdef CONFIG_PPC_P7_NAP
@@ -1342,7 +1349,6 @@ INT_DEFINE_BEGIN(data_access)
IVEC=0x300
IDAR=1
IDSISR=1
- IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(data_access)
@@ -1363,7 +1369,7 @@ BEGIN_MMU_FTR_SECTION
MMU_FTR_SECTION_ELSE
bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
- b interrupt_return
+ b interrupt_return_srr
1: bl do_break
/*
@@ -1371,9 +1377,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
* If so, we need to restore them with their updated values.
*/
REST_NVGPRS(r1)
- b interrupt_return
-
- GEN_KVM data_access
+ b interrupt_return_srr
/**
@@ -1396,7 +1400,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
INT_DEFINE_BEGIN(data_access_slb)
IVEC=0x380
IDAR=1
- IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(data_access_slb)
@@ -1414,7 +1417,7 @@ BEGIN_MMU_FTR_SECTION
bl do_slb_fault
cmpdi r3,0
bne- 1f
- b fast_interrupt_return
+ b fast_interrupt_return_srr
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
@@ -1423,9 +1426,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
- b interrupt_return
-
- GEN_KVM data_access_slb
+ b interrupt_return_srr
/**
@@ -1461,9 +1462,7 @@ BEGIN_MMU_FTR_SECTION
MMU_FTR_SECTION_ELSE
bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
- b interrupt_return
-
- GEN_KVM instruction_access
+ b interrupt_return_srr
/**
@@ -1498,7 +1497,7 @@ BEGIN_MMU_FTR_SECTION
bl do_slb_fault
cmpdi r3,0
bne- 1f
- b fast_interrupt_return
+ b fast_interrupt_return_srr
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
@@ -1507,9 +1506,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
- b interrupt_return
-
- GEN_KVM instruction_access_slb
+ b interrupt_return_srr
/**
@@ -1553,9 +1550,11 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
GEN_COMMON hardware_interrupt
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
- b interrupt_return
-
- GEN_KVM hardware_interrupt
+ BEGIN_FTR_SECTION
+ b interrupt_return_hsrr
+ FTR_SECTION_ELSE
+ b interrupt_return_srr
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
/**
@@ -1582,9 +1581,7 @@ EXC_COMMON_BEGIN(alignment_common)
addi r3,r1,STACK_FRAME_OVERHEAD
bl alignment_exception
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
- b interrupt_return
-
- GEN_KVM alignment
+ b interrupt_return_srr
/**
@@ -1691,9 +1688,7 @@ EXC_COMMON_BEGIN(program_check_common)
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
- b interrupt_return
-
- GEN_KVM program_check
+ b interrupt_return_srr
/*
@@ -1736,16 +1731,14 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
bl load_up_fpu
- b fast_interrupt_return
+ b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
addi r3,r1,STACK_FRAME_OVERHEAD
bl fp_unavailable_tm
- b interrupt_return
+ b interrupt_return_srr
#endif
- GEN_KVM fp_unavailable
-
/**
* Interrupt 0x900 - Decrementer Interrupt.
@@ -1782,9 +1775,7 @@ EXC_COMMON_BEGIN(decrementer_common)
GEN_COMMON decrementer
addi r3,r1,STACK_FRAME_OVERHEAD
bl timer_interrupt
- b interrupt_return
-
- GEN_KVM decrementer
+ b interrupt_return_srr
/**
@@ -1821,6 +1812,8 @@ EXC_COMMON_BEGIN(hdecrementer_common)
*
* Be careful to avoid touching the kernel stack.
*/
+ li r10,0
+ stb r10,PACAHSRR_VALID(r13)
ld r10,PACA_EXGEN+EX_CTR(r13)
mtctr r10
mtcrf 0x80,r9
@@ -1831,8 +1824,6 @@ EXC_COMMON_BEGIN(hdecrementer_common)
ld r13,PACA_EXGEN+EX_R13(r13)
HRFI_TO_KERNEL
- GEN_KVM hdecrementer
-
/**
* Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
@@ -1870,9 +1861,7 @@ EXC_COMMON_BEGIN(doorbell_super_common)
#else
bl unknown_async_exception
#endif
- b interrupt_return
-
- GEN_KVM doorbell_super
+ b interrupt_return_srr
EXC_REAL_NONE(0xb00, 0x100)
@@ -1923,7 +1912,7 @@ INT_DEFINE_END(system_call)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
- KVMTEST system_call /* uses r10, branch to system_call_kvm */
+ KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
mfctr r9
#else
mr r9,r13
@@ -1949,8 +1938,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
mtctr r10
bctr
.else
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
#ifdef CONFIG_RELOCATABLE
__LOAD_HANDLER(r10, system_call_common)
mtctr r10
@@ -1979,14 +1966,16 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
EXC_VIRT_END(system_call, 0x4c00, 0x100)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-TRAMP_REAL_BEGIN(system_call_kvm)
- /*
- * This is a hcall, so register convention is as above, with these
- * differences:
- * r13 = PACA
- * ctr = orig r13
- * orig r10 saved in PACA
- */
+TRAMP_REAL_BEGIN(kvm_hcall)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfcr r9
+ mfctr r10
+ std r10,PACA_EXGEN+EX_R13(r13)
+ li r10,0
+ std r10,PACA_EXGEN+EX_CFAR(r13)
+ std r10,PACA_EXGEN+EX_CTR(r13)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
@@ -1994,31 +1983,24 @@ TRAMP_REAL_BEGIN(system_call_kvm)
*/
BEGIN_FTR_SECTION
mfspr r10,SPRN_PPR
- std r10,HSTATE_PPR(r13)
+ std r10,PACA_EXGEN+EX_PPR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
HMT_MEDIUM
- mfctr r10
- SET_SCRATCH0(r10)
- mfcr r10
- std r12,HSTATE_SCRATCH0(r13)
- sldi r12,r10,32
- ori r12,r12,0xc00
+
#ifdef CONFIG_RELOCATABLE
/*
- * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
+ * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
* outside the head section.
*/
- __LOAD_FAR_HANDLER(r10, kvmppc_interrupt)
+ __LOAD_FAR_HANDLER(r10, kvmppc_hcall)
mtctr r10
- ld r10,PACA_EXGEN+EX_R10(r13)
bctr
#else
- ld r10,PACA_EXGEN+EX_R10(r13)
- b kvmppc_interrupt
+ b kvmppc_hcall
#endif
#endif
-
/**
* Interrupt 0xd00 - Trace Interrupt.
* This is a synchronous interrupt in response to instruction step or
@@ -2041,9 +2023,7 @@ EXC_COMMON_BEGIN(single_step_common)
GEN_COMMON single_step
addi r3,r1,STACK_FRAME_OVERHEAD
bl single_step_exception
- b interrupt_return
-
- GEN_KVM single_step
+ b interrupt_return_srr
/**
@@ -2063,7 +2043,6 @@ INT_DEFINE_BEGIN(h_data_storage)
IHSRR=1
IDAR=1
IDSISR=1
- IKVM_SKIP=1
IKVM_REAL=1
IKVM_VIRT=1
INT_DEFINE_END(h_data_storage)
@@ -2082,9 +2061,7 @@ BEGIN_MMU_FTR_SECTION
MMU_FTR_SECTION_ELSE
bl unknown_exception
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
- b interrupt_return
-
- GEN_KVM h_data_storage
+ b interrupt_return_hsrr
/**
@@ -2109,9 +2086,7 @@ EXC_COMMON_BEGIN(h_instr_storage_common)
GEN_COMMON h_instr_storage
addi r3,r1,STACK_FRAME_OVERHEAD
bl unknown_exception
- b interrupt_return
-
- GEN_KVM h_instr_storage
+ b interrupt_return_hsrr
/**
@@ -2135,9 +2110,7 @@ EXC_COMMON_BEGIN(emulation_assist_common)
addi r3,r1,STACK_FRAME_OVERHEAD
bl emulation_assist_interrupt
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
- b interrupt_return
-
- GEN_KVM emulation_assist
+ b interrupt_return_hsrr
/**
@@ -2210,15 +2183,11 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXCEPTION_RESTORE_REGS hsrr=1
GEN_INT_ENTRY hmi_exception, virt=0
- GEN_KVM hmi_exception_early
-
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
- b interrupt_return
-
- GEN_KVM hmi_exception
+ b interrupt_return_hsrr
/**
@@ -2248,9 +2217,7 @@ EXC_COMMON_BEGIN(h_doorbell_common)
#else
bl unknown_async_exception
#endif
- b interrupt_return
-
- GEN_KVM h_doorbell
+ b interrupt_return_hsrr
/**
@@ -2276,9 +2243,7 @@ EXC_COMMON_BEGIN(h_virt_irq_common)
GEN_COMMON h_virt_irq
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_IRQ
- b interrupt_return
-
- GEN_KVM h_virt_irq
+ b interrupt_return_hsrr
EXC_REAL_NONE(0xec0, 0x20)
@@ -2321,9 +2286,7 @@ EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
- b interrupt_return
-
- GEN_KVM performance_monitor
+ b interrupt_return_srr
/**
@@ -2360,21 +2323,19 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
bl load_up_altivec
- b fast_interrupt_return
+ b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_tm
- b interrupt_return
+ b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_exception
- b interrupt_return
-
- GEN_KVM altivec_unavailable
+ b interrupt_return_srr
/**
@@ -2415,16 +2376,14 @@ BEGIN_FTR_SECTION
2: /* User process was in a transaction */
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_tm
- b interrupt_return
+ b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_exception
- b interrupt_return
-
- GEN_KVM vsx_unavailable
+ b interrupt_return_srr
/**
@@ -2452,9 +2411,7 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
addi r3,r1,STACK_FRAME_OVERHEAD
bl facility_unavailable_exception
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
- b interrupt_return
-
- GEN_KVM facility_unavailable
+ b interrupt_return_srr
/**
@@ -2482,9 +2439,7 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
addi r3,r1,STACK_FRAME_OVERHEAD
bl facility_unavailable_exception
REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
- b interrupt_return
-
- GEN_KVM h_facility_unavailable
+ b interrupt_return_hsrr
EXC_REAL_NONE(0xfa0, 0x20)
@@ -2513,9 +2468,7 @@ EXC_COMMON_BEGIN(cbe_system_error_common)
GEN_COMMON cbe_system_error
addi r3,r1,STACK_FRAME_OVERHEAD
bl cbe_system_error_exception
- b interrupt_return
-
- GEN_KVM cbe_system_error
+ b interrupt_return_hsrr
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
@@ -2546,9 +2499,7 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common)
GEN_COMMON instruction_breakpoint
addi r3,r1,STACK_FRAME_OVERHEAD
bl instruction_breakpoint_exception
- b interrupt_return
-
- GEN_KVM instruction_breakpoint
+ b interrupt_return_srr
EXC_REAL_NONE(0x1400, 0x100)
@@ -2656,6 +2607,8 @@ BEGIN_FTR_SECTION
ld r10,PACA_EXGEN+EX_CFAR(r13)
mtspr SPRN_CFAR,r10
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ li r10,0
+ stb r10,PACAHSRR_VALID(r13)
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
ld r12,PACA_EXGEN+EX_R12(r13)
@@ -2668,9 +2621,7 @@ EXC_COMMON_BEGIN(denorm_exception_common)
GEN_COMMON denorm_exception
addi r3,r1,STACK_FRAME_OVERHEAD
bl unknown_exception
- b interrupt_return
-
- GEN_KVM denorm_exception
+ b interrupt_return_hsrr
#ifdef CONFIG_CBE_RAS
@@ -2687,9 +2638,7 @@ EXC_COMMON_BEGIN(cbe_maintenance_common)
GEN_COMMON cbe_maintenance
addi r3,r1,STACK_FRAME_OVERHEAD
bl cbe_maintenance_exception
- b interrupt_return
-
- GEN_KVM cbe_maintenance
+ b interrupt_return_hsrr
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
@@ -2719,9 +2668,7 @@ EXC_COMMON_BEGIN(altivec_assist_common)
#else
bl unknown_exception
#endif
- b interrupt_return
-
- GEN_KVM altivec_assist
+ b interrupt_return_srr
#ifdef CONFIG_CBE_RAS
@@ -2738,9 +2685,7 @@ EXC_COMMON_BEGIN(cbe_thermal_common)
GEN_COMMON cbe_thermal
addi r3,r1,STACK_FRAME_OVERHEAD
bl cbe_thermal_exception
- b interrupt_return
-
- GEN_KVM cbe_thermal
+ b interrupt_return_hsrr
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
@@ -2765,7 +2710,6 @@ INT_DEFINE_END(soft_nmi)
* and run it entirely with interrupts hard disabled.
*/
EXC_COMMON_BEGIN(soft_nmi_common)
- mfspr r11,SPRN_SRR0
mr r10,r1
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
@@ -2779,6 +2723,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
mtmsrd r9,1
kuap_kernel_restore r9, r10
+
EXCEPTION_RESTORE_REGS hsrr=0
RFI_TO_KERNEL
@@ -2800,19 +2745,24 @@ masked_Hinterrupt:
.else
masked_interrupt:
.endif
- lbz r11,PACAIRQHAPPENED(r13)
- or r11,r11,r10
- stb r11,PACAIRQHAPPENED(r13)
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ lbz r9,PACAIRQHAPPENED(r13)
+ or r9,r9,r10
+ stb r9,PACAIRQHAPPENED(r13)
+
+ .if ! \hsrr
cmpwi r10,PACA_IRQ_DEC
bne 1f
- lis r10,0x7fff
- ori r10,r10,0xffff
- mtspr SPRN_DEC,r10
+ LOAD_REG_IMMEDIATE(r9, 0x7fffffff)
+ mtspr SPRN_DEC,r9
#ifdef CONFIG_PPC_WATCHDOG
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
b soft_nmi_common
#else
b 2f
#endif
+ .endif
+
1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK
beq 2f
xori r12,r12,MSR_EE /* clear MSR_EE */
@@ -2821,11 +2771,29 @@ masked_interrupt:
.else
mtspr SPRN_SRR1,r12
.endif
- ori r11,r11,PACA_IRQ_HARD_DIS
- stb r11,PACAIRQHAPPENED(r13)
+ ori r9,r9,PACA_IRQ_HARD_DIS
+ stb r9,PACAIRQHAPPENED(r13)
2: /* done */
- ld r10,PACA_EXGEN+EX_CTR(r13)
- mtctr r10
+ li r9,0
+ .if \hsrr
+ stb r9,PACAHSRR_VALID(r13)
+ .else
+ stb r9,PACASRR_VALID(r13)
+ .endif
+
+ SEARCH_RESTART_TABLE
+ cmpdi r12,0
+ beq 3f
+ .if \hsrr
+ mtspr SPRN_HSRR0,r12
+ .else
+ mtspr SPRN_SRR0,r12
+ .endif
+3:
+
+ ld r9,PACA_EXGEN+EX_CTR(r13)
+ mtctr r9
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
mtcrf 0x80,r9
std r1,PACAR1(r13)
ld r9,PACA_EXGEN+EX_R9(r13)
@@ -2994,6 +2962,15 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
USE_TEXT_SECTION()
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+kvm_interrupt:
+ /*
+ * The conditional branch in KVMTEST can't reach all the way,
+ * make a stub.
+ */
+ b kvmppc_interrupt
+#endif
+
_GLOBAL(do_uaccess_flush)
UACCESS_FLUSH_FIXUP_SECTION
nop
@@ -3009,32 +2986,6 @@ EXPORT_SYMBOL(do_uaccess_flush)
MASKED_INTERRUPT
MASKED_INTERRUPT hsrr=1
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-kvmppc_skip_interrupt:
- /*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
- */
- mfspr r13, SPRN_SRR0
- addi r13, r13, 4
- mtspr SPRN_SRR0, r13
- GET_SCRATCH0(r13)
- RFI_TO_KERNEL
- b .
-
-kvmppc_skip_Hinterrupt:
- /*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
- */
- mfspr r13, SPRN_HSRR0
- addi r13, r13, 4
- mtspr SPRN_HSRR0, r13
- GET_SCRATCH0(r13)
- HRFI_TO_KERNEL
- b .
-#endif
-
/*
* Relocation-on interrupts: A subset of the interrupts can be delivered
* with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
@@ -3053,7 +3004,7 @@ kvmppc_skip_Hinterrupt:
USE_FIXED_SECTION(virt_trampolines)
/*
- * All code below __end_interrupts is treated as soft-masked. If
+ * All code below __end_soft_masked is treated as soft-masked. If
* any code runs here with MSR[EE]=1, it must then cope with pending
* soft interrupt being raised (i.e., by ensuring it is replayed).
*
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index c9e2819b095a..c7022c41cc31 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -23,18 +23,20 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features);
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
DEFINE_STATIC_KEY_FALSE(kvm_guest);
-bool check_kvm_guest(void)
+int __init check_kvm_guest(void)
{
struct device_node *hyper_node;
hyper_node = of_find_node_by_path("/hypervisor");
if (!hyper_node)
- return false;
+ return 0;
if (!of_device_is_compatible(hyper_node, "linux,kvm"))
- return false;
+ return 0;
static_branch_enable(&kvm_guest);
- return true;
+
+ return 0;
}
+core_initcall(check_kvm_guest); // before kvm_guest_init()
#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 2c57ece6671c..6010adcee16e 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -103,6 +103,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
ori r12,r12,MSR_FP
or r12,r12,r4
std r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+#endif
#endif
li r4,1
stb r4,THREAD_LOAD_FP(r5)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index a8221ddcbd66..6b1ec9e3541b 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -142,42 +142,23 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
.macro SYSCALL_ENTRY trapno
mfspr r9, SPRN_SRR1
- mfspr r10, SPRN_SRR0
+ mfspr r12, SPRN_SRR0
LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */
- lis r12, 1f@h
- ori r12, r12, 1f@l
+ lis r10, 1f@h
+ ori r10, r10, 1f@l
mtspr SPRN_SRR1, r11
- mtspr SPRN_SRR0, r12
- mfspr r12,SPRN_SPRG_THREAD
+ mtspr SPRN_SRR0, r10
+ mfspr r10,SPRN_SPRG_THREAD
mr r11, r1
- lwz r1,TASK_STACK-THREAD(r12)
- tovirt(r12, r12)
+ lwz r1,TASK_STACK-THREAD(r10)
+ tovirt(r10, r10)
addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
rfi
1:
- stw r11,GPR1(r1)
- stw r11,0(r1)
- mr r11, r1
- stw r10,_NIP(r11)
- mflr r10
- stw r10, _LINK(r11)
- mfcr r10
- rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
- stw r10,_CCR(r11) /* save registers */
-#ifdef CONFIG_40x
- rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
-#endif
- lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
- stw r2,GPR2(r11)
- addi r10,r10,STACK_FRAME_REGS_MARKER@l
- stw r9,_MSR(r11)
- li r2, \trapno
- stw r10,8(r11)
- stw r2,_TRAP(r11)
- SAVE_GPR(0, r11)
- SAVE_4GPRS(3, r11)
- SAVE_2GPRS(7, r11)
- addi r2,r12,-THREAD
+ stw r12,_NIP(r1)
+ mfcr r12
+ rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ stw r12,_CCR(r1)
b transfer_to_syscall /* jump to handler */
.endm
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index e1360b88b6cb..7d72ee5ab387 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -701,39 +701,3 @@ _GLOBAL(abort)
mfspr r13,SPRN_DBCR0
oris r13,r13,DBCR0_RST_SYSTEM@h
mtspr SPRN_DBCR0,r13
-
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@ha
- stw r4, abatron_pteptrs@l + 0x4(r5)
-#endif
- sync
- mtspr SPRN_PID,r3
- isync /* Need an isync to flush shadow */
- /* TLBs after changing PID */
- blr
-
-/* We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align 12
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-EXPORT_SYMBOL(empty_zero_page)
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 5c106ac36626..ddc978a2d381 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -532,6 +532,10 @@ finish_tlb_load_44x:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+#ifdef CONFIG_PPC_KUEP
+0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */
+ patch_site 0b, patch__tlb_44x_kuep
+#endif
1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
/* Done...restore registers and get out of here.
@@ -743,6 +747,10 @@ finish_tlb_load_47x:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+#ifdef CONFIG_PPC_KUEP
+0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */
+ patch_site 0b, patch__tlb_47x_kuep
+#endif
1: tlbwe r11,r13,2
/* Done...restore registers and get out of here.
@@ -780,20 +788,6 @@ _GLOBAL(__fixup_440A_mcheck)
sync
blr
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
-
/*
* Init CPU state. This is called at boot time or for secondary CPUs
* to setup initial TLB entries, setup IVORs, etc...
@@ -1239,34 +1233,8 @@ head_start_common:
isync
blr
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align PAGE_SHIFT
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
-/*
- * To support >32-bit physical addresses, we use an 8KB pgdir.
- */
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
-
#ifdef CONFIG_SMP
+ .data
.align 12
temp_boot_stack:
.space 1024
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index ece7f97bafff..79930b0bc781 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -194,8 +194,9 @@ CLOSE_FIXED_SECTION(first_256B)
/* This value is used to mark exception frames on the stack. */
.section ".toc","aw"
+/* This value is used to mark exception frames on the stack. */
exception_marker:
- .tc ID_72656773_68657265[TC],0x7265677368657265
+ .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
.previous
/*
@@ -211,6 +212,8 @@ OPEN_TEXT_SECTION(0x100)
USE_TEXT_SECTION()
+#include "interrupt_64.S"
+
#ifdef CONFIG_PPC_BOOK3E
/*
* The booting_thread_hwid holds the thread id we want to boot in cpu
@@ -997,23 +1000,3 @@ start_here_common:
0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
.previous
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the bss, which is page-aligned.
- */
- .section ".bss"
-/*
- * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K.
- * We will need to find a better way to fix this
- */
- .align 16
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
- .globl empty_zero_page
-empty_zero_page:
- .space PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 7d445e4342c0..9bdb95f5694f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -786,28 +786,3 @@ _GLOBAL(mmu_pin_tlb)
mtspr SPRN_SRR1, r10
mtspr SPRN_SRR0, r11
rfi
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
- .data
- .globl sdata
-sdata:
- .globl empty_zero_page
- .align PAGE_SHIFT
-empty_zero_page:
- .space PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/* Room for two PTE table pointers, usually the kernel and current user
- * pointer to their respective root page table (pgdir).
- */
- .globl abatron_pteptrs
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 065178f19a3d..764edd860ed4 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -518,8 +518,6 @@ BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
- mtcrf 0x80,r2
BEGIN_MMU_FTR_SECTION
li r0,1
mfspr r1,SPRN_SPRG_603_LRU
@@ -531,9 +529,15 @@ BEGIN_MMU_FTR_SECTION
mfspr r2,SPRN_SRR1
rlwimi r2,r0,31-14,14,14
mtspr SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+ mtcrf 0x80,r2
+ tlbld r3
+ rfi
+MMU_FTR_SECTION_ELSE
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
tlbld r3
rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
DataAddressInvalid:
mfspr r3,SPRN_SRR1
rlwinm r1,r3,9,6,6 /* Get load/store bit */
@@ -607,9 +611,15 @@ BEGIN_MMU_FTR_SECTION
mfspr r2,SPRN_SRR1
rlwimi r2,r0,31-14,14,14
mtspr SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+ mtcrf 0x80,r2
+ tlbld r3
+ rfi
+MMU_FTR_SECTION_ELSE
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
tlbld r3
rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
#ifndef CONFIG_ALTIVEC
#define altivec_assist_exception unknown_exception
@@ -756,9 +766,6 @@ PerformanceMonitor:
* the kernel image to physical address PHYSICAL_START.
*/
relocate_kernel:
- addis r9,r26,klimit@ha /* fetch klimit */
- lwz r25,klimit@l(r9)
- addis r25,r25,-KERNELBASE@h
lis r3,PHYSICAL_START@h /* Destination base address */
li r6,0 /* Destination offset */
li r5,0x4000 /* # bytes of memory to copy */
@@ -766,7 +773,8 @@ relocate_kernel:
addi r0,r3,4f@l /* jump to the address of 4f */
mtctr r0 /* in copy and do the rest. */
bctr /* jump to the copy */
-4: mr r5,r25
+4: lis r5,_end-KERNELBASE@h
+ ori r5,r5,_end-KERNELBASE@l
bl copy_and_flush /* copy the rest */
b turn_on_mmu
@@ -924,12 +932,6 @@ _GLOBAL(load_segment_registers)
li r0, NUM_USER_SEGMENTS /* load up user segment register values */
mtctr r0 /* for context 0 */
li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
-#ifdef CONFIG_PPC_KUEP
- oris r3, r3, SR_NX@h /* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
- oris r3, r3, SR_KS@h /* Set Ks */
-#endif
li r4, 0
3: mtsrin r3, r4
addi r3, r3, 0x111 /* increment VSID */
@@ -1024,58 +1026,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
rfi
/*
- * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
- *
- * Set up the segment registers for a new context.
- */
-_ENTRY(switch_mmu_context)
- lwz r3,MMCONTEXTID(r4)
- cmpwi cr0,r3,0
- blt- 4f
- mulli r3,r3,897 /* multiply context by skew factor */
- rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
-#ifdef CONFIG_PPC_KUEP
- oris r3, r3, SR_NX@h /* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
- oris r3, r3, SR_KS@h /* Set Ks */
-#endif
- li r0,NUM_USER_SEGMENTS
- mtctr r0
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is passed as second argument.
- */
- lwz r4, MM_PGD(r4)
- lis r5, abatron_pteptrs@ha
- stw r4, abatron_pteptrs@l + 0x4(r5)
-#endif
-BEGIN_MMU_FTR_SECTION
-#ifndef CONFIG_BDI_SWITCH
- lwz r4, MM_PGD(r4)
-#endif
- tophys(r4, r4)
- rlwinm r4, r4, 4, 0xffff01ff
- mtspr SPRN_SDR1, r4
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
- li r4,0
- isync
-3:
- mtsrin r3,r4
- addi r3,r3,0x111 /* next VSID */
- rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */
- addis r4,r4,0x1000 /* address of next segment */
- bdnz 3b
- sync
- isync
- blr
-4: trap
- EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
- blr
-EXPORT_SYMBOL(switch_mmu_context)
-
-/*
* An undocumented "feature" of 604e requires that the v bit
* be cleared before changing BAT values.
*
@@ -1256,61 +1206,4 @@ setup_usbgecko_bat:
blr
#endif
-#ifdef CONFIG_8260
-/* Jump into the system reset for the rom.
- * We first disable the MMU, and then jump to the ROM reset address.
- *
- * r3 is the board info structure, r4 is the location for starting.
- * I use this for building a small kernel that can load other kernels,
- * rather than trying to write or rely on a rom monitor that can tftp load.
- */
- .globl m8260_gorom
-m8260_gorom:
- mfmsr r0
- rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */
- sync
- mtmsr r0
- sync
- mfspr r11, SPRN_HID0
- lis r10, 0
- ori r10,r10,HID0_ICE|HID0_DCE
- andc r11, r11, r10
- mtspr SPRN_HID0, r11
- isync
- li r5, MSR_ME|MSR_RI
- lis r6,2f@h
- addis r6,r6,-KERNELBASE@h
- ori r6,r6,2f@l
- mtspr SPRN_SRR0,r6
- mtspr SPRN_SRR1,r5
- isync
- sync
- rfi
-2:
- mtlr r4
- blr
-#endif
-
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
.data
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-EXPORT_SYMBOL(empty_zero_page)
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index f82470091697..87b806e8eded 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -128,37 +128,20 @@ BEGIN_FTR_SECTION
mr r12, r13
lwz r13, THREAD_NORMSAVE(2)(r10)
FTR_SECTION_ELSE
-#endif
mfcr r12
-#ifdef CONFIG_KVM_BOOKE_HV
ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#else
+ mfcr r12
#endif
mfspr r9, SPRN_SRR1
BOOKE_CLEAR_BTB(r11)
- lwz r11, TASK_STACK - THREAD(r10)
+ mr r11, r1
+ lwz r1, TASK_STACK - THREAD(r10)
rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
- ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
- stw r12, _CCR(r11) /* save various registers */
- mflr r12
- stw r12,_LINK(r11)
+ ALLOC_STACK_FRAME(r1, THREAD_SIZE - INT_FRAME_SIZE)
+ stw r12, _CCR(r1)
mfspr r12,SPRN_SRR0
- stw r1, GPR1(r11)
- stw r1, 0(r11)
- mr r1, r11
- stw r12,_NIP(r11)
- rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
- lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
- stw r2,GPR2(r11)
- addi r12, r12, STACK_FRAME_REGS_MARKER@l
- stw r9,_MSR(r11)
- li r2, \trapno
- stw r12, 8(r11)
- stw r2,_TRAP(r11)
- SAVE_GPR(0, r11)
- SAVE_4GPRS(3, r11)
- SAVE_2GPRS(7, r11)
-
- addi r2,r10,-THREAD
+ stw r12,_NIP(r1)
b transfer_to_syscall /* jump to handler */
.endm
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index a1a5c3f10dc4..0f9642f36b49 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -985,20 +985,6 @@ _GLOBAL(abort)
mtspr SPRN_DBCR0,r13
isync
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
-
#ifdef CONFIG_SMP
/* When we get here, r24 needs to hold the CPU # */
.globl __secondary_start
@@ -1226,26 +1212,3 @@ _GLOBAL(restore_to_as0)
*/
3: mr r3,r5
bl _start
-
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align 12
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-EXPORT_SYMBOL(empty_zero_page)
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 8fc7a14e4d71..21a638aff72f 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -486,7 +486,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
return;
reset:
- regs->msr &= ~MSR_SE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_SE);
for (i = 0; i < nr_wp_slots(); i++) {
info = counter_arch_bp(__this_cpu_read(bp_per_reg[i]));
__set_breakpoint(i, info);
@@ -537,7 +537,7 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
current->thread.last_hit_ubp[i] = bp[i];
info[i] = NULL;
}
- regs->msr |= MSR_SE;
+ regs_set_return_msr(regs, regs->msr | MSR_SE);
return false;
}
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index e0938ba298f2..21bbd615ca41 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -3,6 +3,7 @@
#include <linux/context_tracking.h>
#include <linux/err.h>
#include <linux/compat.h>
+#include <linux/sched/debug.h> /* for show_regs */
#include <asm/asm-prototypes.h>
#include <asm/kup.h>
@@ -26,6 +27,53 @@ unsigned long global_dbcr0[NR_CPUS];
typedef long (*syscall_fn)(long, long, long, long, long, long);
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+static inline bool exit_must_hard_disable(void)
+{
+ return static_branch_unlikely(&interrupt_exit_not_reentrant);
+}
+#else
+static inline bool exit_must_hard_disable(void)
+{
+ return true;
+}
+#endif
+
+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ *
+ * This should be called with local irqs disabled, but if they were previously
+ * enabled when the interrupt handler returns (indicating a process-context /
+ * synchronous interrupt) then irqs_enabled should be true.
+ *
+ * restartable is true then EE/RI can be left on because interrupts are handled
+ * with a restart sequence.
+ */
+static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
+{
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+ if (exit_must_hard_disable() || !restartable)
+ __hard_EE_RI_disable();
+
+#ifdef CONFIG_PPC64
+ /* This pattern matches prep_irq_for_idle */
+ if (unlikely(lazy_irq_pending_nocheck())) {
+ if (exit_must_hard_disable() || !restartable) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ __hard_RI_enable();
+ }
+ trace_hardirqs_off();
+
+ return false;
+ }
+#endif
+ return true;
+}
+
/* Has to run notrace because it is entered not completely "reconciled" */
notrace long system_call_exception(long r3, long r4, long r5,
long r6, long r7, long r8,
@@ -144,71 +192,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
return f(r3, r4, r5, r6, r7, r8);
}
-/*
- * local irqs must be disabled. Returns false if the caller must re-enable
- * them, check for new work, and try again.
- *
- * This should be called with local irqs disabled, but if they were previously
- * enabled when the interrupt handler returns (indicating a process-context /
- * synchronous interrupt) then irqs_enabled should be true.
- */
-static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri)
-{
- /* This must be done with RI=1 because tracing may touch vmaps */
- trace_hardirqs_on();
-
- /* This pattern matches prep_irq_for_idle */
- if (clear_ri)
- __hard_EE_RI_disable();
- else
- __hard_irq_disable();
-#ifdef CONFIG_PPC64
- if (unlikely(lazy_irq_pending_nocheck())) {
- /* Took an interrupt, may have more exit work to do. */
- if (clear_ri)
- __hard_RI_enable();
- trace_hardirqs_off();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-
- return false;
- }
- local_paca->irq_happened = 0;
- irq_soft_mask_set(IRQS_ENABLED);
-#endif
- return true;
-}
-
-static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled)
-{
- if (__prep_irq_for_enabled_exit(clear_ri))
- return true;
-
- /*
- * Must replay pending soft-masked interrupts now. Don't just
- * local_irq_enabe(); local_irq_disable(); because if we are
- * returning from an asynchronous interrupt here, another one
- * might hit after irqs are enabled, and it would exit via this
- * same path allowing another to fire, and so on unbounded.
- *
- * If interrupts were enabled when this interrupt exited,
- * indicating a process context (synchronous) interrupt,
- * local_irq_enable/disable can be used, which will enable
- * interrupts rather than keeping them masked (unclear how
- * much benefit this is over just replaying for all cases,
- * because we immediately disable again, so all we're really
- * doing is allowing hard interrupts to execute directly for
- * a very small time, rather than being masked and replayed).
- */
- if (irqs_enabled) {
- local_irq_enable();
- local_irq_disable();
- } else {
- replay_soft_interrupts();
- }
-
- return false;
-}
-
static notrace void booke_load_dbcr0(void)
{
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -231,57 +214,92 @@ static notrace void booke_load_dbcr0(void)
#endif
}
-/*
- * This should be called after a syscall returns, with r3 the return value
- * from the syscall. If this function returns non-zero, the system call
- * exit assembly should additionally load all GPR registers and CTR and XER
- * from the interrupt frame.
- *
- * The function graph tracer can not trace the return side of this function,
- * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
- */
-notrace unsigned long syscall_exit_prepare(unsigned long r3,
- struct pt_regs *regs,
- long scv)
+static void check_return_regs_valid(struct pt_regs *regs)
{
- unsigned long ti_flags;
- unsigned long ret = 0;
- bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
+#ifdef CONFIG_PPC_BOOK3S_64
+ unsigned long trap, srr0, srr1;
+ static bool warned;
+ u8 *validp;
+ char *h;
- CT_WARN_ON(ct_state() == CONTEXT_USER);
+ if (trap_is_scv(regs))
+ return;
- kuap_assert_locked();
+ trap = regs->trap;
+ // EE in HV mode sets HSRRs like 0xea0
+ if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
+ trap = 0xea0;
+
+ switch (trap) {
+ case 0x980:
+ case INTERRUPT_H_DATA_STORAGE:
+ case 0xe20:
+ case 0xe40:
+ case INTERRUPT_HMI:
+ case 0xe80:
+ case 0xea0:
+ case INTERRUPT_H_FAC_UNAVAIL:
+ case 0x1200:
+ case 0x1500:
+ case 0x1600:
+ case 0x1800:
+ validp = &local_paca->hsrr_valid;
+ if (!*validp)
+ return;
+
+ srr0 = mfspr(SPRN_HSRR0);
+ srr1 = mfspr(SPRN_HSRR1);
+ h = "H";
+
+ break;
+ default:
+ validp = &local_paca->srr_valid;
+ if (!*validp)
+ return;
+
+ srr0 = mfspr(SPRN_SRR0);
+ srr1 = mfspr(SPRN_SRR1);
+ h = "";
+ break;
+ }
- regs->result = r3;
+ if (srr0 == regs->nip && srr1 == regs->msr)
+ return;
- /* Check whether the syscall is issued inside a restartable sequence */
- rseq_syscall(regs);
+ /*
+ * A NMI / soft-NMI interrupt may have come in after we found
+ * srr_valid and before the SRRs are loaded. The interrupt then
+ * comes in and clobbers SRRs and clears srr_valid. Then we load
+ * the SRRs here and test them above and find they don't match.
+ *
+ * Test validity again after that, to catch such false positives.
+ *
+ * This test in general will have some window for false negatives
+ * and may not catch and fix all such cases if an NMI comes in
+ * later and clobbers SRRs without clearing srr_valid, but hopefully
+ * such things will get caught most of the time, statistically
+ * enough to be able to get a warning out.
+ */
+ barrier();
- ti_flags = current_thread_info()->flags;
+ if (!*validp)
+ return;
- if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
- if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
- r3 = -r3;
- regs->ccr |= 0x10000000; /* Set SO bit in CR */
- }
+ if (!warned) {
+ warned = true;
+ printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
+ printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
+ show_regs(regs);
}
- if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
- if (ti_flags & _TIF_RESTOREALL)
- ret = _TIF_RESTOREALL;
- else
- regs->gpr[3] = r3;
- clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
- } else {
- regs->gpr[3] = r3;
- }
-
- if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
- do_syscall_trace_leave(regs);
- ret |= _TIF_RESTOREALL;
- }
+ *validp = 0; /* fixup */
+#endif
+}
- local_irq_disable();
+static notrace unsigned long
+interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
+{
+ unsigned long ti_flags;
again:
ti_flags = READ_ONCE(current_thread_info()->flags);
@@ -303,7 +321,7 @@ again:
ti_flags = READ_ONCE(current_thread_info()->flags);
}
- if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
unlikely((ti_flags & _TIF_RESTORE_TM))) {
restore_tm_state(regs);
@@ -327,10 +345,10 @@ again:
}
}
- user_enter_irqoff();
+ check_return_regs_valid(regs);
- /* scv need not set RI=0 because SRRs are not used */
- if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) {
+ user_enter_irqoff();
+ if (!prep_irq_for_enabled_exit(true)) {
user_exit_irqoff();
local_irq_enable();
local_irq_disable();
@@ -352,90 +370,131 @@ again:
return ret;
}
-notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+ struct pt_regs *regs,
+ long scv)
{
unsigned long ti_flags;
- unsigned long flags;
unsigned long ret = 0;
+ bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
- if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
- BUG_ON(!(regs->msr & MSR_RI));
- BUG_ON(!(regs->msr & MSR_PR));
- BUG_ON(arch_irq_disabled_regs(regs));
CT_WARN_ON(ct_state() == CONTEXT_USER);
- /*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * AMR can only have been unlocked if we interrupted the kernel.
- */
kuap_assert_locked();
- local_irq_save(flags);
-
-again:
- ti_flags = READ_ONCE(current_thread_info()->flags);
- while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
- local_irq_enable(); /* returning to user: may enable */
- if (ti_flags & _TIF_NEED_RESCHED) {
- schedule();
- } else {
- if (ti_flags & _TIF_SIGPENDING)
- ret |= _TIF_RESTOREALL;
- do_notify_resume(regs, ti_flags);
- }
- local_irq_disable();
- ti_flags = READ_ONCE(current_thread_info()->flags);
- }
+ regs->result = r3;
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
- if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
- unlikely((ti_flags & _TIF_RESTORE_TM))) {
- restore_tm_state(regs);
- } else {
- unsigned long mathflags = MSR_FP;
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
- if (cpu_has_feature(CPU_FTR_VSX))
- mathflags |= MSR_VEC | MSR_VSX;
- else if (cpu_has_feature(CPU_FTR_ALTIVEC))
- mathflags |= MSR_VEC;
+ ti_flags = current_thread_info()->flags;
- /* See above restore_math comment */
- if ((regs->msr & mathflags) != mathflags)
- restore_math(regs);
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
}
}
- user_enter_irqoff();
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
+ } else {
+ regs->gpr[3] = r3;
+ }
- if (unlikely(!__prep_irq_for_enabled_exit(true))) {
- user_exit_irqoff();
- local_irq_enable();
- local_irq_disable();
- goto again;
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ do_syscall_trace_leave(regs);
+ ret |= _TIF_RESTOREALL;
}
- booke_load_dbcr0();
+ local_irq_disable();
+ ret = interrupt_exit_user_prepare_main(ret, regs);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- local_paca->tm_scratch = regs->msr;
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
#endif
- account_cpu_user_exit();
+ return ret;
+}
- /* Restore user access locks last */
- kuap_user_restore(regs);
- kuep_unlock();
+#ifdef CONFIG_PPC64
+notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
+{
+ /*
+ * This is called when detecting a soft-pending interrupt as well as
+ * an alternate-return interrupt. So we can't just have the alternate
+ * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless
+ * the soft-pending case were to fix things up as well). RI might be
+ * disabled, in which case it gets re-enabled by __hard_irq_disable().
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
+
+ return regs->exit_result;
+}
+#endif
+
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
+{
+ unsigned long ret;
+
+ if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
+ BUG_ON(!(regs->msr & MSR_RI));
+ BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(arch_irq_disabled_regs(regs));
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * AMR can only have been unlocked if we interrupted the kernel.
+ */
+ kuap_assert_locked();
+
+ local_irq_disable();
+
+ ret = interrupt_exit_user_prepare_main(0, regs);
+
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
+#endif
return ret;
}
void preempt_schedule_irq(void);
-notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
{
unsigned long flags;
unsigned long ret = 0;
unsigned long kuap;
+ bool stack_store = current_thread_info()->flags &
+ _TIF_EMULATE_STACK_STORE;
if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
unlikely(!(regs->msr & MSR_RI)))
@@ -450,11 +509,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
kuap = kuap_get_and_assert_locked();
- if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
- clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
- ret = 1;
- }
-
local_irq_save(flags);
if (!arch_irq_disabled_regs(regs)) {
@@ -469,17 +523,58 @@ again:
}
}
- if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags))))
+ check_return_regs_valid(regs);
+
+ /*
+ * Stack store exit can't be restarted because the interrupt
+ * stack frame might have been clobbered.
+ */
+ if (!prep_irq_for_enabled_exit(unlikely(stack_store))) {
+ /*
+ * Replay pending soft-masked interrupts now. Don't
+ * just local_irq_enabe(); local_irq_disable(); because
+ * if we are returning from an asynchronous interrupt
+ * here, another one might hit after irqs are enabled,
+ * and it would exit via this same path allowing
+ * another to fire, and so on unbounded.
+ */
+ hard_irq_disable();
+ replay_soft_interrupts();
+ /* Took an interrupt, may have more exit work to do. */
goto again;
- } else {
- /* Returning to a kernel context with local irqs disabled. */
- __hard_EE_RI_disable();
+ }
#ifdef CONFIG_PPC64
+ /*
+ * An interrupt may clear MSR[EE] and set this concurrently,
+ * but it will be marked pending and the exit will be retried.
+ * This leaves a racy window where MSR[EE]=0 and HARD_DIS is
+ * clear, until interrupt_exit_kernel_restart() calls
+ * hard_irq_disable(), which will set HARD_DIS again.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+ } else {
+ check_return_regs_valid(regs);
+
+ if (unlikely(stack_store))
+ __hard_EE_RI_disable();
+ /*
+ * Returning to a kernel context with local irqs disabled.
+ * Here, if EE was enabled in the interrupted context, enable
+ * it on return as well. A problem exists here where a soft
+ * masked interrupt may have cleared MSR[EE] and set HARD_DIS
+ * here, and it will still exist on return to the caller. This
+ * will be resolved by the masked interrupt firing again.
+ */
if (regs->msr & MSR_EE)
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-#endif
+#endif /* CONFIG_PPC64 */
}
+ if (unlikely(stack_store)) {
+ clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
+ ret = 1;
+ }
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
local_paca->tm_scratch = regs->msr;
@@ -494,3 +589,46 @@ again:
return ret;
}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result |= interrupt_exit_user_prepare(regs);
+
+ return regs->exit_result;
+}
+
+/*
+ * No real need to return a value here because the stack store case does not
+ * get restarted.
+ */
+notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ if (regs->softe == IRQS_ENABLED)
+ trace_hardirqs_off();
+
+ BUG_ON(user_mode(regs));
+
+ return interrupt_exit_kernel_prepare(regs);
+}
+#endif
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
new file mode 100644
index 000000000000..4063e8a3f704
--- /dev/null
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -0,0 +1,770 @@
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
+#include <asm/feature-fixups.h>
+#include <asm/head-64.h>
+#include <asm/hw_irq.h>
+#include <asm/kup.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/tm.h>
+
+ .section ".toc","aw"
+SYS_CALL_TABLE:
+ .tc sys_call_table[TC],sys_call_table
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYS_CALL_TABLE:
+ .tc compat_sys_call_table[TC],compat_sys_call_table
+#endif
+ .previous
+
+ .align 7
+
+.macro DEBUG_SRR_VALID srr
+#ifdef CONFIG_PPC_RFI_SRR_DEBUG
+ .ifc \srr,srr
+ mfspr r11,SPRN_SRR0
+ ld r12,_NIP(r1)
+100: tdne r11,r12
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ mfspr r11,SPRN_SRR1
+ ld r12,_MSR(r1)
+100: tdne r11,r12
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ .else
+ mfspr r11,SPRN_HSRR0
+ ld r12,_NIP(r1)
+100: tdne r11,r12
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ mfspr r11,SPRN_HSRR1
+ ld r12,_MSR(r1)
+100: tdne r11,r12
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ .endif
+#endif
+.endm
+
+#ifdef CONFIG_PPC_BOOK3S
+.macro system_call_vectored name trapnr
+ .globl system_call_vectored_\name
+system_call_vectored_\name:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+ bne tabort_syscall
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+ SCV_INTERRUPT_TO_KERNEL
+ mr r10,r1
+ ld r1,PACAKSAVE(r13)
+ std r10,0(r1)
+ std r11,_NIP(r1)
+ std r12,_MSR(r1)
+ std r0,GPR0(r1)
+ std r10,GPR1(r1)
+ std r2,GPR2(r1)
+ ld r2,PACATOC(r13)
+ mfcr r12
+ li r11,0
+ /* Can we avoid saving r3-r8 in common case? */
+ std r3,GPR3(r1)
+ std r4,GPR4(r1)
+ std r5,GPR5(r1)
+ std r6,GPR6(r1)
+ std r7,GPR7(r1)
+ std r8,GPR8(r1)
+ /* Zero r9-r12, this should only be required when restoring all GPRs */
+ std r11,GPR9(r1)
+ std r11,GPR10(r1)
+ std r11,GPR11(r1)
+ std r11,GPR12(r1)
+ std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
+ std r11,_XER(r1)
+ std r11,_LINK(r1)
+ std r11,_CTR(r1)
+
+ li r11,\trapnr
+ std r11,_TRAP(r1)
+ std r12,_CCR(r1)
+ addi r10,r1,STACK_FRAME_OVERHEAD
+ ld r11,exception_marker@toc(r2)
+ std r11,-16(r10) /* "regshere" marker */
+
+BEGIN_FTR_SECTION
+ HMT_MEDIUM
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ /*
+ * scv enters with MSR[EE]=1 and is immediately considered soft-masked.
+ * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
+ * and interrupts may be masked and pending already.
+ * system_call_exception() will call trace_hardirqs_off() which means
+ * interrupts could already have been blocked before trace_hardirqs_off,
+ * but this is the best we can do.
+ */
+
+ /* Calling convention has r9 = orig r0, r10 = regs */
+ mr r9,r0
+ bl system_call_exception
+
+.Lsyscall_vectored_\name\()_exit:
+ addi r4,r1,STACK_FRAME_OVERHEAD
+ li r5,1 /* scv */
+ bl syscall_exit_prepare
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+.Lsyscall_vectored_\name\()_rst_start:
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- syscall_vectored_\name\()_restart
+ li r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+ ld r2,_CCR(r1)
+ ld r4,_NIP(r1)
+ ld r5,_MSR(r1)
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+BEGIN_FTR_SECTION
+ HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ cmpdi r3,0
+ bne .Lsyscall_vectored_\name\()_restore_regs
+
+ /* rfscv returns with LR->NIA and CTR->MSR */
+ mtlr r4
+ mtctr r5
+
+ /* Could zero these as per ABI, but we may consider a stricter ABI
+ * which preserves these if libc implementations can benefit, so
+ * restore them for now until further measurement is done. */
+ ld r0,GPR0(r1)
+ ld r4,GPR4(r1)
+ ld r5,GPR5(r1)
+ ld r6,GPR6(r1)
+ ld r7,GPR7(r1)
+ ld r8,GPR8(r1)
+ /* Zero volatile regs that may contain sensitive kernel data */
+ li r9,0
+ li r10,0
+ li r11,0
+ li r12,0
+ mtspr SPRN_XER,r0
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
+ mtcr r2
+ ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r13,GPR13(r1)
+ ld r1,GPR1(r1)
+ RFSCV_TO_USER
+ b . /* prevent speculative execution */
+
+.Lsyscall_vectored_\name\()_restore_regs:
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+
+ ld r3,_CTR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_XER(r1)
+
+ REST_NVGPRS(r1)
+ ld r0,GPR0(r1)
+ mtcr r2
+ mtctr r3
+ mtlr r4
+ mtspr SPRN_XER,r5
+ REST_10GPRS(2, r1)
+ REST_2GPRS(12, r1)
+ ld r1,GPR1(r1)
+ RFI_TO_USER
+.Lsyscall_vectored_\name\()_rst_end:
+
+syscall_vectored_\name\()_restart:
+_ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ ld r2,PACATOC(r13)
+ ld r3,RESULT(r1)
+ addi r4,r1,STACK_FRAME_OVERHEAD
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl syscall_exit_restart
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Lsyscall_vectored_\name\()_rst_start
+1:
+
+SOFT_MASK_TABLE(.Lsyscall_vectored_\name\()_rst_start, 1b)
+RESTART_TABLE(.Lsyscall_vectored_\name\()_rst_start, .Lsyscall_vectored_\name\()_rst_end, syscall_vectored_\name\()_restart)
+
+.endm
+
+system_call_vectored common 0x3000
+
+/*
+ * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
+ * which is tested by system_call_exception when r0 is -1 (as set by vector
+ * entry code).
+ */
+system_call_vectored sigill 0x7ff0
+
+
+/*
+ * Entered via kernel return set up by kernel/sstep.c, must match entry regs
+ */
+ .globl system_call_vectored_emulate
+system_call_vectored_emulate:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+ b system_call_vectored_common
+#endif /* CONFIG_PPC_BOOK3S */
+
+ .balign IFETCH_ALIGN_BYTES
+ .globl system_call_common_real
+system_call_common_real:
+_ASM_NOKPROBE_SYMBOL(system_call_common_real)
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ mtmsrd r10
+
+ .balign IFETCH_ALIGN_BYTES
+ .globl system_call_common
+system_call_common:
+_ASM_NOKPROBE_SYMBOL(system_call_common)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+ bne tabort_syscall
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+ mr r10,r1
+ ld r1,PACAKSAVE(r13)
+ std r10,0(r1)
+ std r11,_NIP(r1)
+ std r12,_MSR(r1)
+ std r0,GPR0(r1)
+ std r10,GPR1(r1)
+ std r2,GPR2(r1)
+#ifdef CONFIG_PPC_FSL_BOOK3E
+START_BTB_FLUSH_SECTION
+ BTB_FLUSH(r10)
+END_BTB_FLUSH_SECTION
+#endif
+ ld r2,PACATOC(r13)
+ mfcr r12
+ li r11,0
+ /* Can we avoid saving r3-r8 in common case? */
+ std r3,GPR3(r1)
+ std r4,GPR4(r1)
+ std r5,GPR5(r1)
+ std r6,GPR6(r1)
+ std r7,GPR7(r1)
+ std r8,GPR8(r1)
+ /* Zero r9-r12, this should only be required when restoring all GPRs */
+ std r11,GPR9(r1)
+ std r11,GPR10(r1)
+ std r11,GPR11(r1)
+ std r11,GPR12(r1)
+ std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
+ std r11,_XER(r1)
+ std r11,_CTR(r1)
+ mflr r10
+
+ /*
+ * This clears CR0.SO (bit 28), which is the error indication on
+ * return from this system call.
+ */
+ rldimi r12,r11,28,(63-28)
+ li r11,0xc00
+ std r10,_LINK(r1)
+ std r11,_TRAP(r1)
+ std r12,_CCR(r1)
+ addi r10,r1,STACK_FRAME_OVERHEAD
+ ld r11,exception_marker@toc(r2)
+ std r11,-16(r10) /* "regshere" marker */
+
+#ifdef CONFIG_PPC_BOOK3S
+ li r11,1
+ stb r11,PACASRR_VALID(r13)
+#endif
+
+ /*
+ * We always enter kernel from userspace with irq soft-mask enabled and
+ * nothing pending. system_call_exception() will call
+ * trace_hardirqs_off().
+ */
+ li r11,IRQS_ALL_DISABLED
+ li r12,-1 /* Set MSR_EE and MSR_RI */
+ stb r11,PACAIRQSOFTMASK(r13)
+ mtmsrd r12,1
+
+ /* Calling convention has r9 = orig r0, r10 = regs */
+ mr r9,r0
+ bl system_call_exception
+
+.Lsyscall_exit:
+ addi r4,r1,STACK_FRAME_OVERHEAD
+ li r5,0 /* !scv */
+ bl syscall_exit_prepare
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+#ifdef CONFIG_PPC_BOOK3S
+.Lsyscall_rst_start:
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- syscall_restart
+#endif
+ li r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+ ld r2,_CCR(r1)
+ ld r6,_LINK(r1)
+ mtlr r6
+
+#ifdef CONFIG_PPC_BOOK3S
+ lbz r4,PACASRR_VALID(r13)
+ cmpdi r4,0
+ bne 1f
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+#endif
+ ld r4,_NIP(r1)
+ ld r5,_MSR(r1)
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+1:
+ DEBUG_SRR_VALID srr
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ cmpdi r3,0
+ bne .Lsyscall_restore_regs
+ /* Zero volatile regs that may contain sensitive kernel data */
+ li r0,0
+ li r4,0
+ li r5,0
+ li r6,0
+ li r7,0
+ li r8,0
+ li r9,0
+ li r10,0
+ li r11,0
+ li r12,0
+ mtctr r0
+ mtspr SPRN_XER,r0
+.Lsyscall_restore_regs_cont:
+
+BEGIN_FTR_SECTION
+ HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
+ mtcr r2
+ ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r13,GPR13(r1)
+ ld r1,GPR1(r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+
+.Lsyscall_restore_regs:
+ ld r3,_CTR(r1)
+ ld r4,_XER(r1)
+ REST_NVGPRS(r1)
+ mtctr r3
+ mtspr SPRN_XER,r4
+ ld r0,GPR0(r1)
+ REST_8GPRS(4, r1)
+ ld r12,GPR12(r1)
+ b .Lsyscall_restore_regs_cont
+.Lsyscall_rst_end:
+
+#ifdef CONFIG_PPC_BOOK3S
+syscall_restart:
+_ASM_NOKPROBE_SYMBOL(syscall_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ ld r2,PACATOC(r13)
+ ld r3,RESULT(r1)
+ addi r4,r1,STACK_FRAME_OVERHEAD
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl syscall_exit_restart
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Lsyscall_rst_start
+1:
+
+SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b)
+RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart)
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tabort_syscall:
+_ASM_NOKPROBE_SYMBOL(tabort_syscall)
+ /* Firstly we need to enable TM in the kernel */
+ mfmsr r10
+ li r9, 1
+ rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r10, 0
+
+ /* tabort, this dooms the transaction, nothing else */
+ li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+ TABORT(R9)
+
+ /*
+ * Return directly to userspace. We have corrupted user register state,
+ * but userspace will never see that register state. Execution will
+ * resume after the tbegin of the aborted transaction with the
+ * checkpointed register state.
+ */
+ li r9, MSR_RI
+ andc r10, r10, r9
+ mtmsrd r10, 1
+ mtspr SPRN_SRR0, r11
+ mtspr SPRN_SRR1, r12
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+#endif
+
+ /*
+ * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
+ * touched, no exit work created, then this can be used.
+ */
+ .balign IFETCH_ALIGN_BYTES
+ .globl fast_interrupt_return_srr
+fast_interrupt_return_srr:
+_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)
+ kuap_check_amr r3, r4
+ ld r5,_MSR(r1)
+ andi. r0,r5,MSR_PR
+#ifdef CONFIG_PPC_BOOK3S
+ beq 1f
+ kuap_user_restore r3, r4
+ b .Lfast_user_interrupt_return_srr
+1: kuap_kernel_restore r3, r4
+ andi. r0,r5,MSR_RI
+ li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
+ bne+ .Lfast_kernel_interrupt_return_srr
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unrecoverable_exception
+ b . /* should not get here */
+#else
+ bne .Lfast_user_interrupt_return_srr
+ b .Lfast_kernel_interrupt_return_srr
+#endif
+
+.macro interrupt_return_macro srr
+ .balign IFETCH_ALIGN_BYTES
+ .globl interrupt_return_\srr
+interrupt_return_\srr\():
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())
+ ld r4,_MSR(r1)
+ andi. r0,r4,MSR_PR
+ beq interrupt_return_\srr\()_kernel
+interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl interrupt_exit_user_prepare
+ cmpdi r3,0
+ bne- .Lrestore_nvgprs_\srr
+.Lrestore_nvgprs_\srr\()_cont:
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+#ifdef CONFIG_PPC_BOOK3S
+.Linterrupt_return_\srr\()_user_rst_start:
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- interrupt_return_\srr\()_user_restart
+#endif
+ li r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+.Lfast_user_interrupt_return_\srr\():
+#ifdef CONFIG_PPC_BOOK3S
+ .ifc \srr,srr
+ lbz r4,PACASRR_VALID(r13)
+ .else
+ lbz r4,PACAHSRR_VALID(r13)
+ .endif
+ cmpdi r4,0
+ li r4,0
+ bne 1f
+#endif
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+ .ifc \srr,srr
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACASRR_VALID(r13)
+#endif
+ .else
+ mtspr SPRN_HSRR0,r11
+ mtspr SPRN_HSRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACAHSRR_VALID(r13)
+#endif
+ .endif
+ DEBUG_SRR_VALID \srr
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ lbz r4,PACAIRQSOFTMASK(r13)
+ tdnei r4,IRQS_ENABLED
+#endif
+
+BEGIN_FTR_SECTION
+ ld r10,_PPR(r1)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ ld r3,_CCR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_CTR(r1)
+ ld r6,_XER(r1)
+ li r0,0
+
+ REST_4GPRS(7, r1)
+ REST_2GPRS(11, r1)
+ REST_GPR(13, r1)
+
+ mtcr r3
+ mtlr r4
+ mtctr r5
+ mtspr SPRN_XER,r6
+
+ REST_4GPRS(2, r1)
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ .ifc \srr,srr
+ RFI_TO_USER
+ .else
+ HRFI_TO_USER
+ .endif
+ b . /* prevent speculative execution */
+.Linterrupt_return_\srr\()_user_rst_end:
+
+.Lrestore_nvgprs_\srr\():
+ REST_NVGPRS(r1)
+ b .Lrestore_nvgprs_\srr\()_cont
+
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_\srr\()_user_restart:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ ld r2,PACATOC(r13)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl interrupt_exit_user_restart
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Linterrupt_return_\srr\()_user_rst_start
+1:
+
+SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_user_rst_start, 1b)
+RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart)
+#endif
+
+ .balign IFETCH_ALIGN_BYTES
+interrupt_return_\srr\()_kernel:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl interrupt_exit_kernel_prepare
+
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+.Linterrupt_return_\srr\()_kernel_rst_start:
+ ld r11,SOFTE(r1)
+ cmpwi r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bne 1f
+#ifdef CONFIG_PPC_BOOK3S
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- interrupt_return_\srr\()_kernel_restart
+#endif
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+1:
+
+.Lfast_kernel_interrupt_return_\srr\():
+ cmpdi cr1,r3,0
+#ifdef CONFIG_PPC_BOOK3S
+ .ifc \srr,srr
+ lbz r4,PACASRR_VALID(r13)
+ .else
+ lbz r4,PACAHSRR_VALID(r13)
+ .endif
+ cmpdi r4,0
+ li r4,0
+ bne 1f
+#endif
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+ .ifc \srr,srr
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACASRR_VALID(r13)
+#endif
+ .else
+ mtspr SPRN_HSRR0,r11
+ mtspr SPRN_HSRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACAHSRR_VALID(r13)
+#endif
+ .endif
+ DEBUG_SRR_VALID \srr
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ ld r3,_LINK(r1)
+ ld r4,_CTR(r1)
+ ld r5,_XER(r1)
+ ld r6,_CCR(r1)
+ li r0,0
+
+ REST_4GPRS(7, r1)
+ REST_2GPRS(11, r1)
+
+ mtlr r3
+ mtctr r4
+ mtspr SPRN_XER,r5
+
+ /*
+ * Leaving a stale exception_marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
+ */
+ std r0,STACK_FRAME_OVERHEAD-16(r1)
+
+ REST_4GPRS(2, r1)
+
+ bne- cr1,1f /* emulate stack store */
+ mtcr r6
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ .ifc \srr,srr
+ RFI_TO_KERNEL
+ .else
+ HRFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
+
+1: /*
+ * Emulate stack store with update. New r1 value was already calculated
+ * and updated in our interrupt regs by emulate_loadstore, but we can't
+ * store the previous value of r1 to the stack before re-loading our
+ * registers from it, otherwise they could be clobbered. Use
+ * PACA_EXGEN as temporary storage to hold the store data, as
+ * interrupts are disabled here so it won't be clobbered.
+ */
+ mtcr r6
+ std r9,PACA_EXGEN+0(r13)
+ addi r9,r1,INT_FRAME_SIZE /* get original r1 */
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ std r9,0(r1) /* perform store component of stdu */
+ ld r9,PACA_EXGEN+0(r13)
+
+ .ifc \srr,srr
+ RFI_TO_KERNEL
+ .else
+ HRFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
+.Linterrupt_return_\srr\()_kernel_rst_end:
+
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_\srr\()_kernel_restart:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ ld r2,PACATOC(r13)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl interrupt_exit_kernel_restart
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Linterrupt_return_\srr\()_kernel_rst_start
+1:
+
+SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, 1b)
+RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart)
+#endif
+
+.endm
+
+interrupt_return_macro srr
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_macro hsrr
+
+ .globl __end_soft_masked
+__end_soft_masked:
+DEFINE_FIXED_SYMBOL(__end_soft_masked)
+#endif /* CONFIG_PPC_BOOK3S */
+
+#ifdef CONFIG_PPC_BOOK3S
+_GLOBAL(ret_from_fork_scv)
+ bl schedule_tail
+ REST_NVGPRS(r1)
+ li r3,0 /* fork() return value */
+ b .Lsyscall_vectored_common_exit
+#endif
+
+_GLOBAL(ret_from_fork)
+ bl schedule_tail
+ REST_NVGPRS(r1)
+ li r3,0 /* fork() return value */
+ b .Lsyscall_exit
+
+_GLOBAL(ret_from_kernel_thread)
+ bl schedule_tail
+ REST_NVGPRS(r1)
+ mtctr r14
+ mr r3,r15
+#ifdef PPC64_ELF_ABI_v2
+ mr r12,r14
+#endif
+ bctrl
+ li r3,0
+ b .Lsyscall_exit
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 72cb45393ef2..91e63eac4e8f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -121,6 +121,7 @@ void replay_soft_interrupts(void)
ppc_save_regs(&regs);
regs.softe = IRQS_ENABLED;
+ regs.msr |= MSR_EE;
again:
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
@@ -217,6 +218,100 @@ static inline void replay_soft_interrupts_irqrestore(void)
#define replay_soft_interrupts_irqrestore() replay_soft_interrupts()
#endif
+#ifdef CONFIG_CC_HAS_ASM_GOTO
+notrace void arch_local_irq_restore(unsigned long mask)
+{
+ unsigned char irq_happened;
+
+ /* Write the new soft-enabled value if it is a disable */
+ if (mask) {
+ irq_soft_mask_set(mask);
+ return;
+ }
+
+ /*
+ * After the stb, interrupts are unmasked and there are no interrupts
+ * pending replay. The restart sequence makes this atomic with
+ * respect to soft-masked interrupts. If this was just a simple code
+ * sequence, a soft-masked interrupt could become pending right after
+ * the comparison and before the stb.
+ *
+ * This allows interrupts to be unmasked without hard disabling, and
+ * also without new hard interrupts coming in ahead of pending ones.
+ */
+ asm_volatile_goto(
+"1: \n"
+" lbz 9,%0(13) \n"
+" cmpwi 9,0 \n"
+" bne %l[happened] \n"
+" stb 9,%1(13) \n"
+"2: \n"
+ RESTART_TABLE(1b, 2b, 1b)
+ : : "i" (offsetof(struct paca_struct, irq_happened)),
+ "i" (offsetof(struct paca_struct, irq_soft_mask))
+ : "cr0", "r9"
+ : happened);
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
+
+ return;
+
+happened:
+ irq_happened = get_irq_happened();
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(!irq_happened);
+
+ if (irq_happened == PACA_IRQ_HARD_DIS) {
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+ irq_soft_mask_set(IRQS_ENABLED);
+ local_paca->irq_happened = 0;
+ __hard_irq_enable();
+ return;
+ }
+
+ /* Have interrupts to replay, need to hard disable first */
+ if (!(irq_happened & PACA_IRQ_HARD_DIS)) {
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ if (!(mfmsr() & MSR_EE)) {
+ /*
+ * An interrupt could have come in and cleared
+ * MSR[EE] and set IRQ_HARD_DIS, so check
+ * IRQ_HARD_DIS again and warn if it is still
+ * clear.
+ */
+ irq_happened = get_irq_happened();
+ WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS));
+ }
+ }
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ } else {
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ if (WARN_ON_ONCE(mfmsr() & MSR_EE))
+ __hard_irq_disable();
+ }
+ }
+
+ /*
+ * Disable preempt here, so that the below preempt_enable will
+ * perform resched if required (a replayed interrupt may set
+ * need_resched).
+ */
+ preempt_disable();
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
+ trace_hardirqs_off();
+
+ replay_soft_interrupts_irqrestore();
+ local_paca->irq_happened = 0;
+
+ trace_hardirqs_on();
+ irq_soft_mask_set(IRQS_ENABLED);
+ __hard_irq_enable();
+ preempt_enable();
+}
+#else
notrace void arch_local_irq_restore(unsigned long mask)
{
unsigned char irq_happened;
@@ -288,6 +383,7 @@ notrace void arch_local_irq_restore(unsigned long mask)
__hard_irq_enable();
preempt_enable();
}
+#endif
EXPORT_SYMBOL(arch_local_irq_restore);
/*
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index ce87dc5ea23c..5277cf582c16 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -11,10 +11,10 @@
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry);
+ u32 *addr = (u32 *)jump_entry_code(entry);
if (type == JUMP_LABEL_JMP)
patch_branch(addr, jump_entry_target(entry), 0);
else
- patch_instruction(addr, ppc_inst(PPC_INST_NOP));
+ patch_instruction(addr, ppc_inst(PPC_RAW_NOP()));
}
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 7dd2ad3603ad..bdee7262c080 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -147,7 +147,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
return 0;
if (*(u32 *)regs->nip == BREAK_INSTR)
- regs->nip += BREAK_INSTR_SIZE;
+ regs_add_return_ip(regs, BREAK_INSTR_SIZE);
return 1;
}
@@ -372,7 +372,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
{
- regs->nip = pc;
+ regs_set_return_ip(regs, pc);
}
/*
@@ -394,7 +394,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
case 'c':
/* handle the optional parameter */
if (kgdb_hex2long(&ptr, &addr))
- linux_regs->nip = addr;
+ regs_set_return_ip(linux_regs, addr);
atomic_set(&kgdb_cpu_doing_single_step, -1);
/* set the trace bit if we're stepping */
@@ -402,9 +402,9 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
mtspr(SPRN_DBCR0,
mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
- linux_regs->msr |= MSR_DE;
+ regs_set_return_msr(linux_regs, linux_regs->msr | MSR_DE);
#else
- linux_regs->msr |= MSR_SE;
+ regs_set_return_msr(linux_regs, linux_regs->msr | MSR_SE);
#endif
atomic_set(&kgdb_cpu_doing_single_step,
raw_smp_processor_id());
@@ -417,11 +417,10 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
+ u32 instr, *addr = (u32 *)bpt->bpt_addr;
int err;
- unsigned int instr;
- struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr;
- err = get_kernel_nofault(instr, (unsigned *) addr);
+ err = get_kernel_nofault(instr, addr);
if (err)
return err;
@@ -429,7 +428,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
if (err)
return -EFAULT;
- *(unsigned int *)bpt->saved_instr = instr;
+ *(u32 *)bpt->saved_instr = instr;
return 0;
}
@@ -438,7 +437,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
unsigned int instr = *(unsigned int *)bpt->saved_instr;
- struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr;
+ u32 *addr = (u32 *)bpt->bpt_addr;
err = patch_instruction(addr, ppc_inst(instr));
if (err)
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 660138f6c4b2..7154d58338cc 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -39,7 +39,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
* On powerpc, NIP is *before* this instruction for the
* pre handler
*/
- regs->nip -= MCOUNT_INSN_SIZE;
+ regs_add_return_ip(regs, -MCOUNT_INSN_SIZE);
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
@@ -48,7 +48,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
* Emulate singlestep (and also recover regs->nip)
* as if there is a nop
*/
- regs->nip += MCOUNT_INSN_SIZE;
+ regs_add_return_ip(regs, MCOUNT_INSN_SIZE);
if (unlikely(p->post_handler)) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index e8c2a6373157..cbc28d1a2e1b 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -19,11 +19,13 @@
#include <linux/extable.h>
#include <linux/kdebug.h>
#include <linux/slab.h>
+#include <linux/moduleloader.h>
#include <asm/code-patching.h>
#include <asm/cacheflush.h>
#include <asm/sstep.h>
#include <asm/sections.h>
#include <asm/inst.h>
+#include <asm/set_memory.h>
#include <linux/uaccess.h>
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -103,28 +105,42 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
return addr;
}
+void *alloc_insn_page(void)
+{
+ void *page;
+
+ page = module_alloc(PAGE_SIZE);
+ if (!page)
+ return NULL;
+
+ if (strict_module_rwx_enabled()) {
+ set_memory_ro((unsigned long)page, 1);
+ set_memory_x((unsigned long)page, 1);
+ }
+ return page;
+}
+
int arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
struct kprobe *prev;
- struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr);
+ struct ppc_inst insn = ppc_inst_read(p->addr);
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
ret = -EINVAL;
- } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
- printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
+ } else if (IS_MTMSRD(insn) || IS_RFID(insn)) {
+ printk("Cannot register a kprobe on mtmsr[d]/rfi[d]\n");
ret = -EINVAL;
} else if ((unsigned long)p->addr & ~PAGE_MASK &&
- ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) {
+ ppc_inst_prefixed(ppc_inst_read(p->addr - 1))) {
printk("Cannot register a kprobe on the second word of prefixed instruction\n");
ret = -EINVAL;
}
preempt_disable();
prev = get_kprobe(p->addr - 1);
preempt_enable_no_resched();
- if (prev &&
- ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)prev->ainsn.insn))) {
+ if (prev && ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) {
printk("Cannot register a kprobe on the second word of prefixed instruction\n");
ret = -EINVAL;
}
@@ -138,7 +154,7 @@ int arch_prepare_kprobe(struct kprobe *p)
}
if (!ret) {
- patch_instruction((struct ppc_inst *)p->ainsn.insn, insn);
+ patch_instruction(p->ainsn.insn, insn);
p->opcode = ppc_inst_val(insn);
}
@@ -149,13 +165,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe);
void arch_arm_kprobe(struct kprobe *p)
{
- patch_instruction((struct ppc_inst *)p->addr, ppc_inst(BREAKPOINT_INSTRUCTION));
+ WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)));
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
void arch_disarm_kprobe(struct kprobe *p)
{
- patch_instruction((struct ppc_inst *)p->addr, ppc_inst(p->opcode));
+ WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(p->opcode)));
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
@@ -178,7 +194,7 @@ static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs
* variant as values in regs could play a part in
* if the trap is taken or not
*/
- regs->nip = (unsigned long)p->ainsn.insn;
+ regs_set_return_ip(regs, (unsigned long)p->ainsn.insn);
}
static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
@@ -228,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe);
static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
{
int ret;
- struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
+ struct ppc_inst insn = ppc_inst_read(p->ainsn.insn);
/* regs->nip is also adjusted if emulate_step returns 1 */
ret = emulate_step(regs, insn);
@@ -319,8 +335,9 @@ int kprobe_handler(struct pt_regs *regs)
kprobe_opcode_t insn = *p->ainsn.insn;
if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) {
/* Turn off 'trace' bits */
- regs->msr &= ~MSR_SINGLESTEP;
- regs->msr |= kcb->kprobe_saved_msr;
+ regs_set_return_msr(regs,
+ (regs->msr & ~MSR_SINGLESTEP) |
+ kcb->kprobe_saved_msr);
goto no_kprobe;
}
@@ -415,7 +432,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* we end up emulating it in kprobe_handler(), which increments the nip
* again.
*/
- regs->nip = orig_ret_address - 4;
+ regs_set_return_ip(regs, orig_ret_address - 4);
regs->link = orig_ret_address;
return 0;
@@ -439,7 +456,7 @@ int kprobe_post_handler(struct pt_regs *regs)
if (!cur || user_mode(regs))
return 0;
- len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn));
+ len = ppc_inst_len(ppc_inst_read(cur->ainsn.insn));
/* make sure we got here for instruction we have a kprobe on */
if (((unsigned long)cur->ainsn.insn + len) != regs->nip)
return 0;
@@ -450,8 +467,8 @@ int kprobe_post_handler(struct pt_regs *regs)
}
/* Adjust nip to after the single-stepped instruction */
- regs->nip = (unsigned long)cur->addr + len;
- regs->msr |= kcb->kprobe_saved_msr;
+ regs_set_return_ip(regs, (unsigned long)cur->addr + len);
+ regs_set_return_msr(regs, regs->msr | kcb->kprobe_saved_msr);
/*Restore back the original saved kprobes variables and continue. */
if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -490,9 +507,11 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* and allow the page fault handler to continue as a
* normal page fault.
*/
- regs->nip = (unsigned long)cur->addr;
- regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */
- regs->msr |= kcb->kprobe_saved_msr;
+ regs_set_return_ip(regs, (unsigned long)cur->addr);
+ /* Turn off 'trace' bits */
+ regs_set_return_msr(regs,
+ (regs->msr & ~MSR_SINGLESTEP) |
+ kcb->kprobe_saved_msr);
if (kcb->kprobe_status == KPROBE_REENTER)
restore_previous_kprobe(kcb);
else
@@ -502,28 +521,11 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
if ((entry = search_exception_tables(regs->nip)) != NULL) {
- regs->nip = extable_fixup(entry);
+ regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 9a3c2a84a2ac..47a683cd00d2 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -18,6 +18,7 @@
#include <linux/extable.h>
#include <linux/ftrace.h>
#include <linux/memblock.h>
+#include <linux/of.h>
#include <asm/interrupt.h>
#include <asm/machdep.h>
@@ -273,7 +274,7 @@ void mce_common_process_ue(struct pt_regs *regs,
entry = search_kernel_exception_table(regs->nip);
if (entry) {
mce_err->ignore_event = true;
- regs->nip = extable_fixup(entry);
+ regs_set_return_ip(regs, extable_fixup(entry));
}
}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 667104d4c455..c2f55fe7092d 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -463,7 +463,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
pfn = addr_to_pfn(regs, regs->nip);
if (pfn != ULONG_MAX) {
instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
- instr = ppc_inst_read((struct ppc_inst *)instr_addr);
+ instr = ppc_inst_read((u32 *)instr_addr);
if (!analyse_instr(&op, &tmp, instr)) {
pfn = addr_to_pfn(regs, op.ea);
*addr = op.ea;
@@ -481,12 +481,11 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
return -1;
}
-static int mce_handle_ierror(struct pt_regs *regs,
+static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,
const struct mce_ierror_table table[],
struct mce_error_info *mce_err, uint64_t *addr,
uint64_t *phys_addr)
{
- uint64_t srr1 = regs->msr;
int handled = 0;
int i;
@@ -695,19 +694,19 @@ static long mce_handle_ue_error(struct pt_regs *regs,
}
static long mce_handle_error(struct pt_regs *regs,
+ unsigned long srr1,
const struct mce_derror_table dtable[],
const struct mce_ierror_table itable[])
{
struct mce_error_info mce_err = { 0 };
uint64_t addr, phys_addr = ULONG_MAX;
- uint64_t srr1 = regs->msr;
long handled;
if (SRR1_MC_LOADSTORE(srr1))
handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
&phys_addr);
else
- handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
+ handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr,
&phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
@@ -723,16 +722,20 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
/* P7 DD1 leaves top bits of DSISR undefined */
regs->dsisr &= 0x0000ffff;
- return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
+ return mce_handle_error(regs, regs->msr,
+ mce_p7_derror_table, mce_p7_ierror_table);
}
long __machine_check_early_realmode_p8(struct pt_regs *regs)
{
- return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
+ return mce_handle_error(regs, regs->msr,
+ mce_p8_derror_table, mce_p8_ierror_table);
}
long __machine_check_early_realmode_p9(struct pt_regs *regs)
{
+ unsigned long srr1 = regs->msr;
+
/*
* On POWER9 DD2.1 and below, it's possible to get a machine check
* caused by a paste instruction where only DSISR bit 25 is set. This
@@ -746,10 +749,39 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs)
if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
return 1;
- return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
+ /*
+ * Async machine check due to bad real address from store or foreign
+ * link time out comes with the load/store bit (PPC bit 42) set in
+ * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're
+ * directed to the ierror table so it will find the cause (which
+ * describes it correctly as a store error).
+ */
+ if (SRR1_MC_LOADSTORE(srr1) &&
+ ((srr1 & 0x081c0000) == 0x08140000 ||
+ (srr1 & 0x081c0000) == 0x08180000)) {
+ srr1 &= ~PPC_BIT(42);
+ }
+
+ return mce_handle_error(regs, srr1,
+ mce_p9_derror_table, mce_p9_ierror_table);
}
long __machine_check_early_realmode_p10(struct pt_regs *regs)
{
- return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table);
+ unsigned long srr1 = regs->msr;
+
+ /*
+ * Async machine check due to bad real address from store comes with
+ * the load/store bit (PPC bit 42) set in SRR1, but the cause comes in
+ * SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table
+ * so it will find the cause (which describes it correctly as a store
+ * error).
+ */
+ if (SRR1_MC_LOADSTORE(srr1) &&
+ (srr1 & 0x081c0000) == 0x08140000) {
+ srr1 &= ~PPC_BIT(42);
+ }
+
+ return mce_handle_error(regs, srr1,
+ mce_p10_derror_table, mce_p10_ierror_table);
}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 6a076bef2932..39ab15419592 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -388,9 +388,3 @@ _GLOBAL(start_secondary_resume)
bl start_secondary
b .
#endif /* CONFIG_SMP */
-
-/*
- * This routine is just here to keep GCC happy - sigh...
- */
-_GLOBAL(__main)
- blr
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 3f35c8d20be7..ed04a3ba66fe 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -92,12 +92,14 @@ int module_finalize(const Elf_Ehdr *hdr,
static __always_inline void *
__module_alloc(unsigned long size, unsigned long start, unsigned long end)
{
+ pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
+
/*
* Don't do huge page allocations for modules yet until more testing
* is done. STRICT_MODULE_RWX may require extra work to support this
* too.
*/
- return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC,
+ return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot,
VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
NUMA_NO_NODE, __builtin_return_address(0));
}
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index c27b8687b82a..f417afc08d33 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -145,10 +145,9 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
{
- if (entry->jump[0] != (PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val)))
+ if (entry->jump[0] != PPC_RAW_LIS(_R12, PPC_HA(val)))
return 0;
- if (entry->jump[1] != (PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) |
- PPC_LO(val)))
+ if (entry->jump[1] != PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)))
return 0;
return 1;
}
@@ -175,16 +174,10 @@ static uint32_t do_plt_call(void *location,
entry++;
}
- /*
- * lis r12, sym@ha
- * addi r12, r12, sym@l
- * mtctr r12
- * bctr
- */
- entry->jump[0] = PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val);
- entry->jump[1] = PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | PPC_LO(val);
- entry->jump[2] = PPC_INST_MTCTR | __PPC_RS(R12);
- entry->jump[3] = PPC_INST_BCTR;
+ entry->jump[0] = PPC_RAW_LIS(_R12, PPC_HA(val));
+ entry->jump[1] = PPC_RAW_ADDI(_R12, _R12, PPC_LO(val));
+ entry->jump[2] = PPC_RAW_MTCTR(_R12);
+ entry->jump[3] = PPC_RAW_BCTR();
pr_debug("Initialized plt for 0x%x at %p\n", val, entry);
return (uint32_t)entry;
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index ae2b188365b1..6baa676e7cb6 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -122,27 +122,19 @@ struct ppc64_stub_entry
* the stub, but it's significantly shorter to put these values at the
* end of the stub code, and patch the stub address (32-bits relative
* to the TOC ptr, r2) into the stub.
- *
- * addis r11,r2, <high>
- * addi r11,r11, <low>
- * std r2,R2_STACK_OFFSET(r1)
- * ld r12,32(r11)
- * ld r2,40(r11)
- * mtctr r12
- * bctr
*/
static u32 ppc64_stub_insns[] = {
- PPC_INST_ADDIS | __PPC_RT(R11) | __PPC_RA(R2),
- PPC_INST_ADDI | __PPC_RT(R11) | __PPC_RA(R11),
+ PPC_RAW_ADDIS(_R11, _R2, 0),
+ PPC_RAW_ADDI(_R11, _R11, 0),
/* Save current r2 value in magic place on the stack. */
- PPC_INST_STD | __PPC_RS(R2) | __PPC_RA(R1) | R2_STACK_OFFSET,
- PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R11) | 32,
+ PPC_RAW_STD(_R2, _R1, R2_STACK_OFFSET),
+ PPC_RAW_LD(_R12, _R11, 32),
#ifdef PPC64_ELF_ABI_v1
/* Set up new r2 from function descriptor */
- PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R11) | 40,
+ PPC_RAW_LD(_R2, _R11, 40),
#endif
- PPC_INST_MTCTR | __PPC_RS(R12),
- PPC_INST_BCTR,
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR(),
};
/* Count how many different 24-bit relocations (different symbol,
@@ -336,21 +328,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
#ifdef CONFIG_MPROFILE_KERNEL
-#define PACATOC offsetof(struct paca_struct, kernel_toc)
-
-/*
- * ld r12,PACATOC(r13)
- * addis r12,r12,<high>
- * addi r12,r12,<low>
- * mtctr r12
- * bctr
- */
static u32 stub_insns[] = {
- PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC,
- PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12),
- PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12),
- PPC_INST_MTCTR | __PPC_RS(R12),
- PPC_INST_BCTR,
+ PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
+ PPC_RAW_ADDIS(_R12, _R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR(),
};
/*
@@ -507,7 +490,7 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me)
if (!instr_is_relative_link_branch(ppc_inst(*prev_insn)))
return 1;
- if (*instruction != PPC_INST_NOP) {
+ if (*instruction != PPC_RAW_NOP()) {
pr_err("%s: Expected nop after call, got %08x at %pS\n",
me->name, *instruction, instruction);
return 0;
@@ -696,21 +679,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
* ld r2, ...(r12)
* add r2, r2, r12
*/
- if ((((uint32_t *)location)[0] & ~0xfffc) !=
- (PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R12)))
+ if ((((uint32_t *)location)[0] & ~0xfffc) != PPC_RAW_LD(_R2, _R12, 0))
break;
- if (((uint32_t *)location)[1] !=
- (PPC_INST_ADD | __PPC_RT(R2) | __PPC_RA(R2) | __PPC_RB(R12)))
+ if (((uint32_t *)location)[1] != PPC_RAW_ADD(_R2, _R2, _R12))
break;
/*
* If found, replace it with:
* addis r2, r12, (.TOC.-func)@ha
* addi r2, r2, (.TOC.-func)@l
*/
- ((uint32_t *)location)[0] = PPC_INST_ADDIS | __PPC_RT(R2) |
- __PPC_RA(R12) | PPC_HA(value);
- ((uint32_t *)location)[1] = PPC_INST_ADDI | __PPC_RT(R2) |
- __PPC_RA(R2) | PPC_LO(value);
+ ((uint32_t *)location)[0] = PPC_RAW_ADDIS(_R2, _R12, PPC_HA(value));
+ ((uint32_t *)location)[1] = PPC_RAW_ADDI(_R2, _R2, PPC_LO(value));
break;
case R_PPC64_REL16_HA:
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index cdf87086fa33..c79899abcec8 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -18,24 +18,16 @@
#include <asm/ppc-opcode.h>
#include <asm/inst.h>
-#define TMPL_CALL_HDLR_IDX \
- (optprobe_template_call_handler - optprobe_template_entry)
-#define TMPL_EMULATE_IDX \
- (optprobe_template_call_emulate - optprobe_template_entry)
-#define TMPL_RET_IDX \
- (optprobe_template_ret - optprobe_template_entry)
-#define TMPL_OP_IDX \
- (optprobe_template_op_address - optprobe_template_entry)
-#define TMPL_INSN_IDX \
- (optprobe_template_insn - optprobe_template_entry)
-#define TMPL_END_IDX \
- (optprobe_template_end - optprobe_template_entry)
-
-DEFINE_INSN_CACHE_OPS(ppc_optinsn);
+#define TMPL_CALL_HDLR_IDX (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_IDX (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP_IDX (optprobe_template_op_address - optprobe_template_entry)
+#define TMPL_INSN_IDX (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX (optprobe_template_end - optprobe_template_entry)
static bool insn_page_in_use;
-static void *__ppc_alloc_insn_page(void)
+void *alloc_optinsn_page(void)
{
if (insn_page_in_use)
return NULL;
@@ -43,20 +35,11 @@ static void *__ppc_alloc_insn_page(void)
return &optinsn_slot;
}
-static void __ppc_free_insn_page(void *page __maybe_unused)
+void free_optinsn_page(void *page)
{
insn_page_in_use = false;
}
-struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
- .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
- .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
- /* insn_size initialized later */
- .alloc = __ppc_alloc_insn_page,
- .free = __ppc_free_insn_page,
- .nr_garbage = 0,
-};
-
/*
* Check if we can optimize this probe. Returns NIP post-emulation if this can
* be optimized and 0 otherwise.
@@ -66,6 +49,7 @@ static unsigned long can_optimize(struct kprobe *p)
struct pt_regs regs;
struct instruction_op op;
unsigned long nip = 0;
+ unsigned long addr = (unsigned long)p->addr;
/*
* kprobe placed for kretprobe during boot time
@@ -73,7 +57,7 @@ static unsigned long can_optimize(struct kprobe *p)
* So further checks can be skipped.
*/
if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
- return (unsigned long)p->addr + sizeof(kprobe_opcode_t);
+ return addr + sizeof(kprobe_opcode_t);
/*
* We only support optimizing kernel addresses, but not
@@ -81,11 +65,11 @@ static unsigned long can_optimize(struct kprobe *p)
*
* FIXME: Optimize kprobes placed in module addresses.
*/
- if (!is_kernel_addr((unsigned long)p->addr))
+ if (!is_kernel_addr(addr))
return 0;
memset(&regs, 0, sizeof(struct pt_regs));
- regs.nip = (unsigned long)p->addr;
+ regs.nip = addr;
regs.trap = 0x0;
regs.msr = MSR_KERNEL;
@@ -100,9 +84,8 @@ static unsigned long can_optimize(struct kprobe *p)
* Ensure that the instruction is not a conditional branch,
* and that can be emulated.
*/
- if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) &&
- analyse_instr(&op, &regs,
- ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) {
+ if (!is_conditional_branch(ppc_inst_read(p->ainsn.insn)) &&
+ analyse_instr(&op, &regs, ppc_inst_read(p->ainsn.insn)) == 1) {
emulate_update_regs(&regs, &op);
nip = regs.nip;
}
@@ -123,7 +106,7 @@ static void optimized_callback(struct optimized_kprobe *op,
kprobes_inc_nmissed_count(&op->kp);
} else {
__this_cpu_write(current_kprobe, &op->kp);
- regs->nip = (unsigned long)op->kp.addr;
+ regs_set_return_ip(regs, (unsigned long)op->kp.addr);
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
opt_pre_handler(&op->kp, regs);
__this_cpu_write(current_kprobe, NULL);
@@ -136,19 +119,15 @@ NOKPROBE_SYMBOL(optimized_callback);
void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
{
if (op->optinsn.insn) {
- free_ppc_optinsn_slot(op->optinsn.insn, 1);
+ free_optinsn_slot(op->optinsn.insn, 1);
op->optinsn.insn = NULL;
}
}
static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
{
- patch_instruction((struct ppc_inst *)addr,
- ppc_inst(PPC_RAW_LIS(reg, IMM_H(val))));
- addr++;
-
- patch_instruction((struct ppc_inst *)addr,
- ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val))));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HI(val))));
+ patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val))));
}
/*
@@ -157,34 +136,11 @@ static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *
*/
static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr)
{
- /* lis reg,(op)@highest */
- patch_instruction((struct ppc_inst *)addr,
- ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) |
- ((val >> 48) & 0xffff)));
- addr++;
-
- /* ori reg,reg,(op)@higher */
- patch_instruction((struct ppc_inst *)addr,
- ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) |
- ___PPC_RS(reg) | ((val >> 32) & 0xffff)));
- addr++;
-
- /* rldicr reg,reg,32,31 */
- patch_instruction((struct ppc_inst *)addr,
- ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) |
- ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31)));
- addr++;
-
- /* oris reg,reg,(op)@h */
- patch_instruction((struct ppc_inst *)addr,
- ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) |
- ___PPC_RS(reg) | ((val >> 16) & 0xffff)));
- addr++;
-
- /* ori reg,reg,(op)@l */
- patch_instruction((struct ppc_inst *)addr,
- ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) |
- ___PPC_RS(reg) | (val & 0xffff)));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HIGHEST(val))));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_HIGHER(val))));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_SLDI(reg, reg, 32)));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_ORIS(reg, reg, PPC_HI(val))));
+ patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val))));
}
static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
@@ -198,19 +154,18 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
{
struct ppc_inst branch_op_callback, branch_emulate_step, temp;
- kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff;
+ unsigned long op_callback_addr, emulate_step_addr;
+ kprobe_opcode_t *buff;
long b_offset;
unsigned long nip, size;
int rc, i;
- kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
-
nip = can_optimize(p);
if (!nip)
return -EILSEQ;
/* Allocate instruction slot for detour buffer */
- buff = get_ppc_optinsn_slot();
+ buff = get_optinsn_slot();
if (!buff)
return -ENOMEM;
@@ -228,8 +183,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
goto error;
/* Check if the return address is also within 32MB range */
- b_offset = (unsigned long)(buff + TMPL_RET_IDX) -
- (unsigned long)nip;
+ b_offset = (unsigned long)(buff + TMPL_RET_IDX) - nip;
if (!is_offset_in_branch_range(b_offset))
goto error;
@@ -238,8 +192,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int);
pr_devel("Copying template to %p, size %lu\n", buff, size);
for (i = 0; i < size; i++) {
- rc = patch_instruction((struct ppc_inst *)(buff + i),
- ppc_inst(*(optprobe_template_entry + i)));
+ rc = patch_instruction(buff + i, ppc_inst(*(optprobe_template_entry + i)));
if (rc < 0)
goto error;
}
@@ -253,51 +206,48 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
/*
* 2. branch to optimized_callback() and emulate_step()
*/
- op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback");
- emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step");
+ op_callback_addr = ppc_kallsyms_lookup_name("optimized_callback");
+ emulate_step_addr = ppc_kallsyms_lookup_name("emulate_step");
if (!op_callback_addr || !emulate_step_addr) {
WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n");
goto error;
}
- rc = create_branch(&branch_op_callback,
- (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX),
- (unsigned long)op_callback_addr,
- BRANCH_SET_LINK);
+ rc = create_branch(&branch_op_callback, buff + TMPL_CALL_HDLR_IDX,
+ op_callback_addr, BRANCH_SET_LINK);
- rc |= create_branch(&branch_emulate_step,
- (struct ppc_inst *)(buff + TMPL_EMULATE_IDX),
- (unsigned long)emulate_step_addr,
- BRANCH_SET_LINK);
+ rc |= create_branch(&branch_emulate_step, buff + TMPL_EMULATE_IDX,
+ emulate_step_addr, BRANCH_SET_LINK);
if (rc)
goto error;
- patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX),
- branch_op_callback);
- patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX),
- branch_emulate_step);
+ patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback);
+ patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step);
/*
* 3. load instruction to be emulated into relevant register, and
*/
- temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
- patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ temp = ppc_inst_read(p->ainsn.insn);
+ patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
+ } else {
+ patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX);
+ }
/*
* 4. branch back from trampoline
*/
- patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0);
+ patch_branch(buff + TMPL_RET_IDX, nip, 0);
- flush_icache_range((unsigned long)buff,
- (unsigned long)(&buff[TMPL_END_IDX]));
+ flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX]));
op->optinsn.insn = buff;
return 0;
error:
- free_ppc_optinsn_slot(buff, 0);
+ free_optinsn_slot(buff, 0);
return -ERANGE;
}
@@ -328,12 +278,9 @@ void arch_optimize_kprobes(struct list_head *oplist)
* Backup instructions which will be replaced
* by jump address
*/
- memcpy(op->optinsn.copied_insn, op->kp.addr,
- RELATIVEJUMP_SIZE);
- create_branch(&instr,
- (struct ppc_inst *)op->kp.addr,
- (unsigned long)op->optinsn.insn, 0);
- patch_instruction((struct ppc_inst *)op->kp.addr, instr);
+ memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE);
+ create_branch(&instr, op->kp.addr, (unsigned long)op->optinsn.insn, 0);
+ patch_instruction(op->kp.addr, instr);
list_del_init(&op->list);
}
}
@@ -343,8 +290,7 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op)
arch_arm_kprobe(&op->kp);
}
-void arch_unoptimize_kprobes(struct list_head *oplist,
- struct list_head *done_list)
+void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list)
{
struct optimized_kprobe *op;
struct optimized_kprobe *tmp;
@@ -355,8 +301,7 @@ void arch_unoptimize_kprobes(struct list_head *oplist,
}
}
-int arch_within_optimized_kprobe(struct optimized_kprobe *op,
- unsigned long addr)
+int arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
{
return ((unsigned long)op->kp.addr <= addr &&
(unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 7f5aae3c387d..9bd30cac852b 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -346,10 +346,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
#ifdef CONFIG_PPC_BOOK3S
mm_context_t *context = &mm->context;
- get_paca()->mm_ctx_id = context->id;
#ifdef CONFIG_PPC_MM_SLICES
VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
- get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context);
memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
LOW_SLICE_ARRAY_SZ);
memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 89e34aa273e2..185beb290580 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -96,7 +96,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
if (tsk == current && tsk->thread.regs &&
MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
!test_thread_flag(TIF_RESTORE_TM)) {
- tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr;
+ regs_set_return_msr(&tsk->thread.ckpt_regs,
+ tsk->thread.regs->msr);
set_thread_flag(TIF_RESTORE_TM);
}
}
@@ -161,7 +162,7 @@ static void __giveup_fpu(struct task_struct *tsk)
msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
if (cpu_has_feature(CPU_FTR_VSX))
msr &= ~MSR_VSX;
- tsk->thread.regs->msr = msr;
+ regs_set_return_msr(tsk->thread.regs, msr);
}
void giveup_fpu(struct task_struct *tsk)
@@ -244,7 +245,7 @@ static void __giveup_altivec(struct task_struct *tsk)
msr &= ~MSR_VEC;
if (cpu_has_feature(CPU_FTR_VSX))
msr &= ~MSR_VSX;
- tsk->thread.regs->msr = msr;
+ regs_set_return_msr(tsk->thread.regs, msr);
}
void giveup_altivec(struct task_struct *tsk)
@@ -559,7 +560,7 @@ void notrace restore_math(struct pt_regs *regs)
msr_check_and_clear(new_msr);
- regs->msr |= new_msr | fpexc_mode;
+ regs_set_return_msr(regs, regs->msr | new_msr | fpexc_mode);
}
}
#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -1114,7 +1115,7 @@ void restore_tm_state(struct pt_regs *regs)
#endif
restore_math(regs);
- regs->msr |= msr_diff;
+ regs_set_return_msr(regs, regs->msr | msr_diff);
}
#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -1129,6 +1130,10 @@ static inline void save_sprs(struct thread_struct *t)
if (cpu_has_feature(CPU_FTR_ALTIVEC))
t->vrsave = mfspr(SPRN_VRSAVE);
#endif
+#ifdef CONFIG_SPE
+ if (cpu_has_feature(CPU_FTR_SPE))
+ t->spefscr = mfspr(SPRN_SPEFSCR);
+#endif
#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_DSCR))
t->dscr = mfspr(SPRN_DSCR);
@@ -1159,6 +1164,11 @@ static inline void restore_sprs(struct thread_struct *old_thread,
old_thread->vrsave != new_thread->vrsave)
mtspr(SPRN_VRSAVE, new_thread->vrsave);
#endif
+#ifdef CONFIG_SPE
+ if (cpu_has_feature(CPU_FTR_SPE) &&
+ old_thread->spefscr != new_thread->spefscr)
+ mtspr(SPRN_SPEFSCR, new_thread->spefscr);
+#endif
#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_DSCR)) {
u64 dscr = get_paca()->dscr_default;
@@ -1213,6 +1223,19 @@ struct task_struct *__switch_to(struct task_struct *prev,
__flush_tlb_pending(batch);
batch->active = 0;
}
+
+ /*
+ * On POWER9 the copy-paste buffer can only paste into
+ * foreign real addresses, so unprivileged processes can not
+ * see the data or use it in any way unless they have
+ * foreign real mappings. If the new process has the foreign
+ * real address mappings, we must issue a cp_abort to clear
+ * any state and prevent snooping, corruption or a covert
+ * channel. ISA v3.1 supports paste into local memory.
+ */
+ if (new->mm && (cpu_has_feature(CPU_FTR_ARCH_31) ||
+ atomic_read(&new->mm->context.vas_windows)))
+ asm volatile(PPC_CP_ABORT);
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -1248,43 +1271,48 @@ struct task_struct *__switch_to(struct task_struct *prev,
}
/*
- * Call restore_sprs() before calling _switch(). If we move it after
- * _switch() then we miss out on calling it for new tasks. The reason
- * for this is we manually create a stack frame for new tasks that
- * directly returns through ret_from_fork() or
+ * Call restore_sprs() and set_return_regs_changed() before calling
+ * _switch(). If we move it after _switch() then we miss out on calling
+ * it for new tasks. The reason for this is we manually create a stack
+ * frame for new tasks that directly returns through ret_from_fork() or
* ret_from_kernel_thread(). See copy_thread() for details.
*/
restore_sprs(old_thread, new_thread);
+ set_return_regs_changed(); /* _switch changes stack (and regs) */
+
#ifdef CONFIG_PPC32
kuap_assert_locked();
#endif
last = _switch(old_thread, new_thread);
+ /*
+ * Nothing after _switch will be run for newly created tasks,
+ * because they switch directly to ret_from_fork/ret_from_kernel_thread
+ * etc. Code added here should have a comment explaining why that is
+ * okay.
+ */
+
#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * This applies to a process that was context switched while inside
+ * arch_enter_lazy_mmu_mode(), to re-activate the batch that was
+ * deactivated above, before _switch(). This will never be the case
+ * for new tasks.
+ */
if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
- if (current->thread.regs) {
+ /*
+ * Math facilities are masked out of the child MSR in copy_thread.
+ * A new task does not need to restore_math because it will
+ * demand fault them.
+ */
+ if (current->thread.regs)
restore_math(current->thread.regs);
-
- /*
- * On POWER9 the copy-paste buffer can only paste into
- * foreign real addresses, so unprivileged processes can not
- * see the data or use it in any way unless they have
- * foreign real mappings. If the new process has the foreign
- * real address mappings, we must issue a cp_abort to clear
- * any state and prevent snooping, corruption or a covert
- * channel. ISA v3.1 supports paste into local memory.
- */
- if (current->mm &&
- (cpu_has_feature(CPU_FTR_ARCH_31) ||
- atomic_read(&current->mm->context.vas_windows)))
- asm volatile(PPC_CP_ABORT);
- }
#endif /* CONFIG_PPC_BOOK3S_64 */
return last;
@@ -1736,6 +1764,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
#ifdef CONFIG_ALTIVEC
p->thread.vr_save_area = NULL;
#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ p->thread.kuap = KUAP_NONE;
+#endif
setup_ksp_vsid(p, sp);
@@ -1838,13 +1869,14 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
}
regs->gpr[2] = toc;
}
- regs->nip = entry;
- regs->msr = MSR_USER64;
+ regs_set_return_ip(regs, entry);
+ regs_set_return_msr(regs, MSR_USER64);
} else {
- regs->nip = start;
regs->gpr[2] = 0;
- regs->msr = MSR_USER32;
+ regs_set_return_ip(regs, start);
+ regs_set_return_msr(regs, MSR_USER32);
}
+
#endif
#ifdef CONFIG_VSX
current->thread.used_vsr = 0;
@@ -1875,7 +1907,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.tm_tfiar = 0;
current->thread.load_tm = 0;
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
}
EXPORT_SYMBOL(start_thread);
@@ -1923,9 +1954,10 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
if (val > PR_FP_EXC_PRECISE)
return -EINVAL;
tsk->thread.fpexc_mode = __pack_fe01(val);
- if (regs != NULL && (regs->msr & MSR_FP) != 0)
- regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
- | tsk->thread.fpexc_mode;
+ if (regs != NULL && (regs->msr & MSR_FP) != 0) {
+ regs_set_return_msr(regs, (regs->msr & ~(MSR_FE0|MSR_FE1))
+ | tsk->thread.fpexc_mode);
+ }
return 0;
}
@@ -1971,9 +2003,9 @@ int set_endian(struct task_struct *tsk, unsigned int val)
return -EINVAL;
if (val == PR_ENDIAN_BIG)
- regs->msr &= ~MSR_LE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_LE);
else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE)
- regs->msr |= MSR_LE;
+ regs_set_return_msr(regs, regs->msr | MSR_LE);
else
return -EINVAL;
@@ -2084,7 +2116,7 @@ static unsigned long __get_wchan(struct task_struct *p)
unsigned long ip, sp;
int count = 0;
- if (!p || p == current || p->state == TASK_RUNNING)
+ if (!p || p == current || task_is_running(p))
return 0;
sp = p->thread.ksp;
@@ -2094,7 +2126,7 @@ static unsigned long __get_wchan(struct task_struct *p)
do {
sp = *(unsigned long *)sp;
if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
- p->state == TASK_RUNNING)
+ task_is_running(p))
return 0;
if (count > 0) {
ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
@@ -2121,8 +2153,9 @@ unsigned long get_wchan(struct task_struct *p)
static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
-void show_stack(struct task_struct *tsk, unsigned long *stack,
- const char *loglvl)
+void __no_sanitize_address show_stack(struct task_struct *tsk,
+ unsigned long *stack,
+ const char *loglvl)
{
unsigned long sp, ip, lr, newsp;
int count = 0;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fbe9deebc8e1..f620e04dc9bf 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -758,7 +758,7 @@ void __init early_init_devtree(void *params)
first_memblock_size = min_t(u64, first_memblock_size, memory_limit);
setup_initial_memory_limit(memstart_addr, first_memblock_size);
/* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */
- memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
+ memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START);
/* If relocatable, reserve first 32k for interrupt vectors etc. */
if (PHYSICAL_START > MEMORY_START)
memblock_reserve(MEMORY_START, 0x8000);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 41ed7e33d897..a5bf355ce1d6 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -27,10 +27,12 @@
#include <linux/initrd.h>
#include <linux/bitops.h>
#include <linux/pgtable.h>
+#include <linux/printk.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/page.h>
#include <asm/processor.h>
+#include <asm/interrupt.h>
#include <asm/irq.h>
#include <asm/io.h>
#include <asm/smp.h>
@@ -242,13 +244,31 @@ static int __init prom_strcmp(const char *cs, const char *ct)
return 0;
}
-static char __init *prom_strcpy(char *dest, const char *src)
+static ssize_t __init prom_strscpy_pad(char *dest, const char *src, size_t n)
{
- char *tmp = dest;
+ ssize_t rc;
+ size_t i;
- while ((*dest++ = *src++) != '\0')
- /* nothing */;
- return tmp;
+ if (n == 0 || n > INT_MAX)
+ return -E2BIG;
+
+ // Copy up to n bytes
+ for (i = 0; i < n && src[i] != '\0'; i++)
+ dest[i] = src[i];
+
+ rc = i;
+
+ // If we copied all n then we have run out of space for the nul
+ if (rc == n) {
+ // Rewind by one character to ensure nul termination
+ i--;
+ rc = -E2BIG;
+ }
+
+ for (; i < n; i++)
+ dest[i] = '\0';
+
+ return rc;
}
static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
@@ -701,13 +721,12 @@ static int __init prom_setprop(phandle node, const char *nodename,
}
/* We can't use the standard versions because of relocation headaches. */
-#define isxdigit(c) (('0' <= (c) && (c) <= '9') \
- || ('a' <= (c) && (c) <= 'f') \
- || ('A' <= (c) && (c) <= 'F'))
+#define prom_isxdigit(c) \
+ (('0' <= (c) && (c) <= '9') || ('a' <= (c) && (c) <= 'f') || ('A' <= (c) && (c) <= 'F'))
-#define isdigit(c) ('0' <= (c) && (c) <= '9')
-#define islower(c) ('a' <= (c) && (c) <= 'z')
-#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c))
+#define prom_isdigit(c) ('0' <= (c) && (c) <= '9')
+#define prom_islower(c) ('a' <= (c) && (c) <= 'z')
+#define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c))
static unsigned long prom_strtoul(const char *cp, const char **endp)
{
@@ -716,14 +735,14 @@ static unsigned long prom_strtoul(const char *cp, const char **endp)
if (*cp == '0') {
base = 8;
cp++;
- if (toupper(*cp) == 'X') {
+ if (prom_toupper(*cp) == 'X') {
cp++;
base = 16;
}
}
- while (isxdigit(*cp) &&
- (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) {
+ while (prom_isxdigit(*cp) &&
+ (value = prom_isdigit(*cp) ? *cp - '0' : prom_toupper(*cp) - 'A' + 10) < base) {
result = result * base + value;
cp++;
}
@@ -927,6 +946,10 @@ struct option_vector6 {
u8 os_name;
} __packed;
+struct option_vector7 {
+ u8 os_id[256];
+} __packed;
+
struct ibm_arch_vec {
struct { u32 mask, val; } pvrs[14];
@@ -949,6 +972,9 @@ struct ibm_arch_vec {
u8 vec6_len;
struct option_vector6 vec6;
+
+ u8 vec7_len;
+ struct option_vector7 vec7;
} __packed;
static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
@@ -1095,6 +1121,9 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
.secondary_pteg = 0,
.os_name = OV6_LINUX,
},
+
+ /* option vector 7: OS Identification */
+ .vec7_len = VECTOR_LENGTH(sizeof(struct option_vector7)),
};
static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned;
@@ -1323,6 +1352,8 @@ static void __init prom_check_platform_support(void)
memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
sizeof(ibm_architecture_vec));
+ prom_strscpy_pad(ibm_architecture_vec.vec7.os_id, linux_banner, 256);
+
if (prop_len > 1) {
int i;
u8 vec[8];
@@ -1762,6 +1793,8 @@ static int prom_rtas_hcall(uint64_t args)
asm volatile("sc 1\n" : "=r" (arg1) :
"r" (arg1),
"r" (arg2) :);
+ srr_regs_clobbered();
+
return arg1;
}
@@ -2702,7 +2735,7 @@ static void __init flatten_device_tree(void)
/* Add "phandle" in there, we'll need it */
namep = make_room(&mem_start, &mem_end, 16, 1);
- prom_strcpy(namep, "phandle");
+ prom_strscpy_pad(namep, "phandle", sizeof("phandle"));
mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
/* Build string array */
@@ -3210,54 +3243,6 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
#endif /* CONFIG_BLK_DEV_INITRD */
}
-#ifdef CONFIG_PPC64
-#ifdef CONFIG_RELOCATABLE
-static void reloc_toc(void)
-{
-}
-
-static void unreloc_toc(void)
-{
-}
-#else
-static void __reloc_toc(unsigned long offset, unsigned long nr_entries)
-{
- unsigned long i;
- unsigned long *toc_entry;
-
- /* Get the start of the TOC by using r2 directly. */
- asm volatile("addi %0,2,-0x8000" : "=b" (toc_entry));
-
- for (i = 0; i < nr_entries; i++) {
- *toc_entry = *toc_entry + offset;
- toc_entry++;
- }
-}
-
-static void reloc_toc(void)
-{
- unsigned long offset = reloc_offset();
- unsigned long nr_entries =
- (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long);
-
- __reloc_toc(offset, nr_entries);
-
- mb();
-}
-
-static void unreloc_toc(void)
-{
- unsigned long offset = reloc_offset();
- unsigned long nr_entries =
- (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long);
-
- mb();
-
- __reloc_toc(-offset, nr_entries);
-}
-#endif
-#endif
-
#ifdef CONFIG_PPC_SVM
/*
* Perform the Enter Secure Mode ultracall.
@@ -3291,14 +3276,12 @@ static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt)
* relocated it so the check will fail. Restore the original image by
* relocating it back to the kernel virtual base address.
*/
- if (IS_ENABLED(CONFIG_RELOCATABLE))
- relocate(KERNELBASE);
+ relocate(KERNELBASE);
ret = enter_secure_mode(kbase, fdt);
/* Relocate the kernel again. */
- if (IS_ENABLED(CONFIG_RELOCATABLE))
- relocate(kbase);
+ relocate(kbase);
if (ret != U_SUCCESS) {
prom_printf("Returned %d from switching to secure mode.\n", ret);
@@ -3326,8 +3309,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
#ifdef CONFIG_PPC32
unsigned long offset = reloc_offset();
reloc_got2(offset);
-#else
- reloc_toc();
#endif
/*
@@ -3504,8 +3485,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
#ifdef CONFIG_PPC32
reloc_got2(-offset);
-#else
- unreloc_toc();
#endif
/* Move to secure memory if we're supposed to be secure guests. */
diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c
index 3990c01ef8cf..399f5d94a3df 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-adv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c
@@ -12,7 +12,7 @@ void user_enable_single_step(struct task_struct *task)
if (regs != NULL) {
task->thread.debug.dbcr0 &= ~DBCR0_BT;
task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
}
set_tsk_thread_flag(task, TIF_SINGLESTEP);
}
@@ -24,7 +24,7 @@ void user_enable_block_step(struct task_struct *task)
if (regs != NULL) {
task->thread.debug.dbcr0 &= ~DBCR0_IC;
task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
}
set_tsk_thread_flag(task, TIF_SINGLESTEP);
}
@@ -50,7 +50,7 @@ void user_disable_single_step(struct task_struct *task)
* All debug events were off.....
*/
task->thread.debug.dbcr0 &= ~DBCR0_IDM;
- regs->msr &= ~MSR_DE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
}
}
clear_tsk_thread_flag(task, TIF_SINGLESTEP);
@@ -82,6 +82,7 @@ int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
{
+ struct pt_regs *regs = task->thread.regs;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
int ret;
struct thread_struct *thread = &task->thread;
@@ -112,7 +113,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l
dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
task->thread.debug.dbcr1)) {
- task->thread.regs->msr &= ~MSR_DE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
task->thread.debug.dbcr0 &= ~DBCR0_IDM;
}
return 0;
@@ -132,7 +133,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l
dbcr_dac(task) |= DBCR_DAC1R;
if (data & 0x2UL)
dbcr_dac(task) |= DBCR_DAC1W;
- task->thread.regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
return 0;
}
@@ -220,7 +221,7 @@ static long set_instruction_bp(struct task_struct *child,
}
out:
child->thread.debug.dbcr0 |= DBCR0_IDM;
- child->thread.regs->msr |= MSR_DE;
+ regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
return slot;
}
@@ -336,7 +337,7 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
return -ENOSPC;
}
child->thread.debug.dbcr0 |= DBCR0_IDM;
- child->thread.regs->msr |= MSR_DE;
+ regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
return slot + 4;
}
@@ -430,7 +431,7 @@ static int set_dac_range(struct task_struct *child,
child->thread.debug.dbcr2 |= DBCR2_DAC12MX;
else /* PPC_BREAKPOINT_MODE_MASK */
child->thread.debug.dbcr2 |= DBCR2_DAC12MM;
- child->thread.regs->msr |= MSR_DE;
+ regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
return 5;
}
@@ -485,7 +486,8 @@ long ppc_del_hwdebug(struct task_struct *child, long data)
if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
child->thread.debug.dbcr1)) {
child->thread.debug.dbcr0 &= ~DBCR0_IDM;
- child->thread.regs->msr &= ~MSR_DE;
+ regs_set_return_msr(child->thread.regs,
+ child->thread.regs->msr & ~MSR_DE);
}
}
return rc;
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index aa36fcad36cd..a5dd7d2e2c9e 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -11,10 +11,8 @@ void user_enable_single_step(struct task_struct *task)
{
struct pt_regs *regs = task->thread.regs;
- if (regs != NULL) {
- regs->msr &= ~MSR_BE;
- regs->msr |= MSR_SE;
- }
+ if (regs != NULL)
+ regs_set_return_msr(regs, (regs->msr & ~MSR_BE) | MSR_SE);
set_tsk_thread_flag(task, TIF_SINGLESTEP);
}
@@ -22,10 +20,8 @@ void user_enable_block_step(struct task_struct *task)
{
struct pt_regs *regs = task->thread.regs;
- if (regs != NULL) {
- regs->msr &= ~MSR_SE;
- regs->msr |= MSR_BE;
- }
+ if (regs != NULL)
+ regs_set_return_msr(regs, (regs->msr & ~MSR_SE) | MSR_BE);
set_tsk_thread_flag(task, TIF_SINGLESTEP);
}
@@ -34,7 +30,7 @@ void user_disable_single_step(struct task_struct *task)
struct pt_regs *regs = task->thread.regs;
if (regs != NULL)
- regs->msr &= ~(MSR_SE | MSR_BE);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_SE | MSR_BE));
clear_tsk_thread_flag(task, TIF_SINGLESTEP);
}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 773bcc4ca843..b8be1d6668b5 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -113,8 +113,9 @@ static unsigned long get_user_msr(struct task_struct *task)
static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr)
{
- task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
- task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
+ unsigned long newmsr = (task->thread.regs->msr & ~MSR_DEBUGCHANGE) |
+ (msr & MSR_DEBUGCHANGE);
+ regs_set_return_msr(task->thread.regs, newmsr);
return 0;
}
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
index a28239b8b0c0..33c07c8af6c8 100644
--- a/arch/powerpc/kernel/rtas-rtc.c
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -12,7 +12,7 @@
#define MAX_RTC_WAIT 5000 /* 5 sec */
-#define RTAS_CLOCK_BUSY (-2)
+
time64_t __init rtas_get_boot_time(void)
{
int ret[8];
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 6bada744402b..99f2cce635fb 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -25,6 +25,7 @@
#include <linux/reboot.h>
#include <linux/syscalls.h>
+#include <asm/interrupt.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/hvcall.h>
@@ -46,6 +47,13 @@
/* This is here deliberately so it's only used in this file */
void enter_rtas(unsigned long);
+static inline void do_enter_rtas(unsigned long args)
+{
+ enter_rtas(args);
+
+ srr_regs_clobbered(); /* rtas uses SRRs, invalidate */
+}
+
struct rtas_t rtas = {
.lock = __ARCH_SPIN_LOCK_UNLOCKED
};
@@ -384,7 +392,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
save_args = rtas.args;
rtas.args = err_args;
- enter_rtas(__pa(&rtas.args));
+ do_enter_rtas(__pa(&rtas.args));
err_args = rtas.args;
rtas.args = save_args;
@@ -430,7 +438,7 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
for (i = 0; i < nret; ++i)
args->rets[i] = 0;
- enter_rtas(__pa(args));
+ do_enter_rtas(__pa(args));
}
void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...)
@@ -1138,7 +1146,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
flags = lock_rtas();
rtas.args = args;
- enter_rtas(__pa(&rtas.args));
+ do_enter_rtas(__pa(&rtas.args));
args = rtas.args;
/* A -1 return code indicates that the last command couldn't
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 0fdfcdd9d880..cc51fa52e783 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -300,9 +300,7 @@ static void stf_barrier_enable(bool enable)
void setup_stf_barrier(void)
{
enum stf_barrier_type type;
- bool enable, hv;
-
- hv = cpu_has_feature(CPU_FTR_HVMODE);
+ bool enable;
/* Default to fallback in case fw-features are not available */
if (cpu_has_feature(CPU_FTR_ARCH_300))
@@ -315,8 +313,7 @@ void setup_stf_barrier(void)
type = STF_BARRIER_NONE;
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
- (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) ||
- (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv));
+ security_ftr_enabled(SEC_FTR_STF_BARRIER);
if (type == STF_BARRIER_FALLBACK) {
pr_info("stf-barrier: fallback barrier available\n");
@@ -432,26 +429,29 @@ device_initcall(stf_barrier_debugfs_init);
static void update_branch_cache_flush(void)
{
- u32 *site;
+ u32 *site, __maybe_unused *site2;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
site = &patch__call_kvm_flush_link_stack;
+ site2 = &patch__call_kvm_flush_link_stack_p9;
// This controls the branch from guest_exit_cont to kvm_flush_link_stack
if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
- patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+ patch_instruction_site(site2, ppc_inst(PPC_RAW_NOP()));
} else {
// Could use HW flush, but that could also flush count cache
patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+ patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
}
#endif
// Patch out the bcctr first, then nop the rest
site = &patch__call_flush_branch_caches3;
- patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
site = &patch__call_flush_branch_caches2;
- patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
site = &patch__call_flush_branch_caches1;
- patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
// This controls the branch from _switch to flush_branch_caches
if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE &&
@@ -474,12 +474,12 @@ static void update_branch_cache_flush(void)
// If we just need to flush the link stack, early return
if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) {
patch_instruction_site(&patch__flush_link_stack_return,
- ppc_inst(PPC_INST_BLR));
+ ppc_inst(PPC_RAW_BLR()));
// If we have flush instruction, early return
} else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) {
patch_instruction_site(&patch__flush_count_cache_return,
- ppc_inst(PPC_INST_BLR));
+ ppc_inst(PPC_RAW_BLR()));
}
}
}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 74a98fff2c2f..26328fd2990c 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -9,6 +9,7 @@
#undef DEBUG
#include <linux/export.h>
+#include <linux/panic_notifier.h>
#include <linux/string.h>
#include <linux/sched.h>
#include <linux/init.h>
@@ -91,8 +92,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid);
int dcache_bsize;
int icache_bsize;
-unsigned long klimit = (unsigned long) _end;
-
/*
* This still seems to be needed... -- paulus
*/
@@ -930,7 +929,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.start_code = (unsigned long)_stext;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = klimit;
+ init_mm.brk = (unsigned long)_end;
mm_iommu_init(&init_mm);
irqstack_early_init();
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index d7c1f92152af..7ec5c47fce0e 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -74,7 +74,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE);
*/
notrace void __init machine_init(u64 dt_ptr)
{
- struct ppc_inst *addr = (struct ppc_inst *)patch_site_addr(&patch__memset_nocache);
+ u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache);
struct ppc_inst insn;
/* Configure static keys first, now that we're relocated. */
@@ -85,7 +85,7 @@ notrace void __init machine_init(u64 dt_ptr)
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
- patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_INST_NOP));
+ patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_RAW_NOP()));
create_cond_branch(&insn, addr, branch_target(addr), 0x820000);
patch_instruction(addr, insn); /* replace b by bne cr0 */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e42b85e4f1aa..1ff258f6c76c 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -33,6 +33,7 @@
#include <linux/pgtable.h>
#include <asm/debugfs.h>
+#include <asm/kvm_guest.h>
#include <asm/io.h>
#include <asm/kdump.h>
#include <asm/prom.h>
@@ -788,7 +789,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
size_t align)
{
const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
int node = early_cpu_to_node(cpu);
void *ptr;
@@ -939,16 +940,20 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)
* disable it by default. Book3S has a soft-nmi hardlockup detector based
* on the decrementer interrupt, so it does not suffer from this problem.
*
- * It is likely to get false positives in VM guests, so disable it there
- * by default too.
+ * It is likely to get false positives in KVM guests, so disable it there
+ * by default too. PowerVM will not stop or arbitrarily oversubscribe
+ * CPUs, but give a minimum regular allotment even with SPLPAR, so enable
+ * the detector for non-KVM guests, assume PowerVM.
*/
static int __init disable_hardlockup_detector(void)
{
#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
hardlockup_detector_disable();
#else
- if (firmware_has_feature(FW_FEATURE_LPAR))
- hardlockup_detector_disable();
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (is_kvm_guest())
+ hardlockup_detector_disable();
+ }
#endif
return 0;
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 9ded046edb0e..e600764a926c 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -214,7 +214,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
regs->gpr[0] = __NR_restart_syscall;
else
regs->gpr[3] = regs->orig_gpr3;
- regs->nip -= 4;
+ regs_add_return_ip(regs, -4);
regs->result = 0;
} else {
if (trap_is_scv(regs)) {
@@ -322,16 +322,16 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk)
* For signals taken in non-TM or suspended mode, we use the
* normal/non-checkpointed stack pointer.
*/
-
- unsigned long ret = tsk->thread.regs->gpr[1];
+ struct pt_regs *regs = tsk->thread.regs;
+ unsigned long ret = regs->gpr[1];
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BUG_ON(tsk != current);
- if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) {
+ if (MSR_TM_ACTIVE(regs->msr)) {
preempt_disable();
tm_reclaim_current(TM_CAUSE_SIGNAL);
- if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr))
+ if (MSR_TM_TRANSACTIONAL(regs->msr))
ret = tsk->thread.ckpt_regs.gpr[1];
/*
@@ -341,7 +341,7 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk)
* (tm_recheckpoint_new_task() would recheckpoint). Besides, we
* enter the signal handler in non-transactional state.
*/
- tsk->thread.regs->msr &= ~MSR_TS_MASK;
+ regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
preempt_enable();
}
#endif
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 8f05ed0da292..0608581967f0 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -354,14 +354,8 @@ static void prepare_save_tm_user_regs(void)
{
WARN_ON(tm_suspend_disabled);
-#ifdef CONFIG_ALTIVEC
if (cpu_has_feature(CPU_FTR_ALTIVEC))
current->thread.ckvrsave = mfspr(SPRN_VRSAVE);
-#endif
-#ifdef CONFIG_SPE
- if (current->thread.used_spe)
- flush_spe_to_thread(current);
-#endif
}
static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
@@ -379,7 +373,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
*/
unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed);
-#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
unsafe_copy_to_user(&frame->mc_vregs, &current->thread.ckvr_state,
@@ -412,7 +405,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
else
unsafe_put_user(current->thread.ckvrsave,
(u32 __user *)&tm_frame->mc_vregs[32], failed);
-#endif /* CONFIG_ALTIVEC */
unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed);
if (msr & MSR_FP)
@@ -420,7 +412,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
else
unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed);
-#ifdef CONFIG_VSX
/*
* Copy VSR 0-31 upper half from thread_struct to local
* buffer, then write that to userspace. Also set MSR_VSX in
@@ -436,23 +427,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
msr |= MSR_VSX;
}
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
- /* SPE regs are not checkpointed with TM, so this section is
- * simply the same as in __unsafe_save_user_regs().
- */
- if (current->thread.used_spe) {
- unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr,
- ELF_NEVRREG * sizeof(u32), failed);
- /* set MSR_SPE in the saved MSR value to indicate that
- * frame->mc_vregs contains valid data */
- msr |= MSR_SPE;
- }
-
- /* We always copy to/from spefscr */
- unsafe_put_user(current->thread.spefscr,
- (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed);
-#endif /* CONFIG_SPE */
unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed);
@@ -505,14 +479,14 @@ static long restore_user_regs(struct pt_regs *regs,
/* if doing signal return, restore the previous little-endian mode */
if (sig)
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
#ifdef CONFIG_ALTIVEC
/*
* Force the process to reload the altivec registers from
* current->thread when it next does altivec instructions
*/
- regs->msr &= ~MSR_VEC;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
if (msr & MSR_VEC) {
/* restore altivec registers from the stack */
unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
@@ -534,7 +508,7 @@ static long restore_user_regs(struct pt_regs *regs,
* Force the process to reload the VSX registers from
* current->thread when it next does VSX instruction.
*/
- regs->msr &= ~MSR_VSX;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
if (msr & MSR_VSX) {
/*
* Restore altivec registers from the stack to a local
@@ -550,12 +524,12 @@ static long restore_user_regs(struct pt_regs *regs,
* force the process to reload the FP registers from
* current->thread when it next does FP instructions
*/
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
#ifdef CONFIG_SPE
/* force the process to reload the spe registers from
current->thread when it next does spe instructions */
- regs->msr &= ~MSR_SPE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
if (msr & MSR_SPE) {
/* restore spe registers from the stack */
unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
@@ -587,9 +561,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
struct mcontext __user *tm_sr)
{
unsigned long msr, msr_hi;
-#ifdef CONFIG_VSX
int i;
-#endif
if (tm_suspend_disabled)
return 1;
@@ -608,10 +580,9 @@ static long restore_tm_user_regs(struct pt_regs *regs,
unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
/* Restore the previous little-endian mode */
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
-#ifdef CONFIG_ALTIVEC
- regs->msr &= ~MSR_VEC;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
if (msr & MSR_VEC) {
/* restore altivec registers from the stack */
unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
@@ -629,14 +600,12 @@ static long restore_tm_user_regs(struct pt_regs *regs,
(u32 __user *)&sr->mc_vregs[32], failed);
if (cpu_has_feature(CPU_FTR_ALTIVEC))
mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
-#endif /* CONFIG_ALTIVEC */
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
-#ifdef CONFIG_VSX
- regs->msr &= ~MSR_VSX;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
if (msr & MSR_VSX) {
/*
* Restore altivec registers from the stack to a local
@@ -649,24 +618,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
}
-#endif /* CONFIG_VSX */
-
-#ifdef CONFIG_SPE
- /* SPE regs are not checkpointed with TM, so this section is
- * simply the same as in restore_user_regs().
- */
- regs->msr &= ~MSR_SPE;
- if (msr & MSR_SPE) {
- unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
- ELF_NEVRREG * sizeof(u32), failed);
- current->thread.used_spe = true;
- } else if (current->thread.used_spe)
- memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
-
- /* Always get SPEFSCR back */
- unsafe_get_user(current->thread.spefscr,
- (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
-#endif /* CONFIG_SPE */
user_read_access_end();
@@ -675,7 +626,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
unsafe_restore_general_regs(regs, tm_sr, failed);
-#ifdef CONFIG_ALTIVEC
/* restore altivec registers from the stack */
if (msr & MSR_VEC)
unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
@@ -684,11 +634,9 @@ static long restore_tm_user_regs(struct pt_regs *regs,
/* Always get VRSAVE back */
unsafe_get_user(current->thread.vrsave,
(u32 __user *)&tm_sr->mc_vregs[32], failed);
-#endif /* CONFIG_ALTIVEC */
unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
-#ifdef CONFIG_VSX
if (msr & MSR_VSX) {
/*
* Restore altivec registers from the stack to a local
@@ -697,7 +645,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
current->thread.used_vsr = true;
}
-#endif /* CONFIG_VSX */
/* Get the top half of the MSR from the user context */
unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
@@ -725,7 +672,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
*
* Pull in the MSR TM bits from the user context
*/
- regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK));
/* Now, recheckpoint. This loads up all of the checkpointed (older)
* registers, including FP and V[S]Rs. After recheckpointing, the
* transactional versions should be loaded.
@@ -740,14 +687,12 @@ static long restore_tm_user_regs(struct pt_regs *regs,
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
load_fp_state(&current->thread.fp_state);
- regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+ regs_set_return_msr(regs, regs->msr | (MSR_FP | current->thread.fpexc_mode));
}
-#ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) {
load_vr_state(&current->thread.vr_state);
- regs->msr |= MSR_VEC;
+ regs_set_return_msr(regs, regs->msr | MSR_VEC);
}
-#endif
preempt_enable();
@@ -828,10 +773,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32);
} else {
tramp = (unsigned long)mctx->mc_pad;
- /* Set up the sigreturn trampoline: li r0,sigret; sc */
- unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0],
- failed);
- unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed);
+ unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed);
+ unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
}
unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed);
@@ -858,10 +801,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
regs->gpr[4] = (unsigned long)&frame->info;
regs->gpr[5] = (unsigned long)&frame->uc;
regs->gpr[6] = (unsigned long)frame;
- regs->nip = (unsigned long) ksig->ka.sa.sa_handler;
+ regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
/* enter the signal handler in native-endian mode */
- regs->msr &= ~MSR_LE;
- regs->msr |= (MSR_KERNEL & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+
return 0;
failed:
@@ -926,9 +869,8 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32);
} else {
tramp = (unsigned long)mctx->mc_pad;
- /* Set up the sigreturn trampoline: li r0,sigret; sc */
- unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed);
- unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed);
+ unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed);
+ unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
}
user_access_end();
@@ -947,10 +889,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
regs->gpr[1] = newsp;
regs->gpr[3] = ksig->sig;
regs->gpr[4] = (unsigned long) sc;
- regs->nip = (unsigned long)ksig->ka.sa.sa_handler;
+ regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
/* enter the signal handler in native-endian mode */
- regs->msr &= ~MSR_LE;
- regs->msr |= (MSR_KERNEL & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+
return 0;
failed:
@@ -1200,7 +1142,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
* set, and recheckpoint was not called. This avoid
* hitting a TM Bad thing at RFID
*/
- regs->msr &= ~MSR_TS_MASK;
+ regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
}
/* Fall through, for non-TM restore */
#endif
@@ -1289,7 +1231,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
affect the contents of these registers. After this point,
failure is a problem, anyway, and it's very unlikely unless
the user is really doing something wrong. */
- regs->msr = new_msr;
+ regs_set_return_msr(regs, new_msr);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
current->thread.debug.dbcr0 = new_dbcr0;
#endif
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index dca66481d0c2..1831bba0582e 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -354,7 +354,7 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_
/* get MSR separately, transfer the LE bit if doing signal return */
unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
if (sig)
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
@@ -376,7 +376,7 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_
* This has to be done before copying stuff into tsk->thread.fpr/vr
* for the reasons explained in the previous comment.
*/
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX));
#ifdef CONFIG_ALTIVEC
unsafe_get_user(v_regs, &sc->v_regs, efault_out);
@@ -468,7 +468,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
return -EINVAL;
/* pull in MSR LE from user context */
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
/* The following non-GPR non-FPR non-VR state is also checkpointed: */
err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]);
@@ -495,7 +495,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
* This has to be done before copying stuff into tsk->thread.fpr/vr
* for the reasons explained in the previous comment.
*/
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX));
#ifdef CONFIG_ALTIVEC
err |= __get_user(v_regs, &sc->v_regs);
@@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
preempt_disable();
/* pull in MSR TS bits from user context */
- regs->msr |= msr & MSR_TS_MASK;
+ regs_set_return_msr(regs, regs->msr | (msr & MSR_TS_MASK));
/*
* Ensure that TM is enabled in regs->msr before we leave the signal
@@ -583,7 +583,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
* to be de-scheduled with MSR[TS] set but without calling
* tm_recheckpoint(). This can cause a bug.
*/
- regs->msr |= MSR_TM;
+ regs_set_return_msr(regs, regs->msr | MSR_TM);
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&tsk->thread);
@@ -591,11 +591,11 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
load_fp_state(&tsk->thread.fp_state);
- regs->msr |= (MSR_FP | tsk->thread.fpexc_mode);
+ regs_set_return_msr(regs, regs->msr | (MSR_FP | tsk->thread.fpexc_mode));
}
if (msr & MSR_VEC) {
load_vr_state(&tsk->thread.vr_state);
- regs->msr |= MSR_VEC;
+ regs_set_return_msr(regs, regs->msr | MSR_VEC);
}
preempt_enable();
@@ -618,15 +618,12 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
int i;
long err = 0;
- /* bctrl # call the handler */
- err |= __put_user(PPC_INST_BCTRL, &tramp[0]);
- /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */
- err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) |
- (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]);
- /* li r0, __NR_[rt_]sigreturn| */
- err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]);
- /* sc */
- err |= __put_user(PPC_INST_SC, &tramp[3]);
+ /* Call the handler and pop the dummy stackframe*/
+ err |= __put_user(PPC_RAW_BCTRL(), &tramp[0]);
+ err |= __put_user(PPC_RAW_ADDI(_R1, _R1, __SIGNAL_FRAMESIZE), &tramp[1]);
+
+ err |= __put_user(PPC_RAW_LI(_R0, syscall), &tramp[2]);
+ err |= __put_user(PPC_RAW_SC(), &tramp[3]);
/* Minimal traceback info */
for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
@@ -720,6 +717,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
/* This returns like rt_sigreturn */
set_thread_flag(TIF_RESTOREALL);
+
return 0;
efault_out:
@@ -786,7 +784,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
* the MSR[TS] that came from user context later, at
* restore_tm_sigcontexts.
*/
- regs->msr &= ~MSR_TS_MASK;
+ regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
goto badframe;
@@ -818,7 +816,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
* MSR[TS] set, but without CPU in the proper state,
* causing a TM bad thing.
*/
- current->thread.regs->msr &= ~MSR_TS_MASK;
+ regs_set_return_msr(current->thread.regs,
+ current->thread.regs->msr & ~MSR_TS_MASK);
if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
goto badframe;
@@ -832,6 +831,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
goto badframe;
set_thread_flag(TIF_RESTOREALL);
+
return 0;
badframe_block:
@@ -902,24 +902,23 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
user_write_access_end();
+ /* Save the siginfo outside of the unsafe block. */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ goto badframe;
+
/* Make sure signal handler doesn't get spurious FP exceptions */
tsk->thread.fp_state.fpscr = 0;
/* Set up to return from userspace. */
if (tsk->mm->context.vdso) {
- regs->nip = VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64);
+ regs_set_return_ip(regs, VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64));
} else {
err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
if (err)
goto badframe;
- regs->nip = (unsigned long) &frame->tramp[0];
+ regs_set_return_ip(regs, (unsigned long) &frame->tramp[0]);
}
-
- /* Save the siginfo outside of the unsafe block. */
- if (copy_siginfo_to_user(&frame->info, &ksig->info))
- goto badframe;
-
/* Allocate a dummy caller frame for the signal handler. */
newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
@@ -942,14 +941,13 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
}
/* enter the signal handler in native-endian mode */
- regs->msr &= ~MSR_LE;
- regs->msr |= (MSR_KERNEL & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
regs->gpr[1] = newsp;
regs->gpr[3] = ksig->sig;
regs->result = 0;
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo);
- err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc);
+ regs->gpr[4] = (unsigned long)&frame->info;
+ regs->gpr[5] = (unsigned long)&frame->uc;
regs->gpr[6] = (unsigned long) frame;
} else {
regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 2e05c783440a..447b78a87c8f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -619,6 +619,8 @@ static void nmi_stop_this_cpu(struct pt_regs *regs)
/*
* IRQs are already hard disabled by the smp_handle_nmi_ipi.
*/
+ set_cpu_online(smp_processor_id(), false);
+
spin_begin();
while (1)
spin_cpu_relax();
@@ -634,6 +636,15 @@ void smp_send_stop(void)
static void stop_this_cpu(void *dummy)
{
hard_irq_disable();
+
+ /*
+ * Offlining CPUs in stop_this_cpu can result in scheduler warnings,
+ * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
+ * to know other CPUs are offline before it breaks locks to flush
+ * printk buffers, in case we panic()ed while holding the lock.
+ */
+ set_cpu_online(smp_processor_id(), false);
+
spin_begin();
while (1)
spin_cpu_relax();
@@ -1047,7 +1058,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
/*
* numa_node_id() works after this.
*/
@@ -1541,13 +1552,16 @@ void start_secondary(void *unused)
{
unsigned int cpu = raw_smp_processor_id();
+ /* PPC64 calls setup_kup() in early_setup_secondary() */
+ if (IS_ENABLED(CONFIG_PPC32))
+ setup_kup();
+
mmgrab(&init_mm);
current->active_mm = &init_mm;
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
rcu_cpu_starting(cpu);
- preempt_disable();
cpu_callin_map[cpu] = 1;
if (smp_ops->setup_cpu)
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 1deb1bf331dd..2b0d04a1b7d2 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -23,8 +23,8 @@
#include <asm/paca.h>
-void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
- struct task_struct *task, struct pt_regs *regs)
+void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+ struct task_struct *task, struct pt_regs *regs)
{
unsigned long sp;
@@ -61,8 +61,8 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
*
* If the task is not 'current', the caller *must* ensure the task is inactive.
*/
-int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
- void *cookie, struct task_struct *task)
+int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
{
unsigned long sp;
unsigned long newsp;
@@ -172,17 +172,31 @@ static void handle_backtrace_ipi(struct pt_regs *regs)
static void raise_backtrace_ipi(cpumask_t *mask)
{
+ struct paca_struct *p;
unsigned int cpu;
+ u64 delay_us;
for_each_cpu(cpu, mask) {
- if (cpu == smp_processor_id())
+ if (cpu == smp_processor_id()) {
handle_backtrace_ipi(NULL);
- else
- smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
- }
+ continue;
+ }
- for_each_cpu(cpu, mask) {
- struct paca_struct *p = paca_ptrs[cpu];
+ delay_us = 5 * USEC_PER_SEC;
+
+ if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) {
+ // Now wait up to 5s for the other CPU to do its backtrace
+ while (cpumask_test_cpu(cpu, mask) && delay_us) {
+ udelay(1);
+ delay_us--;
+ }
+
+ // Other CPU cleared itself from the mask
+ if (delay_us)
+ continue;
+ }
+
+ p = paca_ptrs[cpu];
cpumask_clear_cpu(cpu, mask);
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index a552c9e68d7e..bf4ae0f0e36c 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -114,7 +114,8 @@ SYSCALL_DEFINE0(switch_endian)
{
struct thread_info *ti;
- current->thread.regs->msr ^= MSR_LE;
+ regs_set_return_msr(current->thread.regs,
+ current->thread.regs->msr ^ MSR_LE);
/*
* Set TIF_RESTOREALL so that r3 isn't clobbered on return to
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 8f052ff4058c..aef2a290e71a 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -522,7 +522,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-# 443 reserved for quotactl_path
+443 common quotactl_fd sys_quotactl_fd
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 2e08640bb3b4..5ff0e55d0db1 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -843,14 +843,14 @@ static int register_cpu_online(unsigned int cpu)
#ifdef HAS_PPC_PMC_IBM
case PPC_PMC_IBM:
attrs = ibm_common_attrs;
- nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(ibm_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_IBM */
#ifdef HAS_PPC_PMC_G4
case PPC_PMC_G4:
attrs = g4_common_attrs;
- nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(g4_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_G4 */
@@ -858,7 +858,7 @@ static int register_cpu_online(unsigned int cpu)
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
attrs = pa6t_attrs;
- nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(pa6t_attrs);
pmc_attrs = NULL;
break;
#endif
@@ -940,14 +940,14 @@ static int unregister_cpu_online(unsigned int cpu)
#ifdef HAS_PPC_PMC_IBM
case PPC_PMC_IBM:
attrs = ibm_common_attrs;
- nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(ibm_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_IBM */
#ifdef HAS_PPC_PMC_G4
case PPC_PMC_G4:
attrs = g4_common_attrs;
- nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(g4_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_G4 */
@@ -955,7 +955,7 @@ static int unregister_cpu_online(unsigned int cpu)
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
attrs = pa6t_attrs;
- nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(pa6t_attrs);
pmc_attrs = NULL;
break;
#endif
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 6c31af7f4fa8..b9a047d92ec0 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -201,7 +201,7 @@ static int __init TAU_init(void)
tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
!strcmp(cur_cpu_spec->platform, "ppc750");
- tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0);
+ tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1);
if (!tau_workq)
return -ENOMEM;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b67d93a609a2..e45ce427bffb 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -231,24 +231,13 @@ static u64 scan_dispatch_log(u64 stop_tb)
void notrace accumulate_stolen_time(void)
{
u64 sst, ust;
- unsigned long save_irq_soft_mask = irq_soft_mask_return();
struct cpu_accounting_data *acct = &local_paca->accounting;
- /* We are called early in the exception entry, before
- * soft/hard_enabled are sync'ed to the expected state
- * for the exception. We are hard disabled but the PACA
- * needs to reflect that so various debug stuff doesn't
- * complain
- */
- irq_soft_mask_set(IRQS_DISABLED);
-
sst = scan_dispatch_log(acct->starttime_user);
ust = scan_dispatch_log(acct->starttime);
acct->stime -= sst;
acct->utime -= ust;
acct->steal_time += ust + sst;
-
- irq_soft_mask_set(save_irq_soft_mask);
}
static inline u64 calculate_stolen_time(u64 stop_tb)
@@ -508,16 +497,6 @@ EXPORT_SYMBOL(profile_pc);
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
-static inline unsigned long test_irq_work_pending(void)
-{
- unsigned long x;
-
- asm volatile("lbz %0,%1(13)"
- : "=r" (x)
- : "i" (offsetof(struct paca_struct, irq_work_pending)));
- return x;
-}
-
static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index ffe9537195aa..d89c5df4f206 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -49,7 +49,7 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
addr = ppc_function_entry((void *)addr);
/* if (link) set op to 'bl' else 'b' */
- create_branch(&op, (struct ppc_inst *)ip, addr, link ? 1 : 0);
+ create_branch(&op, (u32 *)ip, addr, link ? 1 : 0);
return op;
}
@@ -79,7 +79,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
}
/* replace the text with the new text */
- if (patch_instruction((struct ppc_inst *)ip, new))
+ if (patch_instruction((u32 *)ip, new))
return -EPERM;
return 0;
@@ -94,7 +94,7 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr)
addr = ppc_function_entry((void *)addr);
/* use the create_branch to verify that this offset can be branched */
- return create_branch(&op, (struct ppc_inst *)ip, addr, 0) == 0;
+ return create_branch(&op, (u32 *)ip, addr, 0) == 0;
}
static int is_bl_op(struct ppc_inst op)
@@ -162,7 +162,7 @@ __ftrace_make_nop(struct module *mod,
#ifdef CONFIG_MPROFILE_KERNEL
/* When using -mkernel_profile there is no load to jump over */
- pop = ppc_inst(PPC_INST_NOP);
+ pop = ppc_inst(PPC_RAW_NOP());
if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
@@ -170,7 +170,7 @@ __ftrace_make_nop(struct module *mod,
}
/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
- if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) &&
+ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) &&
!ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
pr_err("Unexpected instruction %s around bl _mcount\n",
ppc_inst_as_str(op));
@@ -203,12 +203,12 @@ __ftrace_make_nop(struct module *mod,
}
if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) {
- pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op));
+ pr_err("Expected %08lx found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op));
return -EINVAL;
}
#endif /* CONFIG_MPROFILE_KERNEL */
- if (patch_instruction((struct ppc_inst *)ip, pop)) {
+ if (patch_instruction((u32 *)ip, pop)) {
pr_err("Patching NOP failed.\n");
return -EPERM;
}
@@ -278,9 +278,9 @@ __ftrace_make_nop(struct module *mod,
return -EINVAL;
}
- op = ppc_inst(PPC_INST_NOP);
+ op = ppc_inst(PPC_RAW_NOP());
- if (patch_instruction((struct ppc_inst *)ip, op))
+ if (patch_instruction((u32 *)ip, op))
return -EPERM;
return 0;
@@ -380,7 +380,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
return -1;
}
- if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) {
+ if (patch_branch((u32 *)tramp, ptr, 0)) {
pr_debug("REL24 out of range!\n");
return -1;
}
@@ -424,7 +424,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
}
}
- if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_INST_NOP))) {
+ if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) {
pr_err("Patching NOP failed.\n");
return -EPERM;
}
@@ -446,7 +446,7 @@ int ftrace_make_nop(struct module *mod,
if (test_24bit_addr(ip, addr)) {
/* within range */
old = ftrace_call_replace(ip, addr, 1);
- new = ppc_inst(PPC_INST_NOP);
+ new = ppc_inst(PPC_RAW_NOP());
return ftrace_modify_code(ip, old, new);
} else if (core_kernel_text(ip))
return __ftrace_make_nop_kernel(rec, addr);
@@ -510,7 +510,7 @@ static int
expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
{
/* look for patched "NOP" on ppc64 with -mprofile-kernel */
- if (!ppc_inst_equal(op0, ppc_inst(PPC_INST_NOP)))
+ if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())))
return 0;
return 1;
}
@@ -589,14 +589,14 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
int err;
struct ppc_inst op;
- unsigned long ip = rec->ip;
+ u32 *ip = (u32 *)rec->ip;
/* read where this goes */
- if (copy_inst_from_kernel_nofault(&op, (void *)ip))
+ if (copy_inst_from_kernel_nofault(&op, ip))
return -EFAULT;
/* It should be pointing to a nop */
- if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) {
+ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) {
pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op));
return -EINVAL;
}
@@ -608,8 +608,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
}
/* create the branch to the trampoline */
- err = create_branch(&op, (struct ppc_inst *)ip,
- rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+ err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
if (err) {
pr_err("REL24 out of range!\n");
return -EINVAL;
@@ -617,7 +616,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
pr_devel("write to %lx\n", rec->ip);
- if (patch_instruction((struct ppc_inst *)ip, op))
+ if (patch_instruction(ip, op))
return -EPERM;
return 0;
@@ -653,7 +652,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
return -EFAULT;
}
- if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) {
+ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) {
pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op));
return -EINVAL;
}
@@ -684,7 +683,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
*/
if (test_24bit_addr(ip, addr)) {
/* within range */
- old = ppc_inst(PPC_INST_NOP);
+ old = ppc_inst(PPC_RAW_NOP());
new = ftrace_call_replace(ip, addr, 1);
return ftrace_modify_code(ip, old, new);
} else if (core_kernel_text(ip))
@@ -762,7 +761,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
/* The new target may be within range */
if (test_24bit_addr(ip, addr)) {
/* within range */
- if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) {
+ if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) {
pr_err("REL24 out of range!\n");
return -EINVAL;
}
@@ -790,12 +789,12 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
}
/* Ensure branch is within 24 bits */
- if (create_branch(&op, (struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) {
+ if (create_branch(&op, (u32 *)ip, tramp, BRANCH_SET_LINK)) {
pr_err("Branch out of range\n");
return -EINVAL;
}
- if (patch_branch((struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) {
+ if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) {
pr_err("REL24 out of range!\n");
return -EINVAL;
}
@@ -851,7 +850,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
struct ppc_inst old, new;
int ret;
- old = ppc_inst_read((struct ppc_inst *)&ftrace_call);
+ old = ppc_inst_read((u32 *)&ftrace_call);
new = ftrace_call_replace(ip, (unsigned long)func, 1);
ret = ftrace_modify_code(ip, old, new);
@@ -859,7 +858,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
/* Also update the regs callback function */
if (!ret) {
ip = (unsigned long)(&ftrace_regs_call);
- old = ppc_inst_read((struct ppc_inst *)&ftrace_regs_call);
+ old = ppc_inst_read((u32 *)&ftrace_regs_call);
new = ftrace_call_replace(ip, (unsigned long)func, 1);
ret = ftrace_modify_code(ip, old, new);
}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index b4ab95c9e94a..dfbce527c98e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -67,6 +67,7 @@
#include <asm/kprobes.h>
#include <asm/stacktrace.h>
#include <asm/nmi.h>
+#include <asm/disassemble.h>
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
int (*__debugger)(struct pt_regs *regs) __read_mostly;
@@ -427,7 +428,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
return;
nonrecoverable:
- regs->msr &= ~MSR_RI;
+ regs_set_return_msr(regs, regs->msr & ~MSR_RI);
#endif
}
DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
@@ -537,11 +538,11 @@ static inline int check_io_access(struct pt_regs *regs)
* For the debug message, we look at the preceding
* load or store.
*/
- if (*nip == PPC_INST_NOP)
+ if (*nip == PPC_RAW_NOP())
nip -= 2;
- else if (*nip == PPC_INST_ISYNC)
+ else if (*nip == PPC_RAW_ISYNC())
--nip;
- if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) {
+ if (*nip == PPC_RAW_SYNC() || get_op(*nip) == OP_TRAP) {
unsigned int rb;
--nip;
@@ -549,8 +550,8 @@ static inline int check_io_access(struct pt_regs *regs)
printk(KERN_DEBUG "%s bad port %lx at %p\n",
(*nip & 0x100)? "OUT to": "IN from",
regs->gpr[rb] - _IO_BASE, nip);
- regs->msr |= MSR_RI;
- regs->nip = extable_fixup(entry);
+ regs_set_return_msr(regs, regs->msr | MSR_RI);
+ regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
}
@@ -586,8 +587,8 @@ static inline int check_io_access(struct pt_regs *regs)
#define REASON_BOUNDARY SRR1_BOUNDARY
#define single_stepping(regs) ((regs)->msr & MSR_SE)
-#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
-#define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE)
+#define clear_single_step(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_SE))
+#define clear_br_trace(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_BE))
#endif
#define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4)
@@ -1031,7 +1032,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs)
#endif /* !__LITTLE_ENDIAN__ */
/* Go to next instruction */
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
}
#endif /* CONFIG_VSX */
@@ -1476,7 +1477,7 @@ static void do_program_check(struct pt_regs *regs)
if (!(regs->msr & MSR_PR) && /* not user-mode */
report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
return;
}
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
@@ -1538,7 +1539,7 @@ static void do_program_check(struct pt_regs *regs)
if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
switch (emulate_instruction(regs)) {
case 0:
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
emulate_single_step(regs);
return;
case -EFAULT:
@@ -1566,7 +1567,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
*/
DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
{
- regs->msr |= REASON_ILLEGAL;
+ regs_set_return_msr(regs, regs->msr | REASON_ILLEGAL);
do_program_check(regs);
}
@@ -1593,7 +1594,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
if (fixed == 1) {
/* skip over emulated instruction */
- regs->nip += inst_length(reason);
+ regs_add_return_ip(regs, inst_length(reason));
emulate_single_step(regs);
return;
}
@@ -1659,7 +1660,7 @@ static void tm_unavailable(struct pt_regs *regs)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (user_mode(regs)) {
current->thread.load_tm++;
- regs->msr |= MSR_TM;
+ regs_set_return_msr(regs, regs->msr | MSR_TM);
tm_enable();
tm_restore_sprs(&current->thread);
return;
@@ -1751,7 +1752,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
pr_err("DSCR based mfspr emulation failed\n");
return;
}
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
emulate_single_step(regs);
}
return;
@@ -1948,7 +1949,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
*/
if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
current->thread.debug.dbcr1))
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
else
/* Make sure the IDM flag is off */
current->thread.debug.dbcr0 &= ~DBCR0_IDM;
@@ -1969,7 +1970,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException)
* instead of stopping here when hitting a BT
*/
if (debug_status & DBSR_BT) {
- regs->msr &= ~MSR_DE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
/* Disable BT */
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
@@ -1980,7 +1981,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException)
if (user_mode(regs)) {
current->thread.debug.dbcr0 &= ~DBCR0_BT;
current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
return;
}
@@ -1994,7 +1995,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException)
if (debugger_sstep(regs))
return;
} else if (debug_status & DBSR_IC) { /* Instruction complete */
- regs->msr &= ~MSR_DE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
/* Disable instruction completion */
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
@@ -2016,7 +2017,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException)
current->thread.debug.dbcr0 &= ~DBCR0_IC;
if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
current->thread.debug.dbcr1))
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
else
/* Make sure the IDM bit is off */
current->thread.debug.dbcr0 &= ~DBCR0_IDM;
@@ -2044,7 +2045,7 @@ DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
PPC_WARN_EMULATED(altivec, regs);
err = emulate_altivec(regs);
if (err == 0) {
- regs->nip += 4; /* skip emulated instruction */
+ regs_add_return_ip(regs, 4); /* skip emulated instruction */
emulate_single_step(regs);
return;
}
@@ -2109,7 +2110,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException)
err = do_spe_mathemu(regs);
if (err == 0) {
- regs->nip += 4; /* skip emulated instruction */
+ regs_add_return_ip(regs, 4); /* skip emulated instruction */
emulate_single_step(regs);
return;
}
@@ -2140,10 +2141,10 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
giveup_spe(current);
preempt_enable();
- regs->nip -= 4;
+ regs_add_return_ip(regs, -4);
err = speround_handler(regs);
if (err == 0) {
- regs->nip += 4; /* skip emulated instruction */
+ regs_add_return_ip(regs, 4); /* skip emulated instruction */
emulate_single_step(regs);
return;
}
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 9356b60d6030..8513aa49614e 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -296,3 +296,42 @@ void __init udbg_init_40x_realmode(void)
}
#endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_MICROWATT
+
+#define UDBG_UART_MW_ADDR ((void __iomem *)0xc0002000)
+
+static u8 udbg_uart_in_isa300_rm(unsigned int reg)
+{
+ uint64_t msr = mfmsr();
+ uint8_t c;
+
+ mtmsr(msr & ~(MSR_EE|MSR_DR));
+ isync();
+ eieio();
+ c = __raw_rm_readb(UDBG_UART_MW_ADDR + (reg << 2));
+ mtmsr(msr);
+ isync();
+ return c;
+}
+
+static void udbg_uart_out_isa300_rm(unsigned int reg, u8 val)
+{
+ uint64_t msr = mfmsr();
+
+ mtmsr(msr & ~(MSR_EE|MSR_DR));
+ isync();
+ eieio();
+ __raw_rm_writeb(val, UDBG_UART_MW_ADDR + (reg << 2));
+ mtmsr(msr);
+ isync();
+}
+
+void __init udbg_init_debug_microwatt(void)
+{
+ udbg_uart_in = udbg_uart_in_isa300_rm;
+ udbg_uart_out = udbg_uart_out_isa300_rm;
+ udbg_use_uart();
+}
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_MICROWATT */
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 186f69b11e94..c6975467d9ff 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
return -EINVAL;
if (cpu_has_feature(CPU_FTR_ARCH_31) &&
- ppc_inst_prefixed(auprobe->insn) &&
+ ppc_inst_prefixed(ppc_inst_read(auprobe->insn)) &&
(addr & 0x3f) == 60) {
pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n");
return -EINVAL;
@@ -62,7 +62,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
autask->saved_trap_nr = current->thread.trap_nr;
current->thread.trap_nr = UPROBE_TRAP_NR;
- regs->nip = current->utask->xol_vaddr;
+ regs_set_return_ip(regs, current->utask->xol_vaddr);
user_enable_single_step(current);
return 0;
@@ -119,7 +119,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
* support doesn't exist and have to fix-up the next instruction
* to be executed.
*/
- regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, &auprobe->insn);
+ regs_set_return_ip(regs, (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn));
user_disable_single_step(current);
return 0;
@@ -182,7 +182,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
* emulate_step() returns 1 if the insn was successfully emulated.
* For all other cases, we need to single-step in hardware.
*/
- ret = emulate_step(regs, ppc_inst_read(&auprobe->insn));
+ ret = emulate_step(regs, ppc_inst_read(auprobe->insn));
if (ret > 0)
return true;
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index f5a52f444e36..fc120fac1910 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -73,6 +73,10 @@ _GLOBAL(load_up_altivec)
addi r5,r4,THREAD /* Get THREAD */
oris r12,r12,MSR_VEC@h
std r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+#endif
#endif
li r4,1
stb r4,THREAD_LOAD_VEC(r5)
@@ -131,7 +135,9 @@ _GLOBAL(load_up_vsx)
/* enable use of VSX after return */
oris r12,r12,MSR_VSX@h
std r12,_MSR(r1)
- b fast_interrupt_return
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+ b fast_interrupt_return_srr
#endif /* CONFIG_VSX */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 72fa3c00229a..40bdefe9caa7 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -9,6 +9,22 @@
#define EMITS_PT_NOTE
#define RO_EXCEPTION_TABLE_ALIGN 0
+#define SOFT_MASK_TABLE(align) \
+ . = ALIGN(align); \
+ __soft_mask_table : AT(ADDR(__soft_mask_table) - LOAD_OFFSET) { \
+ __start___soft_mask_table = .; \
+ KEEP(*(__soft_mask_table)) \
+ __stop___soft_mask_table = .; \
+ }
+
+#define RESTART_TABLE(align) \
+ . = ALIGN(align); \
+ __restart_table : AT(ADDR(__restart_table) - LOAD_OFFSET) { \
+ __start___restart_table = .; \
+ KEEP(*(__restart_table)) \
+ __stop___restart_table = .; \
+ }
+
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
@@ -124,6 +140,9 @@ SECTIONS
RO_DATA(PAGE_SIZE)
#ifdef CONFIG_PPC64
+ SOFT_MASK_TABLE(8)
+ RESTART_TABLE(8)
+
. = ALIGN(8);
__stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
__start___stf_entry_barrier_fixup = .;
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index c9a8f4781a10..a165635fd214 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -24,6 +24,7 @@
#include <linux/kdebug.h>
#include <linux/sched/debug.h>
#include <linux/delay.h>
+#include <linux/processor.h>
#include <linux/smp.h>
#include <asm/interrupt.h>
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index 56da5eb2b923..48525e8b5730 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -68,11 +68,11 @@ void machine_kexec_cleanup(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);
VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
VMCOREINFO_SYMBOL(contig_page_data);
#endif
#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 0196d0c211ac..10f997e6bb95 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -105,8 +105,8 @@ void crash_ipi_callback(struct pt_regs *regs)
static void crash_kexec_prepare_cpus(int cpu)
{
unsigned int msecs;
- unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
- int tries = 0;
+ volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+ volatile int tries = 0;
int (*old_handler)(struct pt_regs *regs);
printk(KERN_EMERG "Sending IPI to other CPUs\n");
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2bfeaa13befb..583c14ef596e 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -6,7 +6,7 @@
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
-common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/binary_stats.o
common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
@@ -57,6 +57,7 @@ kvm-pr-y := \
book3s_32_mmu.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+ book3s_64_entry.o \
tm.o
ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -86,6 +87,7 @@ kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_hmi.o \
+ book3s_hv_p9_entry.o \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 2b691f4d1f26..79833f78d1da 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -38,37 +38,66 @@
/* #define EXIT_DEBUG */
-struct kvm_stats_debugfs_item debugfs_entries[] = {
- VCPU_STAT("exits", sum_exits),
- VCPU_STAT("mmio", mmio_exits),
- VCPU_STAT("sig", signal_exits),
- VCPU_STAT("sysc", syscall_exits),
- VCPU_STAT("inst_emu", emulated_inst_exits),
- VCPU_STAT("dec", dec_exits),
- VCPU_STAT("ext_intr", ext_intr_exits),
- VCPU_STAT("queue_intr", queue_intr),
- VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
- VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
- VCPU_STAT("halt_wait_ns", halt_wait_ns),
- VCPU_STAT("halt_successful_poll", halt_successful_poll),
- VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
- VCPU_STAT("halt_successful_wait", halt_successful_wait),
- VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
- VCPU_STAT("halt_wakeup", halt_wakeup),
- VCPU_STAT("pf_storage", pf_storage),
- VCPU_STAT("sp_storage", sp_storage),
- VCPU_STAT("pf_instruc", pf_instruc),
- VCPU_STAT("sp_instruc", sp_instruc),
- VCPU_STAT("ld", ld),
- VCPU_STAT("ld_slow", ld_slow),
- VCPU_STAT("st", st),
- VCPU_STAT("st_slow", st_slow),
- VCPU_STAT("pthru_all", pthru_all),
- VCPU_STAT("pthru_host", pthru_host),
- VCPU_STAT("pthru_bad_aff", pthru_bad_aff),
- VM_STAT("largepages_2M", num_2M_pages, .mode = 0444),
- VM_STAT("largepages_1G", num_1G_pages, .mode = 0444),
- { NULL }
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS(),
+ STATS_DESC_ICOUNTER(VM, num_2M_pages),
+ STATS_DESC_ICOUNTER(VM, num_1G_pages)
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+ sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, sum_exits),
+ STATS_DESC_COUNTER(VCPU, mmio_exits),
+ STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, light_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, syscall_exits),
+ STATS_DESC_COUNTER(VCPU, isi_exits),
+ STATS_DESC_COUNTER(VCPU, dsi_exits),
+ STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
+ STATS_DESC_COUNTER(VCPU, dec_exits),
+ STATS_DESC_COUNTER(VCPU, ext_intr_exits),
+ STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
+ STATS_DESC_COUNTER(VCPU, halt_successful_wait),
+ STATS_DESC_COUNTER(VCPU, dbell_exits),
+ STATS_DESC_COUNTER(VCPU, gdbell_exits),
+ STATS_DESC_COUNTER(VCPU, ld),
+ STATS_DESC_COUNTER(VCPU, st),
+ STATS_DESC_COUNTER(VCPU, pf_storage),
+ STATS_DESC_COUNTER(VCPU, pf_instruc),
+ STATS_DESC_COUNTER(VCPU, sp_storage),
+ STATS_DESC_COUNTER(VCPU, sp_instruc),
+ STATS_DESC_COUNTER(VCPU, queue_intr),
+ STATS_DESC_COUNTER(VCPU, ld_slow),
+ STATS_DESC_COUNTER(VCPU, st_slow),
+ STATS_DESC_COUNTER(VCPU, pthru_all),
+ STATS_DESC_COUNTER(VCPU, pthru_host),
+ STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+ sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
};
static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
@@ -171,6 +200,12 @@ void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
}
EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
+void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu)
+{
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0);
+}
+EXPORT_SYMBOL(kvmppc_core_queue_syscall);
+
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
{
/* might as well deliver this straight away */
@@ -1044,13 +1079,10 @@ static int kvmppc_book3s_init(void)
#ifdef CONFIG_KVM_XICS
#ifdef CONFIG_KVM_XIVE
if (xics_on_xive()) {
- kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
- if (kvmppc_xive_native_supported()) {
- kvmppc_xive_native_init_module();
+ if (kvmppc_xive_native_supported())
kvm_register_device_ops(&kvm_xive_native_ops,
KVM_DEV_TYPE_XIVE);
- }
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
@@ -1060,12 +1092,6 @@ static int kvmppc_book3s_init(void)
static void kvmppc_book3s_exit(void)
{
-#ifdef CONFIG_KVM_XICS
- if (xics_on_xive()) {
- kvmppc_xive_exit_module();
- kvmppc_xive_native_exit_module();
- }
-#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kvmppc_book3s_exit_pr();
#endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index e8e7b2c530d1..4b3a8d80cfa3 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -353,9 +353,6 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
preempt_enable();
}
-/* From mm/mmu_context_hash32.c */
-#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
-
int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
new file mode 100644
index 000000000000..983b8c18bc31
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/code-patching-asm.h>
+#include <asm/exception-64s.h>
+#include <asm/export.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/ultravisor-api.h>
+
+/*
+ * These are branched to from interrupt handlers in exception-64s.S which set
+ * IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero.
+ */
+
+/*
+ * This is a hcall, so register convention is as
+ * Documentation/powerpc/papr_hcalls.rst.
+ *
+ * This may also be a syscall from PR-KVM userspace that is to be
+ * reflected to the PR guest kernel, so registers may be set up for
+ * a system call rather than hcall. We don't currently clobber
+ * anything here, but the 0xc00 handler has already clobbered CTR
+ * and CR0, so PR-KVM can not support a guest kernel that preserves
+ * those registers across its system calls.
+ *
+ * The state of registers is as kvmppc_interrupt, except CFAR is not
+ * saved, R13 is not in SCRATCH0, and R10 does not contain the trap.
+ */
+.global kvmppc_hcall
+.balign IFETCH_ALIGN_BYTES
+kvmppc_hcall:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,KVM_GUEST_MODE_HV_P9
+ beq kvmppc_p9_exit_hcall
+#endif
+ ld r10,PACA_EXGEN+EX_R13(r13)
+ SET_SCRATCH0(r10)
+ li r10,0xc00
+ /* Now we look like kvmppc_interrupt */
+ li r11,PACA_EXGEN
+ b .Lgot_save_area
+
+/*
+ * KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that
+ * call convention:
+ *
+ * guest R9-R13, CTR, CFAR, PPR saved in PACA EX_xxx save area
+ * guest (H)DAR, (H)DSISR are also in the save area for relevant interrupts
+ * guest R13 also saved in SCRATCH0
+ * R13 = PACA
+ * R11 = (H)SRR0
+ * R12 = (H)SRR1
+ * R9 = guest CR
+ * PPR is set to medium
+ *
+ * With the addition for KVM:
+ * R10 = trap vector
+ */
+.global kvmppc_interrupt
+.balign IFETCH_ALIGN_BYTES
+kvmppc_interrupt:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ std r10,HSTATE_SCRATCH0(r13)
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,KVM_GUEST_MODE_HV_P9
+ beq kvmppc_p9_exit_interrupt
+ ld r10,HSTATE_SCRATCH0(r13)
+#endif
+ li r11,PACA_EXGEN
+ cmpdi r10,0x200
+ bgt+ .Lgot_save_area
+ li r11,PACA_EXMC
+ beq .Lgot_save_area
+ li r11,PACA_EXNMI
+.Lgot_save_area:
+ add r11,r11,r13
+BEGIN_FTR_SECTION
+ ld r12,EX_CFAR(r11)
+ std r12,HSTATE_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r12,EX_CTR(r11)
+ mtctr r12
+BEGIN_FTR_SECTION
+ ld r12,EX_PPR(r11)
+ std r12,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r12,EX_R12(r11)
+ std r12,HSTATE_SCRATCH0(r13)
+ sldi r12,r9,32
+ or r12,r12,r10
+ ld r9,EX_R9(r11)
+ ld r10,EX_R10(r11)
+ ld r11,EX_R11(r11)
+
+ /*
+ * Hcalls and other interrupts come here after normalising register
+ * contents and save locations:
+ *
+ * R12 = (guest CR << 32) | interrupt vector
+ * R13 = PACA
+ * guest R12 saved in shadow HSTATE_SCRATCH0
+ * guest R13 saved in SPRN_SCRATCH0
+ */
+ std r9,HSTATE_SCRATCH2(r13)
+ lbz r9,HSTATE_IN_GUEST(r13)
+ cmpwi r9,KVM_GUEST_MODE_SKIP
+ beq- .Lmaybe_skip
+.Lno_skip:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ cmpwi r9,KVM_GUEST_MODE_GUEST
+ beq kvmppc_interrupt_pr
+#endif
+ b kvmppc_interrupt_hv
+#else
+ b kvmppc_interrupt_pr
+#endif
+
+/*
+ * "Skip" interrupts are part of a trick KVM uses a with hash guests to load
+ * the faulting instruction in guest memory from the the hypervisor without
+ * walking page tables.
+ *
+ * When the guest takes a fault that requires the hypervisor to load the
+ * instruction (e.g., MMIO emulation), KVM is running in real-mode with HV=1
+ * and the guest MMU context loaded. It sets KVM_GUEST_MODE_SKIP, and sets
+ * MSR[DR]=1 while leaving MSR[IR]=0, so it continues to fetch HV instructions
+ * but loads and stores will access the guest context. This is used to load
+ * the faulting instruction using the faulting guest effective address.
+ *
+ * However the guest context may not be able to translate, or it may cause a
+ * machine check or other issue, which results in a fault in the host
+ * (even with KVM-HV).
+ *
+ * These faults come here because KVM_GUEST_MODE_SKIP was set, so if they
+ * are (or are likely) caused by that load, the instruction is skipped by
+ * just returning with the PC advanced +4, where it is noticed the load did
+ * not execute and it goes to the slow path which walks the page tables to
+ * read guest memory.
+ */
+.Lmaybe_skip:
+ cmpwi r12,BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq 1f
+ cmpwi r12,BOOK3S_INTERRUPT_DATA_STORAGE
+ beq 1f
+ cmpwi r12,BOOK3S_INTERRUPT_DATA_SEGMENT
+ beq 1f
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* HSRR interrupts get 2 added to interrupt number */
+ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | 0x2
+ beq 2f
+#endif
+ b .Lno_skip
+1: mfspr r9,SPRN_SRR0
+ addi r9,r9,4
+ mtspr SPRN_SRR0,r9
+ ld r12,HSTATE_SCRATCH0(r13)
+ ld r9,HSTATE_SCRATCH2(r13)
+ GET_SCRATCH0(r13)
+ RFI_TO_KERNEL
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+2: mfspr r9,SPRN_HSRR0
+ addi r9,r9,4
+ mtspr SPRN_HSRR0,r9
+ ld r12,HSTATE_SCRATCH0(r13)
+ ld r9,HSTATE_SCRATCH2(r13)
+ GET_SCRATCH0(r13)
+ HRFI_TO_KERNEL
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+/* Stack frame offsets for kvmppc_p9_enter_guest */
+#define SFS (144 + STACK_FRAME_MIN_SIZE)
+#define STACK_SLOT_NVGPRS (SFS - 144) /* 18 gprs */
+
+/*
+ * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
+ *
+ * Enter the guest on a ISAv3.0 or later system.
+ */
+.balign IFETCH_ALIGN_BYTES
+_GLOBAL(kvmppc_p9_enter_guest)
+EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
+ mflr r0
+ std r0,PPC_LR_STKOFF(r1)
+ stdu r1,-SFS(r1)
+
+ std r1,HSTATE_HOST_R1(r13)
+
+ mfcr r4
+ stw r4,SFS+8(r1)
+
+ reg = 14
+ .rept 18
+ std reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ ld r4,VCPU_LR(r3)
+ mtlr r4
+ ld r4,VCPU_CTR(r3)
+ mtctr r4
+ ld r4,VCPU_XER(r3)
+ mtspr SPRN_XER,r4
+
+ ld r1,VCPU_CR(r3)
+
+BEGIN_FTR_SECTION
+ ld r4,VCPU_CFAR(r3)
+ mtspr SPRN_CFAR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ ld r4,VCPU_PPR(r3)
+ mtspr SPRN_PPR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ reg = 4
+ .rept 28
+ ld reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ ld r4,VCPU_KVM(r3)
+ lbz r4,KVM_SECURE_GUEST(r4)
+ cmpdi r4,0
+ ld r4,VCPU_GPR(R4)(r3)
+ bne .Lret_to_ultra
+
+ mtcr r1
+
+ ld r0,VCPU_GPR(R0)(r3)
+ ld r1,VCPU_GPR(R1)(r3)
+ ld r2,VCPU_GPR(R2)(r3)
+ ld r3,VCPU_GPR(R3)(r3)
+
+ HRFI_TO_GUEST
+ b .
+
+ /*
+ * Use UV_RETURN ultracall to return control back to the Ultravisor
+ * after processing an hypercall or interrupt that was forwarded
+ * (a.k.a. reflected) to the Hypervisor.
+ *
+ * All registers have already been reloaded except the ucall requires:
+ * R0 = hcall result
+ * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
+ * R3 = UV_RETURN
+ */
+.Lret_to_ultra:
+ mtcr r1
+ ld r1,VCPU_GPR(R1)(r3)
+
+ ld r0,VCPU_GPR(R3)(r3)
+ mfspr r2,SPRN_SRR1
+ LOAD_REG_IMMEDIATE(r3, UV_RETURN)
+ sc 2
+
+/*
+ * kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from
+ * above if the interrupt was taken for a guest that was entered via
+ * kvmppc_p9_enter_guest().
+ *
+ * The exit code recovers the host stack and vcpu pointer, saves all guest GPRs
+ * and CR, LR, XER as well as guest MSR and NIA into the VCPU, then re-
+ * establishes the host stack and registers to return from the
+ * kvmppc_p9_enter_guest() function, which saves CTR and other guest registers
+ * (SPRs and FP, VEC, etc).
+ */
+.balign IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_hcall:
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+ li r10,0xc00
+ std r10,HSTATE_SCRATCH0(r13)
+
+.balign IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_interrupt:
+ /*
+ * If set to KVM_GUEST_MODE_HV_P9 but we're still in the
+ * hypervisor, that means we can't return from the entry stack.
+ */
+ rldicl. r10,r12,64-MSR_HV_LG,63
+ bne- kvmppc_p9_bad_interrupt
+
+ std r1,HSTATE_SCRATCH1(r13)
+ std r3,HSTATE_SCRATCH2(r13)
+ ld r1,HSTATE_HOST_R1(r13)
+ ld r3,HSTATE_KVM_VCPU(r13)
+
+ std r9,VCPU_CR(r3)
+
+1:
+ std r11,VCPU_PC(r3)
+ std r12,VCPU_MSR(r3)
+
+ reg = 14
+ .rept 18
+ std reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ /* r1, r3, r9-r13 are saved to vcpu by C code */
+ std r0,VCPU_GPR(R0)(r3)
+ std r2,VCPU_GPR(R2)(r3)
+ reg = 4
+ .rept 5
+ std reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ ld r2,PACATOC(r13)
+
+ mflr r4
+ std r4,VCPU_LR(r3)
+ mfspr r4,SPRN_XER
+ std r4,VCPU_XER(r3)
+
+ reg = 14
+ .rept 18
+ ld reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ lwz r4,SFS+8(r1)
+ mtcr r4
+
+ /*
+ * Flush the link stack here, before executing the first blr on the
+ * way out of the guest.
+ *
+ * The link stack won't match coming out of the guest anyway so the
+ * only cost is the flush itself. The call clobbers r0.
+ */
+1: nop
+ patch_site 1b patch__call_kvm_flush_link_stack_p9
+
+ addi r1,r1,SFS
+ ld r0,PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+
+/*
+ * Took an interrupt somewhere right before HRFID to guest, so registers are
+ * in a bad way. Return things hopefully enough to run host virtual code and
+ * run the Linux interrupt handler (SRESET or MCE) to print something useful.
+ *
+ * We could be really clever and save all host registers in known locations
+ * before setting HSTATE_IN_GUEST, then restoring them all here, and setting
+ * return address to a fixup that sets them up again. But that's a lot of
+ * effort for a small bit of code. Lots of other things to do first.
+ */
+kvmppc_p9_bad_interrupt:
+BEGIN_MMU_FTR_SECTION
+ /*
+ * Hash host doesn't try to recover MMU (requires host SLB reload)
+ */
+ b .
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
+ /*
+ * Clean up guest registers to give host a chance to run.
+ */
+ li r10,0
+ mtspr SPRN_AMR,r10
+ mtspr SPRN_IAMR,r10
+ mtspr SPRN_CIABR,r10
+ mtspr SPRN_DAWRX0,r10
+BEGIN_FTR_SECTION
+ mtspr SPRN_DAWRX1,r10
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
+ mtspr SPRN_PID,r10
+
+ /*
+ * Switch to host MMU mode
+ */
+ ld r10, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_KVM(r10)
+ lwz r10, KVM_HOST_LPID(r10)
+ mtspr SPRN_LPID,r10
+
+ ld r10, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_KVM(r10)
+ ld r10, KVM_HOST_LPCR(r10)
+ mtspr SPRN_LPCR,r10
+
+ /*
+ * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
+ * MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
+ */
+ li r10,KVM_GUEST_MODE_NONE
+ stb r10,HSTATE_IN_GUEST(r13)
+ li r10,MSR_RI
+ andc r12,r12,r10
+
+ /*
+ * Go back to interrupt handler. MCE and SRESET have their specific
+ * PACA save area so they should be used directly. They set up their
+ * own stack. The other handlers all use EXGEN. They will use the
+ * guest r1 if it looks like a kernel stack, so just load the
+ * emergency stack and go to program check for all other interrupts.
+ */
+ ld r10,HSTATE_SCRATCH0(r13)
+ cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq machine_check_common
+
+ cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET
+ beq system_reset_common
+
+ b .
+#endif
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index d909c069363e..b5905ae4377c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,7 @@
#include <asm/pte-walk.h>
#include <asm/ultravisor.h>
#include <asm/kvm_book3s_uvmem.h>
+#include <asm/plpar_wrappers.h>
/*
* Supported radix tree geometry.
@@ -318,9 +319,19 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
}
psi = shift_to_mmu_psize(pshift);
- rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
- rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
- lpid, rb);
+
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+ rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+ lpid, rb);
+ } else {
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_TLB,
+ psize_to_rpti_pgsize(psi),
+ addr, addr + psize);
+ }
+
if (rc)
pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
}
@@ -334,8 +345,14 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
return;
}
- rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
- lpid, TLBIEL_INVAL_SET_LPID);
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+ lpid, TLBIEL_INVAL_SET_LPID);
+ else
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+ 0, -1UL);
if (rc)
pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 083a4e037718..dc6591548f0c 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -391,10 +391,6 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
- /* For radix, we might be in virtual mode, so punt */
- if (kvm_is_radix(vcpu->kvm))
- return H_TOO_HARD;
-
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -489,10 +485,6 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
bool prereg = false;
struct kvmppc_spapr_tce_iommu_table *stit;
- /* For radix, we might be in virtual mode, so punt */
- if (kvm_is_radix(vcpu->kvm))
- return H_TOO_HARD;
-
/*
* used to check for invalidations in progress
*/
@@ -602,10 +594,6 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
long i, ret;
struct kvmppc_spapr_tce_iommu_table *stit;
- /* For radix, we might be in virtual mode, so punt */
- if (kvm_is_radix(vcpu->kvm))
- return H_TOO_HARD;
-
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index bc0813644666..1d1fcc290fca 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -76,6 +76,7 @@
#include <asm/kvm_book3s_uvmem.h>
#include <asm/ultravisor.h>
#include <asm/dtl.h>
+#include <asm/plpar_wrappers.h>
#include "book3s.h"
@@ -103,13 +104,9 @@ static int target_smt_mode;
module_param(target_smt_mode, int, 0644);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
-static bool indep_threads_mode = true;
-module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
-
static bool one_vm_per_core;
module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
+MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires POWER8 or older)");
#ifdef CONFIG_KVM_XICS
static const struct kernel_param_ops module_param_ops = {
@@ -134,9 +131,6 @@ static inline bool nesting_enabled(struct kvm *kvm)
return kvm->arch.nested_enable && kvm_is_radix(kvm);
}
-/* If set, the threads on each CPU core have to be in the same MMU mode */
-static bool no_mixing_hpt_and_radix __read_mostly;
-
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
@@ -236,7 +230,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
waitp = kvm_arch_vcpu_get_wait(vcpu);
if (rcuwait_wake_up(waitp))
- ++vcpu->stat.halt_wakeup;
+ ++vcpu->stat.generic.halt_wakeup;
cpu = READ_ONCE(vcpu->arch.thread_cpu);
if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -807,7 +801,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
* KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
* Keep this in synch with kvmppc_filter_guest_lpcr_hv.
*/
- if (mflags != 0 && mflags != 3)
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+ kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
return H_UNSUPPORTED_FLAG_START;
return H_TOO_HARD;
default:
@@ -899,6 +894,10 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
* H_SUCCESS if the source vcore wasn't idle (e.g. if it may
* have useful work to do and should not confer) so we don't
* recheck that here.
+ *
+ * In the case of the P9 single vcpu per vcore case, the real
+ * mode handler is not called but no other threads are in the
+ * source vcore.
*/
spin_lock(&vcore->lock);
@@ -924,8 +923,71 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
return yield_count;
}
+/*
+ * H_RPT_INVALIDATE hcall handler for nested guests.
+ *
+ * Handles only nested process-scoped invalidation requests in L0.
+ */
+static int kvmppc_nested_h_rpt_invalidate(struct kvm_vcpu *vcpu)
+{
+ unsigned long type = kvmppc_get_gpr(vcpu, 6);
+ unsigned long pid, pg_sizes, start, end;
+
+ /*
+ * The partition-scoped invalidations aren't handled here in L0.
+ */
+ if (type & H_RPTI_TYPE_NESTED)
+ return RESUME_HOST;
+
+ pid = kvmppc_get_gpr(vcpu, 4);
+ pg_sizes = kvmppc_get_gpr(vcpu, 7);
+ start = kvmppc_get_gpr(vcpu, 8);
+ end = kvmppc_get_gpr(vcpu, 9);
+
+ do_h_rpt_invalidate_prt(pid, vcpu->arch.nested->shadow_lpid,
+ type, pg_sizes, start, end);
+
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ return RESUME_GUEST;
+}
+
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+ unsigned long id, unsigned long target,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ if (!kvm_is_radix(vcpu->kvm))
+ return H_UNSUPPORTED;
+
+ if (end < start)
+ return H_P5;
+
+ /*
+ * Partition-scoped invalidation for nested guests.
+ */
+ if (type & H_RPTI_TYPE_NESTED) {
+ if (!nesting_enabled(vcpu->kvm))
+ return H_FUNCTION;
+
+ /* Support only cores as target */
+ if (target != H_RPTI_TARGET_CMMU)
+ return H_P2;
+
+ return do_h_rpt_invalidate_pat(vcpu, id, type, pg_sizes,
+ start, end);
+ }
+
+ /*
+ * Process-scoped invalidation for L1 guests.
+ */
+ do_h_rpt_invalidate_prt(id, vcpu->kvm->arch.lpid,
+ type, pg_sizes, start, end);
+ return H_SUCCESS;
+}
+
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
int yield_count;
@@ -937,11 +999,57 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_HOST;
switch (req) {
+ case H_REMOVE:
+ ret = kvmppc_h_remove(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_ENTER:
+ ret = kvmppc_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_READ:
+ ret = kvmppc_h_read(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_MOD:
+ ret = kvmppc_h_clear_mod(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_REF:
+ ret = kvmppc_h_clear_ref(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PROTECT:
+ ret = kvmppc_h_protect(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_BULK_REMOVE:
+ ret = kvmppc_h_bulk_remove(vcpu);
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+
case H_CEDE:
break;
case H_PROD:
target = kvmppc_get_gpr(vcpu, 4);
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
@@ -955,7 +1063,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
target = kvmppc_get_gpr(vcpu, 4);
if (target == -1)
break;
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
@@ -971,12 +1079,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 6));
break;
case H_RTAS:
- if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+ if (list_empty(&kvm->arch.rtas_tokens))
return RESUME_HOST;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
+ idx = srcu_read_lock(&kvm->srcu);
rc = kvmppc_rtas_hcall(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ srcu_read_unlock(&kvm->srcu, idx);
if (rc == -ENOENT)
return RESUME_HOST;
@@ -1060,15 +1168,23 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
ret = H_HARDWARE;
break;
+ case H_RPT_INVALIDATE:
+ ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7),
+ kvmppc_get_gpr(vcpu, 8),
+ kvmppc_get_gpr(vcpu, 9));
+ break;
case H_SET_PARTITION_TABLE:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_set_partition_table(vcpu);
break;
case H_ENTER_NESTED:
ret = H_FUNCTION;
- if (!nesting_enabled(vcpu->kvm))
+ if (!nesting_enabled(kvm))
break;
ret = kvmhv_enter_nested_guest(vcpu);
if (ret == H_INTERRUPT) {
@@ -1083,12 +1199,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
case H_TLB_INVALIDATE:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_do_nested_tlbie(vcpu);
break;
case H_COPY_TOFROM_GUEST:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
break;
case H_PAGE_INIT:
@@ -1099,7 +1215,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_IN:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_page_in(vcpu->kvm,
+ ret = kvmppc_h_svm_page_in(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
@@ -1107,7 +1223,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_OUT:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_page_out(vcpu->kvm,
+ ret = kvmppc_h_svm_page_out(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
@@ -1115,12 +1231,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_INIT_START:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_init_start(vcpu->kvm);
+ ret = kvmppc_h_svm_init_start(kvm);
break;
case H_SVM_INIT_DONE:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_init_done(vcpu->kvm);
+ ret = kvmppc_h_svm_init_done(kvm);
break;
case H_SVM_INIT_ABORT:
/*
@@ -1130,24 +1246,26 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
* Instead the kvm->arch.secure_guest flag is checked inside
* kvmppc_h_svm_init_abort().
*/
- ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+ ret = kvmppc_h_svm_init_abort(kvm);
break;
default:
return RESUME_HOST;
}
+ WARN_ON_ONCE(ret == H_TOO_HARD);
kvmppc_set_gpr(vcpu, 3, ret);
vcpu->arch.hcall_needed = 0;
return RESUME_GUEST;
}
/*
- * Handle H_CEDE in the nested virtualization case where we haven't
- * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
+ * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
+ * handlers in book3s_hv_rmhandlers.S.
+ *
* This has to be done early, not in kvmppc_pseries_do_hcall(), so
* that the cede logic in kvmppc_run_single_vcpu() works properly.
*/
-static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
{
vcpu->arch.shregs.msr |= MSR_EE;
vcpu->arch.ceded = 1;
@@ -1178,6 +1296,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case H_XIRR_X:
#endif
case H_PAGE_INIT:
+ case H_RPT_INVALIDATE:
return 1;
}
@@ -1400,13 +1519,39 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
}
case BOOK3S_INTERRUPT_SYSCALL:
{
- /* hcall - punt to userspace */
int i;
- /* hypercall with MSR_PR has already been handled in rmode,
- * and never reaches here.
- */
+ if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * Guest userspace executed sc 1. This can only be
+ * reached by the P9 path because the old path
+ * handles this case in realmode hcall handlers.
+ */
+ if (!kvmhv_vcpu_is_radix(vcpu)) {
+ /*
+ * A guest could be running PR KVM, so this
+ * may be a PR KVM hcall. It must be reflected
+ * to the guest kernel as a sc interrupt.
+ */
+ kvmppc_core_queue_syscall(vcpu);
+ } else {
+ /*
+ * Radix guests can not run PR KVM or nested HV
+ * hash guests which might run PR KVM, so this
+ * is always a privilege fault. Send a program
+ * check to guest kernel.
+ */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ }
+ r = RESUME_GUEST;
+ break;
+ }
+ /*
+ * hcall - gather args and set exit_reason. This will next be
+ * handled by kvmppc_pseries_do_hcall which may be able to deal
+ * with it and resume guest, or may punt to userspace.
+ */
run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
for (i = 0; i < 9; ++i)
run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
@@ -1419,20 +1564,102 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* We get these next two if the guest accesses a page which it thinks
* it has mapped but which is not actually present, either because
* it is for an emulated I/O device or because the corresonding
- * host page has been paged out. Any other HDSI/HISI interrupts
- * have been handled already.
+ * host page has been paged out.
+ *
+ * Any other HDSI/HISI interrupts have been handled already for P7/8
+ * guests. For POWER9 hash guests not using rmhandlers, basic hash
+ * fault handling is done here.
*/
- case BOOK3S_INTERRUPT_H_DATA_STORAGE:
- r = RESUME_PAGE_FAULT;
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE: {
+ unsigned long vsid;
+ long err;
+
+ if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
+ r = RESUME_GUEST; /* Just retry if it's the canary */
+ break;
+ }
+
+ if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * Radix doesn't require anything, and pre-ISAv3.0 hash
+ * already attempted to handle this in rmhandlers. The
+ * hash fault handling below is v3 only (it uses ASDR
+ * via fault_gpa).
+ */
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) {
+ kvmppc_core_queue_data_storage(vcpu,
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ if (!(vcpu->arch.shregs.msr & MSR_DR))
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ else
+ vsid = vcpu->arch.fault_gpa;
+
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, true);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1 || err == -2) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_data_storage(vcpu,
+ vcpu->arch.fault_dar, err);
+ r = RESUME_GUEST;
+ }
break;
- case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ }
+ case BOOK3S_INTERRUPT_H_INST_STORAGE: {
+ unsigned long vsid;
+ long err;
+
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
DSISR_SRR1_MATCH_64S;
- if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
- vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
- r = RESUME_PAGE_FAULT;
+ if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * Radix doesn't require anything, and pre-ISAv3.0 hash
+ * already attempted to handle this in rmhandlers. The
+ * hash fault handling below is v3 only (it uses ASDR
+ * via fault_gpa).
+ */
+ if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+ vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) {
+ kvmppc_core_queue_inst_storage(vcpu,
+ vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ if (!(vcpu->arch.shregs.msr & MSR_IR))
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ else
+ vsid = vcpu->arch.fault_gpa;
+
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, false);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_inst_storage(vcpu, err);
+ r = RESUME_GUEST;
+ }
break;
+ }
+
/*
* This occurs if the guest executes an illegal instruction.
* If the guest debug is disabled, generate a program interrupt
@@ -1593,6 +1820,23 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
if (!xics_on_xive())
kvmppc_xics_rm_complete(vcpu, 0);
break;
+ case BOOK3S_INTERRUPT_SYSCALL:
+ {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /*
+ * The H_RPT_INVALIDATE hcalls issued by nested
+ * guests for process-scoped invalidations when
+ * GTSE=0, are handled here in L0.
+ */
+ if (req == H_RPT_INVALIDATE) {
+ r = kvmppc_nested_h_rpt_invalidate(vcpu);
+ break;
+ }
+
+ r = RESUME_HOST;
+ break;
+ }
default:
r = RESUME_HOST;
break;
@@ -1654,6 +1898,14 @@ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
lpcr &= ~LPCR_AIL;
if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+ /*
+ * On some POWER9s we force AIL off for radix guests to prevent
+ * executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to
+ * guest, which can result in Q0 translations with LPID=0 PID=PIDR to
+ * be cached, which the host TLB management does not expect.
+ */
+ if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ lpcr &= ~LPCR_AIL;
/*
* On POWER9, allow userspace to enable large decrementer for the
@@ -2233,7 +2485,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*/
static int threads_per_vcore(struct kvm *kvm)
{
- if (kvm->arch.threads_indep)
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
return 1;
return threads_per_subcore;
}
@@ -2657,7 +2909,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
cpumask_t *cpu_in_guest;
int i;
- cpu = cpu_first_thread_sibling(cpu);
+ cpu = cpu_first_tlb_thread_sibling(cpu);
if (nested) {
cpumask_set_cpu(cpu, &nested->need_tlb_flush);
cpu_in_guest = &nested->cpu_in_guest;
@@ -2671,9 +2923,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
* the other side is the first smp_mb() in kvmppc_run_core().
*/
smp_mb();
- for (i = 0; i < threads_per_core; ++i)
- if (cpumask_test_cpu(cpu + i, cpu_in_guest))
- smp_call_function_single(cpu + i, do_nothing, NULL, 1);
+ for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+ i += cpu_tlb_thread_sibling_step())
+ if (cpumask_test_cpu(i, cpu_in_guest))
+ smp_call_function_single(i, do_nothing, NULL, 1);
}
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
@@ -2704,8 +2957,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
*/
if (prev_cpu != pcpu) {
if (prev_cpu >= 0 &&
- cpu_first_thread_sibling(prev_cpu) !=
- cpu_first_thread_sibling(pcpu))
+ cpu_first_tlb_thread_sibling(prev_cpu) !=
+ cpu_first_tlb_thread_sibling(pcpu))
radix_flush_cpu(kvm, prev_cpu, vcpu);
if (nested)
nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
@@ -2967,9 +3220,6 @@ static void prepare_threads(struct kvmppc_vcore *vc)
for_each_runnable_thread(i, vcpu, vc) {
if (signal_pending(vcpu->arch.run_task))
vcpu->arch.ret = -EINTR;
- else if (no_mixing_hpt_and_radix &&
- kvm_is_radix(vc->kvm) != radix_enabled())
- vcpu->arch.ret = -EINVAL;
else if (vcpu->arch.vpa.update_pending ||
vcpu->arch.slb_shadow.update_pending ||
vcpu->arch.dtl.update_pending)
@@ -3176,6 +3426,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int trap;
bool is_power8;
+ if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
+ return;
+
/*
* Remove from the list any threads that have a signal pending
* or need a VPA update done
@@ -3203,9 +3456,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* Make sure we are running on primary threads, and that secondary
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
- * On POWER9, we need to be not in independent-threads mode if
- * this is a HPT guest on a radix host machine where the
- * CPU threads may not be in different MMU modes.
*/
if ((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
@@ -3230,18 +3480,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
collect_piggybacks(&core_info, target_threads);
/*
- * On radix, arrange for TLB flushing if necessary.
- * This has to be done before disabling interrupts since
- * it uses smp_call_function().
- */
- pcpu = smp_processor_id();
- if (kvm_is_radix(vc->kvm)) {
- for (sub = 0; sub < core_info.n_subcores; ++sub)
- for_each_runnable_thread(i, vcpu, core_info.vc[sub])
- kvmppc_prepare_radix_vcpu(vcpu, pcpu);
- }
-
- /*
* Hard-disable interrupts, and check resched flag and signals.
* If we need to reschedule or deliver a signal, clean up
* and return without going into the guest(s).
@@ -3273,8 +3511,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cmd_bit = stat_bit = 0;
split = core_info.n_subcores;
sip = NULL;
- is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
- && !cpu_has_feature(CPU_FTR_ARCH_300);
+ is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);
if (split > 1) {
sip = &split_info;
@@ -3478,184 +3715,113 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
}
-/*
- * Load up hypervisor-mode registers on P9.
- */
-static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
- unsigned long lpcr)
+static void load_spr_state(struct kvm_vcpu *vcpu)
{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
- s64 hdec;
- u64 tb, purr, spurr;
- int trap;
- unsigned long host_hfscr = mfspr(SPRN_HFSCR);
- unsigned long host_ciabr = mfspr(SPRN_CIABR);
- unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
- unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
- unsigned long host_psscr = mfspr(SPRN_PSSCR);
- unsigned long host_pidr = mfspr(SPRN_PID);
- unsigned long host_dawr1 = 0;
- unsigned long host_dawrx1 = 0;
-
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- host_dawr1 = mfspr(SPRN_DAWR1);
- host_dawrx1 = mfspr(SPRN_DAWRX1);
- }
+ mtspr(SPRN_DSCR, vcpu->arch.dscr);
+ mtspr(SPRN_IAMR, vcpu->arch.iamr);
+ mtspr(SPRN_PSPB, vcpu->arch.pspb);
+ mtspr(SPRN_FSCR, vcpu->arch.fscr);
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+ mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+ mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+ mtspr(SPRN_BESCR, vcpu->arch.bescr);
+ mtspr(SPRN_WORT, vcpu->arch.wort);
+ mtspr(SPRN_TIDR, vcpu->arch.tid);
+ mtspr(SPRN_AMR, vcpu->arch.amr);
+ mtspr(SPRN_UAMOR, vcpu->arch.uamor);
/*
- * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
- * so set HDICE before writing HDEC.
+ * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
+ * clear (or hstate set appropriately to catch those registers
+ * being clobbered if we take a MCE or SRESET), so those are done
+ * later.
*/
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr | LPCR_HDICE);
- isync();
-
- hdec = time_limit - mftb();
- if (hdec < 0) {
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
- isync();
- return BOOK3S_INTERRUPT_HV_DECREMENTER;
- }
- mtspr(SPRN_HDEC, hdec);
-
- if (vc->tb_offset) {
- u64 new_tb = mftb() + vc->tb_offset;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = vc->tb_offset;
- }
-
- if (vc->pcr)
- mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
- mtspr(SPRN_DPDES, vc->dpdes);
- mtspr(SPRN_VTB, vc->vtb);
-
- local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
- local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, vcpu->arch.purr);
- mtspr(SPRN_SPURR, vcpu->arch.spurr);
-
- if (dawr_enabled()) {
- mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
- mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
- mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
- }
- }
- mtspr(SPRN_CIABR, vcpu->arch.ciabr);
- mtspr(SPRN_IC, vcpu->arch.ic);
- mtspr(SPRN_PID, vcpu->arch.pid);
-
- mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
- mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
-
- mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
- mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
- mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
- mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
-
- mtspr(SPRN_AMOR, ~0UL);
-
- mtspr(SPRN_LPCR, lpcr);
- isync();
-
- kvmppc_xive_push_vcpu(vcpu);
-
- mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
- mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
-
- trap = __kvmhv_vcpu_entry_p9(vcpu);
-
- /* Advance host PURR/SPURR by the amount used by guest */
- purr = mfspr(SPRN_PURR);
- spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
- purr - vcpu->arch.purr);
- mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
- spurr - vcpu->arch.spurr);
- vcpu->arch.purr = purr;
- vcpu->arch.spurr = spurr;
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+}
- vcpu->arch.ic = mfspr(SPRN_IC);
- vcpu->arch.pid = mfspr(SPRN_PID);
- vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+static void store_spr_state(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
- vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
- vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
- vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
- vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+ vcpu->arch.iamr = mfspr(SPRN_IAMR);
+ vcpu->arch.pspb = mfspr(SPRN_PSPB);
+ vcpu->arch.fscr = mfspr(SPRN_FSCR);
+ vcpu->arch.tar = mfspr(SPRN_TAR);
+ vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+ vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+ vcpu->arch.bescr = mfspr(SPRN_BESCR);
+ vcpu->arch.wort = mfspr(SPRN_WORT);
+ vcpu->arch.tid = mfspr(SPRN_TIDR);
+ vcpu->arch.amr = mfspr(SPRN_AMR);
+ vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+ vcpu->arch.dscr = mfspr(SPRN_DSCR);
+}
- /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
- mtspr(SPRN_PSSCR, host_psscr |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
- mtspr(SPRN_HFSCR, host_hfscr);
- mtspr(SPRN_CIABR, host_ciabr);
- mtspr(SPRN_DAWR0, host_dawr0);
- mtspr(SPRN_DAWRX0, host_dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, host_dawr1);
- mtspr(SPRN_DAWRX1, host_dawrx1);
- }
- mtspr(SPRN_PID, host_pidr);
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+ unsigned long dscr;
+ unsigned long tidr;
+ unsigned long iamr;
+ unsigned long amr;
+ unsigned long fscr;
+};
- /*
- * Since this is radix, do a eieio; tlbsync; ptesync sequence in
- * case we interrupted the guest between a tlbie and a ptesync.
- */
- asm volatile("eieio; tlbsync; ptesync");
+static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
+{
+ host_os_sprs->dscr = mfspr(SPRN_DSCR);
+ host_os_sprs->tidr = mfspr(SPRN_TIDR);
+ host_os_sprs->iamr = mfspr(SPRN_IAMR);
+ host_os_sprs->amr = mfspr(SPRN_AMR);
+ host_os_sprs->fscr = mfspr(SPRN_FSCR);
+}
- /*
- * cp_abort is required if the processor supports local copy-paste
- * to clear the copy buffer that was under control of the guest.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- asm volatile(PPC_CP_ABORT);
+/* vcpu guest regs must already be saved */
+static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ mtspr(SPRN_PSPB, 0);
+ mtspr(SPRN_WORT, 0);
+ mtspr(SPRN_UAMOR, 0);
- mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */
- isync();
+ mtspr(SPRN_DSCR, host_os_sprs->dscr);
+ mtspr(SPRN_TIDR, host_os_sprs->tidr);
+ mtspr(SPRN_IAMR, host_os_sprs->iamr);
- vc->dpdes = mfspr(SPRN_DPDES);
- vc->vtb = mfspr(SPRN_VTB);
- mtspr(SPRN_DPDES, 0);
- if (vc->pcr)
- mtspr(SPRN_PCR, PCR_MASK);
+ if (host_os_sprs->amr != vcpu->arch.amr)
+ mtspr(SPRN_AMR, host_os_sprs->amr);
- if (vc->tb_offset_applied) {
- u64 new_tb = mftb() - vc->tb_offset_applied;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = 0;
- }
+ if (host_os_sprs->fscr != vcpu->arch.fscr)
+ mtspr(SPRN_FSCR, host_os_sprs->fscr);
- mtspr(SPRN_HDEC, 0x7fffffff);
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
+ /* Save guest CTRL register, set runlatch to 1 */
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, 1);
+}
- return trap;
+static inline bool hcall_is_xics(unsigned long req)
+{
+ return req == H_EOI || req == H_CPPR || req == H_IPI ||
+ req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
}
/*
- * Virtual-mode guest entry for POWER9 and later when the host and
- * guest are both using the radix MMU. The LPIDR has already been set.
+ * Guest entry for POWER9 and later CPUs.
*/
static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- unsigned long host_dscr = mfspr(SPRN_DSCR);
- unsigned long host_tidr = mfspr(SPRN_TIDR);
- unsigned long host_iamr = mfspr(SPRN_IAMR);
- unsigned long host_amr = mfspr(SPRN_AMR);
- unsigned long host_fscr = mfspr(SPRN_FSCR);
+ struct p9_host_os_sprs host_os_sprs;
s64 dec;
u64 tb;
int trap, save_pmu;
+ WARN_ON_ONCE(vcpu->arch.ceded);
+
dec = mfspr(SPRN_DEC);
tb = mftb();
if (dec < 0)
@@ -3664,7 +3830,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
if (local_paca->kvm_hstate.dec_expires < time_limit)
time_limit = local_paca->kvm_hstate.dec_expires;
- vcpu->arch.ceded = 0;
+ save_p9_host_os_sprs(&host_os_sprs);
kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
@@ -3693,24 +3859,20 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
#endif
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
- mtspr(SPRN_DSCR, vcpu->arch.dscr);
- mtspr(SPRN_IAMR, vcpu->arch.iamr);
- mtspr(SPRN_PSPB, vcpu->arch.pspb);
- mtspr(SPRN_FSCR, vcpu->arch.fscr);
- mtspr(SPRN_TAR, vcpu->arch.tar);
- mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
- mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
- mtspr(SPRN_BESCR, vcpu->arch.bescr);
- mtspr(SPRN_WORT, vcpu->arch.wort);
- mtspr(SPRN_TIDR, vcpu->arch.tid);
- mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
- mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
- mtspr(SPRN_AMR, vcpu->arch.amr);
- mtspr(SPRN_UAMOR, vcpu->arch.uamor);
-
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+ load_spr_state(vcpu);
+ /*
+ * When setting DEC, we must always deal with irq_work_raise via NMI vs
+ * setting DEC. The problem occurs right as we switch into guest mode
+ * if a NMI hits and sets pending work and sets DEC, then that will
+ * apply to the guest and not bring us back to the host.
+ *
+ * irq_work_raise could check a flag (or possibly LPCR[HDICE] for
+ * example) and set HDEC to 1? That wouldn't solve the nested hv
+ * case which needs to abort the hcall or zero the time limit.
+ *
+ * XXX: Another day's problem.
+ */
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
if (kvmhv_on_pseries()) {
@@ -3718,7 +3880,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
* We need to save and restore the guest visible part of the
* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
* doesn't do this for us. Note only required if pseries since
- * this is done in kvmhv_load_hv_regs_and_go() below otherwise.
+ * this is done in kvmhv_vcpu_entry_p9() below otherwise.
*/
unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
@@ -3738,6 +3900,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
hvregs.vcpu_token = vcpu->vcpu_id;
}
hvregs.hdec_expiry = time_limit;
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
__pa(&vcpu->arch.regs));
kvmhv_restore_hv_return_state(vcpu, &hvregs);
@@ -3750,15 +3914,41 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
- kvmppc_nested_cede(vcpu);
+ kvmppc_cede(vcpu);
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
} else {
- trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+ kvmppc_xive_push_vcpu(vcpu);
+ trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
+ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+ !(vcpu->arch.shregs.msr & MSR_PR)) {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /* H_CEDE has to be handled now, not later */
+ if (req == H_CEDE) {
+ kvmppc_cede(vcpu);
+ kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */
+ kvmppc_set_gpr(vcpu, 3, 0);
+ trap = 0;
+
+ /* XICS hcalls must be handled before xive is pulled */
+ } else if (hcall_is_xics(req)) {
+ int ret;
+
+ ret = kvmppc_xive_xics_hcall(vcpu, req);
+ if (ret != H_TOO_HARD) {
+ kvmppc_set_gpr(vcpu, 3, ret);
+ trap = 0;
+ }
+ }
+ }
+ kvmppc_xive_pull_vcpu(vcpu);
+
+ if (kvm_is_radix(vcpu->kvm))
+ vcpu->arch.slb_max = 0;
}
- vcpu->arch.slb_max = 0;
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
@@ -3766,36 +3956,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
- /* Save guest CTRL register, set runlatch to 1 */
- vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
- vcpu->arch.iamr = mfspr(SPRN_IAMR);
- vcpu->arch.pspb = mfspr(SPRN_PSPB);
- vcpu->arch.fscr = mfspr(SPRN_FSCR);
- vcpu->arch.tar = mfspr(SPRN_TAR);
- vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
- vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
- vcpu->arch.bescr = mfspr(SPRN_BESCR);
- vcpu->arch.wort = mfspr(SPRN_WORT);
- vcpu->arch.tid = mfspr(SPRN_TIDR);
- vcpu->arch.amr = mfspr(SPRN_AMR);
- vcpu->arch.uamor = mfspr(SPRN_UAMOR);
- vcpu->arch.dscr = mfspr(SPRN_DSCR);
+ store_spr_state(vcpu);
- mtspr(SPRN_PSPB, 0);
- mtspr(SPRN_WORT, 0);
- mtspr(SPRN_UAMOR, 0);
- mtspr(SPRN_DSCR, host_dscr);
- mtspr(SPRN_TIDR, host_tidr);
- mtspr(SPRN_IAMR, host_iamr);
-
- if (host_amr != vcpu->arch.amr)
- mtspr(SPRN_AMR, host_amr);
-
- if (host_fscr != vcpu->arch.fscr)
- mtspr(SPRN_FSCR, host_fscr);
+ restore_p9_host_os_sprs(vcpu, &host_os_sprs);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(&vcpu->arch.fp);
@@ -3825,6 +3989,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vc->in_guest = 0;
mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+ /* We may have raced with new irq work */
+ if (test_irq_work_pending())
+ set_dec(1);
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
kvmhv_load_host_pmu();
@@ -3925,7 +4092,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
cur = start_poll = ktime_get();
if (vc->halt_poll_ns) {
ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
- ++vc->runner->stat.halt_attempted_poll;
+ ++vc->runner->stat.generic.halt_attempted_poll;
vc->vcore_state = VCORE_POLLING;
spin_unlock(&vc->lock);
@@ -3942,7 +4109,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_INACTIVE;
if (!do_sleep) {
- ++vc->runner->stat.halt_successful_poll;
+ ++vc->runner->stat.generic.halt_successful_poll;
goto out;
}
}
@@ -3954,7 +4121,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
do_sleep = 0;
/* If we polled, count this as a successful poll */
if (vc->halt_poll_ns)
- ++vc->runner->stat.halt_successful_poll;
+ ++vc->runner->stat.generic.halt_successful_poll;
goto out;
}
@@ -3981,13 +4148,13 @@ out:
ktime_to_ns(cur) - ktime_to_ns(start_wait);
/* Attribute failed poll time */
if (vc->halt_poll_ns)
- vc->runner->stat.halt_poll_fail_ns +=
+ vc->runner->stat.generic.halt_poll_fail_ns +=
ktime_to_ns(start_wait) -
ktime_to_ns(start_poll);
} else {
/* Attribute successful poll time */
if (vc->halt_poll_ns)
- vc->runner->stat.halt_poll_success_ns +=
+ vc->runner->stat.generic.halt_poll_success_ns +=
ktime_to_ns(cur) -
ktime_to_ns(start_poll);
}
@@ -4014,7 +4181,6 @@ out:
/*
* This never fails for a radix guest, as none of the operations it does
* for a radix guest can fail or have a way to report failure.
- * kvmhv_run_single_vcpu() relies on this fact.
*/
static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
{
@@ -4170,7 +4336,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
{
struct kvm_run *run = vcpu->run;
int trap, r, pcpu;
- int srcu_idx, lpid;
+ int srcu_idx;
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
@@ -4193,8 +4359,15 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->runner = vcpu;
/* See if the MMU is ready to go */
- if (!kvm->arch.mmu_ready)
- kvmhv_setup_mmu(vcpu);
+ if (!kvm->arch.mmu_ready) {
+ r = kvmhv_setup_mmu(vcpu);
+ if (r) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ vcpu->arch.ret = r;
+ return r;
+ }
+ }
if (need_resched())
cond_resched();
@@ -4207,7 +4380,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
preempt_disable();
pcpu = smp_processor_id();
vc->pcpu = pcpu;
- kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+ if (kvm_is_radix(kvm))
+ kvmppc_prepare_radix_vcpu(vcpu, pcpu);
local_irq_disable();
hard_irq_disable();
@@ -4244,13 +4418,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->vcore_state = VCORE_RUNNING;
trace_kvmppc_run_core(vc, 0);
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
- lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
- mtspr(SPRN_LPID, lpid);
- isync();
- kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
- }
-
guest_enter_irqoff();
srcu_idx = srcu_read_lock(&kvm->srcu);
@@ -4269,11 +4436,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
srcu_read_unlock(&kvm->srcu, srcu_idx);
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
- mtspr(SPRN_LPID, kvm->arch.host_lpid);
- isync();
- }
-
set_irq_happened(trap);
kvmppc_set_host_core(pcpu);
@@ -4419,19 +4581,23 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
- /*
- * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
- * path, which also handles hash and dependent threads mode.
- */
- if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
- !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else
r = kvmppc_run_vcpu(vcpu);
- if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
- !(vcpu->arch.shregs.msr & MSR_PR)) {
+ if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+ if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * These should have been caught reflected
+ * into the guest by now. Final sanity check:
+ * don't allow userspace to execute hcalls in
+ * the hypervisor.
+ */
+ r = RESUME_GUEST;
+ continue;
+ }
trace_kvm_hcall_enter(vcpu);
r = kvmppc_pseries_do_hcall(vcpu);
trace_kvm_hcall_exit(vcpu, r);
@@ -4460,6 +4626,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&kvm->arch.vcpus_running);
+
+ srr_regs_clobbered();
+
return r;
}
@@ -4758,8 +4927,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* Look up the VMA for the start of this memory slot */
hva = memslot->userspace_addr;
mmap_read_lock(kvm->mm);
- vma = find_vma(kvm->mm, hva);
- if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
+ vma = vma_lookup(kvm->mm, hva);
+ if (!vma || (vma->vm_flags & VM_IO))
goto up_out;
psize = vma_kernel_pagesize(vma);
@@ -5038,18 +5207,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
- * On POWER9, we only need to do this if the "indep_threads_mode"
- * module parameter has been set to N.
*/
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
- pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
- kvm->arch.threads_indep = true;
- } else {
- kvm->arch.threads_indep = indep_threads_mode;
- }
- }
- if (!kvm->arch.threads_indep)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_activated();
/*
@@ -5090,7 +5249,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{
debugfs_remove_recursive(kvm->arch.debugfs_dir);
- if (!kvm->arch.threads_indep)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_deactivated();
kvmppc_free_vcores(kvm);
@@ -5511,7 +5670,9 @@ static int kvmhv_enable_nested(struct kvm *kvm)
{
if (!nested)
return -EPERM;
- if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+ if (!radix_enabled())
return -ENODEV;
/* kvm == NULL means the caller is testing if the capability exists */
@@ -5674,11 +5835,25 @@ static int kvmhv_enable_dawr1(struct kvm *kvm)
static bool kvmppc_hash_v3_possible(void)
{
- if (radix_enabled() && no_mixing_hpt_and_radix)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return false;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
return false;
- return cpu_has_feature(CPU_FTR_ARCH_300) &&
- cpu_has_feature(CPU_FTR_HVMODE);
+ /*
+ * POWER9 chips before version 2.02 can't have some threads in
+ * HPT mode and some in radix mode on the same core.
+ */
+ if (radix_enabled()) {
+ unsigned int pvr = mfspr(SPRN_PVR);
+ if ((pvr >> 16) == PVR_POWER9 &&
+ (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+ ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+ return false;
+ }
+
+ return true;
}
static struct kvmppc_ops kvm_ops_hv = {
@@ -5822,18 +5997,6 @@ static int kvmppc_book3s_init_hv(void)
if (kvmppc_radix_possible())
r = kvmppc_radix_init();
- /*
- * POWER9 chips before version 2.02 can't have some threads in
- * HPT mode and some in radix mode on the same core.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- unsigned int pvr = mfspr(SPRN_PVR);
- if ((pvr >> 16) == PVR_POWER9 &&
- (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
- ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
- no_mixing_hpt_and_radix = true;
- }
-
r = kvmppc_uvmem_init();
if (r < 0)
pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7a0e33a9c980..be8ef1c5b1bf 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -35,21 +35,6 @@
#include "book3s_xive.h"
/*
- * The XIVE module will populate these when it loads
- */
-unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
-unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
-int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
- unsigned long mfrr);
-int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
-int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
-EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
-EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
-
-/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
* should be power of 2.
*/
@@ -196,16 +181,9 @@ int kvmppc_hwrng_present(void)
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
-long kvmppc_h_random(struct kvm_vcpu *vcpu)
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- int r;
-
- /* Only need to do the expensive mfmsr() on radix */
- if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
- r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]);
- else
- r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]);
- if (r)
+ if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
@@ -221,15 +199,6 @@ void kvmhv_rm_send_ipi(int cpu)
void __iomem *xics_phys;
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
- /* For a nested hypervisor, use the XICS via hcall */
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
- IPI_PRIORITY);
- return;
- }
-
/* On POWER9 we can use msgsnd for any destination cpu. */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
msg |= get_hard_smp_processor_id(cpu);
@@ -442,19 +411,12 @@ static long kvmppc_read_one_intr(bool *again)
return 1;
/* Now read the interrupt from the ICP */
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
- xirr = cpu_to_be32(retbuf[0]);
- } else {
- xics_phys = local_paca->kvm_hstate.xics_phys;
- rc = 0;
- if (!xics_phys)
- rc = opal_int_get_xirr(&xirr, false);
- else
- xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
- }
+ xics_phys = local_paca->kvm_hstate.xics_phys;
+ rc = 0;
+ if (!xics_phys)
+ rc = opal_int_get_xirr(&xirr, false);
+ else
+ xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
if (rc < 0)
return 1;
@@ -483,13 +445,7 @@ static long kvmppc_read_one_intr(bool *again)
*/
if (xisr == XICS_IPI) {
rc = 0;
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- plpar_hcall_raw(H_IPI, retbuf,
- hard_smp_processor_id(), 0xff);
- plpar_hcall_raw(H_EOI, retbuf, h_xirr);
- } else if (xics_phys) {
+ if (xics_phys) {
__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
} else {
@@ -515,13 +471,7 @@ static long kvmppc_read_one_intr(bool *again)
/* We raced with the host,
* we need to resend that IPI, bummer
*/
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- plpar_hcall_raw(H_IPI, retbuf,
- hard_smp_processor_id(),
- IPI_PRIORITY);
- } else if (xics_phys)
+ if (xics_phys)
__raw_rm_writeb(IPI_PRIORITY,
xics_phys + XICS_MFRR);
else
@@ -541,22 +491,13 @@ static long kvmppc_read_one_intr(bool *again)
}
#ifdef CONFIG_KVM_XICS
-static inline bool is_rm(void)
-{
- return !(mfmsr() & MSR_DR);
-}
-
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_xirr(vcpu);
- if (unlikely(!__xive_vm_h_xirr))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_xirr(vcpu);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_xirr(vcpu);
+ else
return xics_rm_h_xirr(vcpu);
}
@@ -565,13 +506,9 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
vcpu->arch.regs.gpr[5] = get_tb();
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_xirr(vcpu);
- if (unlikely(!__xive_vm_h_xirr))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_xirr(vcpu);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_xirr(vcpu);
+ else
return xics_rm_h_xirr(vcpu);
}
@@ -579,13 +516,9 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_ipoll(vcpu, server);
- if (unlikely(!__xive_vm_h_ipoll))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_ipoll(vcpu, server);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_ipoll(vcpu, server);
+ else
return H_TOO_HARD;
}
@@ -594,13 +527,9 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_ipi(vcpu, server, mfrr);
- if (unlikely(!__xive_vm_h_ipi))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_ipi(vcpu, server, mfrr);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_ipi(vcpu, server, mfrr);
+ else
return xics_rm_h_ipi(vcpu, server, mfrr);
}
@@ -608,13 +537,9 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_cppr(vcpu, cppr);
- if (unlikely(!__xive_vm_h_cppr))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_cppr(vcpu, cppr);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_cppr(vcpu, cppr);
+ else
return xics_rm_h_cppr(vcpu, cppr);
}
@@ -622,13 +547,9 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_eoi(vcpu, xirr);
- if (unlikely(!__xive_vm_h_eoi))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_eoi(vcpu, xirr);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_eoi(vcpu, xirr);
+ else
return xics_rm_h_eoi(vcpu, xirr);
}
#endif /* CONFIG_KVM_XICS */
@@ -800,7 +721,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
* Thus we make all 4 threads use the same bit.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
- pcpu = cpu_first_thread_sibling(pcpu);
+ pcpu = cpu_first_tlb_thread_sibling(pcpu);
if (nested)
need_tlb_flush = &nested->need_tlb_flush;
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 327417d79eac..4444f83cb133 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -58,7 +58,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/*
* Put whatever is in the decrementer into the
* hypervisor decrementer.
- * Because of a hardware deviation in P8 and P9,
+ * Because of a hardware deviation in P8,
* we need to set LPCR[HDICE] before writing HDEC.
*/
ld r5, HSTATE_KVM_VCORE(r13)
@@ -67,15 +67,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ori r8, r9, LPCR_HDICE
mtspr SPRN_LPCR, r8
isync
- andis. r0, r9, LPCR_LD@h
mfspr r8,SPRN_DEC
mftb r7
-BEGIN_FTR_SECTION
- /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
- bne 32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
-32: mtspr SPRN_HDEC,r8
+ mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 60724f674421..8543ad538b0c 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -19,6 +19,7 @@
#include <asm/pgalloc.h>
#include <asm/pte-walk.h>
#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
static struct patb_entry *pseries_partition_tb;
@@ -53,7 +54,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
hr->dawrx1 = vcpu->arch.dawrx1;
}
-static void byteswap_pt_regs(struct pt_regs *regs)
+/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
+static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
{
unsigned long *addr = (unsigned long *) regs;
@@ -467,8 +469,15 @@ static void kvmhv_flush_lpid(unsigned int lpid)
return;
}
- rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
- lpid, TLBIEL_INVAL_SET_LPID);
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+ lpid, TLBIEL_INVAL_SET_LPID);
+ else
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PAT,
+ H_RPTI_PAGE_ALL, 0, -1UL);
if (rc)
pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
}
@@ -1214,6 +1223,113 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
return H_SUCCESS;
}
+static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
+ unsigned long lpid, unsigned long ric)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *gp;
+
+ gp = kvmhv_get_nested(kvm, lpid, false);
+ if (gp) {
+ kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+ kvmhv_put_nested(gp);
+ }
+ return H_SUCCESS;
+}
+
+/*
+ * Number of pages above which we invalidate the entire LPID rather than
+ * flush individual pages.
+ */
+static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33;
+
+static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
+ unsigned long lpid,
+ unsigned long pg_sizes,
+ unsigned long start,
+ unsigned long end)
+{
+ int ret = H_P4;
+ unsigned long addr, nr_pages;
+ struct mmu_psize_def *def;
+ unsigned long psize, ap, page_size;
+ bool flush_lpid;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ def = &mmu_psize_defs[psize];
+ if (!(pg_sizes & def->h_rpt_pgsize))
+ continue;
+
+ nr_pages = (end - start) >> def->shift;
+ flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
+ if (flush_lpid)
+ return do_tlb_invalidate_nested_all(vcpu, lpid,
+ RIC_FLUSH_TLB);
+ addr = start;
+ ap = mmu_get_ap(psize);
+ page_size = 1UL << def->shift;
+ do {
+ ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
+ get_epn(addr));
+ if (ret)
+ return H_P4;
+ addr += page_size;
+ } while (addr < end);
+ }
+ return ret;
+}
+
+/*
+ * Performs partition-scoped invalidations for nested guests
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ /*
+ * If L2 lpid isn't valid, we need to return H_PARAMETER.
+ *
+ * However, nested KVM issues a L2 lpid flush call when creating
+ * partition table entries for L2. This happens even before the
+ * corresponding shadow lpid is created in HV which happens in
+ * H_ENTER_NESTED call. Since we can't differentiate this case from
+ * the invalid case, we ignore such flush requests and return success.
+ */
+ if (!kvmhv_find_nested(vcpu->kvm, lpid))
+ return H_SUCCESS;
+
+ /*
+ * A flush all request can be handled by a full lpid flush only.
+ */
+ if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
+ return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL);
+
+ /*
+ * We don't need to handle a PWC flush like process table here,
+ * because intermediate partition scoped table in nested guest doesn't
+ * really have PWC. Only level we have PWC is in L0 and for nested
+ * invalidate at L0 we always do kvm_flush_lpid() which does
+ * radix__flush_all_lpid(). For range invalidate at any level, we
+ * are not removing the higher level page tables and hence there is
+ * no PWC invalidate needed.
+ *
+ * if (type & H_RPTI_TYPE_PWC) {
+ * ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC);
+ * if (ret)
+ * return H_P4;
+ * }
+ */
+
+ if (start == 0 && end == -1)
+ return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB);
+
+ if (type & H_RPTI_TYPE_TLB)
+ return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes,
+ start, end);
+ return H_SUCCESS;
+}
+
/* Used to convert a nested guest real address to a L1 guest real address */
static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
struct kvm_nested_guest *gp,
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
new file mode 100644
index 000000000000..83f592eadcd2
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -0,0 +1,508 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <asm/asm-prototypes.h>
+#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 tb = mftb() - vc->tb_offset_applied;
+
+ vcpu->arch.cur_activity = next;
+ vcpu->arch.cur_tb_start = tb;
+}
+
+static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ struct kvmhv_tb_accumulator *curr;
+ u64 tb = mftb() - vc->tb_offset_applied;
+ u64 prev_tb;
+ u64 delta;
+ u64 seq;
+
+ curr = vcpu->arch.cur_activity;
+ vcpu->arch.cur_activity = next;
+ prev_tb = vcpu->arch.cur_tb_start;
+ vcpu->arch.cur_tb_start = tb;
+
+ if (!curr)
+ return;
+
+ delta = tb - prev_tb;
+
+ seq = curr->seqcount;
+ curr->seqcount = seq + 1;
+ smp_wmb();
+ curr->tb_total += delta;
+ if (seq == 0 || delta < curr->tb_min)
+ curr->tb_min = delta;
+ if (delta > curr->tb_max)
+ curr->tb_max = delta;
+ smp_wmb();
+ curr->seqcount = seq + 2;
+}
+
+#define start_timing(vcpu, next) __start_timing(vcpu, next)
+#define end_timing(vcpu) __start_timing(vcpu, NULL)
+#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next)
+#else
+#define start_timing(vcpu, next) do {} while (0)
+#define end_timing(vcpu) do {} while (0)
+#define accumulate_time(vcpu, next) do {} while (0)
+#endif
+
+static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
+{
+ asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
+ asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx));
+}
+
+static inline void mtslb(u64 slbee, u64 slbev)
+{
+ asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
+}
+
+static inline void clear_slb_entry(unsigned int idx)
+{
+ mtslb(idx, 0);
+}
+
+static inline void slb_clear_invalidate_partition(void)
+{
+ clear_slb_entry(0);
+ asm volatile(PPC_SLBIA(6));
+}
+
+/*
+ * Malicious or buggy radix guests may have inserted SLB entries
+ * (only 0..3 because radix always runs with UPRT=1), so these must
+ * be cleared here to avoid side-channels. slbmte is used rather
+ * than slbia, as it won't clear cached translations.
+ */
+static void radix_clear_slb(void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ clear_slb_entry(i);
+}
+
+static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ u32 lpid;
+
+ lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+
+ /*
+ * All the isync()s are overkill but trivially follow the ISA
+ * requirements. Some can likely be replaced with justification
+ * comment for why they are not needed.
+ */
+ isync();
+ mtspr(SPRN_LPID, lpid);
+ isync();
+ mtspr(SPRN_LPCR, lpcr);
+ isync();
+ mtspr(SPRN_PID, vcpu->arch.pid);
+ isync();
+}
+
+static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ u32 lpid;
+ int i;
+
+ lpid = kvm->arch.lpid;
+
+ mtspr(SPRN_LPID, lpid);
+ mtspr(SPRN_LPCR, lpcr);
+ mtspr(SPRN_PID, vcpu->arch.pid);
+
+ for (i = 0; i < vcpu->arch.slb_max; i++)
+ mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
+
+ isync();
+}
+
+static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
+{
+ isync();
+ mtspr(SPRN_PID, pid);
+ isync();
+ mtspr(SPRN_LPID, kvm->arch.host_lpid);
+ isync();
+ mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
+ isync();
+
+ if (!radix_enabled())
+ slb_restore_bolted_realmode();
+}
+
+static void save_clear_host_mmu(struct kvm *kvm)
+{
+ if (!radix_enabled()) {
+ /*
+ * Hash host could save and restore host SLB entries to
+ * reduce SLB fault overheads of VM exits, but for now the
+ * existing code clears all entries and restores just the
+ * bolted ones when switching back to host.
+ */
+ slb_clear_invalidate_partition();
+ }
+}
+
+static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+ if (kvm_is_radix(kvm)) {
+ radix_clear_slb();
+ } else {
+ int i;
+ int nr = 0;
+
+ /*
+ * This must run before switching to host (radix host can't
+ * access all SLBs).
+ */
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ u64 slbee, slbev;
+ mfslb(i, &slbee, &slbev);
+ if (slbee & SLB_ESID_V) {
+ vcpu->arch.slb[nr].orige = slbee | i;
+ vcpu->arch.slb[nr].origv = slbev;
+ nr++;
+ }
+ }
+ vcpu->arch.slb_max = nr;
+ slb_clear_invalidate_partition();
+ }
+}
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ s64 hdec;
+ u64 tb, purr, spurr;
+ u64 *exsave;
+ bool ri_set;
+ int trap;
+ unsigned long msr;
+ unsigned long host_hfscr;
+ unsigned long host_ciabr;
+ unsigned long host_dawr0;
+ unsigned long host_dawrx0;
+ unsigned long host_psscr;
+ unsigned long host_pidr;
+ unsigned long host_dawr1;
+ unsigned long host_dawrx1;
+
+ hdec = time_limit - mftb();
+ if (hdec < 0)
+ return BOOK3S_INTERRUPT_HV_DECREMENTER;
+
+ WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
+ WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
+
+ start_timing(vcpu, &vcpu->arch.rm_entry);
+
+ vcpu->arch.ceded = 0;
+
+ if (vc->tb_offset) {
+ u64 new_tb = mftb() + vc->tb_offset;
+ mtspr(SPRN_TBU40, new_tb);
+ tb = mftb();
+ if ((tb & 0xffffff) < (new_tb & 0xffffff))
+ mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ vc->tb_offset_applied = vc->tb_offset;
+ }
+
+ msr = mfmsr();
+
+ host_hfscr = mfspr(SPRN_HFSCR);
+ host_ciabr = mfspr(SPRN_CIABR);
+ host_dawr0 = mfspr(SPRN_DAWR0);
+ host_dawrx0 = mfspr(SPRN_DAWRX0);
+ host_psscr = mfspr(SPRN_PSSCR);
+ host_pidr = mfspr(SPRN_PID);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ host_dawr1 = mfspr(SPRN_DAWR1);
+ host_dawrx1 = mfspr(SPRN_DAWRX1);
+ }
+
+ if (vc->pcr)
+ mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
+ mtspr(SPRN_DPDES, vc->dpdes);
+ mtspr(SPRN_VTB, vc->vtb);
+
+ local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+ local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+ mtspr(SPRN_PURR, vcpu->arch.purr);
+ mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+ if (dawr_enabled()) {
+ mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+ mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+ mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+ }
+ }
+ mtspr(SPRN_CIABR, vcpu->arch.ciabr);
+ mtspr(SPRN_IC, vcpu->arch.ic);
+
+ mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+ mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
+
+ mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+ mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
+
+ /*
+ * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
+ * Interrupt (HDSI) the HDSISR is not be updated at all.
+ *
+ * To work around this we put a canary value into the HDSISR before
+ * returning to a guest and then check for this canary when we take a
+ * HDSI. If we find the canary on a HDSI, we know the hardware didn't
+ * update the HDSISR. In this case we return to the guest to retake the
+ * HDSI which should correctly update the HDSISR the second time HDSI
+ * entry.
+ *
+ * Just do this on all p9 processors for now.
+ */
+ mtspr(SPRN_HDSISR, HDSISR_CANARY);
+
+ mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+ mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+ mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+ mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+ mtspr(SPRN_AMOR, ~0UL);
+
+ local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
+
+ /*
+ * Hash host, hash guest, or radix guest with prefetch bug, all have
+ * to disable the MMU before switching to guest MMU state.
+ */
+ if (!radix_enabled() || !kvm_is_radix(kvm) ||
+ cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
+ save_clear_host_mmu(kvm);
+
+ if (kvm_is_radix(kvm)) {
+ switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+ if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(0, 1); /* clear RI */
+
+ } else {
+ switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
+ }
+
+ /* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
+ kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
+
+ /*
+ * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+ * so set guest LPCR (with HDICE) before writing HDEC.
+ */
+ mtspr(SPRN_HDEC, hdec);
+
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+ mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+ mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
+ accumulate_time(vcpu, &vcpu->arch.guest_time);
+
+ kvmppc_p9_enter_guest(vcpu);
+
+ accumulate_time(vcpu, &vcpu->arch.rm_intr);
+
+ /* XXX: Could get these from r11/12 and paca exsave instead */
+ vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
+ vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
+ vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+ vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+
+ /* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
+ trap = local_paca->kvm_hstate.scratch0 & ~0x2;
+
+ /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
+ ri_set = false;
+ if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+ if (trap != BOOK3S_INTERRUPT_SYSCALL &&
+ (vcpu->arch.shregs.msr & MSR_RI))
+ ri_set = true;
+ exsave = local_paca->exgen;
+ } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
+ exsave = local_paca->exnmi;
+ } else { /* trap == 0x200 */
+ exsave = local_paca->exmc;
+ }
+
+ vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
+ vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
+
+ /*
+ * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1)
+ * and hstate scratch (which we need to move into exsave to make
+ * re-entrant vs SRESET/MCE)
+ */
+ if (ri_set) {
+ if (unlikely(!(mfmsr() & MSR_RI))) {
+ __mtmsrd(MSR_RI, 1);
+ WARN_ON_ONCE(1);
+ }
+ } else {
+ WARN_ON_ONCE(mfmsr() & MSR_RI);
+ __mtmsrd(MSR_RI, 1);
+ }
+
+ vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
+ vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
+ vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
+ vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
+ vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
+ vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
+ vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
+ vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
+
+ vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+
+ if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+ vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+ vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+ kvmppc_realmode_machine_check(vcpu);
+
+ } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
+ kvmppc_realmode_hmi_handler();
+
+ } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
+ vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
+ vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+ vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+ vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+ vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
+ vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * Softpatch interrupt for transactional memory emulation cases
+ * on POWER9 DD2.2. This is early in the guest exit path - we
+ * haven't saved registers or done a treclaim yet.
+ */
+ } else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
+ vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+ /*
+ * The cases we want to handle here are those where the guest
+ * is in real suspend mode and is trying to transition to
+ * transactional mode.
+ */
+ if (local_paca->kvm_hstate.fake_suspend &&
+ (vcpu->arch.shregs.msr & MSR_TS_S)) {
+ if (kvmhv_p9_tm_emulation_early(vcpu)) {
+ /* Prevent it being handled again. */
+ trap = 0;
+ }
+ }
+#endif
+ }
+
+ accumulate_time(vcpu, &vcpu->arch.rm_exit);
+
+ /* Advance host PURR/SPURR by the amount used by guest */
+ purr = mfspr(SPRN_PURR);
+ spurr = mfspr(SPRN_SPURR);
+ mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
+ purr - vcpu->arch.purr);
+ mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
+ spurr - vcpu->arch.spurr);
+ vcpu->arch.purr = purr;
+ vcpu->arch.spurr = spurr;
+
+ vcpu->arch.ic = mfspr(SPRN_IC);
+ vcpu->arch.pid = mfspr(SPRN_PID);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+
+ vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
+ vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
+ vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
+ vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+
+ /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+ mtspr(SPRN_PSSCR, host_psscr |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+ mtspr(SPRN_HFSCR, host_hfscr);
+ mtspr(SPRN_CIABR, host_ciabr);
+ mtspr(SPRN_DAWR0, host_dawr0);
+ mtspr(SPRN_DAWRX0, host_dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ mtspr(SPRN_DAWR1, host_dawr1);
+ mtspr(SPRN_DAWRX1, host_dawrx1);
+ }
+
+ if (kvm_is_radix(kvm)) {
+ /*
+ * Since this is radix, do a eieio; tlbsync; ptesync sequence
+ * in case we interrupted the guest between a tlbie and a
+ * ptesync.
+ */
+ asm volatile("eieio; tlbsync; ptesync");
+ }
+
+ /*
+ * cp_abort is required if the processor supports local copy-paste
+ * to clear the copy buffer that was under control of the guest.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ asm volatile(PPC_CP_ABORT);
+
+ vc->dpdes = mfspr(SPRN_DPDES);
+ vc->vtb = mfspr(SPRN_VTB);
+ mtspr(SPRN_DPDES, 0);
+ if (vc->pcr)
+ mtspr(SPRN_PCR, PCR_MASK);
+
+ if (vc->tb_offset_applied) {
+ u64 new_tb = mftb() - vc->tb_offset_applied;
+ mtspr(SPRN_TBU40, new_tb);
+ tb = mftb();
+ if ((tb & 0xffffff) < (new_tb & 0xffffff))
+ mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ vc->tb_offset_applied = 0;
+ }
+
+ mtspr(SPRN_HDEC, 0x7fffffff);
+
+ save_clear_guest_mmu(kvm, vcpu);
+ switch_mmu_to_host(kvm, host_pidr);
+ local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
+
+ /*
+ * If we are in real mode, only switch MMU on after the MMU is
+ * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
+ */
+ __mtmsrd(msr, 0);
+
+ end_timing(vcpu);
+
+ return trap;
+}
+EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7a0f12404e0e..632b2545072b 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -46,6 +46,10 @@ static int global_invalidates(struct kvm *kvm)
else
global = 1;
+ /* LPID has been switched to host if in virt mode so can't do local */
+ if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
+ global = 1;
+
if (!global) {
/* any other core might now have stale TLB entries... */
smp_wmb();
@@ -56,7 +60,7 @@ static int global_invalidates(struct kvm *kvm)
* so use the bit for the first thread to represent the core.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
- cpu = cpu_first_thread_sibling(cpu);
+ cpu = cpu_first_tlb_thread_sibling(cpu);
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
}
@@ -398,6 +402,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
vcpu->arch.pgdir, true,
&vcpu->arch.regs.gpr[4]);
}
+EXPORT_SYMBOL_GPL(kvmppc_h_enter);
#ifdef __BIG_ENDIAN__
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
@@ -542,6 +547,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
&vcpu->arch.regs.gpr[4]);
}
+EXPORT_SYMBOL_GPL(kvmppc_h_remove);
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
{
@@ -660,6 +666,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn)
@@ -730,6 +737,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_protect);
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
@@ -770,6 +778,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
}
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_read);
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
@@ -818,6 +827,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
@@ -865,6 +875,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
unsigned long gpa, int writing, unsigned long *hpa,
@@ -1283,3 +1294,4 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
return -1; /* send fault up to host kernel mode */
}
+EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index c2c9c733f359..0a11ec88a0ae 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -141,13 +141,6 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return;
}
- if (xive_enabled() && kvmhv_on_pseries()) {
- /* No XICS access or hypercalls available, too hard */
- this_icp->rm_action |= XICS_RM_KICK_VCPU;
- this_icp->rm_kick_target = vcpu;
- return;
- }
-
/*
* Check if the core is loaded,
* if not, find an available host core to post to wake the VCPU,
@@ -771,14 +764,6 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
void __iomem *xics_phys;
int64_t rc;
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- iosync();
- plpar_hcall_raw(H_EOI, retbuf, hwirq);
- return;
- }
-
rc = pnv_opal_pci_msi_eoi(c, hwirq);
if (rc)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 004f0d4e665f..8dd437d7a2c6 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -25,18 +25,10 @@
#include <asm/export.h>
#include <asm/tm.h>
#include <asm/opal.h>
-#include <asm/xive-regs.h>
#include <asm/thread_info.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#include <asm/cpuidle.h>
-#include <asm/ultravisor-api.h>
-
-/* Sign-extend HDEC if not on POWER9 */
-#define EXTEND_HDEC(reg) \
-BEGIN_FTR_SECTION; \
- extsw reg, reg; \
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
@@ -44,9 +36,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define NAPPING_UNSPLIT 3
/* Stack frame offsets for kvmppc_hv_entry */
-#define SFS 208
+#define SFS 160
#define STACK_SLOT_TRAP (SFS-4)
-#define STACK_SLOT_SHORT_PATH (SFS-8)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
#define STACK_SLOT_PID (SFS-32)
@@ -57,11 +48,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_HFSCR (SFS-72)
#define STACK_SLOT_AMR (SFS-80)
#define STACK_SLOT_UAMOR (SFS-88)
-#define STACK_SLOT_DAWR1 (SFS-96)
-#define STACK_SLOT_DAWRX1 (SFS-104)
-#define STACK_SLOT_FSCR (SFS-112)
-/* the following is used by the P9 short path */
-#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
+#define STACK_SLOT_FSCR (SFS-96)
/*
* Call kvmppc_hv_entry in real mode.
@@ -137,15 +124,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Return the trap number on this thread as the return value */
mr r3, r12
- /*
- * If we came back from the guest via a relocation-on interrupt,
- * we will be in virtual mode at this point, which makes it a
- * little easier to get back to the caller.
- */
- mfmsr r0
- andi. r0, r0, MSR_IR /* in real mode? */
- bne .Lvirt_return
-
/* RFI into the highmem handler */
mfmsr r6
li r0, MSR_RI
@@ -155,11 +133,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtsrr1 r7
RFI_TO_KERNEL
- /* Virtual-mode return */
-.Lvirt_return:
- mtlr r8
- blr
-
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
@@ -246,7 +219,7 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
- EXTEND_HDEC(r0)
+ extsw r0, r0
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
cmpdi r0, 0
blt kvm_novcpu_exit
@@ -348,10 +321,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
- LOAD_REG_ADDR(r6, decrementer_max)
- ld r6, 0(r6)
+ lis r6,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC, r6
-BEGIN_FTR_SECTION
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
cmpdi r6, 0
@@ -363,7 +334,6 @@ BEGIN_FTR_SECTION
ld r0, KVM_SPLIT_LDBAR(r6)
mtspr SPRN_LDBAR, r0
isync
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
63:
/* Order load of vcpu after load of vcore */
lwsync
@@ -434,7 +404,6 @@ kvm_no_guest:
blr
53:
-BEGIN_FTR_SECTION
HMT_LOW
ld r5, HSTATE_KVM_VCORE(r13)
cmpdi r5, 0
@@ -449,14 +418,6 @@ BEGIN_FTR_SECTION
b kvm_unsplit_nap
60: HMT_MEDIUM
b kvm_secondary_got_guest
-FTR_SECTION_ELSE
- HMT_LOW
- ld r5, HSTATE_KVM_VCORE(r13)
- cmpdi r5, 0
- beq kvm_no_guest
- HMT_MEDIUM
- b kvm_secondary_got_guest
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
54: li r0, KVM_HWTHREAD_IN_KVM
stb r0, HSTATE_HWTHREAD_STATE(r13)
@@ -582,13 +543,11 @@ kvmppc_hv_entry:
bne 10f
lwz r7,KVM_LPID(r9)
-BEGIN_FTR_SECTION
ld r6,KVM_SDR1(r9)
li r0,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r0
ptesync
mtspr SPRN_SDR1,r6 /* switch to partition page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -669,16 +628,6 @@ kvmppc_got_guest:
/* Save host values of some registers */
BEGIN_FTR_SECTION
- mfspr r5, SPRN_TIDR
- mfspr r6, SPRN_PSSCR
- mfspr r7, SPRN_PID
- std r5, STACK_SLOT_TID(r1)
- std r6, STACK_SLOT_PSSCR(r1)
- std r7, STACK_SLOT_PID(r1)
- mfspr r5, SPRN_HFSCR
- std r5, STACK_SLOT_HFSCR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
mfspr r6, SPRN_DAWR0
mfspr r7, SPRN_DAWRX0
@@ -690,12 +639,6 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_FSCR
std r5, STACK_SLOT_FSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- mfspr r6, SPRN_DAWR1
- mfspr r7, SPRN_DAWRX1
- std r6, STACK_SLOT_DAWR1(r1)
- std r7, STACK_SLOT_DAWRX1(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
mfspr r5, SPRN_AMR
std r5, STACK_SLOT_AMR(r1)
@@ -713,13 +656,9 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -786,12 +725,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ld r6, VCPU_DAWRX0(r4)
mtspr SPRN_DAWR0, r5
mtspr SPRN_DAWRX0, r6
-BEGIN_FTR_SECTION
- ld r5, VCPU_DAWR1(r4)
- ld r6, VCPU_DAWRX1(r4)
- mtspr SPRN_DAWR1, r5
- mtspr SPRN_DAWRX1, r6
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
1:
ld r7, VCPU_CIABR(r4)
ld r8, VCPU_TAR(r4)
@@ -809,7 +742,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
mtspr SPRN_BESCR, r6
mtspr SPRN_PID, r7
mtspr SPRN_WORT, r8
-BEGIN_FTR_SECTION
/* POWER8-only registers */
ld r5, VCPU_TCSCR(r4)
ld r6, VCPU_ACOP(r4)
@@ -820,18 +752,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_CSIGR, r7
mtspr SPRN_TACR, r8
nop
-FTR_SECTION_ELSE
- /* POWER9-only registers */
- ld r5, VCPU_TID(r4)
- ld r6, VCPU_PSSCR(r4)
- lbz r8, HSTATE_FAKE_SUSPEND(r13)
- oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
- rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
- ld r7, VCPU_HFSCR(r4)
- mtspr SPRN_TIDR, r5
- mtspr SPRN_PSSCR, r6
- mtspr SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
ld r5, VCPU_SPRG0(r4)
@@ -901,23 +821,15 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
- EXTEND_HDEC(r3)
+ extsw r3, r3
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
- ld r6, VCPU_KVM(r4)
- lbz r0, KVM_RADIX(r6)
- cmpwi r0, 0
- bne 9f
-
- /* For hash guest, clear out and reload the SLB */
-BEGIN_MMU_FTR_SECTION
- /* Radix host won't have populated the SLB, so no need to clear */
+ /* Clear out and reload the SLB */
li r6, 0
slbmte r6, r6
PPC_SLBIA(6)
ptesync
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
lwz r5,VCPU_SLB_MAX(r4)
@@ -932,96 +844,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
bdnz 1b
9:
-#ifdef CONFIG_KVM_XICS
- /* We are entering the guest on that thread, push VCPU to XIVE */
- ld r11, VCPU_XIVE_SAVED_STATE(r4)
- li r9, TM_QW1_OS
- lwz r8, VCPU_XIVE_CAM_WORD(r4)
- cmpwi r8, 0
- beq no_xive
- li r7, TM_QW1_OS + TM_WORD2
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 2f
- ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
- cmpldi cr1, r10, 0
- beq cr1, no_xive
- eieio
- stdx r11,r9,r10
- stwx r8,r7,r10
- b 3f
-2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr1, r10, 0
- beq cr1, no_xive
- eieio
- stdcix r11,r9,r10
- stwcix r8,r7,r10
-3: li r9, 1
- stb r9, VCPU_XIVE_PUSHED(r4)
- eieio
-
- /*
- * We clear the irq_pending flag. There is a small chance of a
- * race vs. the escalation interrupt happening on another
- * processor setting it again, but the only consequence is to
- * cause a spurrious wakeup on the next H_CEDE which is not an
- * issue.
- */
- li r0,0
- stb r0, VCPU_IRQ_PENDING(r4)
-
- /*
- * In single escalation mode, if the escalation interrupt is
- * on, we mask it.
- */
- lbz r0, VCPU_XIVE_ESC_ON(r4)
- cmpwi cr1, r0,0
- beq cr1, 1f
- li r9, XIVE_ESB_SET_PQ_01
- beq 4f /* in real mode? */
- ld r10, VCPU_XIVE_ESC_VADDR(r4)
- ldx r0, r10, r9
- b 5f
-4: ld r10, VCPU_XIVE_ESC_RADDR(r4)
- ldcix r0, r10, r9
-5: sync
-
- /* We have a possible subtle race here: The escalation interrupt might
- * have fired and be on its way to the host queue while we mask it,
- * and if we unmask it early enough (re-cede right away), there is
- * a theorical possibility that it fires again, thus landing in the
- * target queue more than once which is a big no-no.
- *
- * Fortunately, solving this is rather easy. If the above load setting
- * PQ to 01 returns a previous value where P is set, then we know the
- * escalation interrupt is somewhere on its way to the host. In that
- * case we simply don't clear the xive_esc_on flag below. It will be
- * eventually cleared by the handler for the escalation interrupt.
- *
- * Then, when doing a cede, we check that flag again before re-enabling
- * the escalation interrupt, and if set, we abort the cede.
- */
- andi. r0, r0, XIVE_ESB_VAL_P
- bne- 1f
-
- /* Now P is 0, we can clear the flag */
- li r0, 0
- stb r0, VCPU_XIVE_ESC_ON(r4)
-1:
-no_xive:
-#endif /* CONFIG_KVM_XICS */
-
- li r0, 0
- stw r0, STACK_SLOT_SHORT_PATH(r1)
-
deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
/* Check if we can deliver an external or decrementer interrupt now */
ld r0, VCPU_PENDING_EXC(r4)
-BEGIN_FTR_SECTION
- /* On POWER9, also check for emulated doorbell interrupt */
- lbz r3, VCPU_DBELL_REQ(r4)
- or r0, r0, r3
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
cmpdi r0, 0
beq 71f
mr r3, r4
@@ -1033,7 +858,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
-fast_guest_entry_c:
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
/* r11 = vcpu->arch.msr & ~MSR_HV */
@@ -1095,18 +919,8 @@ BEGIN_FTR_SECTION
mtspr SPRN_PPR, r0
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-/* Move canary into DSISR to check for later */
-BEGIN_FTR_SECTION
- li r0, 0x7fff
- mtspr SPRN_HDSISR, r0
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
- ld r6, VCPU_KVM(r4)
- lbz r7, KVM_SECURE_GUEST(r6)
- cmpdi r7, 0
ld r6, VCPU_GPR(R6)(r4)
ld r7, VCPU_GPR(R7)(r4)
- bne ret_to_ultra
ld r0, VCPU_CR(r4)
mtcr r0
@@ -1117,117 +931,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r4, VCPU_GPR(R4)(r4)
HRFI_TO_GUEST
b .
-/*
- * Use UV_RETURN ultracall to return control back to the Ultravisor after
- * processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
- * to the Hypervisor.
- *
- * All registers have already been loaded, except:
- * R0 = hcall result
- * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
- * R3 = UV_RETURN
- */
-ret_to_ultra:
- ld r0, VCPU_CR(r4)
- mtcr r0
-
- ld r0, VCPU_GPR(R3)(r4)
- mfspr r2, SPRN_SRR1
- li r3, 0
- ori r3, r3, UV_RETURN
- ld r4, VCPU_GPR(R4)(r4)
- sc 2
-
-/*
- * Enter the guest on a P9 or later system where we have exactly
- * one vcpu per vcore and we don't need to go to real mode
- * (which implies that host and guest are both using radix MMU mode).
- * r3 = vcpu pointer
- * Most SPRs and all the VSRs have been loaded already.
- */
-_GLOBAL(__kvmhv_vcpu_entry_p9)
-EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
- mflr r0
- std r0, PPC_LR_STKOFF(r1)
- stdu r1, -SFS(r1)
-
- li r0, 1
- stw r0, STACK_SLOT_SHORT_PATH(r1)
-
- std r3, HSTATE_KVM_VCPU(r13)
- mfcr r4
- stw r4, SFS+8(r1)
-
- std r1, HSTATE_HOST_R1(r13)
-
- reg = 14
- .rept 18
- std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
- reg = reg + 1
- .endr
-
- reg = 14
- .rept 18
- ld reg, __VCPU_GPR(reg)(r3)
- reg = reg + 1
- .endr
-
- mfmsr r10
- std r10, HSTATE_HOST_MSR(r13)
-
- mr r4, r3
- b fast_guest_entry_c
-guest_exit_short_path:
- /*
- * Malicious or buggy radix guests may have inserted SLB entries
- * (only 0..3 because radix always runs with UPRT=1), so these must
- * be cleared here to avoid side-channels. slbmte is used rather
- * than slbia, as it won't clear cached translations.
- */
- li r0,0
- slbmte r0,r0
- li r4,1
- slbmte r0,r4
- li r4,2
- slbmte r0,r4
- li r4,3
- slbmte r0,r4
-
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
-
- reg = 14
- .rept 18
- std reg, __VCPU_GPR(reg)(r9)
- reg = reg + 1
- .endr
-
- reg = 14
- .rept 18
- ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
- reg = reg + 1
- .endr
-
- lwz r4, SFS+8(r1)
- mtcr r4
-
- mr r3, r12 /* trap number */
-
- addi r1, r1, SFS
- ld r0, PPC_LR_STKOFF(r1)
- mtlr r0
-
- /* If we are in real mode, do a rfid to get back to the caller */
- mfmsr r4
- andi. r5, r4, MSR_IR
- bnelr
- rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */
- mtspr SPRN_SRR0, r0
- ld r10, HSTATE_HOST_MSR(r13)
- rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
- mtspr SPRN_SRR1, r10
- RFI_TO_KERNEL
- b .
secondary_too_late:
li r12, 0
@@ -1268,21 +971,16 @@ hdec_soon:
kvmppc_interrupt_hv:
/*
* Register contents:
+ * R9 = HSTATE_IN_GUEST
* R12 = (guest CR << 32) | interrupt vector
* R13 = PACA
* guest R12 saved in shadow VCPU SCRATCH0
* guest R13 saved in SPRN_SCRATCH0
+ * guest R9 saved in HSTATE_SCRATCH2
*/
- std r9, HSTATE_SCRATCH2(r13)
- lbz r9, HSTATE_IN_GUEST(r13)
- cmpwi r9, KVM_GUEST_MODE_HOST_HV
- beq kvmppc_bad_host_intr
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
- cmpwi r9, KVM_GUEST_MODE_GUEST
- ld r9, HSTATE_SCRATCH2(r13)
- beq kvmppc_interrupt_pr
-#endif
/* We're now back in the host but in guest MMU context */
+ cmpwi r9,KVM_GUEST_MODE_HOST_HV
+ beq kvmppc_bad_host_intr
li r9, KVM_GUEST_MODE_HOST_HV
stb r9, HSTATE_IN_GUEST(r13)
@@ -1400,7 +1098,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
mfspr r3,SPRN_HDEC
- EXTEND_HDEC(r3)
+ extsw r3, r3
cmpdi r3,0
mr r4,r9
bge fast_guest_return
@@ -1412,14 +1110,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Hypervisor doorbell - exit only if host IPI flag set */
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
bne 3f
-BEGIN_FTR_SECTION
- PPC_MSGSYNC
- lwsync
- /* always exit if we're running a nested guest */
- ld r0, VCPU_NESTED(r9)
- cmpdi r0, 0
- bne guest_exit_cont
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
beq maybe_reenter_guest
@@ -1449,62 +1139,16 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
mr r4, r9
bl kvmhv_accumulate_time
#endif
-#ifdef CONFIG_KVM_XICS
- /* We are exiting, pull the VP from the XIVE */
- lbz r0, VCPU_XIVE_PUSHED(r9)
- cmpwi cr0, r0, 0
- beq 1f
- li r7, TM_SPC_PULL_OS_CTX
- li r6, TM_QW1_OS
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 2f
- ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
- cmpldi cr0, r10, 0
- beq 1f
- /* First load to pull the context, we ignore the value */
- eieio
- lwzx r11, r7, r10
- /* Second load to recover the context state (Words 0 and 1) */
- ldx r11, r6, r10
- b 3f
-2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr0, r10, 0
- beq 1f
- /* First load to pull the context, we ignore the value */
- eieio
- lwzcix r11, r7, r10
- /* Second load to recover the context state (Words 0 and 1) */
- ldcix r11, r6, r10
-3: std r11, VCPU_XIVE_SAVED_STATE(r9)
- /* Fixup some of the state for the next load */
- li r10, 0
- li r0, 0xff
- stb r10, VCPU_XIVE_PUSHED(r9)
- stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
- stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
- eieio
-1:
-#endif /* CONFIG_KVM_XICS */
/*
* Possibly flush the link stack here, before we do a blr in
- * guest_exit_short_path.
+ * kvmhv_switch_to_host.
*/
1: nop
patch_site 1b patch__call_kvm_flush_link_stack
- /* If we came in through the P9 short path, go back out to C now */
- lwz r0, STACK_SLOT_SHORT_PATH(r1)
- cmpwi r0, 0
- bne guest_exit_short_path
-
/* For hash guest, read the guest SLB and save it away */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
li r5, 0
- cmpwi r0, 0
- bne 0f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0
li r6,0
@@ -1528,9 +1172,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
stw r5,VCPU_SLB_MAX(r9)
/* load host SLB entries */
-BEGIN_MMU_FTR_SECTION
- b guest_bypass
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
ld r8,PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
@@ -1543,21 +1184,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
slbmte r6,r5
1: addi r8,r8,16
.endr
- b guest_bypass
-
-0: /*
- * Sanitise radix guest SLB, see guest_exit_short_path comment.
- * We clear vcpu->arch.slb_max to match earlier behaviour.
- */
- li r0,0
- stw r0,VCPU_SLB_MAX(r9)
- slbmte r0,r0
- li r4,1
- slbmte r0,r4
- li r4,2
- slbmte r0,r4
- li r4,3
- slbmte r0,r4
guest_bypass:
stw r12, STACK_SLOT_TRAP(r1)
@@ -1567,12 +1193,6 @@ guest_bypass:
ld r3, HSTATE_KVM_VCORE(r13)
mfspr r5,SPRN_DEC
mftb r6
- /* On P9, if the guest has large decr enabled, don't sign extend */
-BEGIN_FTR_SECTION
- ld r4, VCORE_LPCR(r3)
- andis. r4, r4, LPCR_LD@h
- bne 16f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r5,r5
16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
@@ -1646,7 +1266,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r6, VCPU_BESCR(r9)
stw r7, VCPU_GUEST_PID(r9)
std r8, VCPU_WORT(r9)
-BEGIN_FTR_SECTION
mfspr r5, SPRN_TCSCR
mfspr r6, SPRN_ACOP
mfspr r7, SPRN_CSIGR
@@ -1655,17 +1274,6 @@ BEGIN_FTR_SECTION
std r6, VCPU_ACOP(r9)
std r7, VCPU_CSIGR(r9)
std r8, VCPU_TACR(r9)
-FTR_SECTION_ELSE
- mfspr r5, SPRN_TIDR
- mfspr r6, SPRN_PSSCR
- std r5, VCPU_TID(r9)
- rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
- rotldi r6, r6, 60
- std r6, VCPU_PSSCR(r9)
- /* Restore host HFSCR value */
- ld r7, STACK_SLOT_HFSCR(r1)
- mtspr SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_FSCR(r1)
mtspr SPRN_FSCR, r5
@@ -1677,13 +1285,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r0, 0
mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
-BEGIN_FTR_SECTION
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
sldi r0, r0, 31
mtspr SPRN_MMCRS, r0
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
ld r8, STACK_SLOT_IAMR(r1)
@@ -1740,13 +1346,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -1792,80 +1394,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_DAWR0, r6
mtspr SPRN_DAWRX0, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- ld r6, STACK_SLOT_DAWR1(r1)
- ld r7, STACK_SLOT_DAWRX1(r1)
- mtspr SPRN_DAWR1, r6
- mtspr SPRN_DAWRX1, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
-BEGIN_FTR_SECTION
- ld r5, STACK_SLOT_TID(r1)
- ld r6, STACK_SLOT_PSSCR(r1)
- ld r7, STACK_SLOT_PID(r1)
- mtspr SPRN_TIDR, r5
- mtspr SPRN_PSSCR, r6
- mtspr SPRN_PID, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
-#ifdef CONFIG_PPC_RADIX_MMU
- /*
- * Are we running hash or radix ?
- */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
- cmpwi cr2, r0, 0
- beq cr2, 2f
-
- /*
- * Radix: do eieio; tlbsync; ptesync sequence in case we
- * interrupted the guest between a tlbie and a ptesync.
- */
- eieio
- tlbsync
- ptesync
-
-BEGIN_FTR_SECTION
- /* Radix: Handle the case where the guest used an illegal PID */
- LOAD_REG_ADDR(r4, mmu_base_pid)
- lwz r3, VCPU_GUEST_PID(r9)
- lwz r5, 0(r4)
- cmpw cr0,r3,r5
- blt 2f
-
- /*
- * Illegal PID, the HW might have prefetched and cached in the TLB
- * some translations for the LPID 0 / guest PID combination which
- * Linux doesn't know about, so we need to flush that PID out of
- * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
- * the right context.
- */
- li r0,0
- mtspr SPRN_LPID,r0
- isync
-
- /* Then do a congruence class local flush */
- ld r6,VCPU_KVM(r9)
- lwz r0,KVM_TLB_SETS(r6)
- mtctr r0
- li r7,0x400 /* IS field = 0b01 */
- ptesync
- sldi r0,r3,32 /* RS has PID */
-1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
- addi r7,r7,0x1000
- bdnz 1b
- ptesync
-END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
-
-2:
-#endif /* CONFIG_PPC_RADIX_MMU */
-
- /*
- * cp_abort is required if the processor supports local copy-paste
- * to clear the copy buffer that was under control of the guest.
- */
-BEGIN_FTR_SECTION
- PPC_CP_ABORT
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
/*
* POWER7/POWER8 guest -> host partition switch code.
@@ -1902,13 +1430,11 @@ kvmhv_switch_to_host:
/* Primary thread switches back to host partition */
lwz r7,KVM_HOST_LPID(r4)
-BEGIN_FTR_SECTION
ld r6,KVM_HOST_SDR1(r4)
li r8,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r8
ptesync
mtspr SPRN_SDR1,r6 /* switch to host page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -2117,26 +1643,13 @@ kvmppc_tm_emul:
* reflect the HDSI to the guest as a DSI.
*/
kvmppc_hdsi:
- ld r3, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r3)
mfspr r4, SPRN_HDAR
mfspr r6, SPRN_HDSISR
-BEGIN_FTR_SECTION
- /* Look for DSISR canary. If we find it, retry instruction */
- cmpdi r6, 0x7fff
- beq 6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- cmpwi r0, 0
- bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */
/* HPTE not found fault or protection fault? */
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
beq 1f /* if not, send it to the guest */
andi. r0, r11, MSR_DR /* data relocation enabled? */
beq 3f
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
- b 4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrrdi r0, r4, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
li r0, BOOK3S_INTERRUPT_DATA_SEGMENT
@@ -2204,31 +1717,15 @@ fast_interrupt_c_return:
stb r0, HSTATE_IN_GUEST(r13)
b guest_exit_cont
-.Lradix_hdsi:
- std r4, VCPU_FAULT_DAR(r9)
- stw r6, VCPU_FAULT_DSISR(r9)
-.Lradix_hisi:
- mfspr r5, SPRN_ASDR
- std r5, VCPU_FAULT_GPA(r9)
- b guest_exit_cont
-
/*
* Similarly for an HISI, reflect it to the guest as an ISI unless
* it is an HPTE not found fault for a page that we have paged out.
*/
kvmppc_hisi:
- ld r3, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r3)
- cmpwi r0, 0
- bne .Lradix_hisi /* for radix, just save ASDR */
andis. r0, r11, SRR1_ISI_NOPT@h
beq 1f
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
beq 3f
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
- b 4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrrdi r0, r10, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
li r0, BOOK3S_INTERRUPT_INST_SEGMENT
@@ -2276,10 +1773,6 @@ hcall_try_real_mode:
andi. r0,r11,MSR_PR
/* sc 1 from userspace - reflect to guest syscall */
bne sc_1_fast_return
- /* sc 1 from nested guest - give it to L1 to handle */
- ld r0, VCPU_NESTED(r9)
- cmpdi r0, 0
- bne guest_exit_cont
clrrdi r3,r3,2
cmpldi r3,hcall_real_table_end - hcall_real_table
bge guest_exit_cont
@@ -2544,7 +2037,7 @@ hcall_real_table:
#else
.long 0 /* 0x2fc - H_XIRR_X*/
#endif
- .long DOTSYM(kvmppc_h_random) - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_random) - hcall_real_table
.globl hcall_real_table_end
hcall_real_table_end:
@@ -2675,13 +2168,9 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -2701,15 +2190,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
-BEGIN_FTR_SECTION
- /* On P9 check whether the guest has large decrementer mode enabled */
- ld r6, HSTATE_KVM_VCORE(r13)
- ld r6, VCORE_LPCR(r6)
- andis. r6, r6, LPCR_LD@h
- bne 68f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r3, r3
-68: EXTEND_HDEC(r4)
+ extsw r4, r4
cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
@@ -2754,28 +2236,11 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
kvm_nap_sequence: /* desired LPCR value in r5 */
-BEGIN_FTR_SECTION
- /*
- * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset)
- * enable state loss = 1 (allow SMT mode switch)
- * requested level = 0 (just stop dispatching)
- */
- lis r3, (PSSCR_EC | PSSCR_ESL)@h
- /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
- li r4, LPCR_PECE_HVEE@higher
- sldi r4, r4, 32
- or r5, r5, r4
-FTR_SECTION_ELSE
li r3, PNV_THREAD_NAP
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5
isync
-BEGIN_FTR_SECTION
- bl isa300_idle_stop_mayloss
-FTR_SECTION_ELSE
bl isa206_idle_insn_mayloss
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mfspr r0, SPRN_CTRLF
ori r0, r0, 1
@@ -2794,10 +2259,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
beq kvm_end_cede
cmpwi r0, NAPPING_NOVCPU
beq kvm_novcpu_wakeup
-BEGIN_FTR_SECTION
cmpwi r0, NAPPING_UNSPLIT
beq kvm_unsplit_wakeup
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
twi 31,0,0 /* Nap state must not be zero */
33: mr r4, r3
@@ -2817,13 +2280,9 @@ kvm_end_cede:
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -2913,47 +2372,7 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
-#ifdef CONFIG_KVM_XICS
- /* are we using XIVE with single escalation? */
- ld r10, VCPU_XIVE_ESC_VADDR(r9)
- cmpdi r10, 0
- beq 3f
- li r6, XIVE_ESB_SET_PQ_00
- /*
- * If we still have a pending escalation, abort the cede,
- * and we must set PQ to 10 rather than 00 so that we don't
- * potentially end up with two entries for the escalation
- * interrupt in the XIVE interrupt queue. In that case
- * we also don't want to set xive_esc_on to 1 here in
- * case we race with xive_esc_irq().
- */
- lbz r5, VCPU_XIVE_ESC_ON(r9)
- cmpwi r5, 0
- beq 4f
- li r0, 0
- stb r0, VCPU_CEDED(r9)
- /*
- * The escalation interrupts are special as we don't EOI them.
- * There is no need to use the load-after-store ordering offset
- * to set PQ to 10 as we won't use StoreEOI.
- */
- li r6, XIVE_ESB_SET_PQ_10
- b 5f
-4: li r0, 1
- stb r0, VCPU_XIVE_ESC_ON(r9)
- /* make sure store to xive_esc_on is seen before xive_esc_irq runs */
- sync
-5: /* Enable XIVE escalation */
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 1f
- ldx r0, r10, r6
- b 2f
-1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
- ldcix r0, r10, r6
-2: sync
-#endif /* CONFIG_KVM_XICS */
-3: b guest_exit_cont
+ b guest_exit_cont
/* Try to do machine check recovery in real mode */
machine_check_realmode:
@@ -3030,10 +2449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
PPC_MSGCLR(6)
/* see if it's a host IPI */
li r3, 1
-BEGIN_FTR_SECTION
- PPC_MSGSYNC
- lwsync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bnelr
@@ -3342,73 +2757,12 @@ kvmppc_bad_host_intr:
std r3, STACK_FRAME_OVERHEAD-16(r1)
/*
- * On POWER9 do a minimal restore of the MMU and call C code,
- * which will print a message and panic.
* XXX On POWER7 and POWER8, we just spin here since we don't
* know what the other threads are doing (and we don't want to
* coordinate with them) - but at least we now have register state
* in memory that we might be able to look at from another CPU.
*/
-BEGIN_FTR_SECTION
b .
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
- ld r9, HSTATE_KVM_VCPU(r13)
- ld r10, VCPU_KVM(r9)
-
- li r0, 0
- mtspr SPRN_AMR, r0
- mtspr SPRN_IAMR, r0
- mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX0, r0
-BEGIN_FTR_SECTION
- mtspr SPRN_DAWRX1, r0
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
-
- /* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
- slbmte r0, r0
- PPC_SLBIA(6)
-
-BEGIN_MMU_FTR_SECTION
- b 4f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
-
- ptesync
- ld r8, PACA_SLBSHADOWPTR(r13)
- .rept SLB_NUM_BOLTED
- li r3, SLBSHADOW_SAVEAREA
- LDX_BE r5, r8, r3
- addi r3, r3, 8
- LDX_BE r6, r8, r3
- andis. r7, r5, SLB_ESID_V@h
- beq 3f
- slbmte r6, r5
-3: addi r8, r8, 16
- .endr
-
-4: lwz r7, KVM_HOST_LPID(r10)
- mtspr SPRN_LPID, r7
- mtspr SPRN_PID, r0
- ld r8, KVM_HOST_LPCR(r10)
- mtspr SPRN_LPCR, r8
- isync
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
-
- /*
- * Turn on the MMU and jump to C code
- */
- bcl 20, 31, .+4
-5: mflr r3
- addi r3, r3, 9f - 5b
- li r4, -1
- rldimi r3, r4, 62, 0 /* ensure 0xc000000000000000 bits are set */
- ld r4, PACAKMSR(r13)
- mtspr SPRN_SRR0, r3
- mtspr SPRN_SRR1, r4
- RFI_TO_KERNEL
-9: addi r3, r1, STACK_FRAME_OVERHEAD
- bl kvmppc_bad_interrupt
- b 9b
/*
* This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 84e5a2dc8be5..a7061ee3b157 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -90,6 +90,7 @@
#include <linux/migrate.h>
#include <linux/kvm_host.h>
#include <linux/ksm.h>
+#include <linux/of.h>
#include <asm/ultravisor.h>
#include <asm/mman.h>
#include <asm/kvm_ppc.h>
@@ -614,7 +615,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
/* Fetch the VMA if addr is not in the latest fetched one */
if (!vma || addr >= vma->vm_end) {
- vma = find_vma_intersection(kvm->mm, addr, addr+1);
+ vma = vma_lookup(kvm->mm, addr);
if (!vma) {
pr_err("Can't find VMA for gfn:0x%lx\n", gfn);
break;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d7733b07f489..6bc9425acb32 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -25,6 +25,7 @@
#include <asm/cputable.h>
#include <asm/cacheflush.h>
#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -493,7 +494,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
if (!vcpu->arch.pending_exceptions) {
kvm_vcpu_block(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
- vcpu->stat.halt_wakeup++;
+ vcpu->stat.generic.halt_wakeup++;
/* Unset POW bit after we woke up */
msr &= ~MSR_POW;
@@ -1848,6 +1849,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
/* Make sure we save the guest TAR/EBB/DSCR state */
kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+ srr_regs_clobbered();
out:
vcpu->mode = OUTSIDE_GUEST_MODE;
return ret;
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 031c8015864a..ac14239f3424 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -378,7 +378,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_block(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
- vcpu->stat.halt_wakeup++;
+ vcpu->stat.generic.halt_wakeup++;
return EMULATE_DONE;
case H_LOGICAL_CI_LOAD:
return kvmppc_h_pr_logical_ci_load(vcpu);
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 1f492aa4c8d6..202046a83fc1 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -164,12 +164,15 @@ kvmppc_interrupt_pr:
/* 64-bit entry. Register usage at this point:
*
* SPRG_SCRATCH0 = guest R13
+ * R9 = HSTATE_IN_GUEST
* R12 = (guest CR << 32) | exit handler id
* R13 = PACA
* HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH2 = guest R9
*/
#ifdef CONFIG_PPC64
/* Match 32-bit entry */
+ ld r9,HSTATE_SCRATCH2(r13)
rotldi r12, r12, 32 /* Flip R12 halves for stw */
stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
srdi r12, r12, 32 /* shift trap into low half */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index e7219b6f5f9a..8cfab3547494 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -14,6 +14,7 @@
#include <linux/percpu.h>
#include <linux/cpumask.h>
#include <linux/uaccess.h>
+#include <linux/irqdomain.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
@@ -128,6 +129,71 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
/*
+ * Pull a vcpu's context from the XIVE on guest exit.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
+{
+ void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+
+ if (!vcpu->arch.xive_pushed)
+ return;
+
+ /*
+ * Should not have been pushed if there is no tima
+ */
+ if (WARN_ON(!tima))
+ return;
+
+ eieio();
+ /* First load to pull the context, we ignore the value */
+ __raw_readl(tima + TM_SPC_PULL_OS_CTX);
+ /* Second load to recover the context state (Words 0 and 1) */
+ vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+
+ /* Fixup some of the state for the next load */
+ vcpu->arch.xive_saved_state.lsmfb = 0;
+ vcpu->arch.xive_saved_state.ack = 0xff;
+ vcpu->arch.xive_pushed = 0;
+ eieio();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
+
+void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
+{
+ void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
+
+ if (!esc_vaddr)
+ return;
+
+ /* we are using XIVE with single escalation */
+
+ if (vcpu->arch.xive_esc_on) {
+ /*
+ * If we still have a pending escalation, abort the cede,
+ * and we must set PQ to 10 rather than 00 so that we don't
+ * potentially end up with two entries for the escalation
+ * interrupt in the XIVE interrupt queue. In that case
+ * we also don't want to set xive_esc_on to 1 here in
+ * case we race with xive_esc_irq().
+ */
+ vcpu->arch.ceded = 0;
+ /*
+ * The escalation interrupts are special as we don't EOI them.
+ * There is no need to use the load-after-store ordering offset
+ * to set PQ to 10 as we won't use StoreEOI.
+ */
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
+ } else {
+ vcpu->arch.xive_esc_on = true;
+ mb();
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
+ }
+ mb();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
+
+/*
* This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page
*/
@@ -2075,6 +2141,36 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
return 0;
}
+int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ /* The VM should have configured XICS mode before doing XICS hcalls. */
+ if (!kvmppc_xics_enabled(vcpu))
+ return H_TOO_HARD;
+
+ switch (req) {
+ case H_XIRR:
+ return xive_vm_h_xirr(vcpu);
+ case H_CPPR:
+ return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_EOI:
+ return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_IPI:
+ return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ case H_IPOLL:
+ return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_XIRR_X:
+ xive_vm_h_xirr(vcpu);
+ kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
+ return H_SUCCESS;
+ }
+
+ return H_UNSUPPORTED;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
+
int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -2257,21 +2353,3 @@ struct kvm_device_ops kvm_xive_ops = {
.get_attr = xive_get_attr,
.has_attr = xive_has_attr,
};
-
-void kvmppc_xive_init_module(void)
-{
- __xive_vm_h_xirr = xive_vm_h_xirr;
- __xive_vm_h_ipoll = xive_vm_h_ipoll;
- __xive_vm_h_ipi = xive_vm_h_ipi;
- __xive_vm_h_cppr = xive_vm_h_cppr;
- __xive_vm_h_eoi = xive_vm_h_eoi;
-}
-
-void kvmppc_xive_exit_module(void)
-{
- __xive_vm_h_xirr = NULL;
- __xive_vm_h_ipoll = NULL;
- __xive_vm_h_ipi = NULL;
- __xive_vm_h_cppr = NULL;
- __xive_vm_h_eoi = NULL;
-}
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 86c24a4ad809..afe9eeac6d56 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -289,13 +289,6 @@ extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
-extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
-extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
-extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
- unsigned long mfrr);
-extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
-extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
-
/*
* Common Xive routines for XICS-over-XIVE and XIVE native
*/
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 76800c84f2a3..573ecaab3597 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -12,6 +12,7 @@
#include <linux/spinlock.h>
#include <linux/delay.h>
#include <linux/file.h>
+#include <linux/irqdomain.h>
#include <asm/uaccess.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
@@ -1281,13 +1282,3 @@ struct kvm_device_ops kvm_xive_native_ops = {
.has_attr = kvmppc_xive_native_has_attr,
.mmap = kvmppc_xive_native_mmap,
};
-
-void kvmppc_xive_native_init_module(void)
-{
- ;
-}
-
-void kvmppc_xive_native_exit_module(void)
-{
- ;
-}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7d5fe43f85c4..551b30d84aee 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -36,29 +36,59 @@
unsigned long kvmppc_booke_handlers;
-struct kvm_stats_debugfs_item debugfs_entries[] = {
- VCPU_STAT("mmio", mmio_exits),
- VCPU_STAT("sig", signal_exits),
- VCPU_STAT("itlb_r", itlb_real_miss_exits),
- VCPU_STAT("itlb_v", itlb_virt_miss_exits),
- VCPU_STAT("dtlb_r", dtlb_real_miss_exits),
- VCPU_STAT("dtlb_v", dtlb_virt_miss_exits),
- VCPU_STAT("sysc", syscall_exits),
- VCPU_STAT("isi", isi_exits),
- VCPU_STAT("dsi", dsi_exits),
- VCPU_STAT("inst_emu", emulated_inst_exits),
- VCPU_STAT("dec", dec_exits),
- VCPU_STAT("ext_intr", ext_intr_exits),
- VCPU_STAT("halt_successful_poll", halt_successful_poll),
- VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
- VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
- VCPU_STAT("halt_wakeup", halt_wakeup),
- VCPU_STAT("doorbell", dbell_exits),
- VCPU_STAT("guest doorbell", gdbell_exits),
- VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
- VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
- VM_STAT("remote_tlb_flush", remote_tlb_flush),
- { NULL }
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS(),
+ STATS_DESC_ICOUNTER(VM, num_2M_pages),
+ STATS_DESC_ICOUNTER(VM, num_1G_pages)
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+ sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, sum_exits),
+ STATS_DESC_COUNTER(VCPU, mmio_exits),
+ STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, light_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, syscall_exits),
+ STATS_DESC_COUNTER(VCPU, isi_exits),
+ STATS_DESC_COUNTER(VCPU, dsi_exits),
+ STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
+ STATS_DESC_COUNTER(VCPU, dec_exits),
+ STATS_DESC_COUNTER(VCPU, ext_intr_exits),
+ STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
+ STATS_DESC_COUNTER(VCPU, halt_successful_wait),
+ STATS_DESC_COUNTER(VCPU, dbell_exits),
+ STATS_DESC_COUNTER(VCPU, gdbell_exits),
+ STATS_DESC_COUNTER(VCPU, ld),
+ STATS_DESC_COUNTER(VCPU, st),
+ STATS_DESC_COUNTER(VCPU, pthru_all),
+ STATS_DESC_COUNTER(VCPU, pthru_host),
+ STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+ sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
};
/* TODO: use vcpu_printf() */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a2a68a958fa0..be33b5321a76 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -682,6 +682,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
!kvmppc_hv_ops->enable_dawr1(NULL));
break;
+ case KVM_CAP_PPC_RPT_INVALIDATE:
+ r = 1;
+ break;
#endif
default:
r = 0;
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index cc1a8a0f311e..99a7c9132422 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -39,7 +39,7 @@ extra-$(CONFIG_PPC64) += crtsavres.o
endif
obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
- memcpy_power7.o
+ memcpy_power7.o restart_table.o
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
memcpy_64.o copy_mc_64.o
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 870b30d9be2f..f9a3019e37b4 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -18,8 +18,7 @@
#include <asm/setup.h>
#include <asm/inst.h>
-static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr,
- struct ppc_inst *patch_addr)
+static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch_addr)
{
if (!ppc_inst_prefixed(instr)) {
u32 val = ppc_inst_val(instr);
@@ -40,7 +39,7 @@ failed:
return -EFAULT;
}
-int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
+int raw_patch_instruction(u32 *addr, struct ppc_inst instr)
{
return __patch_instruction(addr, instr, addr);
}
@@ -70,22 +69,16 @@ static int text_area_cpu_down(unsigned int cpu)
}
/*
- * Run as a late init call. This allows all the boot time patching to be done
- * simply by patching the code, and then we're called here prior to
- * mark_rodata_ro(), which happens after all init calls are run. Although
- * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge
- * it as being preferable to a kernel that will crash later when someone tries
- * to use patch_instruction().
+ * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and
+ * we judge it as being preferable to a kernel that will crash later when
+ * someone tries to use patch_instruction().
*/
-static int __init setup_text_poke_area(void)
+void __init poking_init(void)
{
BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
"powerpc/text_poke:online", text_area_cpu_up,
text_area_cpu_down));
-
- return 0;
}
-late_initcall(setup_text_poke_area);
/*
* This can be called for kernel text or a module.
@@ -148,10 +141,10 @@ static inline int unmap_patch_area(unsigned long addr)
return 0;
}
-static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
+static int do_patch_instruction(u32 *addr, struct ppc_inst instr)
{
int err;
- struct ppc_inst *patch_addr = NULL;
+ u32 *patch_addr = NULL;
unsigned long flags;
unsigned long text_poke_addr;
unsigned long kaddr = (unsigned long)addr;
@@ -172,7 +165,7 @@ static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
goto out;
}
- patch_addr = (struct ppc_inst *)(text_poke_addr + (kaddr & ~PAGE_MASK));
+ patch_addr = (u32 *)(text_poke_addr + (kaddr & ~PAGE_MASK));
__patch_instruction(addr, instr, patch_addr);
@@ -187,14 +180,14 @@ out:
}
#else /* !CONFIG_STRICT_KERNEL_RWX */
-static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
+static int do_patch_instruction(u32 *addr, struct ppc_inst instr)
{
return raw_patch_instruction(addr, instr);
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
-int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
+int patch_instruction(u32 *addr, struct ppc_inst instr)
{
/* Make sure we aren't patching a freed init section */
if (init_mem_is_free && init_section_contains(addr, 4)) {
@@ -205,7 +198,7 @@ int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
}
NOKPROBE_SYMBOL(patch_instruction);
-int patch_branch(struct ppc_inst *addr, unsigned long target, int flags)
+int patch_branch(u32 *addr, unsigned long target, int flags)
{
struct ppc_inst instr;
@@ -257,8 +250,7 @@ bool is_conditional_branch(struct ppc_inst instr)
}
NOKPROBE_SYMBOL(is_conditional_branch);
-int create_branch(struct ppc_inst *instr,
- const struct ppc_inst *addr,
+int create_branch(struct ppc_inst *instr, const u32 *addr,
unsigned long target, int flags)
{
long offset;
@@ -278,7 +270,7 @@ int create_branch(struct ppc_inst *instr,
return 0;
}
-int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr,
+int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
unsigned long target, int flags)
{
long offset;
@@ -325,39 +317,39 @@ int instr_is_relative_link_branch(struct ppc_inst instr)
return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
}
-static unsigned long branch_iform_target(const struct ppc_inst *instr)
+static unsigned long branch_iform_target(const u32 *instr)
{
signed long imm;
- imm = ppc_inst_val(*instr) & 0x3FFFFFC;
+ imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC;
/* If the top bit of the immediate value is set this is negative */
if (imm & 0x2000000)
imm -= 0x4000000;
- if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0)
+ if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
imm += (unsigned long)instr;
return (unsigned long)imm;
}
-static unsigned long branch_bform_target(const struct ppc_inst *instr)
+static unsigned long branch_bform_target(const u32 *instr)
{
signed long imm;
- imm = ppc_inst_val(*instr) & 0xFFFC;
+ imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC;
/* If the top bit of the immediate value is set this is negative */
if (imm & 0x8000)
imm -= 0x10000;
- if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0)
+ if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
imm += (unsigned long)instr;
return (unsigned long)imm;
}
-unsigned long branch_target(const struct ppc_inst *instr)
+unsigned long branch_target(const u32 *instr)
{
if (instr_is_branch_iform(ppc_inst_read(instr)))
return branch_iform_target(instr);
@@ -367,17 +359,7 @@ unsigned long branch_target(const struct ppc_inst *instr)
return 0;
}
-int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr)
-{
- if (instr_is_branch_iform(ppc_inst_read(instr)) ||
- instr_is_branch_bform(ppc_inst_read(instr)))
- return branch_target(instr) == addr;
-
- return 0;
-}
-
-int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest,
- const struct ppc_inst *src)
+int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src)
{
unsigned long target;
target = branch_target(src);
@@ -404,12 +386,21 @@ void __patch_exception(int exc, unsigned long addr)
* instruction of the exception, not the first one
*/
- patch_branch((struct ppc_inst *)(ibase + (exc / 4) + 1), addr, 0);
+ patch_branch(ibase + (exc / 4) + 1, addr, 0);
}
#endif
#ifdef CONFIG_CODE_PATCHING_SELFTEST
+static int instr_is_branch_to_addr(const u32 *instr, unsigned long addr)
+{
+ if (instr_is_branch_iform(ppc_inst_read(instr)) ||
+ instr_is_branch_bform(ppc_inst_read(instr)))
+ return branch_target(instr) == addr;
+
+ return 0;
+}
+
static void __init test_trampoline(void)
{
asm ("nop;\n");
@@ -422,9 +413,9 @@ static void __init test_branch_iform(void)
{
int err;
struct ppc_inst instr;
- unsigned long addr;
-
- addr = (unsigned long)&instr;
+ u32 tmp[2];
+ u32 *iptr = tmp;
+ unsigned long addr = (unsigned long)tmp;
/* The simplest case, branch to self, no flags */
check(instr_is_branch_iform(ppc_inst(0x48000000)));
@@ -445,63 +436,68 @@ static void __init test_branch_iform(void)
check(!instr_is_branch_iform(ppc_inst(0x7bfffffd)));
/* Absolute branch to 0x100 */
- instr = ppc_inst(0x48000103);
- check(instr_is_branch_to_addr(&instr, 0x100));
+ patch_instruction(iptr, ppc_inst(0x48000103));
+ check(instr_is_branch_to_addr(iptr, 0x100));
/* Absolute branch to 0x420fc */
- instr = ppc_inst(0x480420ff);
- check(instr_is_branch_to_addr(&instr, 0x420fc));
+ patch_instruction(iptr, ppc_inst(0x480420ff));
+ check(instr_is_branch_to_addr(iptr, 0x420fc));
/* Maximum positive relative branch, + 20MB - 4B */
- instr = ppc_inst(0x49fffffc);
- check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC));
+ patch_instruction(iptr, ppc_inst(0x49fffffc));
+ check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC));
/* Smallest negative relative branch, - 4B */
- instr = ppc_inst(0x4bfffffc);
- check(instr_is_branch_to_addr(&instr, addr - 4));
+ patch_instruction(iptr, ppc_inst(0x4bfffffc));
+ check(instr_is_branch_to_addr(iptr, addr - 4));
/* Largest negative relative branch, - 32 MB */
- instr = ppc_inst(0x4a000000);
- check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
+ patch_instruction(iptr, ppc_inst(0x4a000000));
+ check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
/* Branch to self, with link */
- err = create_branch(&instr, &instr, addr, BRANCH_SET_LINK);
- check(instr_is_branch_to_addr(&instr, addr));
+ err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK);
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
/* Branch to self - 0x100, with link */
- err = create_branch(&instr, &instr, addr - 0x100, BRANCH_SET_LINK);
- check(instr_is_branch_to_addr(&instr, addr - 0x100));
+ err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK);
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x100));
/* Branch to self + 0x100, no link */
- err = create_branch(&instr, &instr, addr + 0x100, 0);
- check(instr_is_branch_to_addr(&instr, addr + 0x100));
+ err = create_branch(&instr, iptr, addr + 0x100, 0);
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr + 0x100));
/* Maximum relative negative offset, - 32 MB */
- err = create_branch(&instr, &instr, addr - 0x2000000, BRANCH_SET_LINK);
- check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
+ err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK);
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
/* Out of range relative negative offset, - 32 MB + 4*/
- err = create_branch(&instr, &instr, addr - 0x2000004, BRANCH_SET_LINK);
+ err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK);
check(err);
/* Out of range relative positive offset, + 32 MB */
- err = create_branch(&instr, &instr, addr + 0x2000000, BRANCH_SET_LINK);
+ err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK);
check(err);
/* Unaligned target */
- err = create_branch(&instr, &instr, addr + 3, BRANCH_SET_LINK);
+ err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK);
check(err);
/* Check flags are masked correctly */
- err = create_branch(&instr, &instr, addr, 0xFFFFFFFC);
- check(instr_is_branch_to_addr(&instr, addr));
+ err = create_branch(&instr, iptr, addr, 0xFFFFFFFC);
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
check(ppc_inst_equal(instr, ppc_inst(0x48000000)));
}
static void __init test_create_function_call(void)
{
- struct ppc_inst *iptr;
+ u32 *iptr;
unsigned long dest;
struct ppc_inst instr;
/* Check we can create a function call */
- iptr = (struct ppc_inst *)ppc_function_entry(test_trampoline);
+ iptr = (u32 *)ppc_function_entry(test_trampoline);
dest = ppc_function_entry(test_create_function_call);
create_branch(&instr, iptr, dest, BRANCH_SET_LINK);
patch_instruction(iptr, instr);
@@ -512,10 +508,11 @@ static void __init test_branch_bform(void)
{
int err;
unsigned long addr;
- struct ppc_inst *iptr, instr;
+ struct ppc_inst instr;
+ u32 tmp[2];
+ u32 *iptr = tmp;
unsigned int flags;
- iptr = &instr;
addr = (unsigned long)iptr;
/* The simplest case, branch to self, no flags */
@@ -528,39 +525,43 @@ static void __init test_branch_bform(void)
check(!instr_is_branch_bform(ppc_inst(0x7bffffff)));
/* Absolute conditional branch to 0x100 */
- instr = ppc_inst(0x43ff0103);
- check(instr_is_branch_to_addr(&instr, 0x100));
+ patch_instruction(iptr, ppc_inst(0x43ff0103));
+ check(instr_is_branch_to_addr(iptr, 0x100));
/* Absolute conditional branch to 0x20fc */
- instr = ppc_inst(0x43ff20ff);
- check(instr_is_branch_to_addr(&instr, 0x20fc));
+ patch_instruction(iptr, ppc_inst(0x43ff20ff));
+ check(instr_is_branch_to_addr(iptr, 0x20fc));
/* Maximum positive relative conditional branch, + 32 KB - 4B */
- instr = ppc_inst(0x43ff7ffc);
- check(instr_is_branch_to_addr(&instr, addr + 0x7FFC));
+ patch_instruction(iptr, ppc_inst(0x43ff7ffc));
+ check(instr_is_branch_to_addr(iptr, addr + 0x7FFC));
/* Smallest negative relative conditional branch, - 4B */
- instr = ppc_inst(0x43fffffc);
- check(instr_is_branch_to_addr(&instr, addr - 4));
+ patch_instruction(iptr, ppc_inst(0x43fffffc));
+ check(instr_is_branch_to_addr(iptr, addr - 4));
/* Largest negative relative conditional branch, - 32 KB */
- instr = ppc_inst(0x43ff8000);
- check(instr_is_branch_to_addr(&instr, addr - 0x8000));
+ patch_instruction(iptr, ppc_inst(0x43ff8000));
+ check(instr_is_branch_to_addr(iptr, addr - 0x8000));
/* All condition code bits set & link */
flags = 0x3ff000 | BRANCH_SET_LINK;
/* Branch to self */
err = create_cond_branch(&instr, iptr, addr, flags);
- check(instr_is_branch_to_addr(&instr, addr));
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
/* Branch to self - 0x100 */
err = create_cond_branch(&instr, iptr, addr - 0x100, flags);
- check(instr_is_branch_to_addr(&instr, addr - 0x100));
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x100));
/* Branch to self + 0x100 */
err = create_cond_branch(&instr, iptr, addr + 0x100, flags);
- check(instr_is_branch_to_addr(&instr, addr + 0x100));
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr + 0x100));
/* Maximum relative negative offset, - 32 KB */
err = create_cond_branch(&instr, iptr, addr - 0x8000, flags);
- check(instr_is_branch_to_addr(&instr, addr - 0x8000));
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x8000));
/* Out of range relative negative offset, - 32 KB + 4*/
err = create_cond_branch(&instr, iptr, addr - 0x8004, flags);
@@ -576,7 +577,8 @@ static void __init test_branch_bform(void)
/* Check flags are masked correctly */
err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC);
- check(instr_is_branch_to_addr(&instr, addr));
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
check(ppc_inst_equal(instr, ppc_inst(0x43FF0000)));
}
@@ -715,9 +717,9 @@ static void __init test_prefixed_patching(void)
extern unsigned int code_patching_test1_expected[];
extern unsigned int end_code_patching_test1[];
- __patch_instruction((struct ppc_inst *)code_patching_test1,
+ __patch_instruction(code_patching_test1,
ppc_inst_prefix(OP_PREFIX << 26, 0x00000000),
- (struct ppc_inst *)code_patching_test1);
+ code_patching_test1);
check(!memcmp(code_patching_test1,
code_patching_test1_expected,
diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c
index 407b992fb02f..e834079d2b5c 100644
--- a/arch/powerpc/lib/error-inject.c
+++ b/arch/powerpc/lib/error-inject.c
@@ -11,6 +11,6 @@ void override_function_with_return(struct pt_regs *regs)
* function in the kernel/module, captured on a kprobe. We don't need
* to worry about 32-bit userspace on a 64-bit kernel.
*/
- regs->nip = regs->link;
+ regs_set_return_ip(regs, regs->link);
}
NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index fe26f2fa0f3f..cda17bee5afe 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -17,6 +17,7 @@
#include <linux/stop_machine.h>
#include <asm/cputable.h>
#include <asm/code-patching.h>
+#include <asm/interrupt.h>
#include <asm/page.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -33,26 +34,25 @@ struct fixup_entry {
long alt_end_off;
};
-static struct ppc_inst *calc_addr(struct fixup_entry *fcur, long offset)
+static u32 *calc_addr(struct fixup_entry *fcur, long offset)
{
/*
* We store the offset to the code as a negative offset from
* the start of the alt_entry, to support the VDSO. This
* routine converts that back into an actual address.
*/
- return (struct ppc_inst *)((unsigned long)fcur + offset);
+ return (u32 *)((unsigned long)fcur + offset);
}
-static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest,
- struct ppc_inst *alt_start, struct ppc_inst *alt_end)
+static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end)
{
int err;
struct ppc_inst instr;
instr = ppc_inst_read(src);
- if (instr_is_relative_branch(*src)) {
- struct ppc_inst *target = (struct ppc_inst *)branch_target(src);
+ if (instr_is_relative_branch(ppc_inst_read(src))) {
+ u32 *target = (u32 *)branch_target(src);
/* Branch within the section doesn't need translating */
if (target < alt_start || target > alt_end) {
@@ -69,7 +69,7 @@ static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest,
static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
{
- struct ppc_inst *start, *end, *alt_start, *alt_end, *src, *dest, nop;
+ u32 *start, *end, *alt_start, *alt_end, *src, *dest;
start = calc_addr(fcur, fcur->start_off);
end = calc_addr(fcur, fcur->end_off);
@@ -91,9 +91,8 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
return 1;
}
- nop = ppc_inst(PPC_INST_NOP);
- for (; dest < end; dest = ppc_inst_next(dest, &nop))
- raw_patch_instruction(dest, nop);
+ for (; dest < end; dest++)
+ raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP()));
return 0;
}
@@ -128,21 +127,21 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
start = PTRRELOC(&__start___stf_entry_barrier_fixup);
end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
- instrs[0] = 0x60000000; /* nop */
- instrs[1] = 0x60000000; /* nop */
- instrs[2] = 0x60000000; /* nop */
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
i = 0;
if (types & STF_BARRIER_FALLBACK) {
- instrs[i++] = 0x7d4802a6; /* mflr r10 */
- instrs[i++] = 0x60000000; /* branch patched below */
- instrs[i++] = 0x7d4803a6; /* mtlr r10 */
+ instrs[i++] = PPC_RAW_MFLR(_R10);
+ instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+ instrs[i++] = PPC_RAW_MTLR(_R10);
} else if (types & STF_BARRIER_EIEIO) {
- instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+ instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
} else if (types & STF_BARRIER_SYNC_ORI) {
- instrs[i++] = 0x7c0004ac; /* hwsync */
- instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */
- instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ instrs[i++] = PPC_RAW_SYNC();
+ instrs[i++] = PPC_RAW_LD(_R10, _R13, 0);
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
}
for (i = 0; start < end; start++, i++) {
@@ -152,14 +151,14 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
// See comment in do_entry_flush_fixups() RE order of patching
if (types & STF_BARRIER_FALLBACK) {
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
- patch_branch((struct ppc_inst *)(dest + 1),
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_branch(dest + 1,
(unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
} else {
- patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
}
}
@@ -180,32 +179,31 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
start = PTRRELOC(&__start___stf_exit_barrier_fixup);
end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
- instrs[0] = 0x60000000; /* nop */
- instrs[1] = 0x60000000; /* nop */
- instrs[2] = 0x60000000; /* nop */
- instrs[3] = 0x60000000; /* nop */
- instrs[4] = 0x60000000; /* nop */
- instrs[5] = 0x60000000; /* nop */
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+ instrs[3] = PPC_RAW_NOP();
+ instrs[4] = PPC_RAW_NOP();
+ instrs[5] = PPC_RAW_NOP();
i = 0;
if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
if (cpu_has_feature(CPU_FTR_HVMODE)) {
- instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */
- instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13);
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0);
} else {
- instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */
- instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13);
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1);
}
- instrs[i++] = 0x7c0004ac; /* hwsync */
- instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */
- instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
- instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */
- } else {
- instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */
- }
+ instrs[i++] = PPC_RAW_SYNC();
+ instrs[i++] = PPC_RAW_LD(_R13, _R13, 0);
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1);
+ else
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2);
} else if (types & STF_BARRIER_EIEIO) {
- instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+ instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
}
for (i = 0; start < end; start++, i++) {
@@ -213,12 +211,12 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
- patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
- patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3]));
- patch_instruction((struct ppc_inst *)(dest + 4), ppc_inst(instrs[4]));
- patch_instruction((struct ppc_inst *)(dest + 5), ppc_inst(instrs[5]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_instruction(dest + 3, ppc_inst(instrs[3]));
+ patch_instruction(dest + 4, ppc_inst(instrs[4]));
+ patch_instruction(dest + 5, ppc_inst(instrs[5]));
}
printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
(types == STF_BARRIER_NONE) ? "no" :
@@ -228,6 +226,9 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
: "unknown");
}
+static bool stf_exit_reentrant = false;
+static bool rfi_exit_reentrant = false;
+
static int __do_stf_barrier_fixups(void *data)
{
enum stf_barrier_type *types = data;
@@ -242,11 +243,27 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
{
/*
* The call to the fallback entry flush, and the fallback/sync-ori exit
- * flush can not be safely patched in/out while other CPUs are executing
- * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
- * spin in the stop machine core with interrupts hard disabled.
+ * flush can not be safely patched in/out while other CPUs are
+ * executing them. So call __do_stf_barrier_fixups() on one CPU while
+ * all other CPUs spin in the stop machine core with interrupts hard
+ * disabled.
+ *
+ * The branch to mark interrupt exits non-reentrant is enabled first,
+ * then stop_machine runs which will ensure all CPUs are out of the
+ * low level interrupt exit code before patching. After the patching,
+ * if allowed, then flip the branch to allow fast exits.
*/
+ static_branch_enable(&interrupt_exit_not_reentrant);
+
stop_machine(__do_stf_barrier_fixups, &types, NULL);
+
+ if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI))
+ stf_exit_reentrant = false;
+ else
+ stf_exit_reentrant = true;
+
+ if (stf_exit_reentrant && rfi_exit_reentrant)
+ static_branch_disable(&interrupt_exit_not_reentrant);
}
void do_uaccess_flush_fixups(enum l1d_flush_type types)
@@ -258,35 +275,35 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
start = PTRRELOC(&__start___uaccess_flush_fixup);
end = PTRRELOC(&__stop___uaccess_flush_fixup);
- instrs[0] = 0x60000000; /* nop */
- instrs[1] = 0x60000000; /* nop */
- instrs[2] = 0x60000000; /* nop */
- instrs[3] = 0x4e800020; /* blr */
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+ instrs[3] = PPC_RAW_BLR();
i = 0;
if (types == L1D_FLUSH_FALLBACK) {
- instrs[3] = 0x60000000; /* nop */
+ instrs[3] = PPC_RAW_NOP();
/* fallthrough to fallback flush */
}
if (types & L1D_FLUSH_ORI) {
- instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
- instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
}
if (types & L1D_FLUSH_MTTRIG)
- instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
for (i = 0; start < end; start++, i++) {
dest = (void *)start + *start;
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
- patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3]));
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_instruction(dest + 3, ppc_inst(instrs[3]));
}
printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
@@ -306,24 +323,24 @@ static int __do_entry_flush_fixups(void *data)
long *start, *end;
int i;
- instrs[0] = 0x60000000; /* nop */
- instrs[1] = 0x60000000; /* nop */
- instrs[2] = 0x60000000; /* nop */
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
i = 0;
if (types == L1D_FLUSH_FALLBACK) {
- instrs[i++] = 0x7d4802a6; /* mflr r10 */
- instrs[i++] = 0x60000000; /* branch patched below */
- instrs[i++] = 0x7d4803a6; /* mtlr r10 */
+ instrs[i++] = PPC_RAW_MFLR(_R10);
+ instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+ instrs[i++] = PPC_RAW_MTLR(_R10);
}
if (types & L1D_FLUSH_ORI) {
- instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
- instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
}
if (types & L1D_FLUSH_MTTRIG)
- instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
/*
* If we're patching in or out the fallback flush we need to be careful about the
@@ -358,14 +375,14 @@ static int __do_entry_flush_fixups(void *data)
pr_devel("patching dest %lx\n", (unsigned long)dest);
if (types == L1D_FLUSH_FALLBACK) {
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
- patch_branch((struct ppc_inst *)(dest + 1),
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_branch(dest + 1,
(unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
} else {
- patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
}
}
@@ -377,14 +394,14 @@ static int __do_entry_flush_fixups(void *data)
pr_devel("patching dest %lx\n", (unsigned long)dest);
if (types == L1D_FLUSH_FALLBACK) {
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
- patch_branch((struct ppc_inst *)(dest + 1),
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_branch(dest + 1,
(unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
} else {
- patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
}
}
@@ -412,8 +429,9 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
stop_machine(__do_entry_flush_fixups, &types, NULL);
}
-void do_rfi_flush_fixups(enum l1d_flush_type types)
+static int __do_rfi_flush_fixups(void *data)
{
+ enum l1d_flush_type types = *(enum l1d_flush_type *)data;
unsigned int instrs[3], *dest;
long *start, *end;
int i;
@@ -421,31 +439,31 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
start = PTRRELOC(&__start___rfi_flush_fixup);
end = PTRRELOC(&__stop___rfi_flush_fixup);
- instrs[0] = 0x60000000; /* nop */
- instrs[1] = 0x60000000; /* nop */
- instrs[2] = 0x60000000; /* nop */
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
if (types & L1D_FLUSH_FALLBACK)
/* b .+16 to fallback flush */
- instrs[0] = 0x48000010;
+ instrs[0] = PPC_INST_BRANCH | 16;
i = 0;
if (types & L1D_FLUSH_ORI) {
- instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
- instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
}
if (types & L1D_FLUSH_MTTRIG)
- instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
for (i = 0; start < end; start++, i++) {
dest = (void *)start + *start;
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
- patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
}
printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
@@ -456,6 +474,29 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
: "ori type" :
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
: "unknown");
+
+ return 0;
+}
+
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+ /*
+ * stop_machine gets all CPUs out of the interrupt exit handler same
+ * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run
+ * without stop_machine, so this could be achieved with a broadcast
+ * IPI instead, but this matches the stf sequence.
+ */
+ static_branch_enable(&interrupt_exit_not_reentrant);
+
+ stop_machine(__do_rfi_flush_fixups, &types, NULL);
+
+ if (types & L1D_FLUSH_FALLBACK)
+ rfi_exit_reentrant = false;
+ else
+ rfi_exit_reentrant = true;
+
+ if (stf_exit_reentrant && rfi_exit_reentrant)
+ static_branch_disable(&interrupt_exit_not_reentrant);
}
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
@@ -467,18 +508,18 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
start = fixup_start;
end = fixup_end;
- instr = 0x60000000; /* nop */
+ instr = PPC_RAW_NOP();
if (enable) {
pr_info("barrier-nospec: using ORI speculation barrier\n");
- instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
}
for (i = 0; start < end; start++, i++) {
dest = (void *)start + *start;
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instr));
+ patch_instruction(dest, ppc_inst(instr));
}
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
@@ -508,21 +549,21 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
start = fixup_start;
end = fixup_end;
- instr[0] = PPC_INST_NOP;
- instr[1] = PPC_INST_NOP;
+ instr[0] = PPC_RAW_NOP();
+ instr[1] = PPC_RAW_NOP();
if (enable) {
pr_info("barrier-nospec: using isync; sync as speculation barrier\n");
- instr[0] = PPC_INST_ISYNC;
- instr[1] = PPC_INST_SYNC;
+ instr[0] = PPC_RAW_ISYNC();
+ instr[1] = PPC_RAW_SYNC();
}
for (i = 0; start < end; start++, i++) {
dest = (void *)start + *start;
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instr[0]));
- patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instr[1]));
+ patch_instruction(dest, ppc_inst(instr[0]));
+ patch_instruction(dest + 1, ppc_inst(instr[1]));
}
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
@@ -536,7 +577,7 @@ static void patch_btb_flush_section(long *curr)
end = (void *)curr + *(curr + 1);
for (; start < end; start++) {
pr_devel("patching dest %lx\n", (unsigned long)start);
- patch_instruction((struct ppc_inst *)start, ppc_inst(PPC_INST_NOP));
+ patch_instruction(start, ppc_inst(PPC_RAW_NOP()));
}
}
@@ -555,7 +596,7 @@ void do_btb_flush_fixups(void)
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
long *start, *end;
- struct ppc_inst *dest;
+ u32 *dest;
if (!(value & CPU_FTR_LWSYNC))
return ;
@@ -572,13 +613,14 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
static void do_final_fixups(void)
{
#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
- struct ppc_inst inst, *src, *dest, *end;
+ struct ppc_inst inst;
+ u32 *src, *dest, *end;
if (PHYSICAL_START == 0)
return;
- src = (struct ppc_inst *)(KERNELBASE + PHYSICAL_START);
- dest = (struct ppc_inst *)KERNELBASE;
+ src = (u32 *)(KERNELBASE + PHYSICAL_START);
+ dest = (u32 *)KERNELBASE;
end = (void *)src + (__end_interrupts - _stext);
while (src < end) {
diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c
new file mode 100644
index 000000000000..bccb662c1b7b
--- /dev/null
+++ b/arch/powerpc/lib/restart_table.c
@@ -0,0 +1,56 @@
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+
+struct soft_mask_table_entry {
+ unsigned long start;
+ unsigned long end;
+};
+
+struct restart_table_entry {
+ unsigned long start;
+ unsigned long end;
+ unsigned long fixup;
+};
+
+extern struct soft_mask_table_entry __start___soft_mask_table[];
+extern struct soft_mask_table_entry __stop___soft_mask_table[];
+
+extern struct restart_table_entry __start___restart_table[];
+extern struct restart_table_entry __stop___restart_table[];
+
+/* Given an address, look for it in the soft mask table */
+bool search_kernel_soft_mask_table(unsigned long addr)
+{
+ struct soft_mask_table_entry *smte = __start___soft_mask_table;
+
+ while (smte < __stop___soft_mask_table) {
+ unsigned long start = smte->start;
+ unsigned long end = smte->end;
+
+ if (addr >= start && addr < end)
+ return true;
+
+ smte++;
+ }
+ return false;
+}
+NOKPROBE_SYMBOL(search_kernel_soft_mask_table);
+
+/* Given an address, look for it in the kernel exception table */
+unsigned long search_kernel_restart_table(unsigned long addr)
+{
+ struct restart_table_entry *rte = __start___restart_table;
+
+ while (rte < __stop___restart_table) {
+ unsigned long start = rte->start;
+ unsigned long end = rte->end;
+ unsigned long fixup = rte->fixup;
+
+ if (addr >= start && addr < end)
+ return fixup;
+
+ rte++;
+ }
+ return 0;
+}
+NOKPROBE_SYMBOL(search_kernel_restart_table);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 45bda2520755..d8d5f901cee1 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1700,6 +1700,28 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->val = regs->ccr & imm;
goto compute_done;
+ case 128: /* setb */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ /*
+ * 'ra' encodes the CR field number (bfa) in the top 3 bits.
+ * Since each CR field is 4 bits,
+ * we can simply mask off the bottom two bits (bfa * 4)
+ * to yield the first bit in the CR field.
+ */
+ ra = ra & ~0x3;
+ /* 'val' stores bits of the CR field (bfa) */
+ val = regs->ccr >> (CR0_SHIFT - ra);
+ /* checks if the LT bit of CR field (bfa) is set */
+ if (val & 8)
+ op->val = -1;
+ /* checks if the GT bit of CR field (bfa) is set */
+ else if (val & 4)
+ op->val = 1;
+ else
+ op->val = 0;
+ goto compute_done;
+
case 144: /* mtcrf */
op->type = COMPUTE + SETCC;
imm = 0xf0000000UL;
@@ -3203,7 +3225,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
default:
WARN_ON_ONCE(1);
}
- regs->nip = next_pc;
+ regs_set_return_ip(regs, next_pc);
}
NOKPROBE_SYMBOL(emulate_update_regs);
@@ -3541,7 +3563,7 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
/* can't step mtmsr[d] that would clear MSR_RI */
return -1;
/* here op.val is the mask of bits to change */
- regs->msr = (regs->msr & ~op.val) | (val & op.val);
+ regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val));
goto instr_done;
#ifdef CONFIG_PPC64
@@ -3554,7 +3576,7 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
if (IS_ENABLED(CONFIG_PPC_FAST_ENDIAN_SWITCH) &&
cpu_has_feature(CPU_FTR_REAL_LE) &&
regs->gpr[0] == 0x1ebe) {
- regs->msr ^= MSR_LE;
+ regs_set_return_msr(regs, regs->msr ^ MSR_LE);
goto instr_done;
}
regs->gpr[9] = regs->gpr[13];
@@ -3562,8 +3584,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
regs->gpr[11] = regs->nip + 4;
regs->gpr[12] = regs->msr & MSR_MASK;
regs->gpr[13] = (unsigned long) get_paca();
- regs->nip = (unsigned long) &system_call_common;
- regs->msr = MSR_KERNEL;
+ regs_set_return_ip(regs, (unsigned long) &system_call_common);
+ regs_set_return_msr(regs, MSR_KERNEL);
return 1;
#ifdef CONFIG_PPC_BOOK3S_64
@@ -3573,8 +3595,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
regs->gpr[11] = regs->nip + 4;
regs->gpr[12] = regs->msr & MSR_MASK;
regs->gpr[13] = (unsigned long) get_paca();
- regs->nip = (unsigned long) &system_call_vectored_emulate;
- regs->msr = MSR_KERNEL;
+ regs_set_return_ip(regs, (unsigned long) &system_call_vectored_emulate);
+ regs_set_return_msr(regs, MSR_KERNEL);
return 1;
#endif
@@ -3585,7 +3607,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
return 0;
instr_done:
- regs->nip = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type));
+ regs_set_return_ip(regs,
+ truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type)));
return 1;
}
NOKPROBE_SYMBOL(emulate_step);
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
index 783d1b85ecfe..8b4f6b3e96c4 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -53,6 +53,8 @@
ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
PPC_RAW_ADDI(t, a, i))
+#define TEST_SETB(t, bfa) ppc_inst(PPC_INST_SETB | ___PPC_RT(t) | ___PPC_RA((bfa & 0x7) << 2))
+
static void __init init_pt_regs(struct pt_regs *regs)
{
@@ -824,8 +826,7 @@ static void __init test_plxvp_pstxvp(void)
* XTp = 32xTX + 2xTp
* let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1
*/
- instr = ppc_inst_prefix(PPC_RAW_PLXVP(34, 0, 3, 0) >> 32,
- PPC_RAW_PLXVP(34, 0, 3, 0) & 0xffffffff);
+ instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0));
stepped = emulate_step(&regs, instr);
if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
@@ -853,8 +854,7 @@ static void __init test_plxvp_pstxvp(void)
* XSp = 32xSX + 2xSp
* let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1
*/
- instr = ppc_inst_prefix(PPC_RAW_PSTXVP(34, 0, 3, 0) >> 32,
- PPC_RAW_PSTXVP(34, 0, 3, 0) & 0xffffffff);
+ instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0));
stepped = emulate_step(&regs, instr);
@@ -922,7 +922,7 @@ static struct compute_test compute_tests[] = {
.subtests = {
{
.descr = "R0 = LONG_MAX",
- .instr = ppc_inst(PPC_INST_NOP),
+ .instr = ppc_inst(PPC_RAW_NOP()),
.regs = {
.gpr[0] = LONG_MAX,
}
@@ -930,6 +930,33 @@ static struct compute_test compute_tests[] = {
}
},
{
+ .mnemonic = "setb",
+ .cpu_feature = CPU_FTR_ARCH_300,
+ .subtests = {
+ {
+ .descr = "BFA = 1, CR = GT",
+ .instr = TEST_SETB(20, 1),
+ .regs = {
+ .ccr = 0x4000000,
+ }
+ },
+ {
+ .descr = "BFA = 4, CR = LT",
+ .instr = TEST_SETB(20, 4),
+ .regs = {
+ .ccr = 0x8000,
+ }
+ },
+ {
+ .descr = "BFA = 5, CR = EQ",
+ .instr = TEST_SETB(20, 5),
+ .regs = {
+ .ccr = 0x200,
+ }
+ }
+ }
+ },
+ {
.mnemonic = "add",
.subtests = {
{
@@ -1582,6 +1609,7 @@ static int __init emulate_compute_instr(struct pt_regs *regs,
if (!regs || !ppc_inst_val(instr))
return -EINVAL;
+ /* This is not a return frame regs */
regs->nip = patch_site_addr(&patch__exec_instr);
analysed = analyse_instr(&op, regs, instr);
diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
index 327165f26ca6..36761bd00f38 100644
--- a/arch/powerpc/math-emu/math.c
+++ b/arch/powerpc/math-emu/math.c
@@ -453,7 +453,7 @@ do_mathemu(struct pt_regs *regs)
break;
}
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
return 0;
illegal:
diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c
index 0a05e51964c1..39b84e7452e1 100644
--- a/arch/powerpc/math-emu/math_efp.c
+++ b/arch/powerpc/math-emu/math_efp.c
@@ -710,7 +710,7 @@ update_regs:
illegal:
if (have_e500_cpu_a005_erratum) {
/* according to e500 cpu a005 erratum, reissue efp inst */
- regs->nip -= 4;
+ regs_add_return_ip(regs, -4);
pr_debug("re-issue efp inst: %08lx\n", speinsn);
return 0;
}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index c3df3a8501d4..eae4ec2988fc 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,7 +5,7 @@
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \
+obj-y := fault.o mem.o pgtable.o mmap.o maccess.o pageattr.o \
init_$(BITS).o pgtable_$(BITS).o \
pgtable-frag.o ioremap.o ioremap_$(BITS).o \
init-common.o mmu_context.o drmem.o \
@@ -13,7 +13,7 @@ obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \
obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/
-obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
index 7f0c8a78ba0c..15f4773643d2 100644
--- a/arch/powerpc/mm/book3s32/Makefile
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -10,3 +10,4 @@ obj-y += mmu.o mmu_context.o
obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
obj-$(CONFIG_PPC_KUEP) += kuep.o
+obj-$(CONFIG_PPC_KUAP) += kuap.o
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index fb4233a5bdf7..6925ce998557 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -27,8 +27,10 @@
#include <asm/code-patching-asm.h>
#ifdef CONFIG_PTE_64BIT
+#define PTE_T_SIZE 8
#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */
#else
+#define PTE_T_SIZE 4
#define PTE_FLAGS_OFFSET 0
#endif
@@ -488,7 +490,7 @@ _GLOBAL(flush_hash_pages)
bne 2f
ble cr1,19f
addi r4,r4,0x1000
- addi r5,r5,PTE_SIZE
+ addi r5,r5,PTE_T_SIZE
addi r6,r6,-1
b 1b
@@ -573,7 +575,7 @@ _GLOBAL(flush_hash_pages)
8: ble cr1,9f /* if all ptes checked */
81: addi r6,r6,-1
- addi r5,r5,PTE_SIZE
+ addi r5,r5,PTE_T_SIZE
addi r4,r4,0x1000
lwz r0,0(r5) /* check next pte */
cmpwi cr1,r6,1
diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c
new file mode 100644
index 000000000000..0f920f09af57
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/kuap.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+struct static_key_false disable_kuap_key;
+EXPORT_SYMBOL(disable_kuap_key);
+
+void kuap_lock_all_ool(void)
+{
+ kuap_lock_all();
+}
+EXPORT_SYMBOL(kuap_lock_all_ool);
+
+void kuap_unlock_all_ool(void)
+{
+ kuap_unlock_all();
+}
+EXPORT_SYMBOL(kuap_unlock_all_ool);
+
+void setup_kuap(bool disabled)
+{
+ if (!disabled)
+ kuap_lock_all_ool();
+
+ if (smp_processor_id() != boot_cpuid)
+ return;
+
+ if (disabled)
+ static_branch_enable(&disable_kuap_key);
+ else
+ pr_info("Activating Kernel Userspace Access Protection\n");
+}
diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c
index 8ed1b8634839..c20733d6e02c 100644
--- a/arch/powerpc/mm/book3s32/kuep.c
+++ b/arch/powerpc/mm/book3s32/kuep.c
@@ -1,40 +1,20 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <asm/kup.h>
-#include <asm/reg.h>
-#include <asm/task_size_32.h>
-#include <asm/mmu.h>
+#include <asm/smp.h>
-#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { \
- if (TASK_SIZE > ((n) << 28)) \
- mtsr(val1, (n) << 28); \
- if (TASK_SIZE > (((n) + 1) << 28)) \
- mtsr(val2, ((n) + 1) << 28); \
- val1 = (val1 + 0x222) & 0xf0ffffff; \
- val2 = (val2 + 0x222) & 0xf0ffffff; \
-} while (0)
+struct static_key_false disable_kuep_key;
-static __always_inline void kuep_update(u32 val)
+void setup_kuep(bool disabled)
{
- int val1 = val;
- int val2 = (val + 0x111) & 0xf0ffffff;
+ if (!disabled)
+ kuep_lock();
- KUEP_UPDATE_TWO_USER_SEGMENTS(0);
- KUEP_UPDATE_TWO_USER_SEGMENTS(2);
- KUEP_UPDATE_TWO_USER_SEGMENTS(4);
- KUEP_UPDATE_TWO_USER_SEGMENTS(6);
- KUEP_UPDATE_TWO_USER_SEGMENTS(8);
- KUEP_UPDATE_TWO_USER_SEGMENTS(10);
- KUEP_UPDATE_TWO_USER_SEGMENTS(12);
- KUEP_UPDATE_TWO_USER_SEGMENTS(14);
-}
+ if (smp_processor_id() != boot_cpuid)
+ return;
-void kuep_lock(void)
-{
- kuep_update(mfsr(0) | SR_NX);
-}
-
-void kuep_unlock(void)
-{
- kuep_update(mfsr(0) & ~SR_NX);
+ if (disabled)
+ static_branch_enable(&disable_kuep_key);
+ else
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
}
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 159930351d9f..27061583a010 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -445,26 +445,6 @@ void __init print_system_hash_info(void)
pr_info("Hash_mask = 0x%lx\n", Hash_mask);
}
-#ifdef CONFIG_PPC_KUEP
-void __init setup_kuep(bool disabled)
-{
- pr_info("Activating Kernel Userspace Execution Prevention\n");
-
- if (disabled)
- pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
-}
-#endif
-
-#ifdef CONFIG_PPC_KUAP
-void __init setup_kuap(bool disabled)
-{
- pr_info("Activating Kernel Userspace Access Protection\n");
-
- if (disabled)
- pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n");
-}
-#endif
-
void __init early_init_mmu(void)
{
}
diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c
index 218996e40a8e..e2708e387dc3 100644
--- a/arch/powerpc/mm/book3s32/mmu_context.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
@@ -24,6 +24,12 @@
#include <asm/mmu_context.h>
/*
+ * Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+void *abatron_pteptrs[2];
+
+/*
* On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
* (virtual segment identifiers) for each context. Although the
* hardware supports 24-bit VSIDs, and thus >1 million contexts,
@@ -39,19 +45,6 @@
#define LAST_CONTEXT 32767
#define FIRST_CONTEXT 1
-/*
- * This function defines the mapping from contexts to VSIDs (virtual
- * segment IDs). We use a skew on both the context and the high 4 bits
- * of the 32-bit virtual address (the "effective segment ID") in order
- * to spread out the entries in the MMU hash table. Note, if this
- * function is changed then arch/ppc/mm/hashtable.S will have to be
- * changed to correspond.
- *
- *
- * CTX_TO_VSID(ctx, va) (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \
- * & 0xffffff)
- */
-
static unsigned long next_mmu_context;
static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
@@ -111,3 +104,32 @@ void __init mmu_context_init(void)
context_map[0] = (1 << FIRST_CONTEXT) - 1;
next_mmu_context = FIRST_CONTEXT;
}
+
+void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
+{
+ long id = next->context.id;
+ unsigned long val;
+
+ if (id < 0)
+ panic("mm_struct %p has no context ID", next);
+
+ isync();
+
+ val = CTX_TO_VSID(id, 0);
+ if (!kuep_is_disabled())
+ val |= SR_NX;
+ if (!kuap_is_disabled())
+ val |= SR_KS;
+
+ update_user_segments(val);
+
+ if (IS_ENABLED(CONFIG_BDI_SWITCH))
+ abatron_pteptrs[1] = next->pgd;
+
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ mtspr(SPRN_SDR1, rol32(__pa(next->pgd), 4) & 0xffff01ff);
+
+ mb(); /* sync */
+ isync();
+}
+EXPORT_SYMBOL(switch_mmu_context);
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 96d9aa164007..ac5720371c0d 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1522,8 +1522,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
}
EXPORT_SYMBOL_GPL(hash_page);
-DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault);
-DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
+DECLARE_INTERRUPT_HANDLER(__do_hash_fault);
+DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
{
unsigned long ea = regs->dar;
unsigned long dsisr = regs->dsisr;
@@ -1533,6 +1533,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
unsigned int region_id;
long err;
+ if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
+ hash__do_page_fault(regs);
+ return;
+ }
+
region_id = get_region_id(ea);
if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
mm = &init_mm;
@@ -1571,9 +1576,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
bad_page_fault(regs, SIGBUS);
}
err = 0;
- }
- return err;
+ } else if (err) {
+ hash__do_page_fault(regs);
+ }
}
/*
@@ -1582,13 +1588,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
*/
DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
{
- unsigned long dsisr = regs->dsisr;
-
- if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
- hash__do_page_fault(regs);
- return 0;
- }
-
/*
* If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
* don't call hash_page, just fail the fault. This is required to
@@ -1607,8 +1606,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
return 0;
}
- if (__do_hash_fault(regs))
- hash__do_page_fault(regs);
+ __do_hash_fault(regs);
return 0;
}
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 5fef8db3b463..2176a5f70746 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/sched/mm.h>
#include <linux/memblock.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
@@ -357,30 +358,19 @@ static void __init radix_init_pgtable(void)
}
/* Find out how many PID bits are supported */
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
- mmu_base_pid = 1;
- } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (!cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
/*
- * When KVM is possible, we only use the top half of the
- * PID space to avoid collisions between host and guest PIDs
- * which can cause problems due to prefetch when exiting the
- * guest with AIL=3
+ * Older versions of KVM on these machines perfer if the
+ * guest only uses the low 19 PID bits.
*/
- mmu_base_pid = 1 << (mmu_pid_bits - 1);
-#else
- mmu_base_pid = 1;
-#endif
- } else {
- /* The guest uses the bottom half of the PID space */
if (!mmu_pid_bits)
mmu_pid_bits = 19;
- mmu_base_pid = 1;
+ } else {
+ if (!mmu_pid_bits)
+ mmu_pid_bits = 20;
}
+ mmu_base_pid = 1;
/*
* Allocate Partition table and process table for the
@@ -486,6 +476,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
def = &mmu_psize_defs[idx];
def->shift = shift;
def->ap = ap;
+ def->h_rpt_pgsize = psize_to_rpti_pgsize(idx);
}
/* needed ? */
@@ -560,9 +551,13 @@ void __init radix__early_init_devtree(void)
*/
mmu_psize_defs[MMU_PAGE_4K].shift = 12;
mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+ mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize =
+ psize_to_rpti_pgsize(MMU_PAGE_4K);
mmu_psize_defs[MMU_PAGE_64K].shift = 16;
mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+ mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
+ psize_to_rpti_pgsize(MMU_PAGE_64K);
}
/*
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 409e61210789..ae12fb5559cb 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -20,10 +20,6 @@
#include "internal.h"
-#define RIC_FLUSH_TLB 0
-#define RIC_FLUSH_PWC 1
-#define RIC_FLUSH_ALL 2
-
/*
* tlbiel instruction for radix, set invalidation
* i.e., r=1 and is=01 or is=10 or is=11
@@ -130,6 +126,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+ unsigned long lpid,
+ unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -190,6 +201,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap, unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap, unsigned long ric)
{
@@ -235,6 +263,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
}
}
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+ unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
static inline void fixup_tlbie_pid(unsigned long pid)
{
/*
@@ -254,6 +298,25 @@ static inline void fixup_tlbie_pid(unsigned long pid)
}
}
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
+ unsigned long va = ((1UL << 52) - 1);
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+ RIC_FLUSH_TLB);
+ }
+}
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap)
@@ -291,22 +354,30 @@ static inline void fixup_tlbie_lpid(unsigned long lpid)
/*
* We use 128 set in radix mode and 256 set in hpt mode.
*/
-static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
{
int set;
asm volatile("ptesync": : :"memory");
- /*
- * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
- * also flush the entire Page Walk Cache.
- */
- __tlbiel_pid(pid, 0, ric);
+ switch (ric) {
+ case RIC_FLUSH_PWC:
- /* For PWC, only one flush is needed */
- if (ric == RIC_FLUSH_PWC) {
+ /* For PWC, only one flush is needed */
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
ppc_after_tlbiel_barrier();
return;
+ case RIC_FLUSH_TLB:
+ __tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ /*
+ * Flush the first set of the TLB, and if
+ * we're doing a RIC_FLUSH_ALL, also flush
+ * the entire Page Walk Cache.
+ */
+ __tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
}
if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
@@ -344,6 +415,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+ unsigned long ric)
+{
+ asm volatile("ptesync" : : : "memory");
+
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ }
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
struct tlbiel_pid {
unsigned long pid;
unsigned long ric;
@@ -469,6 +565,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
fixup_tlbie_va_range(addr - page_size, pid, ap);
}
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
unsigned long psize, unsigned long ric)
{
@@ -549,6 +659,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync" : : : "memory");
+ if (also_pwc)
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
@@ -1176,7 +1298,7 @@ void radix__tlb_flush(struct mmu_gather *tlb)
}
}
-static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
+static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
unsigned long start, unsigned long end,
int psize, bool also_pwc)
{
@@ -1338,47 +1460,57 @@ void radix__flush_tlb_all(void)
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
+/*
+ * Performs process-scoped invalidations for a given LPID
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
{
- unsigned long pid = mm->context.id;
+ unsigned long psize, nr_pages;
+ struct mmu_psize_def *def;
+ bool flush_pid;
- if (unlikely(pid == MMU_NO_CONTEXT))
+ /*
+ * A H_RPTI_TYPE_ALL request implies RIC=3, hence
+ * do a single IS=1 based flush.
+ */
+ if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
return;
+ }
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
- return;
+ if (type & H_RPTI_TYPE_PWC)
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
- /*
- * If this context hasn't run on that CPU before and KVM is
- * around, there's a slim chance that the guest on another
- * CPU just brought in obsolete translation into the TLB of
- * this CPU due to a bad prefetch using the guest PID on
- * the way into the hypervisor.
- *
- * We work around this here. If KVM is possible, we check if
- * any sibling thread is in KVM. If it is, the window may exist
- * and thus we flush that PID from the core.
- *
- * A potential future improvement would be to mark which PIDs
- * have never been used on the system and avoid it if the PID
- * is new and the process has no other cpumask bit set.
- */
- if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
- int cpu = smp_processor_id();
- int sib = cpu_first_thread_sibling(cpu);
- bool flush = false;
-
- for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
- if (sib == cpu)
- continue;
- if (!cpu_possible(sib))
- continue;
- if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
- flush = true;
+ /* Full PID flush */
+ if (start == 0 && end == -1)
+ return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+
+ /* Do range invalidation for all the valid page sizes */
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ def = &mmu_psize_defs[psize];
+ if (!(pg_sizes & def->h_rpt_pgsize))
+ continue;
+
+ nr_pages = (end - start) >> def->shift;
+ flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+
+ /*
+ * If the number of pages spanning the range is above
+ * the ceiling, convert the request into a full PID flush.
+ * And since PID flush takes out all the page sizes, there
+ * is no need to consider remaining page sizes.
+ */
+ if (flush_pid) {
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ return;
}
- if (flush)
- _tlbiel_pid(pid, RIC_FLUSH_ALL);
+ _tlbie_va_range_lpid(start, end, pid, lpid,
+ (1UL << def->shift), psize, false);
}
}
-EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
+EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
+
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
index 743e11384dea..9d13143b8be4 100644
--- a/arch/powerpc/mm/ioremap_32.c
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -70,10 +70,10 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
*/
pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
- err = early_ioremap_range(ioremap_bot - size, p, size, prot);
+ err = early_ioremap_range(ioremap_bot - size - PAGE_SIZE, p, size, prot);
if (err)
return NULL;
- ioremap_bot -= size;
+ ioremap_bot -= size + PAGE_SIZE;
return (void __iomem *)ioremap_bot + offset;
}
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
index ba5cbb0d66bd..3acece00b33e 100644
--- a/arch/powerpc/mm/ioremap_64.c
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -38,7 +38,7 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
return NULL;
ret = (void __iomem *)ioremap_bot + offset;
- ioremap_bot += size;
+ ioremap_bot += size + PAGE_SIZE;
return ret;
}
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
index a3c30a884076..aad7c47e0030 100644
--- a/arch/powerpc/mm/maccess.c
+++ b/arch/powerpc/mm/maccess.c
@@ -12,7 +12,7 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
return is_kernel_addr((unsigned long)unsafe_src);
}
-int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src)
+int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src)
{
unsigned int val, suffix;
int err;
@@ -21,7 +21,7 @@ int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src)
if (err)
return err;
if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
- err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4);
+ err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix));
*inst = ppc_inst_prefix(val, suffix);
} else {
*inst = ppc_inst(val);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 043bbeaf407c..ad198b439222 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -20,6 +20,7 @@
#include <asm/machdep.h>
#include <asm/rtas.h>
#include <asm/kasan.h>
+#include <asm/sparsemem.h>
#include <asm/svm.h>
#include <mm/mmu_decl.h>
@@ -27,6 +28,9 @@
unsigned long long memory_limit;
bool init_mem_is_free;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
+
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -126,7 +130,7 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
}
#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
void __init mem_topology_setup(void)
{
max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -161,7 +165,7 @@ static int __init mark_nonram_nosave(void)
return 0;
}
-#else /* CONFIG_NEED_MULTIPLE_NODES */
+#else /* CONFIG_NUMA */
static int __init mark_nonram_nosave(void)
{
return 0;
@@ -298,6 +302,10 @@ void __init mem_init(void)
ioremap_bot, IOREMAP_TOP);
pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n",
VMALLOC_START, VMALLOC_END);
+#ifdef MODULES_VADDR
+ pr_info(" * 0x%08lx..0x%08lx : modules\n",
+ MODULES_VADDR, MODULES_END);
+#endif
#endif /* CONFIG_PPC32 */
}
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index a857af401738..74246536b832 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -83,9 +83,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall");
- if (new_on_cpu)
- radix_kvm_prefetch_workaround(next);
- else
+ if (!new_on_cpu)
membarrier_arch_switch_mm(prev, next, tsk);
/*
diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c
index 3d6ae7c72412..e079f26b267e 100644
--- a/arch/powerpc/mm/nohash/44x.c
+++ b/arch/powerpc/mm/nohash/44x.c
@@ -25,6 +25,7 @@
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
+#include <asm/smp.h>
#include <mm/mmu_decl.h>
@@ -239,3 +240,19 @@ void __init mmu_init_secondary(int cpu)
}
}
#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled)
+{
+ if (smp_processor_id() != boot_cpuid)
+ return;
+
+ if (disabled)
+ patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP()));
+ else
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+ if (IS_ENABLED(CONFIG_PPC_47x) && disabled)
+ patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP()));
+}
+#endif
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 71bfdbedacee..60780e089118 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -212,37 +212,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M));
}
-/*
- * Set up to use a given MMU context.
- * id is context number, pgd is PGD pointer.
- *
- * We place the physical address of the new task page directory loaded
- * into the MMU base register, and set the ASID compare register with
- * the new "context."
- */
-void set_context(unsigned long id, pgd_t *pgd)
-{
- s16 offset = (s16)(__pa(swapper_pg_dir));
-
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is passed as second argument.
- */
- if (IS_ENABLED(CONFIG_BDI_SWITCH))
- abatron_pteptrs[1] = pgd;
-
- /* Register M_TWB will contain base address of level 1 table minus the
- * lower part of the kernel PGDIR base address, so that all accesses to
- * level 1 table are done relative to lower part of kernel PGDIR base
- * address.
- */
- mtspr(SPRN_M_TWB, __pa(pgd) - offset);
-
- /* Update context */
- mtspr(SPRN_M_CASID, id - 1);
- /* sync */
- mb();
-}
-
#ifdef CONFIG_PPC_KUEP
void __init setup_kuep(bool disabled)
{
@@ -256,12 +225,17 @@ void __init setup_kuep(bool disabled)
#endif
#ifdef CONFIG_PPC_KUAP
+struct static_key_false disable_kuap_key;
+EXPORT_SYMBOL(disable_kuap_key);
+
void __init setup_kuap(bool disabled)
{
- pr_info("Activating Kernel Userspace Access Protection\n");
+ if (disabled) {
+ static_branch_enable(&disable_kuap_key);
+ return;
+ }
- if (disabled)
- pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n");
+ pr_info("Activating Kernel Userspace Access Protection\n");
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}
diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c
index aac81c9f84a5..44b2b5e7cabe 100644
--- a/arch/powerpc/mm/nohash/mmu_context.c
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -21,21 +21,6 @@
* also clear mm->cpu_vm_mask bits when processes are migrated
*/
-//#define DEBUG_MAP_CONSISTENCY
-//#define DEBUG_CLAMP_LAST_CONTEXT 31
-//#define DEBUG_HARDER
-
-/* We don't use DEBUG because it tends to be compiled in always nowadays
- * and this would generate way too much output
- */
-#ifdef DEBUG_HARDER
-#define pr_hard(args...) printk(KERN_DEBUG args)
-#define pr_hardcont(args...) printk(KERN_CONT args)
-#else
-#define pr_hard(args...) do { } while(0)
-#define pr_hardcont(args...) do { } while(0)
-#endif
-
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/init.h>
@@ -47,10 +32,17 @@
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
+#include <asm/smp.h>
#include <mm/mmu_decl.h>
/*
+ * Room for two PTE table pointers, usually the kernel and current user
+ * pointer to their respective root page table (pgdir).
+ */
+void *abatron_pteptrs[2];
+
+/*
* The MPC8xx has only 16 contexts. We rotate through them on each task switch.
* A better way would be to keep track of tasks that own contexts, and implement
* an LRU usage. That way very active tasks don't always have to pay the TLB
@@ -68,9 +60,7 @@
* -- BenH
*/
#define FIRST_CONTEXT 1
-#ifdef DEBUG_CLAMP_LAST_CONTEXT
-#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT
-#elif defined(CONFIG_PPC_8xx)
+#if defined(CONFIG_PPC_8xx)
#define LAST_CONTEXT 16
#elif defined(CONFIG_PPC_47x)
#define LAST_CONTEXT 65535
@@ -80,9 +70,7 @@
static unsigned int next_context, nr_free_contexts;
static unsigned long *context_map;
-#ifdef CONFIG_SMP
static unsigned long *stale_map[NR_CPUS];
-#endif
static struct mm_struct **context_mm;
static DEFINE_RAW_SPINLOCK(context_lock);
@@ -105,7 +93,6 @@ static DEFINE_RAW_SPINLOCK(context_lock);
* the stale map as we can just flush the local CPU
* -- benh
*/
-#ifdef CONFIG_SMP
static unsigned int steal_context_smp(unsigned int id)
{
struct mm_struct *mm;
@@ -127,7 +114,6 @@ static unsigned int steal_context_smp(unsigned int id)
id = FIRST_CONTEXT;
continue;
}
- pr_hardcont(" | steal %d from 0x%p", id, mm);
/* Mark this mm has having no context anymore */
mm->context.id = MMU_NO_CONTEXT;
@@ -158,34 +144,25 @@ static unsigned int steal_context_smp(unsigned int id)
/* This will cause the caller to try again */
return MMU_NO_CONTEXT;
}
-#endif /* CONFIG_SMP */
static unsigned int steal_all_contexts(void)
{
struct mm_struct *mm;
-#ifdef CONFIG_SMP
int cpu = smp_processor_id();
-#endif
unsigned int id;
for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
/* Pick up the victim mm */
mm = context_mm[id];
- pr_hardcont(" | steal %d from 0x%p", id, mm);
-
/* Mark this mm as having no context anymore */
mm->context.id = MMU_NO_CONTEXT;
if (id != FIRST_CONTEXT) {
context_mm[id] = NULL;
__clear_bit(id, context_map);
-#ifdef DEBUG_MAP_CONSISTENCY
- mm->context.active = 0;
-#endif
}
-#ifdef CONFIG_SMP
- __clear_bit(id, stale_map[cpu]);
-#endif
+ if (IS_ENABLED(CONFIG_SMP))
+ __clear_bit(id, stale_map[cpu]);
}
/* Flush the TLB for all contexts (not to be used on SMP) */
@@ -204,15 +181,11 @@ static unsigned int steal_all_contexts(void)
static unsigned int steal_context_up(unsigned int id)
{
struct mm_struct *mm;
-#ifdef CONFIG_SMP
int cpu = smp_processor_id();
-#endif
/* Pick up the victim mm */
mm = context_mm[id];
- pr_hardcont(" | steal %d from 0x%p", id, mm);
-
/* Flush the TLB for that context */
local_flush_tlb_mm(mm);
@@ -220,81 +193,64 @@ static unsigned int steal_context_up(unsigned int id)
mm->context.id = MMU_NO_CONTEXT;
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
-#ifdef CONFIG_SMP
- __clear_bit(id, stale_map[cpu]);
-#endif
+ if (IS_ENABLED(CONFIG_SMP))
+ __clear_bit(id, stale_map[cpu]);
return id;
}
-#ifdef DEBUG_MAP_CONSISTENCY
-static void context_check_map(void)
+static void set_context(unsigned long id, pgd_t *pgd)
{
- unsigned int id, nrf, nact;
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ s16 offset = (s16)(__pa(swapper_pg_dir));
+
+ /*
+ * Register M_TWB will contain base address of level 1 table minus the
+ * lower part of the kernel PGDIR base address, so that all accesses to
+ * level 1 table are done relative to lower part of kernel PGDIR base
+ * address.
+ */
+ mtspr(SPRN_M_TWB, __pa(pgd) - offset);
- nrf = nact = 0;
- for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
- int used = test_bit(id, context_map);
- if (!used)
- nrf++;
- if (used != (context_mm[id] != NULL))
- pr_err("MMU: Context %d is %s and MM is %p !\n",
- id, used ? "used" : "free", context_mm[id]);
- if (context_mm[id] != NULL)
- nact += context_mm[id]->context.active;
- }
- if (nrf != nr_free_contexts) {
- pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
- nr_free_contexts, nrf);
- nr_free_contexts = nrf;
+ /* Update context */
+ mtspr(SPRN_M_CASID, id - 1);
+
+ /* sync */
+ mb();
+ } else {
+ if (IS_ENABLED(CONFIG_40x))
+ mb(); /* sync */
+
+ mtspr(SPRN_PID, id);
+ isync();
}
- if (nact > num_online_cpus())
- pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
- nact, num_online_cpus());
- if (FIRST_CONTEXT > 0 && !test_bit(0, context_map))
- pr_err("MMU: Context 0 has been freed !!!\n");
}
-#else
-static void context_check_map(void) { }
-#endif
void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned int id;
-#ifdef CONFIG_SMP
unsigned int i, cpu = smp_processor_id();
-#endif
unsigned long *map;
/* No lockless fast path .. yet */
raw_spin_lock(&context_lock);
- pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
- cpu, next, next->context.active, next->context.id);
-
-#ifdef CONFIG_SMP
- /* Mark us active and the previous one not anymore */
- next->context.active++;
- if (prev) {
- pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
- WARN_ON(prev->context.active < 1);
- prev->context.active--;
+ if (IS_ENABLED(CONFIG_SMP)) {
+ /* Mark us active and the previous one not anymore */
+ next->context.active++;
+ if (prev) {
+ WARN_ON(prev->context.active < 1);
+ prev->context.active--;
+ }
}
again:
-#endif /* CONFIG_SMP */
/* If we already have a valid assigned context, skip all that */
id = next->context.id;
- if (likely(id != MMU_NO_CONTEXT)) {
-#ifdef DEBUG_MAP_CONSISTENCY
- if (context_mm[id] != next)
- pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
- next, id, id, context_mm[id]);
-#endif
+ if (likely(id != MMU_NO_CONTEXT))
goto ctxt_ok;
- }
/* We really don't have a context, let's try to acquire one */
id = next_context;
@@ -304,14 +260,12 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
/* No more free contexts, let's try to steal one */
if (nr_free_contexts == 0) {
-#ifdef CONFIG_SMP
if (num_online_cpus() > 1) {
id = steal_context_smp(id);
if (id == MMU_NO_CONTEXT)
goto again;
goto stolen;
}
-#endif /* CONFIG_SMP */
if (IS_ENABLED(CONFIG_PPC_8xx))
id = steal_all_contexts();
else
@@ -330,20 +284,13 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
next_context = id + 1;
context_mm[id] = next;
next->context.id = id;
- pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);
- context_check_map();
ctxt_ok:
/* If that context got marked stale on this CPU, then flush the
* local TLB for it and unmark it before we use it
*/
-#ifdef CONFIG_SMP
- if (test_bit(id, stale_map[cpu])) {
- pr_hardcont(" | stale flush %d [%d..%d]",
- id, cpu_first_thread_sibling(cpu),
- cpu_last_thread_sibling(cpu));
-
+ if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) {
local_flush_tlb_mm(next);
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
@@ -353,10 +300,10 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
__clear_bit(id, stale_map[i]);
}
}
-#endif
/* Flick the MMU and release lock */
- pr_hardcont(" -> %d\n", id);
+ if (IS_ENABLED(CONFIG_BDI_SWITCH))
+ abatron_pteptrs[1] = next->pgd;
set_context(id, next->pgd);
raw_spin_unlock(&context_lock);
}
@@ -366,8 +313,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
*/
int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
- pr_hard("initing context for mm @%p\n", mm);
-
/*
* We have MMU_NO_CONTEXT set to be ~0. Hence check
* explicitly against context.id == 0. This ensures that we properly
@@ -401,16 +346,12 @@ void destroy_context(struct mm_struct *mm)
if (id != MMU_NO_CONTEXT) {
__clear_bit(id, context_map);
mm->context.id = MMU_NO_CONTEXT;
-#ifdef DEBUG_MAP_CONSISTENCY
- mm->context.active = 0;
-#endif
context_mm[id] = NULL;
nr_free_contexts++;
}
raw_spin_unlock_irqrestore(&context_lock, flags);
}
-#ifdef CONFIG_SMP
static int mmu_ctx_cpu_prepare(unsigned int cpu)
{
/* We don't touch CPU 0 map, it's allocated at aboot and kept
@@ -419,7 +360,6 @@ static int mmu_ctx_cpu_prepare(unsigned int cpu)
if (cpu == boot_cpuid)
return 0;
- pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
return 0;
}
@@ -430,7 +370,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu)
if (cpu == boot_cpuid)
return 0;
- pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
kfree(stale_map[cpu]);
stale_map[cpu] = NULL;
@@ -440,8 +379,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu)
return 0;
}
-#endif /* CONFIG_SMP */
-
/*
* Initialize the context management stuff.
*/
@@ -465,16 +402,16 @@ void __init mmu_context_init(void)
if (!context_mm)
panic("%s: Failed to allocate %zu bytes\n", __func__,
sizeof(void *) * (LAST_CONTEXT + 1));
-#ifdef CONFIG_SMP
- stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
- if (!stale_map[boot_cpuid])
- panic("%s: Failed to allocate %zu bytes\n", __func__,
- CTX_MAP_SIZE);
-
- cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
- "powerpc/mmu/ctx:prepare",
- mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
-#endif
+ if (IS_ENABLED(CONFIG_SMP)) {
+ stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+ if (!stale_map[boot_cpuid])
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ CTX_MAP_SIZE);
+
+ cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+ "powerpc/mmu/ctx:prepare",
+ mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
+ }
printk(KERN_INFO
"MMU: Allocated %zu bytes of context maps for %d contexts\n",
diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index 68797e072f55..4613bf8e9aae 100644
--- a/arch/powerpc/mm/nohash/tlb_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
@@ -360,19 +360,6 @@ _GLOBAL(_tlbivax_bcast)
sync
wrtee r10
blr
-
-_GLOBAL(set_context)
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
#else
#error Unsupported processor type !
#endif
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
new file mode 100644
index 000000000000..0876216ceee6
--- /dev/null
+++ b/arch/powerpc/mm/pageattr.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * MMU-generic set_memory implementation for powerpc
+ *
+ * Copyright 2019-2021, IBM Corporation.
+ */
+
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/set_memory.h>
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+
+/*
+ * Updates the attributes of a page in three steps:
+ *
+ * 1. invalidate the page table entry
+ * 2. flush the TLB
+ * 3. install the new entry with the updated attributes
+ *
+ * Invalidating the pte means there are situations where this will not work
+ * when in theory it should.
+ * For example:
+ * - removing write from page whilst it is being executed
+ * - setting a page read-only whilst it is being read by another CPU
+ *
+ */
+static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
+{
+ long action = (long)data;
+ pte_t pte;
+
+ spin_lock(&init_mm.page_table_lock);
+
+ /* invalidate the PTE so it's safe to modify */
+ pte = ptep_get_and_clear(&init_mm, addr, ptep);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ /* modify the PTE bits as desired, then apply */
+ switch (action) {
+ case SET_MEMORY_RO:
+ pte = pte_wrprotect(pte);
+ break;
+ case SET_MEMORY_RW:
+ pte = pte_mkwrite(pte_mkdirty(pte));
+ break;
+ case SET_MEMORY_NX:
+ pte = pte_exprotect(pte);
+ break;
+ case SET_MEMORY_X:
+ pte = pte_mkexec(pte);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ set_pte_at(&init_mm, addr, ptep, pte);
+
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync": : :"memory");
+ spin_unlock(&init_mm.page_table_lock);
+
+ return 0;
+}
+
+int change_memory_attr(unsigned long addr, int numpages, long action)
+{
+ unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+ unsigned long size = numpages * PAGE_SIZE;
+
+ if (!numpages)
+ return 0;
+
+ if (WARN_ON_ONCE(is_vmalloc_or_module_addr((void *)addr) &&
+ is_vm_area_hugepages((void *)addr)))
+ return -EINVAL;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * On hash, the linear mapping is not in the Linux page table so
+ * apply_to_existing_page_range() will have no effect. If in the future
+ * the set_memory_* functions are used on the linear map this will need
+ * to be updated.
+ */
+ if (!radix_enabled()) {
+ int region = get_region_id(addr);
+
+ if (WARN_ON_ONCE(region != VMALLOC_REGION_ID && region != IO_REGION_ID))
+ return -EINVAL;
+ }
+#endif
+
+ return apply_to_existing_page_range(&init_mm, start, size,
+ change_page_attr, (void *)action);
+}
+
+/*
+ * Set the attributes of a page:
+ *
+ * This function is used by PPC32 at the end of init to set final kernel memory
+ * protection. It includes changing the maping of the page it is executing from
+ * and data pages it is using.
+ */
+static int set_page_attr(pte_t *ptep, unsigned long addr, void *data)
+{
+ pgprot_t prot = __pgprot((unsigned long)data);
+
+ spin_lock(&init_mm.page_table_lock);
+
+ set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot));
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ spin_unlock(&init_mm.page_table_lock);
+
+ return 0;
+}
+
+int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot)
+{
+ unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+ unsigned long sz = numpages * PAGE_SIZE;
+
+ if (numpages <= 0)
+ return 0;
+
+ return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr,
+ (void *)pgprot_val(prot));
+}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 354611940118..cd16b407f47e 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -28,6 +28,14 @@
#include <asm/hugetlb.h>
#include <asm/pte-walk.h>
+#ifdef CONFIG_PPC64
+#define PGD_ALIGN (sizeof(pgd_t) * MAX_PTRS_PER_PGD)
+#else
+#define PGD_ALIGN PAGE_SIZE
+#endif
+
+pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __section(".bss..page_aligned") __aligned(PGD_ALIGN);
+
static inline int is_exec_fault(void)
{
return current->thread.regs && TRAP(current->thread.regs) == 0x400;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index e0ec67a16887..dcf5ecca19d9 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -23,6 +23,7 @@
#include <linux/highmem.h>
#include <linux/memblock.h>
#include <linux/slab.h>
+#include <linux/set_memory.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
@@ -132,64 +133,20 @@ void __init mapin_ram(void)
}
}
-static int __change_page_attr_noflush(struct page *page, pgprot_t prot)
-{
- pte_t *kpte;
- unsigned long address;
-
- BUG_ON(PageHighMem(page));
- address = (unsigned long)page_address(page);
-
- if (v_block_mapped(address))
- return 0;
- kpte = virt_to_kpte(address);
- if (!kpte)
- return -EINVAL;
- __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0);
-
- return 0;
-}
-
-/*
- * Change the page attributes of an page in the linear mapping.
- *
- * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY
- */
-static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
- int i, err = 0;
- unsigned long flags;
- struct page *start = page;
-
- local_irq_save(flags);
- for (i = 0; i < numpages; i++, page++) {
- err = __change_page_attr_noflush(page, prot);
- if (err)
- break;
- }
- wmb();
- local_irq_restore(flags);
- flush_tlb_kernel_range((unsigned long)page_address(start),
- (unsigned long)page_address(page));
- return err;
-}
-
void mark_initmem_nx(void)
{
- struct page *page = virt_to_page(_sinittext);
unsigned long numpages = PFN_UP((unsigned long)_einittext) -
PFN_DOWN((unsigned long)_sinittext);
if (v_block_mapped((unsigned long)_sinittext))
mmu_mark_initmem_nx();
else
- change_page_attr(page, numpages, PAGE_KERNEL);
+ set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL);
}
#ifdef CONFIG_STRICT_KERNEL_RWX
void mark_rodata_ro(void)
{
- struct page *page;
unsigned long numpages;
if (v_block_mapped((unsigned long)_stext + 1)) {
@@ -198,20 +155,18 @@ void mark_rodata_ro(void)
return;
}
- page = virt_to_page(_stext);
numpages = PFN_UP((unsigned long)_etext) -
PFN_DOWN((unsigned long)_stext);
- change_page_attr(page, numpages, PAGE_KERNEL_ROX);
+ set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX);
/*
* mark .rodata as read only. Use __init_begin rather than __end_rodata
* to cover NOTES and EXCEPTION_TABLE.
*/
- page = virt_to_page(__start_rodata);
numpages = PFN_UP((unsigned long)__init_begin) -
PFN_DOWN((unsigned long)__start_rodata);
- change_page_attr(page, numpages, PAGE_KERNEL_RO);
+ set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO);
// mark_initmem_nx() should have already run by now
ptdump_check_wx();
@@ -221,9 +176,14 @@ void mark_rodata_ro(void)
#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
+ unsigned long addr = (unsigned long)page_address(page);
+
if (PageHighMem(page))
return;
- change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+ if (enable)
+ set_memory_attr(addr, numpages, PAGE_KERNEL);
+ else
+ set_memory_attr(addr, numpages, __pgprot(0));
}
#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index aca354fb670b..5062c58b1e5b 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -58,8 +58,6 @@ struct pg_state {
const struct addr_marker *marker;
unsigned long start_address;
unsigned long start_pa;
- unsigned long last_pa;
- unsigned long page_size;
unsigned int level;
u64 current_flags;
bool check_wx;
@@ -163,8 +161,6 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info
static void dump_addr(struct pg_state *st, unsigned long addr)
{
- unsigned long delta;
-
#ifdef CONFIG_PPC64
#define REG "0x%016lx"
#else
@@ -172,14 +168,8 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
#endif
pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
- if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) {
- pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa);
- delta = st->page_size >> 10;
- } else {
- pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
- delta = (addr - st->start_address) >> 10;
- }
- pt_dump_size(st->seq, delta);
+ pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
+ pt_dump_size(st->seq, (addr - st->start_address) >> 10);
}
static void note_prot_wx(struct pg_state *st, unsigned long addr)
@@ -208,7 +198,6 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr,
st->current_flags = flag;
st->start_address = addr;
st->start_pa = pa;
- st->page_size = page_size;
while (addr >= st->marker[1].start_address) {
st->marker++;
@@ -220,7 +209,6 @@ static void note_page(struct pg_state *st, unsigned long addr,
unsigned int level, u64 val, unsigned long page_size)
{
u64 flag = val & pg_level[level].mask;
- u64 pa = val & PTE_RPN_MASK;
/* At first no level is set */
if (!st->level) {
@@ -232,12 +220,9 @@ static void note_page(struct pg_state *st, unsigned long addr,
* - we change levels in the tree.
* - the address is in a different section of memory and is thus
* used for a different purpose, regardless of the flags.
- * - the pa of this page is not adjacent to the last inspected page
*/
} else if (flag != st->current_flags || level != st->level ||
- addr >= st->marker[1].start_address ||
- (pa != st->last_pa + st->page_size &&
- (pa != st->start_pa || st->start_pa != st->last_pa))) {
+ addr >= st->marker[1].start_address) {
/* Check the PTE flags */
if (st->current_flags) {
@@ -259,7 +244,6 @@ static void note_page(struct pg_state *st, unsigned long addr,
*/
note_page_update_state(st, addr, level, val, page_size);
}
- st->last_pa = pa;
}
static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 798ac4350a82..53aefee3fe70 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -237,6 +237,7 @@ skip_codegen_passes:
fp->jited_len = alloclen;
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+ bpf_jit_binary_lock_ro(bpf_hdr);
if (!fp->is_func || extra_pass) {
bpf_prog_fill_jited_linfo(fp, addrs);
out_addrs:
@@ -257,15 +258,3 @@ out:
return fp;
}
-
-/* Overriding bpf_jit_free() as we don't set images read-only. */
-void bpf_jit_free(struct bpf_prog *fp)
-{
- unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
- struct bpf_binary_header *bpf_hdr = (void *)addr;
-
- if (fp->jited)
- bpf_jit_binary_free(bpf_hdr);
-
- bpf_prog_unlock_free(fp);
-}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index bbb16099e8c7..cbe5b399ed86 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -108,20 +108,20 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
int i;
/* First arg comes in as a 32 bits pointer. */
- EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3));
+ EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), _R3));
EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0));
- EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx)));
+ EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
/*
* Initialize tail_call_cnt in stack frame if we do tail calls.
* Otherwise, put in NOPs so that it can be skipped when we are
* invoked through a tail call.
*/
- if (ctx->seen & SEEN_TAILCALL) {
- EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
- } else {
+ if (ctx->seen & SEEN_TAILCALL)
+ EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, _R1,
+ bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+ else
EMIT(PPC_RAW_NOP());
- }
#define BPF_TAILCALL_PROLOGUE_SIZE 16
@@ -130,30 +130,30 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
* save/restore LR unless we call other functions
*/
if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_MFLR(__REG_R0));
+ EMIT(PPC_RAW_MFLR(_R0));
/*
* Back up non-volatile regs -- registers r18-r31
*/
for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
if (bpf_is_seen_register(ctx, i))
- EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i)));
+ EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
/* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/
if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) {
- EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8);
- EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12);
+ EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8);
+ EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12);
}
/* Setup frame pointer to point to the bpf stack area */
if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) {
EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0));
- EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1,
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), _R1,
STACK_FRAME_MIN_SIZE + ctx->stack_size));
}
if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+ EMIT(PPC_RAW_STW(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
}
static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
@@ -163,24 +163,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
/* Restore NVRs */
for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
if (bpf_is_seen_register(ctx, i))
- EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i)));
+ EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
}
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
{
- EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0)));
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_0)));
bpf_jit_emit_common_epilogue(image, ctx);
/* Tear down our stack frame */
if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+ EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
- EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx)));
+ EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_MTLR(__REG_R0));
+ EMIT(PPC_RAW_MTLR(_R0));
EMIT(PPC_RAW_BLR());
}
@@ -193,10 +193,10 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun
PPC_BL_ABS(func);
} else {
/* Load function address into r0 */
- EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func)));
- EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func)));
- EMIT(PPC_RAW_MTLR(__REG_R0));
- EMIT(PPC_RAW_BLRL());
+ EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
+ EMIT(PPC_RAW_ORI(_R0, _R0, IMM_L(func)));
+ EMIT(PPC_RAW_MTCTR(_R0));
+ EMIT(PPC_RAW_BCTRL());
}
}
@@ -215,47 +215,47 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
* if (index >= array->map.max_entries)
* goto out;
*/
- EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
- EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0));
- EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+ EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+ EMIT(PPC_RAW_CMPLW(b2p_index, _R0));
+ EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
PPC_BCC(COND_GE, out);
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT));
+ EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT));
/* tail_call_cnt++; */
- EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1));
+ EMIT(PPC_RAW_ADDIC(_R0, _R0, 1));
PPC_BCC(COND_GT, out);
/* prog = array->ptrs[index]; */
- EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29));
- EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array));
- EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs)));
- EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+ EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29));
+ EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array));
+ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs)));
+ EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
/*
* if (prog == NULL)
* goto out;
*/
- EMIT(PPC_RAW_CMPLWI(__REG_R3, 0));
+ EMIT(PPC_RAW_CMPLWI(_R3, 0));
PPC_BCC(COND_EQ, out);
/* goto *(prog->bpf_func + prologue_size); */
- EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func)));
+ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func)));
if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+ EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
- EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE));
+ EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE));
if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_MTLR(__REG_R0));
+ EMIT(PPC_RAW_MTLR(_R0));
- EMIT(PPC_RAW_MTCTR(__REG_R3));
+ EMIT(PPC_RAW_MTCTR(_R3));
- EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1)));
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_1)));
/* tear restore NVRs, ... */
bpf_jit_emit_common_epilogue(image, ctx);
@@ -352,8 +352,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
if (imm >= -32768 && imm < 32768) {
EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm));
} else {
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, _R0));
}
if (imm >= 0)
EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h));
@@ -362,11 +362,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
break;
case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
bpf_set_seen_register(ctx, tmp_reg);
- EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h));
+ EMIT(PPC_RAW_MULW(_R0, dst_reg, src_reg_h));
EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg));
EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg));
EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
- EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0));
EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg));
break;
case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
@@ -376,8 +376,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
if (imm >= -32768 && imm < 32768) {
EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm));
} else {
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_MULW(dst_reg, dst_reg, _R0));
}
break;
case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
@@ -398,17 +398,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg));
if (imm < 0)
EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg));
- EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg));
+ EMIT(PPC_RAW_MULHWU(_R0, dst_reg, tmp_reg));
EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg));
- EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0));
break;
case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
break;
case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
- EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg));
- EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0));
- EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0));
+ EMIT(PPC_RAW_DIVWU(_R0, dst_reg, src_reg));
+ EMIT(PPC_RAW_MULW(_R0, src_reg, _R0));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0));
break;
case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
return -EOPNOTSUPP;
@@ -420,8 +420,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
if (imm == 1)
break;
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, _R0));
break;
case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
if (!imm)
@@ -430,9 +430,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
if (!is_power_of_2((u32)imm)) {
bpf_set_seen_register(ctx, tmp_reg);
PPC_LI32(tmp_reg, imm);
- EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg));
- EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0));
- EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0));
+ EMIT(PPC_RAW_DIVWU(_R0, dst_reg, tmp_reg));
+ EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0));
break;
}
if (imm == 1)
@@ -503,8 +503,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0,
32 - fls(imm), 32 - ffs(imm)));
} else {
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_AND(dst_reg, dst_reg, _R0));
}
break;
case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
@@ -555,12 +555,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
break;
case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
bpf_set_seen_register(ctx, tmp_reg);
- EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+ EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg));
EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
- EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0));
+ EMIT(PPC_RAW_SRW(_R0, dst_reg, _R0));
EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg));
- EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0));
+ EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, _R0));
EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg));
break;
@@ -591,12 +591,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
break;
case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
bpf_set_seen_register(ctx, tmp_reg);
- EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+ EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
- EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
+ EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0));
EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg));
- EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0));
EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg));
EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
break;
@@ -627,15 +627,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
break;
case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
bpf_set_seen_register(ctx, tmp_reg);
- EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+ EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
- EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
+ EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0));
EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
- EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
- EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0));
+ EMIT(PPC_RAW_RLWINM(_R0, tmp_reg, 0, 26, 26));
EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg));
EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg));
- EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0));
+ EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, _R0));
EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
break;
case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
@@ -702,24 +702,24 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
* 2 bytes are already in their final position
* -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
*/
- EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31));
+ EMIT(PPC_RAW_RLWINM(_R0, dst_reg, 8, 0, 31));
/* Rotate 24 bits and insert byte 1 */
- EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7));
+ EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 0, 7));
/* Rotate 24 bits and insert byte 3 */
- EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23));
- EMIT(PPC_RAW_MR(dst_reg, __REG_R0));
+ EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 16, 23));
+ EMIT(PPC_RAW_MR(dst_reg, _R0));
break;
case 64:
bpf_set_seen_register(ctx, tmp_reg);
EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31));
- EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31));
+ EMIT(PPC_RAW_RLWINM(_R0, dst_reg_h, 8, 0, 31));
/* Rotate 24 bits and insert byte 1 */
EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7));
- EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7));
+ EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 0, 7));
/* Rotate 24 bits and insert byte 3 */
EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23));
- EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23));
- EMIT(PPC_RAW_MR(dst_reg, __REG_R0));
+ EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 16, 23));
+ EMIT(PPC_RAW_MR(dst_reg, _R0));
EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg));
break;
}
@@ -744,32 +744,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
break;
case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STB(_R0, dst_reg, off));
break;
case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
break;
case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STH(_R0, dst_reg, off));
break;
case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
break;
case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STW(_R0, dst_reg, off));
break;
case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off));
EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4));
break;
case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4));
- PPC_EX32(__REG_R0, imm);
- EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STW(_R0, dst_reg, off + 4));
+ PPC_EX32(_R0, imm);
+ EMIT(PPC_RAW_STW(_R0, dst_reg, off));
break;
/*
@@ -780,11 +780,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
/* Get offset into TMP_REG */
EMIT(PPC_RAW_LI(tmp_reg, off));
/* load value from memory into r0 */
- EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0));
+ EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
/* add value from src_reg into this */
- EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg));
+ EMIT(PPC_RAW_ADD(_R0, _R0, src_reg));
/* store result back */
- EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg));
+ EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg));
/* we're done if this succeeded */
PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4);
break;
@@ -852,14 +852,14 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
return ret;
if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) {
- EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8));
- EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12));
+ EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, 8));
+ EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12));
}
bpf_jit_emit_func_call_rel(image, ctx, func_addr);
- EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3));
- EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4));
+ EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3));
+ EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4));
break;
/*
@@ -967,12 +967,12 @@ cond_branch:
EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
break;
case BPF_JMP | BPF_JSET | BPF_X:
- EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h));
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg_h, src_reg_h));
PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
- EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg));
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg));
break;
case BPF_JMP32 | BPF_JSET | BPF_X: {
- EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg));
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg));
break;
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JEQ | BPF_K:
@@ -990,11 +990,11 @@ cond_branch:
EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
} else {
/* sign-extending load ... but unsigned comparison */
- PPC_EX32(__REG_R0, imm);
- EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0));
- PPC_LI32(__REG_R0, imm);
+ PPC_EX32(_R0, imm);
+ EMIT(PPC_RAW_CMPLW(dst_reg_h, _R0));
+ PPC_LI32(_R0, imm);
PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
- EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+ EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
}
break;
case BPF_JMP32 | BPF_JNE | BPF_K:
@@ -1006,8 +1006,8 @@ cond_branch:
if (imm >= 0 && imm < 65536) {
EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
} else {
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
}
break;
}
@@ -1022,9 +1022,9 @@ cond_branch:
} else {
/* sign-extending load */
EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
- PPC_LI32(__REG_R0, imm);
+ PPC_LI32(_R0, imm);
PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
- EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+ EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
}
break;
case BPF_JMP32 | BPF_JSGT | BPF_K:
@@ -1039,32 +1039,32 @@ cond_branch:
EMIT(PPC_RAW_CMPWI(dst_reg, imm));
} else {
/* sign-extending load */
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_CMPW(dst_reg, _R0));
}
break;
case BPF_JMP | BPF_JSET | BPF_K:
/* andi does not sign-extend the immediate */
if (imm >= 0 && imm < 32768) {
/* PPC_ANDI is _only/always_ dot-form */
- EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm));
+ EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm));
} else {
- PPC_LI32(__REG_R0, imm);
+ PPC_LI32(_R0, imm);
if (imm < 0) {
EMIT(PPC_RAW_CMPWI(dst_reg_h, 0));
PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
}
- EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0));
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0));
}
break;
case BPF_JMP32 | BPF_JSET | BPF_K:
/* andi does not sign-extend the immediate */
if (imm >= -32768 && imm < 32768) {
/* PPC_ANDI is _only/always_ dot-form */
- EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm));
+ EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm));
} else {
- PPC_LI32(__REG_R0, imm);
- EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0));
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0));
}
break;
}
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 57a8c1153851..5cad5b5a7e97 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -94,7 +94,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
* save/restore LR unless we call other functions
*/
if (ctx->seen & SEEN_FUNC) {
- EMIT(PPC_INST_MFLR | __PPC_RT(R0));
+ EMIT(PPC_RAW_MFLR(_R0));
PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
}
@@ -153,8 +153,8 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
PPC_LI64(b2p[TMP_REG_2], func);
/* Load actual entry point from function descriptor */
PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
- /* ... and move it to LR */
- EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1]));
+ /* ... and move it to CTR */
+ EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1]));
/*
* Load TOC from function descriptor at offset 8.
* We can clobber r2 since we get called through a
@@ -165,9 +165,9 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
#else
/* We can clobber r12 */
PPC_FUNC_ADDR(12, func);
- EMIT(PPC_RAW_MTLR(12));
+ EMIT(PPC_RAW_MTCTR(12));
#endif
- EMIT(PPC_RAW_BLRL());
+ EMIT(PPC_RAW_BCTRL());
}
void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
@@ -202,8 +202,8 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun
PPC_BPF_LL(12, 12, 0);
#endif
- EMIT(PPC_RAW_MTLR(12));
- EMIT(PPC_RAW_BLRL());
+ EMIT(PPC_RAW_MTCTR(12));
+ EMIT(PPC_RAW_BCTRL());
}
static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index c02854dea2b2..2f46e31c7612 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -1,9 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_PERF_EVENTS) += callchain.o callchain_$(BITS).o perf_regs.o
-ifdef CONFIG_COMPAT
-obj-$(CONFIG_PERF_EVENTS) += callchain_32.o
-endif
+obj-y += callchain.o callchain_$(BITS).o perf_regs.o
+obj-$(CONFIG_COMPAT) += callchain_32.o
obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 6c028ee513c0..082f6d0308a4 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -40,7 +40,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
return 0;
}
-void
+void __no_sanitize_address
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
unsigned long sp, next_sp;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 16d4d1b6a1ff..bb0ee716de91 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -460,7 +460,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
sizeof(instr)))
return 0;
- return branch_target((struct ppc_inst *)&instr);
+ return branch_target(&instr);
}
/* Userspace: need copy instruction here then translate it */
@@ -468,7 +468,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
sizeof(instr)))
return 0;
- target = branch_target((struct ppc_inst *)&instr);
+ target = branch_target(&instr);
if ((!target) || (instr & BRANCH_ABSOLUTE))
return target;
@@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs)
bool use_siar = regs_use_siar(regs);
unsigned long siar = mfspr(SPRN_SIAR);
- if (ppmu->flags & PPMU_P10_DD1) {
+ if (ppmu && (ppmu->flags & PPMU_P10_DD1)) {
if (siar)
return siar;
else
diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c
index eb8a6aaf4cc1..695975227e60 100644
--- a/arch/powerpc/perf/generic-compat-pmu.c
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -14,45 +14,119 @@
*
* 28 24 20 16 12 8 4 0
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- * [ pmc ] [unit ] [ ] m [ pmcxsel ]
- * | |
- * | *- mark
- * |
- * |
- * *- combine
- *
- * Below uses IBM bit numbering.
- *
- * MMCR1[x:y] = unit (PMCxUNIT)
- * MMCR1[24] = pmc1combine[0]
- * MMCR1[25] = pmc1combine[1]
- * MMCR1[26] = pmc2combine[0]
- * MMCR1[27] = pmc2combine[1]
- * MMCR1[28] = pmc3combine[0]
- * MMCR1[29] = pmc3combine[1]
- * MMCR1[30] = pmc4combine[0]
- * MMCR1[31] = pmc4combine[1]
- *
+ * [ pmc ] [ pmcxsel ]
*/
/*
- * Some power9 event codes.
+ * Event codes defined in ISA v3.0B
*/
#define EVENT(_name, _code) _name = _code,
enum {
-EVENT(PM_CYC, 0x0001e)
-EVENT(PM_INST_CMPL, 0x00002)
+ /* Cycles, alternate code */
+ EVENT(PM_CYC_ALT, 0x100f0)
+ /* One or more instructions completed in a cycle */
+ EVENT(PM_CYC_INST_CMPL, 0x100f2)
+ /* Floating-point instruction completed */
+ EVENT(PM_FLOP_CMPL, 0x100f4)
+ /* Instruction ERAT/L1-TLB miss */
+ EVENT(PM_L1_ITLB_MISS, 0x100f6)
+ /* All instructions completed and none available */
+ EVENT(PM_NO_INST_AVAIL, 0x100f8)
+ /* A load-type instruction completed (ISA v3.0+) */
+ EVENT(PM_LD_CMPL, 0x100fc)
+ /* Instruction completed, alternate code (ISA v3.0+) */
+ EVENT(PM_INST_CMPL_ALT, 0x100fe)
+ /* A store-type instruction completed */
+ EVENT(PM_ST_CMPL, 0x200f0)
+ /* Instruction Dispatched */
+ EVENT(PM_INST_DISP, 0x200f2)
+ /* Run_cycles */
+ EVENT(PM_RUN_CYC, 0x200f4)
+ /* Data ERAT/L1-TLB miss/reload */
+ EVENT(PM_L1_DTLB_RELOAD, 0x200f6)
+ /* Taken branch completed */
+ EVENT(PM_BR_TAKEN_CMPL, 0x200fa)
+ /* Demand iCache Miss */
+ EVENT(PM_L1_ICACHE_MISS, 0x200fc)
+ /* L1 Dcache reload from memory */
+ EVENT(PM_L1_RELOAD_FROM_MEM, 0x200fe)
+ /* L1 Dcache store miss */
+ EVENT(PM_ST_MISS_L1, 0x300f0)
+ /* Alternate code for PM_INST_DISP */
+ EVENT(PM_INST_DISP_ALT, 0x300f2)
+ /* Branch direction or target mispredicted */
+ EVENT(PM_BR_MISPREDICT, 0x300f6)
+ /* Data TLB miss/reload */
+ EVENT(PM_DTLB_MISS, 0x300fc)
+ /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+ EVENT(PM_DATA_FROM_L3MISS, 0x300fe)
+ /* L1 Dcache load miss */
+ EVENT(PM_LD_MISS_L1, 0x400f0)
+ /* Cycle when instruction(s) dispatched */
+ EVENT(PM_CYC_INST_DISP, 0x400f2)
+ /* Branch or branch target mispredicted */
+ EVENT(PM_BR_MPRED_CMPL, 0x400f6)
+ /* Instructions completed with run latch set */
+ EVENT(PM_RUN_INST_CMPL, 0x400fa)
+ /* Instruction TLB miss/reload */
+ EVENT(PM_ITLB_MISS, 0x400fc)
+ /* Load data not cached */
+ EVENT(PM_LD_NOT_CACHED, 0x400fe)
+ /* Instructions */
+ EVENT(PM_INST_CMPL, 0x500fa)
+ /* Cycles */
+ EVENT(PM_CYC, 0x600f4)
};
#undef EVENT
+/* Table of alternatives, sorted in increasing order of column 0 */
+/* Note that in each row, column 0 must be the smallest */
+static const unsigned int generic_event_alternatives[][MAX_ALT] = {
+ { PM_CYC_ALT, PM_CYC },
+ { PM_INST_CMPL_ALT, PM_INST_CMPL },
+ { PM_INST_DISP, PM_INST_DISP_ALT },
+};
+
+static int generic_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int num_alt = 0;
+
+ num_alt = isa207_get_alternatives(event, alt,
+ ARRAY_SIZE(generic_event_alternatives), flags,
+ generic_event_alternatives);
+
+ return num_alt;
+}
+
GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_NO_INST_AVAIL);
+GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
static struct attribute *generic_compat_events_attr[] = {
GENERIC_EVENT_PTR(PM_CYC),
GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_NO_INST_AVAIL),
+ GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+ GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
NULL
};
@@ -63,17 +137,11 @@ static struct attribute_group generic_compat_pmu_events_group = {
PMU_FORMAT_ATTR(event, "config:0-19");
PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
-PMU_FORMAT_ATTR(mark, "config:8");
-PMU_FORMAT_ATTR(combine, "config:10-11");
-PMU_FORMAT_ATTR(unit, "config:12-15");
PMU_FORMAT_ATTR(pmc, "config:16-19");
static struct attribute *generic_compat_pmu_format_attr[] = {
&format_attr_event.attr,
&format_attr_pmcxsel.attr,
- &format_attr_mark.attr,
- &format_attr_combine.attr,
- &format_attr_unit.attr,
&format_attr_pmc.attr,
NULL,
};
@@ -92,6 +160,9 @@ static const struct attribute_group *generic_compat_pmu_attr_groups[] = {
static int compat_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
[PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_NO_INST_AVAIL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1,
};
#define C(x) PERF_COUNT_HW_CACHE_##x
@@ -105,11 +176,11 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_LD_MISS_L1,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_ST_MISS_L1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
@@ -119,7 +190,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(L1I) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
@@ -133,7 +204,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(LL) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
@@ -147,7 +218,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_DTLB_MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
@@ -161,7 +232,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_ITLB_MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
@@ -175,7 +246,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(BPU) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
@@ -204,13 +275,30 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
#undef C
+/*
+ * We set MMCR0[CC5-6RUN] so we can use counters 5 and 6 for
+ * PM_INST_CMPL and PM_CYC.
+ */
+static int generic_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[], u32 flags)
+{
+ int ret;
+
+ ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
+ if (!ret)
+ mmcr->mmcr0 |= MMCR0_C56RUN;
+ return ret;
+}
+
static struct power_pmu generic_compat_pmu = {
.name = "GENERIC_COMPAT",
.n_counter = MAX_PMU_COUNTERS,
.add_fields = ISA207_ADD_FIELDS,
.test_adder = ISA207_TEST_ADDER,
- .compute_mmcr = isa207_compute_mmcr,
+ .compute_mmcr = generic_compute_mmcr,
.get_constraint = isa207_get_constraint,
+ .get_alternatives = generic_get_alternatives,
.disable_pmc = isa207_disable_pmc,
.flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
.n_generic = ARRAY_SIZE(compat_generic_events),
@@ -223,6 +311,16 @@ int init_generic_compat_pmu(void)
{
int rc = 0;
+ /*
+ * From ISA v2.07 on, PMU features are architected;
+ * we require >= v3.0 because (a) that has PM_LD_CMPL and
+ * PM_INST_CMPL_ALT, which v2.07 doesn't have, and
+ * (b) we don't expect any non-IBM Power ISA
+ * implementations that conform to v2.07 but not v3.0.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+
rc = register_power_pmu(&generic_compat_pmu);
if (rc)
return rc;
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 8c0d324f657e..3823df235f25 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -582,6 +582,7 @@ static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd,
if (ret)
break;
/* fall through and return the timeout */
+ fallthrough;
case WDIOC_GETTIMEOUT:
/* we need to round here as to avoid e.g. the following
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index 87f524e4b09c..8a7e55acf090 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -73,7 +73,7 @@ smp_86xx_kick_cpu(int nr)
/* Setup fake reset vector to call __secondary_start_mpc86xx. */
target = (unsigned long) __secondary_start_mpc86xx;
- patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK);
+ patch_branch(vector, target, BRANCH_SET_LINK);
/* Kick that CPU */
smp_86xx_release_core(nr);
@@ -83,7 +83,7 @@ smp_86xx_kick_cpu(int nr)
mdelay(1);
/* Restore the exception vector */
- patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector));
+ patch_instruction(vector, ppc_inst(save_vector));
local_irq_restore(flags);
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 7a5e8f4541e3..e02d29a9d12f 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -20,6 +20,8 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig"
source "arch/powerpc/platforms/44x/Kconfig"
source "arch/powerpc/platforms/40x/Kconfig"
source "arch/powerpc/platforms/amigaone/Kconfig"
+source "arch/powerpc/platforms/book3s/Kconfig"
+source "arch/powerpc/platforms/microwatt/Kconfig"
config KVM_GUEST
bool "KVM Guest support"
@@ -49,6 +51,7 @@ config PPC_NATIVE
config PPC_OF_BOOT_TRAMPOLINE
bool "Support booting from Open Firmware or yaboot"
depends on PPC_BOOK3S_32 || PPC64
+ select RELOCATABLE if PPC64
default y
help
Support from booting from Open Firmware or yaboot using an
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index f998e655b570..0e1bb1cedd13 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -61,6 +61,7 @@ config 44x
select 4xx_SOC
select HAVE_PCI
select PHYS_64BIT
+ select PPC_HAVE_KUEP
endchoice
@@ -111,6 +112,7 @@ config PPC_BOOK3E_64
select PPC_FPU # Make it a choice ?
select PPC_SMP_MUXED_IPI
select PPC_DOORBELL
+ select ZONE_DMA
endchoice
@@ -389,7 +391,7 @@ config PPC_HAVE_KUEP
config PPC_KUEP
bool "Kernel Userspace Execution Prevention"
depends on PPC_HAVE_KUEP
- default y if !PPC_BOOK3S_32
+ default y
help
Enable support for Kernel Userspace Execution Prevention (KUEP)
@@ -401,7 +403,7 @@ config PPC_HAVE_KUAP
config PPC_KUAP
bool "Kernel Userspace Access Protection"
depends on PPC_HAVE_KUAP
- default y if !PPC_BOOK3S_32
+ default y
help
Enable support for Kernel Userspace Access Protection (KUAP)
@@ -424,10 +426,6 @@ config PPC_MMU_NOHASH
def_bool y
depends on !PPC_BOOK3S
-config PPC_MMU_NOHASH_32
- def_bool y
- depends on PPC_MMU_NOHASH && PPC32
-
config PPC_BOOK3E_MMU
def_bool y
depends on FSL_BOOKE || PPC_BOOK3E
@@ -476,9 +474,9 @@ config SMP
If you don't know what to do here, say N.
config NR_CPUS
- int "Maximum number of CPUs (2-8192)"
- range 2 8192
- depends on SMP
+ int "Maximum number of CPUs (2-8192)" if SMP
+ range 2 8192 if SMP
+ default "1" if !SMP
default "32" if PPC64
default "4"
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 143d4417f6cc..94470fb27c99 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -22,3 +22,5 @@ obj-$(CONFIG_PPC_CELL) += cell/
obj-$(CONFIG_PPC_PS3) += ps3/
obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/
obj-$(CONFIG_AMIGAONE) += amigaone/
+obj-$(CONFIG_PPC_BOOK3S) += book3s/
+obj-$(CONFIG_PPC_MICROWATT) += microwatt/
diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig
new file mode 100644
index 000000000000..34c931592ef0
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_VAS
+ bool "IBM Virtual Accelerator Switchboard (VAS)"
+ depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES
+ default y
+ help
+ This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+ VAS devices are found in POWER9-based and later systems, they
+ provide access to accelerator coprocessors such as NX-GZIP and
+ NX-842. This config allows the kernel to use NX-842 accelerators,
+ and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV
+ and POWER10 PowerVM platforms.
+
+ If unsure, say "N".
diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile
new file mode 100644
index 000000000000..e790f1910f61
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PPC_VAS) += vas-api.o
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
new file mode 100644
index 000000000000..30172e52e16b
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * VAS user space API for its accelerators (Only NX-GZIP is supported now)
+ * Copyright (C) 2019 Haren Myneni, IBM Corp
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+#include <linux/mmu_context.h>
+#include <linux/io.h>
+#include <asm/vas.h>
+#include <uapi/asm/vas-api.h>
+
+/*
+ * The driver creates the device node that can be used as follows:
+ * For NX-GZIP
+ *
+ * fd = open("/dev/crypto/nx-gzip", O_RDWR);
+ * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
+ * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
+ * vas_copy(&crb, 0, 1);
+ * vas_paste(paste_addr, 0, 1);
+ * close(fd) or exit process to close window.
+ *
+ * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
+ * copy/paste returns to the user space directly. So refer NX hardware
+ * documententation for exact copy/paste usage and completion / error
+ * conditions.
+ */
+
+/*
+ * Wrapper object for the nx-gzip device - there is just one instance of
+ * this node for the whole system.
+ */
+static struct coproc_dev {
+ struct cdev cdev;
+ struct device *device;
+ char *name;
+ dev_t devt;
+ struct class *class;
+ enum vas_cop_type cop_type;
+ const struct vas_user_win_ops *vops;
+} coproc_device;
+
+struct coproc_instance {
+ struct coproc_dev *coproc;
+ struct vas_window *txwin;
+};
+
+static char *coproc_devnode(struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
+}
+
+/*
+ * Take reference to pid and mm
+ */
+int get_vas_user_win_ref(struct vas_user_win_ref *task_ref)
+{
+ /*
+ * Window opened by a child thread may not be closed when
+ * it exits. So take reference to its pid and release it
+ * when the window is free by parent thread.
+ * Acquire a reference to the task's pid to make sure
+ * pid will not be re-used - needed only for multithread
+ * applications.
+ */
+ task_ref->pid = get_task_pid(current, PIDTYPE_PID);
+ /*
+ * Acquire a reference to the task's mm.
+ */
+ task_ref->mm = get_task_mm(current);
+ if (!task_ref->mm) {
+ put_pid(task_ref->pid);
+ pr_err("VAS: pid(%d): mm_struct is not found\n",
+ current->pid);
+ return -EPERM;
+ }
+
+ mmgrab(task_ref->mm);
+ mmput(task_ref->mm);
+ /*
+ * Process closes window during exit. In the case of
+ * multithread application, the child thread can open
+ * window and can exit without closing it. So takes tgid
+ * reference until window closed to make sure tgid is not
+ * reused.
+ */
+ task_ref->tgid = find_get_pid(task_tgid_vnr(current));
+
+ return 0;
+}
+
+/*
+ * Successful return must release the task reference with
+ * put_task_struct
+ */
+static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref,
+ struct task_struct **tskp, struct pid **pidp)
+{
+ struct task_struct *tsk;
+ struct pid *pid;
+
+ pid = task_ref->pid;
+ tsk = get_pid_task(pid, PIDTYPE_PID);
+ if (!tsk) {
+ pid = task_ref->tgid;
+ tsk = get_pid_task(pid, PIDTYPE_PID);
+ /*
+ * Parent thread (tgid) will be closing window when it
+ * exits. So should not get here.
+ */
+ if (WARN_ON_ONCE(!tsk))
+ return false;
+ }
+
+ /* Return if the task is exiting. */
+ if (tsk->flags & PF_EXITING) {
+ put_task_struct(tsk);
+ return false;
+ }
+
+ *tskp = tsk;
+ *pidp = pid;
+
+ return true;
+}
+
+/*
+ * Update the CSB to indicate a translation error.
+ *
+ * User space will be polling on CSB after the request is issued.
+ * If NX can handle the request without any issues, it updates CSB.
+ * Whereas if NX encounters page fault, the kernel will handle the
+ * fault and update CSB with translation error.
+ *
+ * If we are unable to update the CSB means copy_to_user failed due to
+ * invalid csb_addr, send a signal to the process.
+ */
+void vas_update_csb(struct coprocessor_request_block *crb,
+ struct vas_user_win_ref *task_ref)
+{
+ struct coprocessor_status_block csb;
+ struct kernel_siginfo info;
+ struct task_struct *tsk;
+ void __user *csb_addr;
+ struct pid *pid;
+ int rc;
+
+ /*
+ * NX user space windows can not be opened for task->mm=NULL
+ * and faults will not be generated for kernel requests.
+ */
+ if (WARN_ON_ONCE(!task_ref->mm))
+ return;
+
+ csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
+
+ memset(&csb, 0, sizeof(csb));
+ csb.cc = CSB_CC_FAULT_ADDRESS;
+ csb.ce = CSB_CE_TERMINATION;
+ csb.cs = 0;
+ csb.count = 0;
+
+ /*
+ * NX operates and returns in BE format as defined CRB struct.
+ * So saves fault_storage_addr in BE as NX pastes in FIFO and
+ * expects user space to convert to CPU format.
+ */
+ csb.address = crb->stamp.nx.fault_storage_addr;
+ csb.flags = 0;
+
+ /*
+ * Process closes send window after all pending NX requests are
+ * completed. In multi-thread applications, a child thread can
+ * open a window and can exit without closing it. May be some
+ * requests are pending or this window can be used by other
+ * threads later. We should handle faults if NX encounters
+ * pages faults on these requests. Update CSB with translation
+ * error and fault address. If csb_addr passed by user space is
+ * invalid, send SEGV signal to pid saved in window. If the
+ * child thread is not running, send the signal to tgid.
+ * Parent thread (tgid) will close this window upon its exit.
+ *
+ * pid and mm references are taken when window is opened by
+ * process (pid). So tgid is used only when child thread opens
+ * a window and exits without closing it.
+ */
+
+ if (!ref_get_pid_and_task(task_ref, &tsk, &pid))
+ return;
+
+ kthread_use_mm(task_ref->mm);
+ rc = copy_to_user(csb_addr, &csb, sizeof(csb));
+ /*
+ * User space polls on csb.flags (first byte). So add barrier
+ * then copy first byte with csb flags update.
+ */
+ if (!rc) {
+ csb.flags = CSB_V;
+ /* Make sure update to csb.flags is visible now */
+ smp_mb();
+ rc = copy_to_user(csb_addr, &csb, sizeof(u8));
+ }
+ kthread_unuse_mm(task_ref->mm);
+ put_task_struct(tsk);
+
+ /* Success */
+ if (!rc)
+ return;
+
+
+ pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
+ csb_addr, pid_vnr(pid));
+
+ clear_siginfo(&info);
+ info.si_signo = SIGSEGV;
+ info.si_errno = EFAULT;
+ info.si_code = SEGV_MAPERR;
+ info.si_addr = csb_addr;
+ /*
+ * process will be polling on csb.flags after request is sent to
+ * NX. So generally CSB update should not fail except when an
+ * application passes invalid csb_addr. So an error message will
+ * be displayed and leave it to user space whether to ignore or
+ * handle this signal.
+ */
+ rcu_read_lock();
+ rc = kill_pid_info(SIGSEGV, &info, pid);
+ rcu_read_unlock();
+
+ pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
+ pid_vnr(pid), rc);
+}
+
+void vas_dump_crb(struct coprocessor_request_block *crb)
+{
+ struct data_descriptor_entry *dde;
+ struct nx_fault_stamp *nx;
+
+ dde = &crb->source;
+ pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+ be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+ dde->count, dde->index, dde->flags);
+
+ dde = &crb->target;
+ pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+ be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+ dde->count, dde->index, dde->flags);
+
+ nx = &crb->stamp.nx;
+ pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
+ be32_to_cpu(nx->pswid),
+ be64_to_cpu(crb->stamp.nx.fault_storage_addr),
+ nx->flags, nx->fault_status);
+}
+
+static int coproc_open(struct inode *inode, struct file *fp)
+{
+ struct coproc_instance *cp_inst;
+
+ cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
+ if (!cp_inst)
+ return -ENOMEM;
+
+ cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
+ cdev);
+ fp->private_data = cp_inst;
+
+ return 0;
+}
+
+static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
+{
+ void __user *uptr = (void __user *)arg;
+ struct vas_tx_win_open_attr uattr;
+ struct coproc_instance *cp_inst;
+ struct vas_window *txwin;
+ int rc;
+
+ cp_inst = fp->private_data;
+
+ /*
+ * One window for file descriptor
+ */
+ if (cp_inst->txwin)
+ return -EEXIST;
+
+ rc = copy_from_user(&uattr, uptr, sizeof(uattr));
+ if (rc) {
+ pr_err("%s(): copy_from_user() returns %d\n", __func__, rc);
+ return -EFAULT;
+ }
+
+ if (uattr.version != 1) {
+ pr_err("Invalid window open API version\n");
+ return -EINVAL;
+ }
+
+ if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->open_win) {
+ pr_err("VAS API is not registered\n");
+ return -EACCES;
+ }
+
+ txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags,
+ cp_inst->coproc->cop_type);
+ if (IS_ERR(txwin)) {
+ pr_err("%s() VAS window open failed, %ld\n", __func__,
+ PTR_ERR(txwin));
+ return PTR_ERR(txwin);
+ }
+
+ cp_inst->txwin = txwin;
+
+ return 0;
+}
+
+static int coproc_release(struct inode *inode, struct file *fp)
+{
+ struct coproc_instance *cp_inst = fp->private_data;
+ int rc;
+
+ if (cp_inst->txwin) {
+ if (cp_inst->coproc->vops &&
+ cp_inst->coproc->vops->close_win) {
+ rc = cp_inst->coproc->vops->close_win(cp_inst->txwin);
+ if (rc)
+ return rc;
+ }
+ cp_inst->txwin = NULL;
+ }
+
+ kfree(cp_inst);
+ fp->private_data = NULL;
+
+ /*
+ * We don't know here if user has other receive windows
+ * open, so we can't really call clear_thread_tidr().
+ * So, once the process calls set_thread_tidr(), the
+ * TIDR value sticks around until process exits, resulting
+ * in an extra copy in restore_sprs().
+ */
+
+ return 0;
+}
+
+static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+ struct coproc_instance *cp_inst = fp->private_data;
+ struct vas_window *txwin;
+ unsigned long pfn;
+ u64 paste_addr;
+ pgprot_t prot;
+ int rc;
+
+ txwin = cp_inst->txwin;
+
+ if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+ pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__,
+ (vma->vm_end - vma->vm_start), PAGE_SIZE);
+ return -EINVAL;
+ }
+
+ /* Ensure instance has an open send window */
+ if (!txwin) {
+ pr_err("%s(): No send window open?\n", __func__);
+ return -EINVAL;
+ }
+
+ if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->paste_addr) {
+ pr_err("%s(): VAS API is not registered\n", __func__);
+ return -EACCES;
+ }
+
+ paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+ if (!paste_addr) {
+ pr_err("%s(): Window paste address failed\n", __func__);
+ return -EINVAL;
+ }
+
+ pfn = paste_addr >> PAGE_SHIFT;
+
+ /* flags, page_prot from cxl_mmap(), except we want cachable */
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+
+ prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
+
+ rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, prot);
+
+ pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__,
+ paste_addr, vma->vm_start, rc);
+
+ return rc;
+}
+
+static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case VAS_TX_WIN_OPEN:
+ return coproc_ioc_tx_win_open(fp, arg);
+ default:
+ return -EINVAL;
+ }
+}
+
+static struct file_operations coproc_fops = {
+ .open = coproc_open,
+ .release = coproc_release,
+ .mmap = coproc_mmap,
+ .unlocked_ioctl = coproc_ioctl,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
+ const char *name,
+ const struct vas_user_win_ops *vops)
+{
+ int rc = -EINVAL;
+ dev_t devno;
+
+ rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
+ if (rc) {
+ pr_err("Unable to allocate coproc major number: %i\n", rc);
+ return rc;
+ }
+
+ pr_devel("%s device allocated, dev [%i,%i]\n", name,
+ MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
+
+ coproc_device.class = class_create(mod, name);
+ if (IS_ERR(coproc_device.class)) {
+ rc = PTR_ERR(coproc_device.class);
+ pr_err("Unable to create %s class %d\n", name, rc);
+ goto err_class;
+ }
+ coproc_device.class->devnode = coproc_devnode;
+ coproc_device.cop_type = cop_type;
+ coproc_device.vops = vops;
+
+ coproc_fops.owner = mod;
+ cdev_init(&coproc_device.cdev, &coproc_fops);
+
+ devno = MKDEV(MAJOR(coproc_device.devt), 0);
+ rc = cdev_add(&coproc_device.cdev, devno, 1);
+ if (rc) {
+ pr_err("cdev_add() failed %d\n", rc);
+ goto err_cdev;
+ }
+
+ coproc_device.device = device_create(coproc_device.class, NULL,
+ devno, NULL, name, MINOR(devno));
+ if (IS_ERR(coproc_device.device)) {
+ rc = PTR_ERR(coproc_device.device);
+ pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
+ goto err;
+ }
+
+ pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno),
+ MINOR(devno));
+
+ return 0;
+
+err:
+ cdev_del(&coproc_device.cdev);
+err_cdev:
+ class_destroy(coproc_device.class);
+err_class:
+ unregister_chrdev_region(coproc_device.devt, 1);
+ return rc;
+}
+
+void vas_unregister_coproc_api(void)
+{
+ dev_t devno;
+
+ cdev_del(&coproc_device.cdev);
+ devno = MKDEV(MAJOR(coproc_device.devt), 0);
+ device_destroy(coproc_device.class, devno);
+
+ class_destroy(coproc_device.class);
+ unregister_chrdev_region(coproc_device.devt, 1);
+}
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index e7c976bcadff..cb70c5f25bc6 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -35,6 +35,7 @@ config PPC_IBM_CELL_BLADE
config AXON_MSI
bool
depends on PPC_IBM_CELL_BLADE && PCI_MSI
+ select IRQ_DOMAIN_NOMAP
default y
menu "Cell Broadband Engine options"
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
index 35bbd15582af..b207a7f99be5 100644
--- a/arch/powerpc/platforms/cell/pmu.c
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -10,6 +10,7 @@
*/
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/types.h>
#include <linux/export.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index 93ea41680f54..a1c293f42a1f 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -25,10 +25,9 @@ struct spiderpci_iowa_private {
static void spiderpci_io_flush(struct iowa_bus *bus)
{
struct spiderpci_iowa_private *priv;
- u32 val;
priv = bus->private;
- val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+ in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
iosync();
}
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index d56b4e3241cd..b41e81b22fdc 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -1657,14 +1657,13 @@ static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu)
static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
{
struct spu_problem __iomem *prob = spu->problem;
- u32 dummy = 0;
/* Restore, Step 66:
* If CSA.MB_Stat[P]=0 (mailbox empty) then
* read from the PPU_MB register.
*/
if ((csa->prob.mb_stat_R & 0xFF) == 0) {
- dummy = in_be32(&prob->pu_mb_R);
+ in_be32(&prob->pu_mb_R);
eieio();
}
}
@@ -1672,14 +1671,13 @@ static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
{
struct spu_priv2 __iomem *priv2 = spu->priv2;
- u64 dummy = 0UL;
/* Restore, Step 66:
* If CSA.MB_Stat[I]=0 (mailbox empty) then
* read from the PPUINT_MB register.
*/
if ((csa->prob.mb_stat_R & 0xFF0000) == 0) {
- dummy = in_be64(&priv2->puint_mb_R);
+ in_be64(&priv2->puint_mb_R);
eieio();
spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
eieio();
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index d39a9213a3e6..609bda2ad5dd 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 53065d564161..85521b3e7098 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -251,8 +251,8 @@ static int ppc750_machine_check_exception(struct pt_regs *regs)
/* Are we prepared to handle this fault */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
tsi108_clear_pci_cfg_error();
- regs->msr |= MSR_RI;
- regs->nip = extable_fixup(entry);
+ regs_set_return_msr(regs, regs->msr | MSR_RI);
+ regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
return 0;
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 5565647dc879..d8da6a483e59 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -173,8 +173,8 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs)
/* Are we prepared to handle this fault */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
tsi108_clear_pci_cfg_error();
- regs->msr |= MSR_RI;
- regs->nip = extable_fixup(entry);
+ regs_set_return_msr(regs, regs->msr | MSR_RI);
+ regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
return 0;
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
new file mode 100644
index 000000000000..8f6a81978461
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MICROWATT
+ depends on PPC_BOOK3S_64 && !SMP
+ bool "Microwatt SoC platform"
+ select PPC_XICS
+ select PPC_ICS_NATIVE
+ select PPC_ICP_NATIVE
+ select PPC_NATIVE
+ select PPC_UDBG_16550
+ select ARCH_RANDOM
+ help
+ This option enables support for FPGA-based Microwatt implementations.
+
diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile
new file mode 100644
index 000000000000..116d6d3ad3f0
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/Makefile
@@ -0,0 +1 @@
+obj-y += setup.o rng.o
diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c
new file mode 100644
index 000000000000..3d8ee6eb7dad
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/rng.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from arch/powerpc/platforms/powernv/rng.c, which is:
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "microwatt-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <asm/archrandom.h>
+#include <asm/cputable.h>
+#include <asm/machdep.h>
+
+#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
+
+int microwatt_get_random_darn(unsigned long *v)
+{
+ unsigned long val;
+
+ /* Using DARN with L=1 - 64-bit conditioned random number */
+ asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+ if (val == DARN_ERR)
+ return 0;
+
+ *v = val;
+
+ return 1;
+}
+
+static __init int rng_init(void)
+{
+ unsigned long val;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ if (microwatt_get_random_darn(&val)) {
+ ppc_md.get_random_seed = microwatt_get_random_darn;
+ return 0;
+ }
+ }
+
+ pr_warn("Unable to use DARN for get_random_seed()\n");
+
+ return -EIO;
+}
+machine_subsys_initcall(, rng_init);
diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c
new file mode 100644
index 000000000000..0b02603bdb74
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/setup.c
@@ -0,0 +1,41 @@
+/*
+ * Microwatt FPGA-based SoC platform setup code.
+ *
+ * Copyright 2020 Paul Mackerras (paulus@ozlabs.org), IBM Corp.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/xics.h>
+#include <asm/udbg.h>
+
+static void __init microwatt_init_IRQ(void)
+{
+ xics_init();
+}
+
+static int __init microwatt_probe(void)
+{
+ return of_machine_is_compatible("microwatt-soc");
+}
+
+static int __init microwatt_populate(void)
+{
+ return of_platform_default_populate(NULL, NULL, NULL);
+}
+machine_arch_initcall(microwatt, microwatt_populate);
+
+define_machine(microwatt) {
+ .name = "microwatt",
+ .probe = microwatt_probe,
+ .init_IRQ = microwatt_init_IRQ,
+ .progress = udbg_progress,
+ .calibrate_decr = generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c
index 1c954c90b0f4..9b88e3cded7d 100644
--- a/arch/powerpc/platforms/pasemi/idle.c
+++ b/arch/powerpc/platforms/pasemi/idle.c
@@ -37,7 +37,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
*/
if (regs->msr & SRR1_WAKEMASK)
- regs->nip = regs->link;
+ regs_set_return_ip(regs, regs->link);
switch (regs->msr & SRR1_WAKEMASK) {
case SRR1_WAKEDEC:
@@ -58,7 +58,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
restore_astate(hard_smp_processor_id());
/* everything handled */
- regs->msr |= MSR_RI;
+ regs_set_return_msr(regs, regs->msr | MSR_RI);
return 1;
}
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index c02d8c503b29..b97bf12801eb 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -24,6 +24,7 @@ config PPC_PMAC32_PSURGE
bool "Support for powersurge upgrade cards" if EXPERT
depends on SMP && PPC32 && PPC_PMAC
select PPC_SMP_MUXED_IPI
+ select IRQ_DOMAIN_NOMAP
default y
help
The powersurge cpu boards can be used in the generation
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 9d4ecd292255..d20ef35e6d9d 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -433,7 +433,7 @@ static void __init btext_welcome(boot_infos_t *bi)
bootx_printf("\nframe buffer at : 0x%x", bi->dispDeviceBase);
bootx_printf(" (phys), 0x%x", bi->logicalDisplayBase);
bootx_printf(" (log)");
- bootx_printf("\nklimit : 0x%x",(unsigned long)klimit);
+ bootx_printf("\nklimit : 0x%x",(unsigned long)_end);
bootx_printf("\nboot_info at : 0x%x", bi);
__asm__ __volatile__ ("mfmsr %0" : "=r" (flags));
bootx_printf("\nMSR : 0x%x", flags);
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index adae2a6712e1..bdfea6d6ab69 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -810,7 +810,7 @@ static int smp_core99_kick_cpu(int nr)
* b __secondary_start_pmac_0 + nr*8
*/
target = (unsigned long) __secondary_start_pmac_0 + nr * 8;
- patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK);
+ patch_branch(vector, target, BRANCH_SET_LINK);
/* Put some life in our friend */
pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0);
@@ -823,7 +823,7 @@ static int smp_core99_kick_cpu(int nr)
mdelay(1);
/* Restore our exception vector */
- patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector));
+ patch_instruction(vector, ppc_inst(save_vector));
local_irq_restore(flags);
if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 619b093a0657..043eefbbdd28 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -33,20 +33,6 @@ config PPC_MEMTRACE
Enabling this option allows for runtime allocation of memory (RAM)
for hardware tracing.
-config PPC_VAS
- bool "IBM Virtual Accelerator Switchboard (VAS)"
- depends on PPC_POWERNV && PPC_64K_PAGES
- default y
- help
- This enables support for IBM Virtual Accelerator Switchboard (VAS).
-
- VAS allows accelerators in co-processors like NX-GZIP and NX-842
- to be accessible to kernel subsystems and user processes.
-
- VAS adapters are found in POWER9 based systems.
-
- If unsure, say N.
-
config SCOM_DEBUGFS
bool "Expose SCOM controllers via debugfs"
depends on DEBUG_FS
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index be2546b96816..dc7b37c23b60 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_OPAL_PRD) += opal-prd.o
obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
-obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o
+obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o
obj-$(CONFIG_OCXL_BASE) += ocxl.o
obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 999997d9e9a9..528a7e0cf83a 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -604,7 +604,7 @@ struct p9_sprs {
u64 uamor;
};
-static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power9_idle_stop(unsigned long psscr)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
@@ -620,8 +620,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
- BUG_ON(!mmu_on);
-
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
@@ -803,8 +801,7 @@ core_woken:
__slb_restore_bolted_realmode();
out:
- if (mmu_on)
- mtmsr(MSR_KERNEL);
+ mtmsr(MSR_KERNEL);
return srr1;
}
@@ -895,7 +892,7 @@ struct p10_sprs {
*/
};
-static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power10_idle_stop(unsigned long psscr)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
@@ -909,8 +906,6 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
- BUG_ON(!mmu_on);
-
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
@@ -991,8 +986,7 @@ core_woken:
__slb_restore_bolted_realmode();
out:
- if (mmu_on)
- mtmsr(MSR_KERNEL);
+ mtmsr(MSR_KERNEL);
return srr1;
}
@@ -1002,40 +996,10 @@ static unsigned long arch300_offline_stop(unsigned long psscr)
{
unsigned long srr1;
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- __ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, true);
+ srr1 = power10_idle_stop(psscr);
else
- srr1 = power9_idle_stop(psscr, true);
- __ppc64_runlatch_on();
-#else
- /*
- * Tell KVM we're entering idle.
- * This does not have to be done in real mode because the P9 MMU
- * is independent per-thread. Some steppings share radix/hash mode
- * between threads, but in that case KVM has a barrier sync in real
- * mode before and after switching between radix and hash.
- *
- * kvm_start_guest must still be called in real mode though, hence
- * the false argument.
- */
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
-
- __ppc64_runlatch_off();
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, false);
- else
- srr1 = power9_idle_stop(psscr, false);
- __ppc64_runlatch_on();
-
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
- /* Order setting hwthread_state vs. testing hwthread_req */
- smp_mb();
- if (local_paca->kvm_hstate.hwthread_req)
- srr1 = idle_kvm_start_guest(srr1);
- mtmsr(MSR_KERNEL);
-#endif
+ srr1 = power9_idle_stop(psscr);
return srr1;
}
@@ -1055,9 +1019,9 @@ void arch300_idle_type(unsigned long stop_psscr_val,
__ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, true);
+ srr1 = power10_idle_stop(psscr);
else
- srr1 = power9_idle_stop(psscr, true);
+ srr1 = power9_idle_stop(psscr);
__ppc64_runlatch_on();
fini_irq_for_idle_irqsoff();
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 01401e3da7ca..f812c74c61e5 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/percpu.h>
#include <linux/jump_label.h>
+#include <asm/interrupt.h>
#include <asm/opal-api.h>
#include <asm/trace.h>
#include <asm/asm-prototypes.h>
@@ -100,6 +101,9 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
bool mmu = (msr & (MSR_IR|MSR_DR));
int64_t ret;
+ /* OPAL call / firmware may use SRR and/or HSRR */
+ srr_regs_clobbered();
+
msr &= ~MSR_EE;
if (unlikely(!mmu))
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 303d7c775740..2835376e61a4 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -773,7 +773,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs)
* Setup regs->nip to rfi into fixup address.
*/
if (recover_addr)
- regs->nip = recover_addr;
+ regs_set_return_ip(regs, recover_addr);
out:
return !!recover_addr;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b18468dc31ff..6bb3c52633fb 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -711,7 +711,7 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
return PCIBIOS_SUCCESSFUL;
}
-#if CONFIG_EEH
+#ifdef CONFIG_EEH
static bool pnv_pci_cfg_check(struct pci_dn *pdn)
{
struct eeh_dev *edev = NULL;
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 73207b53dc2b..7e98b00ea2e8 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -169,6 +169,16 @@ static void update_hid_in_slw(u64 hid0)
}
}
+static inline void update_power8_hid0(unsigned long hid0)
+{
+ /*
+ * The HID0 update on Power8 should at the very least be
+ * preceded by a SYNC instruction followed by an ISYNC
+ * instruction
+ */
+ asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
+}
+
static void unsplit_core(void)
{
u64 hid0, mask;
diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c
deleted file mode 100644
index 98ed5d8c5441..000000000000
--- a/arch/powerpc/platforms/powernv/vas-api.c
+++ /dev/null
@@ -1,278 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * VAS user space API for its accelerators (Only NX-GZIP is supported now)
- * Copyright (C) 2019 Haren Myneni, IBM Corp
- */
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/cdev.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <asm/vas.h>
-#include <uapi/asm/vas-api.h>
-#include "vas.h"
-
-/*
- * The driver creates the device node that can be used as follows:
- * For NX-GZIP
- *
- * fd = open("/dev/crypto/nx-gzip", O_RDWR);
- * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
- * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
- * vas_copy(&crb, 0, 1);
- * vas_paste(paste_addr, 0, 1);
- * close(fd) or exit process to close window.
- *
- * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
- * copy/paste returns to the user space directly. So refer NX hardware
- * documententation for exact copy/paste usage and completion / error
- * conditions.
- */
-
-/*
- * Wrapper object for the nx-gzip device - there is just one instance of
- * this node for the whole system.
- */
-static struct coproc_dev {
- struct cdev cdev;
- struct device *device;
- char *name;
- dev_t devt;
- struct class *class;
- enum vas_cop_type cop_type;
-} coproc_device;
-
-struct coproc_instance {
- struct coproc_dev *coproc;
- struct vas_window *txwin;
-};
-
-static char *coproc_devnode(struct device *dev, umode_t *mode)
-{
- return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
-}
-
-static int coproc_open(struct inode *inode, struct file *fp)
-{
- struct coproc_instance *cp_inst;
-
- cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
- if (!cp_inst)
- return -ENOMEM;
-
- cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
- cdev);
- fp->private_data = cp_inst;
-
- return 0;
-}
-
-static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
-{
- void __user *uptr = (void __user *)arg;
- struct vas_tx_win_attr txattr = {};
- struct vas_tx_win_open_attr uattr;
- struct coproc_instance *cp_inst;
- struct vas_window *txwin;
- int rc, vasid;
-
- cp_inst = fp->private_data;
-
- /*
- * One window for file descriptor
- */
- if (cp_inst->txwin)
- return -EEXIST;
-
- rc = copy_from_user(&uattr, uptr, sizeof(uattr));
- if (rc) {
- pr_err("%s(): copy_from_user() returns %d\n", __func__, rc);
- return -EFAULT;
- }
-
- if (uattr.version != 1) {
- pr_err("Invalid version\n");
- return -EINVAL;
- }
-
- vasid = uattr.vas_id;
-
- vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type);
-
- txattr.lpid = mfspr(SPRN_LPID);
- txattr.pidr = mfspr(SPRN_PID);
- txattr.user_win = true;
- txattr.rsvd_txbuf_count = false;
- txattr.pswid = false;
-
- pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr,
- mfspr(SPRN_PID));
-
- txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr);
- if (IS_ERR(txwin)) {
- pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__,
- PTR_ERR(txwin));
- return PTR_ERR(txwin);
- }
-
- cp_inst->txwin = txwin;
-
- return 0;
-}
-
-static int coproc_release(struct inode *inode, struct file *fp)
-{
- struct coproc_instance *cp_inst = fp->private_data;
-
- if (cp_inst->txwin) {
- vas_win_close(cp_inst->txwin);
- cp_inst->txwin = NULL;
- }
-
- kfree(cp_inst);
- fp->private_data = NULL;
-
- /*
- * We don't know here if user has other receive windows
- * open, so we can't really call clear_thread_tidr().
- * So, once the process calls set_thread_tidr(), the
- * TIDR value sticks around until process exits, resulting
- * in an extra copy in restore_sprs().
- */
-
- return 0;
-}
-
-static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
-{
- struct coproc_instance *cp_inst = fp->private_data;
- struct vas_window *txwin;
- unsigned long pfn;
- u64 paste_addr;
- pgprot_t prot;
- int rc;
-
- txwin = cp_inst->txwin;
-
- if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
- pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__,
- (vma->vm_end - vma->vm_start), PAGE_SIZE);
- return -EINVAL;
- }
-
- /* Ensure instance has an open send window */
- if (!txwin) {
- pr_err("%s(): No send window open?\n", __func__);
- return -EINVAL;
- }
-
- vas_win_paste_addr(txwin, &paste_addr, NULL);
- pfn = paste_addr >> PAGE_SHIFT;
-
- /* flags, page_prot from cxl_mmap(), except we want cachable */
- vma->vm_flags |= VM_IO | VM_PFNMAP;
- vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
-
- prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
-
- rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
- vma->vm_end - vma->vm_start, prot);
-
- pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__,
- paste_addr, vma->vm_start, rc);
-
- return rc;
-}
-
-static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
-{
- switch (cmd) {
- case VAS_TX_WIN_OPEN:
- return coproc_ioc_tx_win_open(fp, arg);
- default:
- return -EINVAL;
- }
-}
-
-static struct file_operations coproc_fops = {
- .open = coproc_open,
- .release = coproc_release,
- .mmap = coproc_mmap,
- .unlocked_ioctl = coproc_ioctl,
-};
-
-/*
- * Supporting only nx-gzip coprocessor type now, but this API code
- * extended to other coprocessor types later.
- */
-int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
- const char *name)
-{
- int rc = -EINVAL;
- dev_t devno;
-
- rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
- if (rc) {
- pr_err("Unable to allocate coproc major number: %i\n", rc);
- return rc;
- }
-
- pr_devel("%s device allocated, dev [%i,%i]\n", name,
- MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
-
- coproc_device.class = class_create(mod, name);
- if (IS_ERR(coproc_device.class)) {
- rc = PTR_ERR(coproc_device.class);
- pr_err("Unable to create %s class %d\n", name, rc);
- goto err_class;
- }
- coproc_device.class->devnode = coproc_devnode;
- coproc_device.cop_type = cop_type;
-
- coproc_fops.owner = mod;
- cdev_init(&coproc_device.cdev, &coproc_fops);
-
- devno = MKDEV(MAJOR(coproc_device.devt), 0);
- rc = cdev_add(&coproc_device.cdev, devno, 1);
- if (rc) {
- pr_err("cdev_add() failed %d\n", rc);
- goto err_cdev;
- }
-
- coproc_device.device = device_create(coproc_device.class, NULL,
- devno, NULL, name, MINOR(devno));
- if (IS_ERR(coproc_device.device)) {
- rc = PTR_ERR(coproc_device.device);
- pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
- goto err;
- }
-
- pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno),
- MINOR(devno));
-
- return 0;
-
-err:
- cdev_del(&coproc_device.cdev);
-err_cdev:
- class_destroy(coproc_device.class);
-err_class:
- unregister_chrdev_region(coproc_device.devt, 1);
- return rc;
-}
-EXPORT_SYMBOL_GPL(vas_register_coproc_api);
-
-void vas_unregister_coproc_api(void)
-{
- dev_t devno;
-
- cdev_del(&coproc_device.cdev);
- devno = MKDEV(MAJOR(coproc_device.devt), 0);
- device_destroy(coproc_device.class, devno);
-
- class_destroy(coproc_device.class);
- unregister_chrdev_region(coproc_device.devt, 1);
-}
-EXPORT_SYMBOL_GPL(vas_unregister_coproc_api);
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
index 41fa90d2f4ab..3ce89a4b54be 100644
--- a/arch/powerpc/platforms/powernv/vas-debug.c
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -9,6 +9,7 @@
#include <linux/slab.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
+#include <asm/vas.h>
#include "vas.h"
static struct dentry *vas_debugfs;
@@ -28,7 +29,7 @@ static char *cop_to_str(int cop)
static int info_show(struct seq_file *s, void *private)
{
- struct vas_window *window = s->private;
+ struct pnv_vas_window *window = s->private;
mutex_lock(&vas_mutex);
@@ -36,9 +37,9 @@ static int info_show(struct seq_file *s, void *private)
if (!window->hvwc_map)
goto unlock;
- seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
+ seq_printf(s, "Type: %s, %s\n", cop_to_str(window->vas_win.cop),
window->tx_win ? "Send" : "Receive");
- seq_printf(s, "Pid : %d\n", vas_window_pid(window));
+ seq_printf(s, "Pid : %d\n", vas_window_pid(&window->vas_win));
unlock:
mutex_unlock(&vas_mutex);
@@ -47,7 +48,7 @@ unlock:
DEFINE_SHOW_ATTRIBUTE(info);
-static inline void print_reg(struct seq_file *s, struct vas_window *win,
+static inline void print_reg(struct seq_file *s, struct pnv_vas_window *win,
char *name, u32 reg)
{
seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name);
@@ -55,7 +56,7 @@ static inline void print_reg(struct seq_file *s, struct vas_window *win,
static int hvwc_show(struct seq_file *s, void *private)
{
- struct vas_window *window = s->private;
+ struct pnv_vas_window *window = s->private;
mutex_lock(&vas_mutex);
@@ -103,8 +104,10 @@ unlock:
DEFINE_SHOW_ATTRIBUTE(hvwc);
-void vas_window_free_dbgdir(struct vas_window *window)
+void vas_window_free_dbgdir(struct pnv_vas_window *pnv_win)
{
+ struct vas_window *window = &pnv_win->vas_win;
+
if (window->dbgdir) {
debugfs_remove_recursive(window->dbgdir);
kfree(window->dbgname);
@@ -113,21 +116,21 @@ void vas_window_free_dbgdir(struct vas_window *window)
}
}
-void vas_window_init_dbgdir(struct vas_window *window)
+void vas_window_init_dbgdir(struct pnv_vas_window *window)
{
struct dentry *d;
if (!window->vinst->dbgdir)
return;
- window->dbgname = kzalloc(16, GFP_KERNEL);
- if (!window->dbgname)
+ window->vas_win.dbgname = kzalloc(16, GFP_KERNEL);
+ if (!window->vas_win.dbgname)
return;
- snprintf(window->dbgname, 16, "w%d", window->winid);
+ snprintf(window->vas_win.dbgname, 16, "w%d", window->vas_win.winid);
- d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir);
- window->dbgdir = d;
+ d = debugfs_create_dir(window->vas_win.dbgname, window->vinst->dbgdir);
+ window->vas_win.dbgdir = d;
debugfs_create_file("info", 0444, d, window, &info_fops);
debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops);
diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c
index 3d21fce254b7..a7aabc18039e 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -26,150 +26,6 @@
*/
#define VAS_FAULT_WIN_FIFO_SIZE (4 << 20)
-static void dump_crb(struct coprocessor_request_block *crb)
-{
- struct data_descriptor_entry *dde;
- struct nx_fault_stamp *nx;
-
- dde = &crb->source;
- pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
- be64_to_cpu(dde->address), be32_to_cpu(dde->length),
- dde->count, dde->index, dde->flags);
-
- dde = &crb->target;
- pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
- be64_to_cpu(dde->address), be32_to_cpu(dde->length),
- dde->count, dde->index, dde->flags);
-
- nx = &crb->stamp.nx;
- pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
- be32_to_cpu(nx->pswid),
- be64_to_cpu(crb->stamp.nx.fault_storage_addr),
- nx->flags, nx->fault_status);
-}
-
-/*
- * Update the CSB to indicate a translation error.
- *
- * User space will be polling on CSB after the request is issued.
- * If NX can handle the request without any issues, it updates CSB.
- * Whereas if NX encounters page fault, the kernel will handle the
- * fault and update CSB with translation error.
- *
- * If we are unable to update the CSB means copy_to_user failed due to
- * invalid csb_addr, send a signal to the process.
- */
-static void update_csb(struct vas_window *window,
- struct coprocessor_request_block *crb)
-{
- struct coprocessor_status_block csb;
- struct kernel_siginfo info;
- struct task_struct *tsk;
- void __user *csb_addr;
- struct pid *pid;
- int rc;
-
- /*
- * NX user space windows can not be opened for task->mm=NULL
- * and faults will not be generated for kernel requests.
- */
- if (WARN_ON_ONCE(!window->mm || !window->user_win))
- return;
-
- csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
-
- memset(&csb, 0, sizeof(csb));
- csb.cc = CSB_CC_FAULT_ADDRESS;
- csb.ce = CSB_CE_TERMINATION;
- csb.cs = 0;
- csb.count = 0;
-
- /*
- * NX operates and returns in BE format as defined CRB struct.
- * So saves fault_storage_addr in BE as NX pastes in FIFO and
- * expects user space to convert to CPU format.
- */
- csb.address = crb->stamp.nx.fault_storage_addr;
- csb.flags = 0;
-
- pid = window->pid;
- tsk = get_pid_task(pid, PIDTYPE_PID);
- /*
- * Process closes send window after all pending NX requests are
- * completed. In multi-thread applications, a child thread can
- * open a window and can exit without closing it. May be some
- * requests are pending or this window can be used by other
- * threads later. We should handle faults if NX encounters
- * pages faults on these requests. Update CSB with translation
- * error and fault address. If csb_addr passed by user space is
- * invalid, send SEGV signal to pid saved in window. If the
- * child thread is not running, send the signal to tgid.
- * Parent thread (tgid) will close this window upon its exit.
- *
- * pid and mm references are taken when window is opened by
- * process (pid). So tgid is used only when child thread opens
- * a window and exits without closing it.
- */
- if (!tsk) {
- pid = window->tgid;
- tsk = get_pid_task(pid, PIDTYPE_PID);
- /*
- * Parent thread (tgid) will be closing window when it
- * exits. So should not get here.
- */
- if (WARN_ON_ONCE(!tsk))
- return;
- }
-
- /* Return if the task is exiting. */
- if (tsk->flags & PF_EXITING) {
- put_task_struct(tsk);
- return;
- }
-
- kthread_use_mm(window->mm);
- rc = copy_to_user(csb_addr, &csb, sizeof(csb));
- /*
- * User space polls on csb.flags (first byte). So add barrier
- * then copy first byte with csb flags update.
- */
- if (!rc) {
- csb.flags = CSB_V;
- /* Make sure update to csb.flags is visible now */
- smp_mb();
- rc = copy_to_user(csb_addr, &csb, sizeof(u8));
- }
- kthread_unuse_mm(window->mm);
- put_task_struct(tsk);
-
- /* Success */
- if (!rc)
- return;
-
- pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
- csb_addr, pid_vnr(pid));
-
- clear_siginfo(&info);
- info.si_signo = SIGSEGV;
- info.si_errno = EFAULT;
- info.si_code = SEGV_MAPERR;
- info.si_addr = csb_addr;
-
- /*
- * process will be polling on csb.flags after request is sent to
- * NX. So generally CSB update should not fail except when an
- * application passes invalid csb_addr. So an error message will
- * be displayed and leave it to user space whether to ignore or
- * handle this signal.
- */
- rcu_read_lock();
- rc = kill_pid_info(SIGSEGV, &info, pid);
- rcu_read_unlock();
-
- pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
- pid_vnr(pid), rc);
-}
-
static void dump_fifo(struct vas_instance *vinst, void *entry)
{
unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size;
@@ -212,7 +68,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
struct vas_instance *vinst = data;
struct coprocessor_request_block *crb, *entry;
struct coprocessor_request_block buf;
- struct vas_window *window;
+ struct pnv_vas_window *window;
unsigned long flags;
void *fifo;
@@ -272,7 +128,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
vinst->vas_id, vinst->fault_fifo, fifo,
vinst->fault_crbs);
- dump_crb(crb);
+ vas_dump_crb(crb);
window = vas_pswid_to_window(vinst,
be32_to_cpu(crb->stamp.nx.pswid));
@@ -293,7 +149,14 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
WARN_ON_ONCE(1);
} else {
- update_csb(window, crb);
+ /*
+ * NX sees faults only with user space windows.
+ */
+ if (window->user_win)
+ vas_update_csb(crb, &window->vas_win.task_ref);
+ else
+ WARN_ON_ONCE(!window->user_win);
+
/*
* Return credit for send window after processing
* fault CRB.
@@ -336,6 +199,7 @@ irqreturn_t vas_fault_handler(int irq, void *dev_id)
int vas_setup_fault_window(struct vas_instance *vinst)
{
struct vas_rx_win_attr attr;
+ struct vas_window *win;
vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE;
vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL);
@@ -364,18 +228,17 @@ int vas_setup_fault_window(struct vas_instance *vinst)
attr.lnotify_pid = mfspr(SPRN_PID);
attr.lnotify_tid = mfspr(SPRN_PID);
- vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT,
- &attr);
-
- if (IS_ERR(vinst->fault_win)) {
- pr_err("VAS: Error %ld opening FaultWin\n",
- PTR_ERR(vinst->fault_win));
+ win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, &attr);
+ if (IS_ERR(win)) {
+ pr_err("VAS: Error %ld opening FaultWin\n", PTR_ERR(win));
kfree(vinst->fault_fifo);
- return PTR_ERR(vinst->fault_win);
+ return PTR_ERR(win);
}
+ vinst->fault_win = container_of(win, struct pnv_vas_window, vas_win);
+
pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n",
- vinst->fault_win->winid, attr.lnotify_lpid,
+ vinst->fault_win->vas_win.winid, attr.lnotify_lpid,
attr.lnotify_pid, attr.lnotify_tid);
return 0;
diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h
index a449b9f0c12e..ca2e08f2ddc0 100644
--- a/arch/powerpc/platforms/powernv/vas-trace.h
+++ b/arch/powerpc/platforms/powernv/vas-trace.h
@@ -80,7 +80,7 @@ TRACE_EVENT( vas_tx_win_open,
TRACE_EVENT( vas_paste_crb,
TP_PROTO(struct task_struct *tsk,
- struct vas_window *win),
+ struct pnv_vas_window *win),
TP_ARGS(tsk, win),
@@ -96,7 +96,7 @@ TRACE_EVENT( vas_paste_crb,
TP_fast_assign(
__entry->pid = tsk->pid;
__entry->vasid = win->vinst->vas_id;
- __entry->winid = win->winid;
+ __entry->winid = win->vas_win.winid;
__entry->paste_kaddr = (unsigned long)win->paste_kaddr
),
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 5f5fe63a3d1c..0f8d39fbf2b2 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -16,6 +16,7 @@
#include <linux/mmu_context.h>
#include <asm/switch_to.h>
#include <asm/ppc-opcode.h>
+#include <asm/vas.h>
#include "vas.h"
#include "copy-paste.h"
@@ -26,14 +27,14 @@
* Compute the paste address region for the window @window using the
* ->paste_base_addr and ->paste_win_id_shift we got from device tree.
*/
-void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len)
+void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len)
{
int winid;
u64 base, shift;
base = window->vinst->paste_base_addr;
shift = window->vinst->paste_win_id_shift;
- winid = window->winid;
+ winid = window->vas_win.winid;
*addr = base + (winid << shift);
if (len)
@@ -42,23 +43,23 @@ void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len)
pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
}
-static inline void get_hvwc_mmio_bar(struct vas_window *window,
+static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window,
u64 *start, int *len)
{
u64 pbaddr;
pbaddr = window->vinst->hvwc_bar_start;
- *start = pbaddr + window->winid * VAS_HVWC_SIZE;
+ *start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE;
*len = VAS_HVWC_SIZE;
}
-static inline void get_uwc_mmio_bar(struct vas_window *window,
+static inline void get_uwc_mmio_bar(struct pnv_vas_window *window,
u64 *start, int *len)
{
u64 pbaddr;
pbaddr = window->vinst->uwc_bar_start;
- *start = pbaddr + window->winid * VAS_UWC_SIZE;
+ *start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE;
*len = VAS_UWC_SIZE;
}
@@ -67,7 +68,7 @@ static inline void get_uwc_mmio_bar(struct vas_window *window,
* space. Unlike MMIO regions (map_mmio_region() below), paste region must
* be mapped cache-able and is only applicable to send windows.
*/
-static void *map_paste_region(struct vas_window *txwin)
+static void *map_paste_region(struct pnv_vas_window *txwin)
{
int len;
void *map;
@@ -75,7 +76,7 @@ static void *map_paste_region(struct vas_window *txwin)
u64 start;
name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id,
- txwin->winid);
+ txwin->vas_win.winid);
if (!name)
goto free_name;
@@ -132,7 +133,7 @@ static void unmap_region(void *addr, u64 start, int len)
/*
* Unmap the paste address region for a window.
*/
-static void unmap_paste_region(struct vas_window *window)
+static void unmap_paste_region(struct pnv_vas_window *window)
{
int len;
u64 busaddr_start;
@@ -153,7 +154,7 @@ static void unmap_paste_region(struct vas_window *window)
* path, just minimize the time we hold the mutex for now. We can add
* a per-instance mutex later if necessary.
*/
-static void unmap_winctx_mmio_bars(struct vas_window *window)
+static void unmap_winctx_mmio_bars(struct pnv_vas_window *window)
{
int len;
void *uwc_map;
@@ -186,7 +187,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window)
* OS/User Window Context (UWC) MMIO Base Address Region for the given window.
* Map these bus addresses and save the mapped kernel addresses in @window.
*/
-static int map_winctx_mmio_bars(struct vas_window *window)
+static int map_winctx_mmio_bars(struct pnv_vas_window *window)
{
int len;
u64 start;
@@ -214,7 +215,7 @@ static int map_winctx_mmio_bars(struct vas_window *window)
* registers are not sequential. And, we can only write to offsets
* with valid registers.
*/
-static void reset_window_regs(struct vas_window *window)
+static void reset_window_regs(struct pnv_vas_window *window)
{
write_hvwc_reg(window, VREG(LPID), 0ULL);
write_hvwc_reg(window, VREG(PID), 0ULL);
@@ -270,7 +271,7 @@ static void reset_window_regs(struct vas_window *window)
* want to add fields to vas_winctx and move the initialization to
* init_vas_winctx_regs().
*/
-static void init_xlate_regs(struct vas_window *window, bool user_win)
+static void init_xlate_regs(struct pnv_vas_window *window, bool user_win)
{
u64 lpcr, val;
@@ -335,7 +336,7 @@ static void init_xlate_regs(struct vas_window *window, bool user_win)
*
* TODO: Reserved (aka dedicated) send buffers are not supported yet.
*/
-static void init_rsvd_tx_buf_count(struct vas_window *txwin,
+static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin,
struct vas_winctx *winctx)
{
write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL);
@@ -357,7 +358,7 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin,
* as a one-time task? That could work for NX but what about other
* receivers? Let the receivers tell us the rx-fifo buffers for now.
*/
-static void init_winctx_regs(struct vas_window *window,
+static void init_winctx_regs(struct pnv_vas_window *window,
struct vas_winctx *winctx)
{
u64 val;
@@ -519,10 +520,10 @@ static int vas_assign_window_id(struct ida *ida)
return winid;
}
-static void vas_window_free(struct vas_window *window)
+static void vas_window_free(struct pnv_vas_window *window)
{
- int winid = window->winid;
struct vas_instance *vinst = window->vinst;
+ int winid = window->vas_win.winid;
unmap_winctx_mmio_bars(window);
@@ -533,10 +534,10 @@ static void vas_window_free(struct vas_window *window)
vas_release_window_id(&vinst->ida, winid);
}
-static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
+static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst)
{
int winid;
- struct vas_window *window;
+ struct pnv_vas_window *window;
winid = vas_assign_window_id(&vinst->ida);
if (winid < 0)
@@ -547,7 +548,7 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
goto out_free;
window->vinst = vinst;
- window->winid = winid;
+ window->vas_win.winid = winid;
if (map_winctx_mmio_bars(window))
goto out_free;
@@ -562,7 +563,7 @@ out_free:
return ERR_PTR(-ENOMEM);
}
-static void put_rx_win(struct vas_window *rxwin)
+static void put_rx_win(struct pnv_vas_window *rxwin)
{
/* Better not be a send window! */
WARN_ON_ONCE(rxwin->tx_win);
@@ -578,10 +579,11 @@ static void put_rx_win(struct vas_window *rxwin)
*
* NOTE: We access ->windows[] table and assume that vinst->mutex is held.
*/
-static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
+static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst,
+ u32 pswid)
{
int vasid, winid;
- struct vas_window *rxwin;
+ struct pnv_vas_window *rxwin;
decode_pswid(pswid, &vasid, &winid);
@@ -590,7 +592,7 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
rxwin = vinst->windows[winid];
- if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW)
+ if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW)
return ERR_PTR(-EINVAL);
return rxwin;
@@ -602,10 +604,10 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
*
* See also function header of set_vinst_win().
*/
-static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
+static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst,
enum vas_cop_type cop, u32 pswid)
{
- struct vas_window *rxwin;
+ struct pnv_vas_window *rxwin;
mutex_lock(&vinst->mutex);
@@ -638,9 +640,9 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
* window, we also save the window in the ->rxwin[] table.
*/
static void set_vinst_win(struct vas_instance *vinst,
- struct vas_window *window)
+ struct pnv_vas_window *window)
{
- int id = window->winid;
+ int id = window->vas_win.winid;
mutex_lock(&vinst->mutex);
@@ -649,8 +651,8 @@ static void set_vinst_win(struct vas_instance *vinst,
* unless its a user (FTW) window.
*/
if (!window->user_win && !window->tx_win) {
- WARN_ON_ONCE(vinst->rxwin[window->cop]);
- vinst->rxwin[window->cop] = window;
+ WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]);
+ vinst->rxwin[window->vas_win.cop] = window;
}
WARN_ON_ONCE(vinst->windows[id] != NULL);
@@ -663,16 +665,16 @@ static void set_vinst_win(struct vas_instance *vinst,
* Clear this window from the table(s) of windows for this VAS instance.
* See also function header of set_vinst_win().
*/
-static void clear_vinst_win(struct vas_window *window)
+static void clear_vinst_win(struct pnv_vas_window *window)
{
- int id = window->winid;
+ int id = window->vas_win.winid;
struct vas_instance *vinst = window->vinst;
mutex_lock(&vinst->mutex);
if (!window->user_win && !window->tx_win) {
- WARN_ON_ONCE(!vinst->rxwin[window->cop]);
- vinst->rxwin[window->cop] = NULL;
+ WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]);
+ vinst->rxwin[window->vas_win.cop] = NULL;
}
WARN_ON_ONCE(vinst->windows[id] != window);
@@ -681,7 +683,7 @@ static void clear_vinst_win(struct vas_window *window)
mutex_unlock(&vinst->mutex);
}
-static void init_winctx_for_rxwin(struct vas_window *rxwin,
+static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin,
struct vas_rx_win_attr *rxattr,
struct vas_winctx *winctx)
{
@@ -702,7 +704,7 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
winctx->rx_fifo = rxattr->rx_fifo;
winctx->rx_fifo_size = rxattr->rx_fifo_size;
- winctx->wcreds_max = rxwin->wcreds_max;
+ winctx->wcreds_max = rxwin->vas_win.wcreds_max;
winctx->pin_win = rxattr->pin_win;
winctx->nx_win = rxattr->nx_win;
@@ -851,7 +853,7 @@ EXPORT_SYMBOL_GPL(vas_init_rx_win_attr);
struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
struct vas_rx_win_attr *rxattr)
{
- struct vas_window *rxwin;
+ struct pnv_vas_window *rxwin;
struct vas_winctx winctx;
struct vas_instance *vinst;
@@ -870,21 +872,21 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
rxwin = vas_window_alloc(vinst);
if (IS_ERR(rxwin)) {
pr_devel("Unable to allocate memory for Rx window\n");
- return rxwin;
+ return (struct vas_window *)rxwin;
}
rxwin->tx_win = false;
rxwin->nx_win = rxattr->nx_win;
rxwin->user_win = rxattr->user_win;
- rxwin->cop = cop;
- rxwin->wcreds_max = rxattr->wcreds_max;
+ rxwin->vas_win.cop = cop;
+ rxwin->vas_win.wcreds_max = rxattr->wcreds_max;
init_winctx_for_rxwin(rxwin, rxattr, &winctx);
init_winctx_regs(rxwin, &winctx);
set_vinst_win(vinst, rxwin);
- return rxwin;
+ return &rxwin->vas_win;
}
EXPORT_SYMBOL_GPL(vas_rx_win_open);
@@ -905,7 +907,7 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
}
EXPORT_SYMBOL_GPL(vas_init_tx_win_attr);
-static void init_winctx_for_txwin(struct vas_window *txwin,
+static void init_winctx_for_txwin(struct pnv_vas_window *txwin,
struct vas_tx_win_attr *txattr,
struct vas_winctx *winctx)
{
@@ -926,7 +928,7 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
*/
memset(winctx, 0, sizeof(struct vas_winctx));
- winctx->wcreds_max = txwin->wcreds_max;
+ winctx->wcreds_max = txwin->vas_win.wcreds_max;
winctx->user_win = txattr->user_win;
winctx->nx_win = txwin->rxwin->nx_win;
@@ -946,13 +948,13 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
winctx->lpid = txattr->lpid;
winctx->pidr = txattr->pidr;
- winctx->rx_win_id = txwin->rxwin->winid;
+ winctx->rx_win_id = txwin->rxwin->vas_win.winid;
/*
* IRQ and fault window setup is successful. Set fault window
* for the send window so that ready to handle faults.
*/
if (txwin->vinst->virq)
- winctx->fault_win_id = txwin->vinst->fault_win->winid;
+ winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid;
winctx->dma_type = VAS_DMA_TYPE_INJECT;
winctx->tc_mode = txattr->tc_mode;
@@ -962,7 +964,8 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
winctx->irq_port = txwin->vinst->irq_port;
winctx->pswid = txattr->pswid ? txattr->pswid :
- encode_pswid(txwin->vinst->vas_id, txwin->winid);
+ encode_pswid(txwin->vinst->vas_id,
+ txwin->vas_win.winid);
}
static bool tx_win_args_valid(enum vas_cop_type cop,
@@ -993,8 +996,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
struct vas_tx_win_attr *attr)
{
int rc;
- struct vas_window *txwin;
- struct vas_window *rxwin;
+ struct pnv_vas_window *txwin;
+ struct pnv_vas_window *rxwin;
struct vas_winctx winctx;
struct vas_instance *vinst;
@@ -1020,7 +1023,7 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
rxwin = get_vinst_rxwin(vinst, cop, attr->pswid);
if (IS_ERR(rxwin)) {
pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop);
- return rxwin;
+ return (struct vas_window *)rxwin;
}
txwin = vas_window_alloc(vinst);
@@ -1029,12 +1032,12 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
goto put_rxwin;
}
- txwin->cop = cop;
+ txwin->vas_win.cop = cop;
txwin->tx_win = 1;
txwin->rxwin = rxwin;
txwin->nx_win = txwin->rxwin->nx_win;
txwin->user_win = attr->user_win;
- txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+ txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
init_winctx_for_txwin(txwin, attr, &winctx);
@@ -1064,56 +1067,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
rc = -ENODEV;
goto free_window;
}
-
- /*
- * Window opened by a child thread may not be closed when
- * it exits. So take reference to its pid and release it
- * when the window is free by parent thread.
- * Acquire a reference to the task's pid to make sure
- * pid will not be re-used - needed only for multithread
- * applications.
- */
- txwin->pid = get_task_pid(current, PIDTYPE_PID);
- /*
- * Acquire a reference to the task's mm.
- */
- txwin->mm = get_task_mm(current);
-
- if (!txwin->mm) {
- put_pid(txwin->pid);
- pr_err("VAS: pid(%d): mm_struct is not found\n",
- current->pid);
- rc = -EPERM;
+ rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+ if (rc)
goto free_window;
- }
- mmgrab(txwin->mm);
- mmput(txwin->mm);
- mm_context_add_vas_window(txwin->mm);
- /*
- * Process closes window during exit. In the case of
- * multithread application, the child thread can open
- * window and can exit without closing it. Expects parent
- * thread to use and close the window. So do not need
- * to take pid reference for parent thread.
- */
- txwin->tgid = find_get_pid(task_tgid_vnr(current));
- /*
- * Even a process that has no foreign real address mapping can
- * use an unpaired COPY instruction (to no real effect). Issue
- * CP_ABORT to clear any pending COPY and prevent a covert
- * channel.
- *
- * __switch_to() will issue CP_ABORT on future context switches
- * if process / thread has any open VAS window (Use
- * current->mm->context.vas_windows).
- */
- asm volatile(PPC_CP_ABORT);
+ vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
}
set_vinst_win(vinst, txwin);
- return txwin;
+ return &txwin->vas_win;
free_window:
vas_window_free(txwin);
@@ -1132,12 +1095,14 @@ int vas_copy_crb(void *crb, int offset)
EXPORT_SYMBOL_GPL(vas_copy_crb);
#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
-int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
+int vas_paste_crb(struct vas_window *vwin, int offset, bool re)
{
+ struct pnv_vas_window *txwin;
int rc;
void *addr;
uint64_t val;
+ txwin = container_of(vwin, struct pnv_vas_window, vas_win);
trace_vas_paste_crb(current, txwin);
/*
@@ -1167,7 +1132,7 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
else
rc = -EINVAL;
- pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid,
+ pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid,
read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
return rc;
@@ -1187,7 +1152,7 @@ EXPORT_SYMBOL_GPL(vas_paste_crb);
* user space. (NX-842 driver waits for CSB and Fast thread-wakeup
* doesn't use credit checking).
*/
-static void poll_window_credits(struct vas_window *window)
+static void poll_window_credits(struct pnv_vas_window *window)
{
u64 val;
int creds, mode;
@@ -1217,7 +1182,7 @@ retry:
* and issue CRB Kill to stop all pending requests. Need only
* if there is a bug in NX or fault handling in kernel.
*/
- if (creds < window->wcreds_max) {
+ if (creds < window->vas_win.wcreds_max) {
val = 0;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(10));
@@ -1228,7 +1193,8 @@ retry:
*/
if (!(count % 1000))
pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n",
- vas_window_pid(window), window->winid,
+ vas_window_pid(&window->vas_win),
+ window->vas_win.winid,
creds, count);
goto retry;
@@ -1240,7 +1206,7 @@ retry:
* short time to queue a CRB, so window should not be busy for too long.
* Trying 5ms intervals.
*/
-static void poll_window_busy_state(struct vas_window *window)
+static void poll_window_busy_state(struct pnv_vas_window *window)
{
int busy;
u64 val;
@@ -1260,7 +1226,8 @@ retry:
*/
if (!(count % 1000))
pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n",
- vas_window_pid(window), window->winid, count);
+ vas_window_pid(&window->vas_win),
+ window->vas_win.winid, count);
goto retry;
}
@@ -1282,7 +1249,7 @@ retry:
* casting out becomes necessary we should consider offloading the
* job to a worker thread, so the window close can proceed quickly.
*/
-static void poll_window_castout(struct vas_window *window)
+static void poll_window_castout(struct pnv_vas_window *window)
{
/* stub for now */
}
@@ -1291,7 +1258,7 @@ static void poll_window_castout(struct vas_window *window)
* Unpin and close a window so no new requests are accepted and the
* hardware can evict this window from cache if necessary.
*/
-static void unpin_close_window(struct vas_window *window)
+static void unpin_close_window(struct pnv_vas_window *window)
{
u64 val;
@@ -1313,11 +1280,15 @@ static void unpin_close_window(struct vas_window *window)
*
* Besides the hardware, kernel has some bookkeeping of course.
*/
-int vas_win_close(struct vas_window *window)
+int vas_win_close(struct vas_window *vwin)
{
- if (!window)
+ struct pnv_vas_window *window;
+
+ if (!vwin)
return 0;
+ window = container_of(vwin, struct pnv_vas_window, vas_win);
+
if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
pr_devel("Attempting to close an active Rx window!\n");
WARN_ON_ONCE(1);
@@ -1339,12 +1310,8 @@ int vas_win_close(struct vas_window *window)
/* if send window, drop reference to matching receive window */
if (window->tx_win) {
if (window->user_win) {
- /* Drop references to pid and mm */
- put_pid(window->pid);
- if (window->mm) {
- mm_context_remove_vas_window(window->mm);
- mmdrop(window->mm);
- }
+ put_vas_user_win_ref(&vwin->task_ref);
+ mm_context_remove_vas_window(vwin->task_ref.mm);
}
put_rx_win(window->rxwin);
}
@@ -1377,7 +1344,7 @@ EXPORT_SYMBOL_GPL(vas_win_close);
* - The kernel with return credit on fault window after reading entry
* from fault FIFO.
*/
-void vas_return_credit(struct vas_window *window, bool tx)
+void vas_return_credit(struct pnv_vas_window *window, bool tx)
{
uint64_t val;
@@ -1391,10 +1358,10 @@ void vas_return_credit(struct vas_window *window, bool tx)
}
}
-struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
uint32_t pswid)
{
- struct vas_window *window;
+ struct pnv_vas_window *window;
int winid;
if (!pswid) {
@@ -1431,13 +1398,74 @@ struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
* by NX).
*/
if (!window->tx_win || !window->user_win || !window->nx_win ||
- window->cop == VAS_COP_TYPE_FAULT ||
- window->cop == VAS_COP_TYPE_FTW) {
+ window->vas_win.cop == VAS_COP_TYPE_FAULT ||
+ window->vas_win.cop == VAS_COP_TYPE_FTW) {
pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n",
winid, window->tx_win, window->user_win,
- window->nx_win, window->cop);
+ window->nx_win, window->vas_win.cop);
WARN_ON(1);
}
return window;
}
+
+static struct vas_window *vas_user_win_open(int vas_id, u64 flags,
+ enum vas_cop_type cop_type)
+{
+ struct vas_tx_win_attr txattr = {};
+
+ vas_init_tx_win_attr(&txattr, cop_type);
+
+ txattr.lpid = mfspr(SPRN_LPID);
+ txattr.pidr = mfspr(SPRN_PID);
+ txattr.user_win = true;
+ txattr.rsvd_txbuf_count = false;
+ txattr.pswid = false;
+
+ pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr,
+ mfspr(SPRN_PID));
+
+ return vas_tx_win_open(vas_id, cop_type, &txattr);
+}
+
+static u64 vas_user_win_paste_addr(struct vas_window *txwin)
+{
+ struct pnv_vas_window *win;
+ u64 paste_addr;
+
+ win = container_of(txwin, struct pnv_vas_window, vas_win);
+ vas_win_paste_addr(win, &paste_addr, NULL);
+
+ return paste_addr;
+}
+
+static int vas_user_win_close(struct vas_window *txwin)
+{
+ vas_win_close(txwin);
+
+ return 0;
+}
+
+static const struct vas_user_win_ops vops = {
+ .open_win = vas_user_win_open,
+ .paste_addr = vas_user_win_paste_addr,
+ .close_win = vas_user_win_close,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type,
+ const char *name)
+{
+
+ return vas_register_coproc_api(mod, cop_type, name, &vops);
+}
+EXPORT_SYMBOL_GPL(vas_register_api_powernv);
+
+void vas_unregister_api_powernv(void)
+{
+ vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_powernv);
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
index c7db3190baca..8bb08e395de0 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -334,11 +334,11 @@ struct vas_instance {
int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */
spinlock_t fault_lock; /* Protects fifo_in_progress update */
void *fault_fifo;
- struct vas_window *fault_win; /* Fault window */
+ struct pnv_vas_window *fault_win; /* Fault window */
struct mutex mutex;
- struct vas_window *rxwin[VAS_COP_TYPE_MAX];
- struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
+ struct pnv_vas_window *rxwin[VAS_COP_TYPE_MAX];
+ struct pnv_vas_window *windows[VAS_WINDOWS_PER_CHIP];
char *name;
char *dbgname;
@@ -346,32 +346,24 @@ struct vas_instance {
};
/*
- * In-kernel state a VAS window. One per window.
+ * In-kernel state a VAS window on PowerNV. One per window.
*/
-struct vas_window {
+struct pnv_vas_window {
+ struct vas_window vas_win;
/* Fields common to send and receive windows */
struct vas_instance *vinst;
- int winid;
bool tx_win; /* True if send window */
bool nx_win; /* True if NX window */
bool user_win; /* True if user space window */
void *hvwc_map; /* HV window context */
void *uwc_map; /* OS/User window context */
- struct pid *pid; /* Linux process id of owner */
- struct pid *tgid; /* Thread group ID of owner */
- struct mm_struct *mm; /* Linux process mm_struct */
- int wcreds_max; /* Window credits */
-
- char *dbgname;
- struct dentry *dbgdir;
/* Fields applicable only to send windows */
void *paste_kaddr;
char *paste_addr_name;
- struct vas_window *rxwin;
+ struct pnv_vas_window *rxwin;
- /* Feilds applicable only to receive windows */
- enum vas_cop_type cop;
+ /* Fields applicable only to receive windows */
atomic_t num_txwins;
};
@@ -430,32 +422,32 @@ extern struct mutex vas_mutex;
extern struct vas_instance *find_vas_instance(int vasid);
extern void vas_init_dbgdir(void);
extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
-extern void vas_window_init_dbgdir(struct vas_window *win);
-extern void vas_window_free_dbgdir(struct vas_window *win);
+extern void vas_window_init_dbgdir(struct pnv_vas_window *win);
+extern void vas_window_free_dbgdir(struct pnv_vas_window *win);
extern int vas_setup_fault_window(struct vas_instance *vinst);
extern irqreturn_t vas_fault_thread_fn(int irq, void *data);
extern irqreturn_t vas_fault_handler(int irq, void *dev_id);
-extern void vas_return_credit(struct vas_window *window, bool tx);
-extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+extern void vas_return_credit(struct pnv_vas_window *window, bool tx);
+extern struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
uint32_t pswid);
-extern void vas_win_paste_addr(struct vas_window *window, u64 *addr,
- int *len);
+extern void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr,
+ int *len);
static inline int vas_window_pid(struct vas_window *window)
{
- return pid_vnr(window->pid);
+ return pid_vnr(window->task_ref.pid);
}
-static inline void vas_log_write(struct vas_window *win, char *name,
+static inline void vas_log_write(struct pnv_vas_window *win, char *name,
void *regptr, u64 val)
{
if (val)
pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n",
- win->tx_win ? "Tx" : "Rx", win->winid, name,
- regptr, val);
+ win->tx_win ? "Tx" : "Rx", win->vas_win.winid,
+ name, regptr, val);
}
-static inline void write_uwc_reg(struct vas_window *win, char *name,
+static inline void write_uwc_reg(struct pnv_vas_window *win, char *name,
s32 reg, u64 val)
{
void *regptr;
@@ -466,7 +458,7 @@ static inline void write_uwc_reg(struct vas_window *win, char *name,
out_be64(regptr, val);
}
-static inline void write_hvwc_reg(struct vas_window *win, char *name,
+static inline void write_hvwc_reg(struct pnv_vas_window *win, char *name,
s32 reg, u64 val)
{
void *regptr;
@@ -477,7 +469,7 @@ static inline void write_hvwc_reg(struct vas_window *win, char *name,
out_be64(regptr, val);
}
-static inline u64 read_hvwc_reg(struct vas_window *win,
+static inline u64 read_hvwc_reg(struct pnv_vas_window *win,
char *name __maybe_unused, s32 reg)
{
return in_be64(win->hvwc_map+reg);
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index e32406e918d0..a4048b8c8c50 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -7,6 +7,7 @@ config PPC_PS3
select USB_OHCI_BIG_ENDIAN_MMIO
select USB_EHCI_BIG_ENDIAN_MMIO
select HAVE_PCI
+ select IRQ_DOMAIN_NOMAP
help
This option enables support for the Sony PS3 game console
and other platforms using the PS3 hypervisor. Enabling this
@@ -85,6 +86,15 @@ config PS3_SYS_MANAGER
This support is required for PS3 system control. In
general, all users will say Y or M.
+config PS3_VERBOSE_RESULT
+ bool "PS3 Verbose LV1 hypercall results" if PS3_ADVANCED
+ depends on PPC_PS3
+ help
+ Enables more verbose log mesages for LV1 hypercall results.
+
+ If in doubt, say N here and reduce the size of the kernel by a
+ small amount.
+
config PS3_REPOSITORY_WRITE
bool "PS3 Repository write support" if PS3_ADVANCED
depends on PPC_PS3
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index 78f2339ed5cb..49871427f599 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
@@ -45,7 +46,7 @@
* implementation equates HV plug value to Linux virq value, constrains each
* interrupt to have a system wide unique plug number, and limits the range
* of the plug values to map into the first dword of the bitmaps. This
- * gives a usable range of plug values of {NUM_ISA_INTERRUPTS..63}. Note
+ * gives a usable range of plug values of {NR_IRQS_LEGACY..63}. Note
* that there is no constraint on how many in this set an individual thread
* can acquire.
*
@@ -721,7 +722,7 @@ static unsigned int ps3_get_irq(void)
}
#if defined(DEBUG)
- if (unlikely(plug < NUM_ISA_INTERRUPTS || plug > PS3_PLUG_MAX)) {
+ if (unlikely(plug < NR_IRQS_LEGACY || plug > PS3_PLUG_MAX)) {
dump_bmp(&per_cpu(ps3_private, 0));
dump_bmp(&per_cpu(ps3_private, 1));
BUG();
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index d094321964fb..a81eac35d900 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -6,6 +6,7 @@
* Copyright 2006 Sony Corp.
*/
+#include <linux/dma-mapping.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/memblock.h>
@@ -1118,6 +1119,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev,
enum ps3_dma_region_type region_type, void *addr, unsigned long len)
{
unsigned long lpar_addr;
+ int result;
lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0;
@@ -1129,6 +1131,16 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev,
r->offset -= map.r1.offset;
r->len = len ? len : ALIGN(map.total, 1 << r->page_size);
+ dev->core.dma_mask = &r->dma_mask;
+
+ result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32));
+
+ if (result < 0) {
+ dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n",
+ __func__, __LINE__, result);
+ return result;
+ }
+
switch (dev->dev_type) {
case PS3_DEVICE_TYPE_SB:
r->region_ops = (USE_DYNAMIC_DMA)
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index e9ae5dd03593..3de9145c20bc 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -36,6 +36,7 @@ DEFINE_MUTEX(ps3_gpu_mutex);
EXPORT_SYMBOL_GPL(ps3_gpu_mutex);
static union ps3_firmware_version ps3_firmware_version;
+static char ps3_firmware_version_str[16];
void ps3_get_firmware_version(union ps3_firmware_version *v)
{
@@ -182,6 +183,40 @@ static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx)
return lv1_set_dabr(dabr, dabrx) ? -1 : 0;
}
+static ssize_t ps3_fw_version_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s", ps3_firmware_version_str);
+}
+
+static int __init ps3_setup_sysfs(void)
+{
+ static struct kobj_attribute attr = __ATTR(fw-version, S_IRUGO,
+ ps3_fw_version_show, NULL);
+ static struct kobject *kobj;
+ int result;
+
+ kobj = kobject_create_and_add("ps3", firmware_kobj);
+
+ if (!kobj) {
+ pr_warn("%s:%d: kobject_create_and_add failed.\n", __func__,
+ __LINE__);
+ return -ENOMEM;
+ }
+
+ result = sysfs_create_file(kobj, &attr.attr);
+
+ if (result) {
+ pr_warn("%s:%d: sysfs_create_file failed.\n", __func__,
+ __LINE__);
+ kobject_put(kobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+core_initcall(ps3_setup_sysfs);
+
static void __init ps3_setup_arch(void)
{
u64 tmp;
@@ -190,9 +225,11 @@ static void __init ps3_setup_arch(void)
lv1_get_version_info(&ps3_firmware_version.raw, &tmp);
- printk(KERN_INFO "PS3 firmware version %u.%u.%u\n",
- ps3_firmware_version.major, ps3_firmware_version.minor,
- ps3_firmware_version.rev);
+ snprintf(ps3_firmware_version_str, sizeof(ps3_firmware_version_str),
+ "%u.%u.%u", ps3_firmware_version.major,
+ ps3_firmware_version.minor, ps3_firmware_version.rev);
+
+ printk(KERN_INFO "PS3 firmware version %s\n", ps3_firmware_version_str);
ps3_spu_set_platform();
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index b431f41c6cb5..1a5665875165 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -64,9 +64,10 @@ static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev)
result = lv1_open_device(dev->bus_id, dev->dev_id, 0);
if (result) {
- pr_debug("%s:%d: lv1_open_device failed: %s\n", __func__,
- __LINE__, ps3_result(result));
- result = -EPERM;
+ pr_warn("%s:%d: lv1_open_device dev=%u.%u(%s) failed: %s\n",
+ __func__, __LINE__, dev->match_id, dev->match_sub_id,
+ dev_name(&dev->core), ps3_result(result));
+ result = -EPERM;
}
done:
@@ -120,7 +121,7 @@ static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev)
result = lv1_gpu_open(0);
if (result) {
- pr_debug("%s:%d: lv1_gpu_open failed: %s\n", __func__,
+ pr_warn("%s:%d: lv1_gpu_open failed: %s\n", __func__,
__LINE__, ps3_result(result));
result = -EPERM;
}
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index c8a2b0b05ac0..4cda0ef87be0 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_SVM) += svm.o
obj-$(CONFIG_FA_DUMP) += rtas-fadump.o
obj-$(CONFIG_SUSPEND) += suspend.o
+obj-$(CONFIG_PPC_VAS) += vas.o
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 3ac70790ec7a..b1f01ac0c29e 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -289,8 +289,7 @@ int dlpar_acquire_drc(u32 drc_index)
{
int dr_status, rc;
- rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
- DR_ENTITY_SENSE, drc_index);
+ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
if (rc || dr_status != DR_ENTITY_UNUSABLE)
return -1;
@@ -311,8 +310,7 @@ int dlpar_release_drc(u32 drc_index)
{
int dr_status, rc;
- rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
- DR_ENTITY_SENSE, drc_index);
+ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
if (rc || dr_status != DR_ENTITY_PRESENT)
return -1;
@@ -333,8 +331,7 @@ int dlpar_unisolate_drc(u32 drc_index)
{
int dr_status, rc;
- rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
- DR_ENTITY_SENSE, drc_index);
+ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
if (rc || dr_status != DR_ENTITY_PRESENT)
return -1;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 8377f1f7c78e..377d852f5a9a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -348,7 +348,8 @@ static int pseries_remove_mem_node(struct device_node *np)
static bool lmb_is_removable(struct drmem_lmb *lmb)
{
- if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+ if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+ !(lmb->flags & DRCONF_MEM_ASSIGNED))
return false;
#ifdef CONFIG_FA_DUMP
@@ -401,7 +402,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
{
struct drmem_lmb *lmb;
- int lmbs_removed = 0;
+ int lmbs_reserved = 0;
int lmbs_available = 0;
int rc;
@@ -435,12 +436,12 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
*/
drmem_mark_lmb_reserved(lmb);
- lmbs_removed++;
- if (lmbs_removed == lmbs_to_remove)
+ lmbs_reserved++;
+ if (lmbs_reserved == lmbs_to_remove)
break;
}
- if (lmbs_removed != lmbs_to_remove) {
+ if (lmbs_reserved != lmbs_to_remove) {
pr_err("Memory hot-remove failed, adding LMB's back\n");
for_each_drmem_lmb(lmb) {
@@ -453,6 +454,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
lmb->drc_index);
drmem_remove_lmb_reservation(lmb);
+
+ lmbs_reserved--;
+ if (lmbs_reserved == 0)
+ break;
}
rc = -EINVAL;
@@ -466,6 +471,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
lmb->base_addr);
drmem_remove_lmb_reservation(lmb);
+
+ lmbs_reserved--;
+ if (lmbs_reserved == 0)
+ break;
}
rc = 0;
}
@@ -508,7 +517,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index)
static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
{
struct drmem_lmb *lmb, *start_lmb, *end_lmb;
- int lmbs_available = 0;
int rc;
pr_info("Attempting to hot-remove %u LMB(s) at %x\n",
@@ -521,18 +529,29 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
if (rc)
return -EINVAL;
- /* Validate that there are enough LMBs to satisfy the request */
+ /*
+ * Validate that all LMBs in range are not reserved. Note that it
+ * is ok if they are !ASSIGNED since our goal here is to remove the
+ * LMB range, regardless of whether some LMBs were already removed
+ * by any other reason.
+ *
+ * This is a contrast to what is done in remove_by_count() where we
+ * check for both RESERVED and !ASSIGNED (via lmb_is_removable()),
+ * because we want to remove a fixed amount of LMBs in that function.
+ */
for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
- if (lmb->flags & DRCONF_MEM_RESERVED)
- break;
-
- lmbs_available++;
+ if (lmb->flags & DRCONF_MEM_RESERVED) {
+ pr_err("Memory at %llx (drc index %x) is reserved\n",
+ lmb->base_addr, lmb->drc_index);
+ return -EINVAL;
+ }
}
- if (lmbs_available < lmbs_to_remove)
- return -EINVAL;
-
for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ /*
+ * dlpar_remove_lmb() will error out if the LMB is already
+ * !ASSIGNED, but this case is a no-op for us.
+ */
if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
continue;
@@ -551,6 +570,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
if (!drmem_lmb_reserved(lmb))
continue;
+ /*
+ * Setting the isolation state of an UNISOLATED/CONFIGURED
+ * device to UNISOLATE is a no-op, but the hypervisor can
+ * use it as a hint that the LMB removal failed.
+ */
+ dlpar_unisolate_drc(lmb->drc_index);
+
rc = dlpar_add_lmb(lmb);
if (rc)
pr_err("Failed to add LMB, drc index %x\n",
@@ -585,10 +611,6 @@ static inline int pseries_remove_mem_node(struct device_node *np)
{
return 0;
}
-static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
-{
- return -EOPNOTSUPP;
-}
static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
return -EOPNOTSUPP;
@@ -651,7 +673,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
{
struct drmem_lmb *lmb;
int lmbs_available = 0;
- int lmbs_added = 0;
+ int lmbs_reserved = 0;
int rc;
pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
@@ -661,6 +683,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
/* Validate that there are enough LMBs to satisfy the request */
for_each_drmem_lmb(lmb) {
+ if (lmb->flags & DRCONF_MEM_RESERVED)
+ continue;
+
if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
lmbs_available++;
@@ -689,13 +714,12 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
* requested LMBs cannot be added.
*/
drmem_mark_lmb_reserved(lmb);
-
- lmbs_added++;
- if (lmbs_added == lmbs_to_add)
+ lmbs_reserved++;
+ if (lmbs_reserved == lmbs_to_add)
break;
}
- if (lmbs_added != lmbs_to_add) {
+ if (lmbs_reserved != lmbs_to_add) {
pr_err("Memory hot-add failed, removing any added LMBs\n");
for_each_drmem_lmb(lmb) {
@@ -710,6 +734,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
dlpar_release_drc(lmb->drc_index);
drmem_remove_lmb_reservation(lmb);
+ lmbs_reserved--;
+
+ if (lmbs_reserved == 0)
+ break;
}
rc = -EINVAL;
} else {
@@ -720,6 +748,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
pr_debug("Memory at %llx (drc index %x) was hot-added\n",
lmb->base_addr, lmb->drc_index);
drmem_remove_lmb_reservation(lmb);
+ lmbs_reserved--;
+
+ if (lmbs_reserved == 0)
+ break;
}
rc = 0;
}
@@ -764,7 +796,6 @@ static int dlpar_memory_add_by_index(u32 drc_index)
static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
{
struct drmem_lmb *lmb, *start_lmb, *end_lmb;
- int lmbs_available = 0;
int rc;
pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
@@ -779,15 +810,14 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
/* Validate that the LMBs in this range are not reserved */
for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
- if (lmb->flags & DRCONF_MEM_RESERVED)
- break;
-
- lmbs_available++;
+ /* Fail immediately if the whole range can't be hot-added */
+ if (lmb->flags & DRCONF_MEM_RESERVED) {
+ pr_err("Memory at %llx (drc index %x) is reserved\n",
+ lmb->base_addr, lmb->drc_index);
+ return -EINVAL;
+ }
}
- if (lmbs_available < lmbs_to_add)
- return -EINVAL;
-
for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
if (lmb->flags & DRCONF_MEM_ASSIGNED)
continue;
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 8a2b8d64265b..ab9fc6506861 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -108,6 +108,10 @@ _GLOBAL_TOC(plpar_hcall_norets_notrace)
mfcr r0
stw r0,8(r1)
HVSC /* invoke the hypervisor */
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
blr /* return r3 = status */
@@ -120,6 +124,9 @@ _GLOBAL_TOC(plpar_hcall_norets)
HCALL_BRANCH(plpar_hcall_norets_trace)
HVSC /* invoke the hypervisor */
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
blr /* return r3 = status */
@@ -129,6 +136,10 @@ plpar_hcall_norets_trace:
HCALL_INST_PRECALL(R4)
HVSC
HCALL_INST_POSTCALL_NORETS
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
blr
@@ -159,6 +170,9 @@ _GLOBAL_TOC(plpar_hcall)
std r6, 16(r12)
std r7, 24(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -188,6 +202,9 @@ plpar_hcall_trace:
HCALL_INST_POSTCALL(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -223,6 +240,9 @@ _GLOBAL(plpar_hcall_raw)
std r6, 16(r12)
std r7, 24(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -262,6 +282,9 @@ _GLOBAL_TOC(plpar_hcall9)
std r11,56(r12)
std r0, 64(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -300,6 +323,9 @@ plpar_hcall9_trace:
HCALL_INST_POSTCALL(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -339,6 +365,9 @@ _GLOBAL(plpar_hcall9_raw)
std r11,56(r12)
std r0, 64(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index a15ab33646b3..c6c79ef55e13 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -42,6 +42,7 @@
#include <linux/kobject.h>
#include <linux/dma-map-ops.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/stat.h>
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index ef26fe40efb0..f48e87ac89c9 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -18,6 +18,7 @@
#include <asm/plpar_wrappers.h>
#include <asm/papr_pdsm.h>
#include <asm/mce.h>
+#include <asm/unaligned.h>
#define BIND_ANY_ADDR (~0ul)
@@ -114,6 +115,9 @@ struct papr_scm_priv {
/* Health information for the dimm */
u64 health_bitmap;
+ /* Holds the last known dirty shutdown counter value */
+ u64 dirty_shutdown_counter;
+
/* length of the stat buffer as expected by phyp */
size_t stat_buffer_len;
};
@@ -260,7 +264,7 @@ err_out:
* Query the Dimm performance stats from PHYP and copy them (if returned) to
* provided struct papr_scm_perf_stats instance 'stats' that can hold atleast
* (num_stats + header) bytes.
- * - If buff_stats == NULL the return value is the size in byes of the buffer
+ * - If buff_stats == NULL the return value is the size in bytes of the buffer
* needed to hold all supported performance-statistics.
* - If buff_stats != NULL and num_stats == 0 then we copy all known
* performance-statistics to 'buff_stat' and expect to be large enough to
@@ -310,6 +314,13 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
dev_err(&p->pdev->dev,
"Unknown performance stats, Err:0x%016lX\n", ret[0]);
return -ENOENT;
+ } else if (rc == H_AUTHORITY) {
+ dev_info(&p->pdev->dev,
+ "Permission denied while accessing performance stats");
+ return -EPERM;
+ } else if (rc == H_UNSUPPORTED) {
+ dev_dbg(&p->pdev->dev, "Performance stats unsupported\n");
+ return -EOPNOTSUPP;
} else if (rc != H_SUCCESS) {
dev_err(&p->pdev->dev,
"Failed to query performance stats, Err:%lld\n", rc);
@@ -596,6 +607,16 @@ free_stats:
return rc;
}
+/* Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_dsc(struct papr_scm_priv *p,
+ union nd_pdsm_payload *payload)
+{
+ payload->health.extension_flags |= PDSM_DIMM_DSC_VALID;
+ payload->health.dimm_dsc = p->dirty_shutdown_counter;
+
+ return sizeof(struct nd_papr_pdsm_health);
+}
+
/* Fetch the DIMM health info and populate it in provided package. */
static int papr_pdsm_health(struct papr_scm_priv *p,
union nd_pdsm_payload *payload)
@@ -639,6 +660,8 @@ static int papr_pdsm_health(struct papr_scm_priv *p,
/* Populate the fuel gauge meter in the payload */
papr_pdsm_fuel_gauge(p, payload);
+ /* Populate the dirty-shutdown-counter field */
+ papr_pdsm_dsc(p, payload);
rc = sizeof(struct nd_papr_pdsm_health);
@@ -900,15 +923,41 @@ static ssize_t flags_show(struct device *dev,
}
DEVICE_ATTR_RO(flags);
+static ssize_t dirty_shutdown_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *dimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+ return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter);
+}
+DEVICE_ATTR_RO(dirty_shutdown);
+
+static umode_t papr_nd_attribute_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(nvdimm);
+
+ /* For if perf-stats not available remove perf_stats sysfs */
+ if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0)
+ return 0;
+
+ return attr->mode;
+}
+
/* papr_scm specific dimm attributes */
static struct attribute *papr_nd_attributes[] = {
&dev_attr_flags.attr,
&dev_attr_perf_stats.attr,
+ &dev_attr_dirty_shutdown.attr,
NULL,
};
static struct attribute_group papr_nd_attribute_group = {
.name = "papr",
+ .is_visible = papr_nd_attribute_visible,
.attrs = papr_nd_attributes,
};
@@ -924,7 +973,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
struct nd_region_desc ndr_desc;
unsigned long dimm_flags;
int target_nid, online_nid;
- ssize_t stat_size;
p->bus_desc.ndctl = papr_scm_ndctl;
p->bus_desc.module = THIS_MODULE;
@@ -1009,16 +1057,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
list_add_tail(&p->region_list, &papr_nd_regions);
mutex_unlock(&papr_ndr_lock);
- /* Try retriving the stat buffer and see if its supported */
- stat_size = drc_pmem_query_stats(p, NULL, 0);
- if (stat_size > 0) {
- p->stat_buffer_len = stat_size;
- dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
- p->stat_buffer_len);
- } else {
- dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n");
- }
-
return 0;
err: nvdimm_bus_unregister(p->bus);
@@ -1094,8 +1132,10 @@ static int papr_scm_probe(struct platform_device *pdev)
u32 drc_index, metadata_size;
u64 blocks, block_size;
struct papr_scm_priv *p;
+ u8 uuid_raw[UUID_SIZE];
const char *uuid_str;
- u64 uuid[2];
+ ssize_t stat_size;
+ uuid_t uuid;
int rc;
/* check we have all the required DT properties */
@@ -1137,17 +1177,28 @@ static int papr_scm_probe(struct platform_device *pdev)
p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
+ if (of_property_read_u64(dn, "ibm,persistence-failed-count",
+ &p->dirty_shutdown_counter))
+ p->dirty_shutdown_counter = 0;
+
/* We just need to ensure that set cookies are unique across */
- uuid_parse(uuid_str, (uuid_t *) uuid);
+ uuid_parse(uuid_str, &uuid);
+
/*
- * cookie1 and cookie2 are not really little endian
- * we store a little endian representation of the
- * uuid str so that we can compare this with the label
- * area cookie irrespective of the endian config with which
- * the kernel is built.
+ * The cookie1 and cookie2 are not really little endian.
+ * We store a raw buffer representation of the
+ * uuid string so that we can compare this with the label
+ * area cookie irrespective of the endian configuration
+ * with which the kernel is built.
+ *
+ * Historically we stored the cookie in the below format.
+ * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa
+ * cookie1 was 0xfd423b0b671b5172
+ * cookie2 was 0xaabce8cae35b1d8d
*/
- p->nd_set.cookie1 = cpu_to_le64(uuid[0]);
- p->nd_set.cookie2 = cpu_to_le64(uuid[1]);
+ export_uuid(uuid_raw, &uuid);
+ p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]);
+ p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]);
/* might be zero */
p->metadata_size = metadata_size;
@@ -1172,6 +1223,14 @@ static int papr_scm_probe(struct platform_device *pdev)
p->res.name = pdev->name;
p->res.flags = IORESOURCE_MEM;
+ /* Try retrieving the stat buffer and see if its supported */
+ stat_size = drc_pmem_query_stats(p, NULL, 0);
+ if (stat_size > 0) {
+ p->stat_buffer_len = stat_size;
+ dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
+ p->stat_buffer_len);
+ }
+
rc = papr_scm_nvdimm_init(p);
if (rc)
goto err2;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 9d4ef65da7f3..167f2e1b8d39 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -487,8 +487,8 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
if ((be64_to_cpu(regs->msr) &
(MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
- regs->nip = be64_to_cpu((__be64)regs->nip);
- regs->msr = 0;
+ regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip));
+ regs_set_return_msr(regs, 0);
}
#endif
@@ -593,8 +593,6 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.severity = MCE_SEV_SEVERE;
else if (severity == RTAS_SEVERITY_ERROR)
mce_err.severity = MCE_SEV_SEVERE;
- else if (severity == RTAS_SEVERITY_FATAL)
- mce_err.severity = MCE_SEV_FATAL;
else
mce_err.severity = MCE_SEV_FATAL;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 754e493b7c05..631a0d57b6cd 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -549,6 +549,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+ if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY)
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+
+ if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS)
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+
+ if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER)
+ security_ftr_clear(SEC_FTR_STF_BARRIER);
+
if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index c70b4be9f0a5..096629f54576 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -211,7 +211,9 @@ static __init void pSeries_smp_probe(void)
if (!cpu_has_feature(CPU_FTR_SMT))
return;
- if (check_kvm_guest()) {
+ check_kvm_guest();
+
+ if (is_kvm_guest()) {
/*
* KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
* faults to the hypervisor which then reads the instruction
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
new file mode 100644
index 000000000000..b5c1cf1bc64d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/vas.h>
+#include "vas.h"
+
+#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
+#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul
+/* The hypervisor allows one credit per window right now */
+#define DEF_WIN_CREDS 1
+
+static struct vas_all_caps caps_all;
+static bool copypaste_feat;
+
+static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
+static DEFINE_MUTEX(vas_pseries_mutex);
+
+static long hcall_return_busy_check(long rc)
+{
+ /* Check if we are stalled for some time */
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ } else if (rc == H_BUSY) {
+ cond_resched();
+ }
+
+ return rc;
+}
+
+/*
+ * Allocate VAS window hcall
+ */
+static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
+ u8 wintype, u16 credits)
+{
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+ long rc;
+
+ do {
+ rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
+ credits, domain[0], domain[1], domain[2],
+ domain[3], domain[4], domain[5]);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS) {
+ if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
+ pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
+ return -ENOTSUPP;
+ }
+ win->vas_win.winid = retbuf[0];
+ win->win_addr = retbuf[1];
+ win->complete_irq = retbuf[2];
+ win->fault_irq = retbuf[3];
+ return 0;
+ }
+
+ pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
+ rc, wintype, credits);
+
+ return -EIO;
+}
+
+/*
+ * Deallocate VAS window hcall.
+ */
+static int h_deallocate_vas_window(u64 winid)
+{
+ long rc;
+
+ do {
+ rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
+ rc, winid);
+ return -EIO;
+}
+
+/*
+ * Modify VAS window.
+ * After the window is opened with allocate window hcall, configure it
+ * with flags and LPAR PID before using.
+ */
+static int h_modify_vas_window(struct pseries_vas_window *win)
+{
+ long rc;
+ u32 lpid = mfspr(SPRN_PID);
+
+ /*
+ * AMR value is not supported in Linux VAS implementation.
+ * The hypervisor ignores it if 0 is passed.
+ */
+ do {
+ rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
+ win->vas_win.winid, lpid, 0,
+ VAS_MOD_WIN_FLAGS, 0);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
+ rc, win->vas_win.winid, lpid);
+ return -EIO;
+}
+
+/*
+ * This hcall is used to determine the capabilities from the hypervisor.
+ * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
+ * @query_type: If 0 is passed, the hypervisor returns the overall
+ * capabilities which provides all feature(s) that are
+ * available. Then query the hypervisor to get the
+ * corresponding capabilities for the specific feature.
+ * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
+ * and VAS GZIP Default capabilities.
+ * H_QUERY_NX_CAPABILITIES provides NX GZIP
+ * capabilities.
+ * @result: Return buffer to save capabilities.
+ */
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
+{
+ long rc;
+
+ rc = plpar_hcall_norets(hcall, query_type, result);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n",
+ hcall, rc, query_type, result);
+ return -EIO;
+}
+EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
+
+/*
+ * hcall to get fault CRB from the hypervisor.
+ */
+static int h_get_nx_fault(u32 winid, u64 buffer)
+{
+ long rc;
+
+ rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
+ rc, winid, buffer);
+ return -EIO;
+
+}
+
+/*
+ * Handle the fault interrupt.
+ * When the fault interrupt is received for each window, query the
+ * hypervisor to get the fault CRB on the specific fault. Then
+ * process the CRB by updating CSB or send signal if the user space
+ * CSB is invalid.
+ * Note: The hypervisor forwards an interrupt for each fault request.
+ * So one fault CRB to process for each H_GET_NX_FAULT hcall.
+ */
+irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+{
+ struct pseries_vas_window *txwin = data;
+ struct coprocessor_request_block crb;
+ struct vas_user_win_ref *tsk_ref;
+ int rc;
+
+ rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
+ if (!rc) {
+ tsk_ref = &txwin->vas_win.task_ref;
+ vas_dump_crb(&crb);
+ vas_update_csb(&crb, tsk_ref);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Allocate window and setup IRQ mapping.
+ */
+static int allocate_setup_window(struct pseries_vas_window *txwin,
+ u64 *domain, u8 wintype)
+{
+ int rc;
+
+ rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
+ if (rc)
+ return rc;
+ /*
+ * On PowerVM, the hypervisor setup and forwards the fault
+ * interrupt per window. So the IRQ setup and fault handling
+ * will be done for each open window separately.
+ */
+ txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
+ if (!txwin->fault_virq) {
+ pr_err("Failed irq mapping %d\n", txwin->fault_irq);
+ rc = -EINVAL;
+ goto out_win;
+ }
+
+ txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
+ txwin->vas_win.winid);
+ if (!txwin->name) {
+ rc = -ENOMEM;
+ goto out_irq;
+ }
+
+ rc = request_threaded_irq(txwin->fault_virq, NULL,
+ pseries_vas_fault_thread_fn, IRQF_ONESHOT,
+ txwin->name, txwin);
+ if (rc) {
+ pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
+ txwin->vas_win.winid, txwin->fault_virq, rc);
+ goto out_free;
+ }
+
+ txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
+
+ return 0;
+out_free:
+ kfree(txwin->name);
+out_irq:
+ irq_dispose_mapping(txwin->fault_virq);
+out_win:
+ h_deallocate_vas_window(txwin->vas_win.winid);
+ return rc;
+}
+
+static inline void free_irq_setup(struct pseries_vas_window *txwin)
+{
+ free_irq(txwin->fault_virq, txwin);
+ kfree(txwin->name);
+ irq_dispose_mapping(txwin->fault_virq);
+}
+
+static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
+ enum vas_cop_type cop_type)
+{
+ long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+ struct vas_cop_feat_caps *cop_feat_caps;
+ struct vas_caps *caps;
+ struct pseries_vas_window *txwin;
+ int rc;
+
+ txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
+ if (!txwin)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * A VAS window can have many credits which means that many
+ * requests can be issued simultaneously. But the hypervisor
+ * restricts one credit per window.
+ * The hypervisor introduces 2 different types of credits:
+ * Default credit type (Uses normal priority FIFO):
+ * A limited number of credits are assigned to partitions
+ * based on processor entitlement. But these credits may be
+ * over-committed on a system depends on whether the CPUs
+ * are in shared or dedicated modes - that is, more requests
+ * may be issued across the system than NX can service at
+ * once which can result in paste command failure (RMA_busy).
+ * Then the process has to resend requests or fall-back to
+ * SW compression.
+ * Quality of Service (QoS) credit type (Uses high priority FIFO):
+ * To avoid NX HW contention, the system admins can assign
+ * QoS credits for each LPAR so that this partition is
+ * guaranteed access to NX resources. These credits are
+ * assigned to partitions via the HMC.
+ * Refer PAPR for more information.
+ *
+ * Allocate window with QoS credits if user requested. Otherwise
+ * default credits are used.
+ */
+ if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
+ caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
+ else
+ caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
+
+ cop_feat_caps = &caps->caps;
+
+ if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) >
+ atomic_read(&cop_feat_caps->target_lpar_creds)) {
+ pr_err("Credits are not available to allocate window\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (vas_id == -1) {
+ /*
+ * The user space is requesting to allocate a window on
+ * a VAS instance where the process is executing.
+ * On PowerVM, domain values are passed to the hypervisor
+ * to select VAS instance. Useful if the process is
+ * affinity to NUMA node.
+ * The hypervisor selects VAS instance if
+ * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
+ * The h_allocate_vas_window hcall is defined to take a
+ * domain values as specified by h_home_node_associativity,
+ * So no unpacking needs to be done.
+ */
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
+ VPHN_FLAG_VCPU, smp_processor_id());
+ if (rc != H_SUCCESS) {
+ pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
+ goto out;
+ }
+ }
+
+ /*
+ * Allocate / Deallocate window hcalls and setup / free IRQs
+ * have to be protected with mutex.
+ * Open VAS window: Allocate window hcall and setup IRQ
+ * Close VAS window: Deallocate window hcall and free IRQ
+ * The hypervisor waits until all NX requests are
+ * completed before closing the window. So expects OS
+ * to handle NX faults, means IRQ can be freed only
+ * after the deallocate window hcall is returned.
+ * So once the window is closed with deallocate hcall before
+ * the IRQ is freed, it can be assigned to new allocate
+ * hcall with the same fault IRQ by the hypervisor. It can
+ * result in setup IRQ fail for the new window since the
+ * same fault IRQ is not freed by the OS before.
+ */
+ mutex_lock(&vas_pseries_mutex);
+ rc = allocate_setup_window(txwin, (u64 *)&domain[0],
+ cop_feat_caps->win_type);
+ mutex_unlock(&vas_pseries_mutex);
+ if (rc)
+ goto out;
+
+ /*
+ * Modify window and it is ready to use.
+ */
+ rc = h_modify_vas_window(txwin);
+ if (!rc)
+ rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+ if (rc)
+ goto out_free;
+
+ vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+ txwin->win_type = cop_feat_caps->win_type;
+ mutex_lock(&vas_pseries_mutex);
+ list_add(&txwin->win_list, &caps->list);
+ mutex_unlock(&vas_pseries_mutex);
+
+ return &txwin->vas_win;
+
+out_free:
+ /*
+ * Window is not operational. Free IRQ before closing
+ * window so that do not have to hold mutex.
+ */
+ free_irq_setup(txwin);
+ h_deallocate_vas_window(txwin->vas_win.winid);
+out:
+ atomic_dec(&cop_feat_caps->used_lpar_creds);
+ kfree(txwin);
+ return ERR_PTR(rc);
+}
+
+static u64 vas_paste_address(struct vas_window *vwin)
+{
+ struct pseries_vas_window *win;
+
+ win = container_of(vwin, struct pseries_vas_window, vas_win);
+ return win->win_addr;
+}
+
+static int deallocate_free_window(struct pseries_vas_window *win)
+{
+ int rc = 0;
+
+ /*
+ * The hypervisor waits for all requests including faults
+ * are processed before closing the window - Means all
+ * credits have to be returned. In the case of fault
+ * request, a credit is returned after OS issues
+ * H_GET_NX_FAULT hcall.
+ * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
+ * hcall.
+ */
+ rc = h_deallocate_vas_window(win->vas_win.winid);
+ if (!rc)
+ free_irq_setup(win);
+
+ return rc;
+}
+
+static int vas_deallocate_window(struct vas_window *vwin)
+{
+ struct pseries_vas_window *win;
+ struct vas_cop_feat_caps *caps;
+ int rc = 0;
+
+ if (!vwin)
+ return -EINVAL;
+
+ win = container_of(vwin, struct pseries_vas_window, vas_win);
+
+ /* Should not happen */
+ if (win->win_type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Window (%u): Invalid window type %u\n",
+ vwin->winid, win->win_type);
+ return -EINVAL;
+ }
+
+ caps = &vascaps[win->win_type].caps;
+ mutex_lock(&vas_pseries_mutex);
+ rc = deallocate_free_window(win);
+ if (rc) {
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+ }
+
+ list_del(&win->win_list);
+ atomic_dec(&caps->used_lpar_creds);
+ mutex_unlock(&vas_pseries_mutex);
+
+ put_vas_user_win_ref(&vwin->task_ref);
+ mm_context_remove_vas_window(vwin->task_ref.mm);
+
+ kfree(win);
+ return 0;
+}
+
+static const struct vas_user_win_ops vops_pseries = {
+ .open_win = vas_allocate_window, /* Open and configure window */
+ .paste_addr = vas_paste_address, /* To do copy/paste */
+ .close_win = vas_deallocate_window, /* Close window */
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
+ const char *name)
+{
+ int rc;
+
+ if (!copypaste_feat)
+ return -ENOTSUPP;
+
+ rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vas_register_api_pseries);
+
+void vas_unregister_api_pseries(void)
+{
+ vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
+
+/*
+ * Get the specific capabilities based on the feature type.
+ * Right now supports GZIP default and GZIP QoS capabilities.
+ */
+static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
+ struct hv_vas_cop_feat_caps *hv_caps)
+{
+ struct vas_cop_feat_caps *caps;
+ struct vas_caps *vcaps;
+ int rc = 0;
+
+ vcaps = &vascaps[type];
+ memset(vcaps, 0, sizeof(*vcaps));
+ INIT_LIST_HEAD(&vcaps->list);
+
+ caps = &vcaps->caps;
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
+ (u64)virt_to_phys(hv_caps));
+ if (rc)
+ return rc;
+
+ caps->user_mode = hv_caps->user_mode;
+ if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
+ pr_err("User space COPY/PASTE is not supported\n");
+ return -ENOTSUPP;
+ }
+
+ caps->descriptor = be64_to_cpu(hv_caps->descriptor);
+ caps->win_type = hv_caps->win_type;
+ if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Unsupported window type %u\n", caps->win_type);
+ return -EINVAL;
+ }
+ caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
+ caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
+ atomic_set(&caps->target_lpar_creds,
+ be16_to_cpu(hv_caps->target_lpar_creds));
+ if (feat == VAS_GZIP_DEF_FEAT) {
+ caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
+
+ if (caps->max_win_creds < DEF_WIN_CREDS) {
+ pr_err("Window creds(%u) > max allowed window creds(%u)\n",
+ DEF_WIN_CREDS, caps->max_win_creds);
+ return -EINVAL;
+ }
+ }
+
+ copypaste_feat = true;
+
+ return 0;
+}
+
+static int __init pseries_vas_init(void)
+{
+ struct hv_vas_cop_feat_caps *hv_cop_caps;
+ struct hv_vas_all_caps *hv_caps;
+ int rc;
+
+ /*
+ * Linux supports user space COPY/PASTE only with Radix
+ */
+ if (!radix_enabled()) {
+ pr_err("API is supported only with radix page tables\n");
+ return -ENOTSUPP;
+ }
+
+ hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
+ if (!hv_caps)
+ return -ENOMEM;
+ /*
+ * Get VAS overall capabilities by passing 0 to feature type.
+ */
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
+ (u64)virt_to_phys(hv_caps));
+ if (rc)
+ goto out;
+
+ caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
+ caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
+
+ hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
+ if (!hv_cop_caps) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ /*
+ * QOS capabilities available
+ */
+ if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
+ rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
+ VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
+
+ if (rc)
+ goto out_cop;
+ }
+ /*
+ * Default capabilities available
+ */
+ if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
+ rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
+ VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
+ if (rc)
+ goto out_cop;
+ }
+
+ pr_info("GZIP feature is available\n");
+
+out_cop:
+ kfree(hv_cop_caps);
+out:
+ kfree(hv_caps);
+ return rc;
+}
+machine_device_initcall(pseries, pseries_vas_init);
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
new file mode 100644
index 000000000000..4ecb3fcabd10
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <asm/vas.h>
+#include <linux/mutex.h>
+#include <linux/stringify.h>
+
+/*
+ * VAS window modify flags
+ */
+#define VAS_MOD_WIN_CLOSE PPC_BIT(0)
+#define VAS_MOD_WIN_JOBS_KILL PPC_BIT(1)
+#define VAS_MOD_WIN_DR PPC_BIT(3)
+#define VAS_MOD_WIN_PR PPC_BIT(4)
+#define VAS_MOD_WIN_SF PPC_BIT(5)
+#define VAS_MOD_WIN_TA PPC_BIT(6)
+#define VAS_MOD_WIN_FLAGS (VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \
+ VAS_MOD_WIN_PR | VAS_MOD_WIN_SF)
+
+#define VAS_WIN_ACTIVE 0x0
+#define VAS_WIN_CLOSED 0x1
+#define VAS_WIN_INACTIVE 0x2 /* Inactive due to HW failure */
+/* Process of being modified, deallocated, or quiesced */
+#define VAS_WIN_MOD_IN_PROCESS 0x3
+
+#define VAS_COPY_PASTE_USER_MODE 0x00000001
+#define VAS_COP_OP_USER_MODE 0x00000010
+
+/*
+ * Co-processor feature - GZIP QoS windows or GZIP default windows
+ */
+enum vas_cop_feat_type {
+ VAS_GZIP_QOS_FEAT_TYPE,
+ VAS_GZIP_DEF_FEAT_TYPE,
+ VAS_MAX_FEAT_TYPE,
+};
+
+/*
+ * Use to get feature specific capabilities from the
+ * hypervisor.
+ */
+struct hv_vas_cop_feat_caps {
+ __be64 descriptor;
+ u8 win_type; /* Default or QoS type */
+ u8 user_mode;
+ __be16 max_lpar_creds;
+ __be16 max_win_creds;
+ union {
+ __be16 reserved;
+ __be16 def_lpar_creds; /* Used for default capabilities */
+ };
+ __be16 target_lpar_creds;
+} __packed __aligned(0x1000);
+
+/*
+ * Feature specific (QoS or default) capabilities.
+ */
+struct vas_cop_feat_caps {
+ u64 descriptor;
+ u8 win_type; /* Default or QoS type */
+ u8 user_mode; /* User mode copy/paste or COP HCALL */
+ u16 max_lpar_creds; /* Max credits available in LPAR */
+ /* Max credits can be assigned per window */
+ u16 max_win_creds;
+ union {
+ u16 reserved; /* Used for QoS credit type */
+ u16 def_lpar_creds; /* Used for default credit type */
+ };
+ /* Total LPAR available credits. Can be different from max LPAR */
+ /* credits due to DLPAR operation */
+ atomic_t target_lpar_creds;
+ atomic_t used_lpar_creds; /* Used credits so far */
+ u16 avail_lpar_creds; /* Remaining available credits */
+};
+
+/*
+ * Feature (QoS or Default) specific to store capabilities and
+ * the list of open windows.
+ */
+struct vas_caps {
+ struct vas_cop_feat_caps caps;
+ struct list_head list; /* List of open windows */
+};
+
+/*
+ * To get window information from the hypervisor.
+ */
+struct hv_vas_win_lpar {
+ __be16 version;
+ u8 win_type;
+ u8 status;
+ __be16 credits; /* No of credits assigned to this window */
+ __be16 reserved;
+ __be32 pid; /* LPAR Process ID */
+ __be32 tid; /* LPAR Thread ID */
+ __be64 win_addr; /* Paste address */
+ __be32 interrupt; /* Interrupt when NX request completes */
+ __be32 fault; /* Interrupt when NX sees fault */
+ /* Associativity Domain Identifiers as returned in */
+ /* H_HOME_NODE_ASSOCIATIVITY */
+ __be64 domain[6];
+ __be64 win_util; /* Number of bytes processed */
+} __packed __aligned(0x1000);
+
+struct pseries_vas_window {
+ struct vas_window vas_win;
+ u64 win_addr; /* Physical paste address */
+ u8 win_type; /* QoS or Default window */
+ u32 complete_irq; /* Completion interrupt */
+ u32 fault_irq; /* Fault interrupt */
+ u64 domain[6]; /* Associativity domain Ids */
+ /* this window is allocated */
+ u64 util;
+
+ /* List of windows opened which is used for LPM */
+ struct list_head win_list;
+ u64 flags;
+ char *name;
+ int fault_virq;
+};
+#endif /* _VAS_H */
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
index 48866e6c1efb..00705258ecf9 100644
--- a/arch/powerpc/sysdev/ehv_pic.c
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
index 13583bbc3e8e..5fa5fa215541 100644
--- a/arch/powerpc/sysdev/fsl_mpic_err.c
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -8,6 +8,7 @@
#include <linux/irq.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <asm/io.h>
#include <asm/irq.h>
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 69af73765783..b8f76f3fd994 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -1072,7 +1072,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs)
ret = get_kernel_nofault(inst, (void *)regs->nip);
if (!ret && mcheck_handle_load(regs, inst)) {
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
return 1;
}
}
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 07c164f7f8cf..5a95b8ea23d8 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -108,8 +108,8 @@ int fsl_rio_mcheck_exception(struct pt_regs *regs)
__func__);
out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR),
0);
- regs->msr |= MSR_RI;
- regs->nip = extable_fixup(entry);
+ regs_set_return_msr(regs, regs->msr | MSR_RI);
+ regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
}
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index c1d76c344351..dc1a151c63d7 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -260,7 +260,8 @@ void i8259_init(struct device_node *node, unsigned long intack_addr)
raw_spin_unlock_irqrestore(&i8259_lock, flags);
/* create a legacy host */
- i8259_host = irq_domain_add_legacy_isa(node, &i8259_host_ops, NULL);
+ i8259_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+ &i8259_host_ops, NULL);
if (i8259_host == NULL) {
printk(KERN_ERR "i8259: failed to allocate irq host !\n");
return;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index b0426f28946a..995fb2ada507 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -602,7 +602,7 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic)
/* Find an mpic associated with a given linux interrupt */
static struct mpic *mpic_find(unsigned int irq)
{
- if (irq < NUM_ISA_INTERRUPTS)
+ if (irq < NR_IRQS_LEGACY)
return NULL;
return irq_get_chip_data(irq);
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 49f9541954f8..042bb38fa5c2 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -404,7 +404,8 @@ void __init tsi108_pci_int_init(struct device_node *node)
{
DBG("Tsi108_pci_int_init: initializing PCI interrupts\n");
- pci_irq_host = irq_domain_add_legacy_isa(node, &pci_irq_domain_ops, NULL);
+ pci_irq_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+ &pci_irq_domain_ops, NULL);
if (pci_irq_host == NULL) {
printk(KERN_ERR "pci_irq_host: failed to allocate irq domain!\n");
return;
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
index 304614c920aa..063d9195891f 100644
--- a/arch/powerpc/sysdev/xics/Kconfig
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -12,3 +12,6 @@ config PPC_ICP_HV
config PPC_ICS_RTAS
def_bool n
+
+config PPC_ICS_NATIVE
+ def_bool n
diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile
index ba1e3117b1c0..747063927c6c 100644
--- a/arch/powerpc/sysdev/xics/Makefile
+++ b/arch/powerpc/sysdev/xics/Makefile
@@ -4,4 +4,5 @@ obj-y += xics-common.o
obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o
obj-$(CONFIG_PPC_ICP_HV) += icp-hv.o
obj-$(CONFIG_PPC_ICS_RTAS) += ics-rtas.o
+obj-$(CONFIG_PPC_ICS_NATIVE) += ics-native.o
obj-$(CONFIG_PPC_POWERNV) += ics-opal.o icp-opal.o
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index 21b9d1bf39ff..6765d9e264a3 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -7,6 +7,7 @@
#include <linux/irq.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/cpu.h>
#include <linux/of.h>
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 68fd2540b093..675d708863d5 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -7,6 +7,7 @@
#include <linux/irq.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/cpu.h>
#include <linux/of.h>
diff --git a/arch/powerpc/sysdev/xics/ics-native.c b/arch/powerpc/sysdev/xics/ics-native.c
new file mode 100644
index 000000000000..d450502f4053
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-native.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ICS backend for OPAL managed interrupts.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+//#define DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+#include <linux/list.h>
+
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+
+struct ics_native {
+ struct ics ics;
+ struct device_node *node;
+ void __iomem *base;
+ u32 ibase;
+ u32 icount;
+};
+#define to_ics_native(_ics) container_of(_ics, struct ics_native, ics)
+
+static void __iomem *ics_native_xive(struct ics_native *in, unsigned int vec)
+{
+ return in->base + 0x800 + ((vec - in->ibase) << 2);
+}
+
+static void ics_native_unmask_irq(struct irq_data *d)
+{
+ unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+ struct ics *ics = irq_data_get_irq_chip_data(d);
+ struct ics_native *in = to_ics_native(ics);
+ unsigned int server;
+
+ pr_devel("ics-native: unmask virq %d [hw 0x%x]\n", d->irq, vec);
+
+ if (vec < in->ibase || vec >= (in->ibase + in->icount))
+ return;
+
+ server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+ out_be32(ics_native_xive(in, vec), (server << 8) | DEFAULT_PRIORITY);
+}
+
+static unsigned int ics_native_startup(struct irq_data *d)
+{
+#ifdef CONFIG_PCI_MSI
+ /*
+ * The generic MSI code returns with the interrupt disabled on the
+ * card, using the MSI mask bits. Firmware doesn't appear to unmask
+ * at that level, so we do it here by hand.
+ */
+ if (irq_data_get_msi_desc(d))
+ pci_msi_unmask_irq(d);
+#endif
+
+ /* unmask it */
+ ics_native_unmask_irq(d);
+ return 0;
+}
+
+static void ics_native_do_mask(struct ics_native *in, unsigned int vec)
+{
+ out_be32(ics_native_xive(in, vec), 0xff);
+}
+
+static void ics_native_mask_irq(struct irq_data *d)
+{
+ unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+ struct ics *ics = irq_data_get_irq_chip_data(d);
+ struct ics_native *in = to_ics_native(ics);
+
+ pr_devel("ics-native: mask virq %d [hw 0x%x]\n", d->irq, vec);
+
+ if (vec < in->ibase || vec >= (in->ibase + in->icount))
+ return;
+ ics_native_do_mask(in, vec);
+}
+
+static int ics_native_set_affinity(struct irq_data *d,
+ const struct cpumask *cpumask,
+ bool force)
+{
+ unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+ struct ics *ics = irq_data_get_irq_chip_data(d);
+ struct ics_native *in = to_ics_native(ics);
+ int server;
+ u32 xive;
+
+ if (vec < in->ibase || vec >= (in->ibase + in->icount))
+ return -EINVAL;
+
+ server = xics_get_irq_server(d->irq, cpumask, 1);
+ if (server == -1) {
+ pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+ __func__, cpumask_pr_args(cpumask), d->irq);
+ return -1;
+ }
+
+ xive = in_be32(ics_native_xive(in, vec));
+ xive = (xive & 0xff) | (server << 8);
+ out_be32(ics_native_xive(in, vec), xive);
+
+ return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_native_irq_chip = {
+ .name = "ICS",
+ .irq_startup = ics_native_startup,
+ .irq_mask = ics_native_mask_irq,
+ .irq_unmask = ics_native_unmask_irq,
+ .irq_eoi = NULL, /* Patched at init time */
+ .irq_set_affinity = ics_native_set_affinity,
+ .irq_set_type = xics_set_irq_type,
+ .irq_retrigger = xics_retrigger,
+};
+
+static int ics_native_map(struct ics *ics, unsigned int virq)
+{
+ unsigned int vec = (unsigned int)virq_to_hw(virq);
+ struct ics_native *in = to_ics_native(ics);
+
+ pr_devel("%s: vec=0x%x\n", __func__, vec);
+
+ if (vec < in->ibase || vec >= (in->ibase + in->icount))
+ return -EINVAL;
+
+ irq_set_chip_and_handler(virq, &ics_native_irq_chip, handle_fasteoi_irq);
+ irq_set_chip_data(virq, ics);
+
+ return 0;
+}
+
+static void ics_native_mask_unknown(struct ics *ics, unsigned long vec)
+{
+ struct ics_native *in = to_ics_native(ics);
+
+ if (vec < in->ibase || vec >= (in->ibase + in->icount))
+ return;
+
+ ics_native_do_mask(in, vec);
+}
+
+static long ics_native_get_server(struct ics *ics, unsigned long vec)
+{
+ struct ics_native *in = to_ics_native(ics);
+ u32 xive;
+
+ if (vec < in->ibase || vec >= (in->ibase + in->icount))
+ return -EINVAL;
+
+ xive = in_be32(ics_native_xive(in, vec));
+ return (xive >> 8) & 0xfff;
+}
+
+static int ics_native_host_match(struct ics *ics, struct device_node *node)
+{
+ struct ics_native *in = to_ics_native(ics);
+
+ return in->node == node;
+}
+
+static struct ics ics_native_template = {
+ .map = ics_native_map,
+ .mask_unknown = ics_native_mask_unknown,
+ .get_server = ics_native_get_server,
+ .host_match = ics_native_host_match,
+};
+
+static int __init ics_native_add_one(struct device_node *np)
+{
+ struct ics_native *ics;
+ u32 ranges[2];
+ int rc, count;
+
+ ics = kzalloc(sizeof(struct ics_native), GFP_KERNEL);
+ if (!ics)
+ return -ENOMEM;
+ ics->node = of_node_get(np);
+ memcpy(&ics->ics, &ics_native_template, sizeof(struct ics));
+
+ ics->base = of_iomap(np, 0);
+ if (!ics->base) {
+ pr_err("Failed to map %pOFP\n", np);
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ count = of_property_count_u32_elems(np, "interrupt-ranges");
+ if (count < 2 || count & 1) {
+ pr_err("Failed to read interrupt-ranges of %pOFP\n", np);
+ rc = -EINVAL;
+ goto fail;
+ }
+ if (count > 2) {
+ pr_warn("ICS %pOFP has %d ranges, only one supported\n",
+ np, count >> 1);
+ }
+ rc = of_property_read_u32_array(np, "interrupt-ranges",
+ ranges, 2);
+ if (rc) {
+ pr_err("Failed to read interrupt-ranges of %pOFP\n", np);
+ goto fail;
+ }
+ ics->ibase = ranges[0];
+ ics->icount = ranges[1];
+
+ pr_info("ICS native initialized for sources %d..%d\n",
+ ics->ibase, ics->ibase + ics->icount - 1);
+
+ /* Register ourselves */
+ xics_register_ics(&ics->ics);
+
+ return 0;
+fail:
+ of_node_put(ics->node);
+ kfree(ics);
+ return rc;
+}
+
+int __init ics_native_init(void)
+{
+ struct device_node *ics;
+ bool found_one = false;
+
+ /* We need to patch our irq chip's EOI to point to the
+ * right ICP
+ */
+ ics_native_irq_chip.irq_eoi = icp_ops->eoi;
+
+ /* Find native ICS in the device-tree */
+ for_each_compatible_node(ics, NULL, "openpower,xics-sources") {
+ if (ics_native_add_one(ics) == 0)
+ found_one = true;
+ }
+
+ if (found_one)
+ pr_info("ICS native backend registered\n");
+
+ return found_one ? 0 : -ENODEV;
+}
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 7e4305c01bac..b14c502e56a8 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -201,7 +201,7 @@ void xics_migrate_irqs_away(void)
struct ics *ics;
/* We can't set affinity on ISA interrupts */
- if (virq < NUM_ISA_INTERRUPTS)
+ if (virq < NR_IRQS_LEGACY)
continue;
/* We only need to migrate enabled IRQS */
if (!desc->action)
@@ -477,6 +477,8 @@ void __init xics_init(void)
if (rc < 0)
rc = ics_opal_init();
if (rc < 0)
+ rc = ics_native_init();
+ if (rc < 0)
pr_warn("XICS: Cannot find a Source Controller !\n");
/* Initialize common bits */
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
index 785c292d104b..97796c6b63f0 100644
--- a/arch/powerpc/sysdev/xive/Kconfig
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -3,6 +3,7 @@ config PPC_XIVE
bool
select PPC_SMP_MUXED_IPI
select HARDIRQS_SW_RESEND
+ select IRQ_DOMAIN_NOMAP
config PPC_XIVE_NATIVE
bool
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index c8173e92f19d..da4d7f225a40 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -70,6 +70,9 @@ static cpumask_t cpus_in_xmon = CPU_MASK_NONE;
static unsigned long xmon_taken = 1;
static int xmon_owner;
static int xmon_gate;
+static int xmon_batch;
+static unsigned long xmon_batch_start_cpu;
+static cpumask_t xmon_batch_cpus = CPU_MASK_NONE;
#else
#define xmon_owner 0
#endif /* CONFIG_SMP */
@@ -100,7 +103,7 @@ static long *xmon_fault_jmp[NR_CPUS];
/* Breakpoint stuff */
struct bpt {
unsigned long address;
- struct ppc_inst *instr;
+ u32 *instr;
atomic_t ref_count;
int enabled;
unsigned long pad;
@@ -133,6 +136,12 @@ static void prdump(unsigned long, long);
static int ppc_inst_dump(unsigned long, long, int);
static void dump_log_buf(void);
+#ifdef CONFIG_SMP
+static int xmon_switch_cpu(unsigned long);
+static int xmon_batch_next_cpu(void);
+static int batch_cmds(struct pt_regs *);
+#endif
+
#ifdef CONFIG_PPC_POWERNV
static void dump_opal_msglog(void);
#else
@@ -216,7 +225,8 @@ Commands:\n\
#ifdef CONFIG_SMP
"\
c print cpus stopped in xmon\n\
- c# try to switch to cpu number h (in hex)\n"
+ c# try to switch to cpu number h (in hex)\n\
+ c# $ run command '$' (one of 'r','S' or 't') on all cpus in xmon\n"
#endif
"\
C checksum\n\
@@ -489,10 +499,10 @@ static void xmon_touch_watchdogs(void)
touch_nmi_watchdog();
}
-static int xmon_core(struct pt_regs *regs, int fromipi)
+static int xmon_core(struct pt_regs *regs, volatile int fromipi)
{
- int cmd = 0;
- struct bpt *bp;
+ volatile int cmd = 0;
+ struct bpt *volatile bp;
long recurse_jmp[JMP_BUF_LEN];
bool locked_down;
unsigned long offset;
@@ -514,7 +524,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
bp = in_breakpoint_table(regs->nip, &offset);
if (bp != NULL) {
- regs->nip = bp->address + offset;
+ regs_set_return_ip(regs, bp->address + offset);
atomic_dec(&bp->ref_count);
}
@@ -644,7 +654,12 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
spin_cpu_relax();
touch_nmi_watchdog();
} else {
- if (!locked_down)
+ cmd = 1;
+#ifdef CONFIG_SMP
+ if (xmon_batch)
+ cmd = batch_cmds(regs);
+#endif
+ if (!locked_down && cmd)
cmd = cmds(regs);
if (locked_down || cmd != 0) {
/* exiting xmon */
@@ -702,7 +717,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
if (regs->msr & MSR_DE) {
bp = at_breakpoint(regs->nip);
if (bp != NULL) {
- regs->nip = (unsigned long) &bp->instr[0];
+ regs_set_return_ip(regs, (unsigned long) &bp->instr[0]);
atomic_inc(&bp->ref_count);
}
}
@@ -712,7 +727,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
if (bp != NULL) {
int stepped = emulate_step(regs, ppc_inst_read(bp->instr));
if (stepped == 0) {
- regs->nip = (unsigned long) &bp->instr[0];
+ regs_set_return_ip(regs, (unsigned long) &bp->instr[0]);
atomic_inc(&bp->ref_count);
} else if (stepped < 0) {
printf("Couldn't single-step %s instruction\n",
@@ -766,7 +781,7 @@ static int xmon_bpt(struct pt_regs *regs)
/* Are we at the trap at bp->instr[1] for some bp? */
bp = in_breakpoint_table(regs->nip, &offset);
if (bp != NULL && (offset == 4 || offset == 8)) {
- regs->nip = bp->address + offset;
+ regs_set_return_ip(regs, bp->address + offset);
atomic_dec(&bp->ref_count);
return 1;
}
@@ -836,7 +851,7 @@ static int xmon_fault_handler(struct pt_regs *regs)
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
bp = in_breakpoint_table(regs->nip, &offset);
if (bp != NULL) {
- regs->nip = bp->address + offset;
+ regs_set_return_ip(regs, bp->address + offset);
atomic_dec(&bp->ref_count);
}
}
@@ -857,7 +872,7 @@ static inline void force_enable_xmon(void)
static struct bpt *at_breakpoint(unsigned long pc)
{
int i;
- struct bpt *bp;
+ struct bpt *volatile bp;
bp = bpts;
for (i = 0; i < NBPTS; ++i, ++bp)
@@ -946,11 +961,11 @@ static void insert_bpts(void)
}
patch_instruction(bp->instr, instr);
- patch_instruction(ppc_inst_next(bp->instr, &instr),
+ patch_instruction(ppc_inst_next(bp->instr, bp->instr),
ppc_inst(bpinstr));
if (bp->enabled & BP_CIABR)
continue;
- if (patch_instruction((struct ppc_inst *)bp->address,
+ if (patch_instruction((u32 *)bp->address,
ppc_inst(bpinstr)) != 0) {
printf("Couldn't write instruction at %lx, "
"disabling breakpoint there\n", bp->address);
@@ -992,7 +1007,7 @@ static void remove_bpts(void)
if (mread_instr(bp->address, &instr)
&& ppc_inst_equal(instr, ppc_inst(bpinstr))
&& patch_instruction(
- (struct ppc_inst *)bp->address, ppc_inst_read(bp->instr)) != 0)
+ (u32 *)bp->address, ppc_inst_read(bp->instr)) != 0)
printf("Couldn't remove breakpoint at %lx\n",
bp->address);
}
@@ -1188,7 +1203,7 @@ cmds(struct pt_regs *excp)
#ifdef CONFIG_BOOKE
static int do_step(struct pt_regs *regs)
{
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
return 1;
}
@@ -1221,7 +1236,7 @@ static int do_step(struct pt_regs *regs)
}
}
}
- regs->msr |= MSR_SE;
+ regs_set_return_msr(regs, regs->msr | MSR_SE);
return 1;
}
#endif
@@ -1243,11 +1258,112 @@ static void bootcmds(void)
}
}
+#ifdef CONFIG_SMP
+static int xmon_switch_cpu(unsigned long cpu)
+{
+ int timeout;
+
+ xmon_taken = 0;
+ mb();
+ xmon_owner = cpu;
+ timeout = 10000000;
+ while (!xmon_taken) {
+ if (--timeout == 0) {
+ if (test_and_set_bit(0, &xmon_taken))
+ break;
+ /* take control back */
+ mb();
+ xmon_owner = smp_processor_id();
+ printf("cpu 0x%lx didn't take control\n", cpu);
+ return 0;
+ }
+ barrier();
+ }
+ return 1;
+}
+
+static int xmon_batch_next_cpu(void)
+{
+ unsigned long cpu;
+
+ while (!cpumask_empty(&xmon_batch_cpus)) {
+ cpu = cpumask_next_wrap(smp_processor_id(), &xmon_batch_cpus,
+ xmon_batch_start_cpu, true);
+ if (cpu == nr_cpumask_bits)
+ break;
+ if (xmon_batch_start_cpu == -1)
+ xmon_batch_start_cpu = cpu;
+ if (xmon_switch_cpu(cpu))
+ return 0;
+ cpumask_clear_cpu(cpu, &xmon_batch_cpus);
+ }
+
+ xmon_batch = 0;
+ printf("%x:mon> \n", smp_processor_id());
+ return 1;
+}
+
+static int batch_cmds(struct pt_regs *excp)
+{
+ int cmd;
+
+ /* simulate command entry */
+ cmd = xmon_batch;
+ termch = '\n';
+
+ last_cmd = NULL;
+ xmon_regs = excp;
+
+ printf("%x:", smp_processor_id());
+ printf("mon> ");
+ printf("%c\n", (char)cmd);
+
+ switch (cmd) {
+ case 'r':
+ prregs(excp); /* print regs */
+ break;
+ case 'S':
+ super_regs();
+ break;
+ case 't':
+ backtrace(excp);
+ break;
+ }
+
+ cpumask_clear_cpu(smp_processor_id(), &xmon_batch_cpus);
+
+ return xmon_batch_next_cpu();
+}
+
static int cpu_cmd(void)
{
-#ifdef CONFIG_SMP
unsigned long cpu, first_cpu, last_cpu;
- int timeout;
+
+ cpu = skipbl();
+ if (cpu == '#') {
+ xmon_batch = skipbl();
+ if (xmon_batch) {
+ switch (xmon_batch) {
+ case 'r':
+ case 'S':
+ case 't':
+ cpumask_copy(&xmon_batch_cpus, &cpus_in_xmon);
+ if (cpumask_weight(&xmon_batch_cpus) <= 1) {
+ printf("There are no other cpus in xmon\n");
+ break;
+ }
+ xmon_batch_start_cpu = -1;
+ if (!xmon_batch_next_cpu())
+ return 1;
+ break;
+ default:
+ printf("c# only supports 'r', 'S' and 't' commands\n");
+ }
+ xmon_batch = 0;
+ return 0;
+ }
+ }
+ termch = cpu;
if (!scanhex(&cpu)) {
/* print cpus waiting or in xmon */
@@ -1279,27 +1395,15 @@ static int cpu_cmd(void)
#endif
return 0;
}
- xmon_taken = 0;
- mb();
- xmon_owner = cpu;
- timeout = 10000000;
- while (!xmon_taken) {
- if (--timeout == 0) {
- if (test_and_set_bit(0, &xmon_taken))
- break;
- /* take control back */
- mb();
- xmon_owner = smp_processor_id();
- printf("cpu 0x%lx didn't take control\n", cpu);
- return 0;
- }
- barrier();
- }
- return 1;
+
+ return xmon_switch_cpu(cpu);
+}
#else
+static int cpu_cmd(void)
+{
return 0;
-#endif /* CONFIG_SMP */
}
+#endif /* CONFIG_SMP */
static unsigned short fcstab[256] = {
0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
@@ -2214,7 +2318,7 @@ mread_instr(unsigned long adrs, struct ppc_inst *instr)
if (setjmp(bus_error_jmp) == 0) {
catch_memory_errors = 1;
sync();
- *instr = ppc_inst_read((struct ppc_inst *)adrs);
+ *instr = ppc_inst_read((u32 *)adrs);
sync();
/* wait a little while to see if we get a machine check */
__delay(200);
@@ -2975,7 +3079,7 @@ static void
dump_log_buf(void)
{
struct kmsg_dump_iter iter;
- unsigned char buf[128];
+ static unsigned char buf[1024];
size_t len;
if (setjmp(bus_error_jmp) != 0) {
@@ -3005,7 +3109,7 @@ static void dump_opal_msglog(void)
{
unsigned char buf[128];
ssize_t res;
- loff_t pos = 0;
+ volatile loff_t pos = 0;
if (!firmware_has_feature(FW_FEATURE_OPAL)) {
printf("Machine is not running OPAL firmware.\n");
@@ -3160,8 +3264,9 @@ memzcan(void)
printf("%.8lx\n", a - mskip);
}
-static void show_task(struct task_struct *tsk)
+static void show_task(struct task_struct *volatile tsk)
{
+ unsigned int p_state = READ_ONCE(tsk->__state);
char state;
/*
@@ -3169,14 +3274,14 @@ static void show_task(struct task_struct *tsk)
* appropriate for calling from xmon. This could be moved
* to a common, generic, routine used by both.
*/
- state = (tsk->state == 0) ? 'R' :
- (tsk->state < 0) ? 'U' :
- (tsk->state & TASK_UNINTERRUPTIBLE) ? 'D' :
- (tsk->state & TASK_STOPPED) ? 'T' :
- (tsk->state & TASK_TRACED) ? 'C' :
+ state = (p_state == 0) ? 'R' :
+ (p_state < 0) ? 'U' :
+ (p_state & TASK_UNINTERRUPTIBLE) ? 'D' :
+ (p_state & TASK_STOPPED) ? 'T' :
+ (p_state & TASK_TRACED) ? 'C' :
(tsk->exit_state & EXIT_ZOMBIE) ? 'Z' :
(tsk->exit_state & EXIT_DEAD) ? 'E' :
- (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
+ (p_state & TASK_INTERRUPTIBLE) ? 'S' : '?';
printf("%16px %16lx %16px %6d %6d %c %2d %s\n", tsk,
tsk->thread.ksp, tsk->thread.regs,
@@ -3204,7 +3309,7 @@ static void format_pte(void *ptep, unsigned long pte)
static void show_pte(unsigned long addr)
{
unsigned long tskv = 0;
- struct task_struct *tsk = NULL;
+ struct task_struct *volatile tsk = NULL;
struct mm_struct *mm;
pgd_t *pgdp;
p4d_t *p4dp;
@@ -3299,7 +3404,7 @@ static void show_pte(unsigned long addr)
static void show_tasks(void)
{
unsigned long tskv;
- struct task_struct *tsk = NULL;
+ struct task_struct *volatile tsk = NULL;
printf(" task_struct ->thread.ksp ->thread.regs PID PPID S P CMD\n");
@@ -3622,7 +3727,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
const char *after)
{
char *modname;
- const char *name = NULL;
+ const char *volatile name = NULL;
unsigned long offset, size;
printf(REG, address);
@@ -4054,7 +4159,7 @@ void xmon_register_spus(struct list_head *list)
static void stop_spus(void)
{
struct spu *spu;
- int i;
+ volatile int i;
u64 tmp;
for (i = 0; i < XMON_NUM_SPUS; i++) {
@@ -4095,7 +4200,7 @@ static void stop_spus(void)
static void restart_spus(void)
{
struct spu *spu;
- int i;
+ volatile int i;
for (i = 0; i < XMON_NUM_SPUS; i++) {
if (!spu_info[i].spu)