summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-14 15:17:26 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-14 15:17:26 +0100
commit29ec39fcf11e4583eb8d5174f756ea109c77cc44 (patch)
tree656f5c7166efe176ab2c7e24042f4e38a86b4473 /arch/powerpc/kernel
parentMerge tag 'mips_5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/l... (diff)
parentpowerpc/xmon: Dump XIVE information for online-only processors. (diff)
downloadlinux-29ec39fcf11e4583eb8d5174f756ea109c77cc44.tar.xz
linux-29ec39fcf11e4583eb8d5174f756ea109c77cc44.zip
Merge tag 'powerpc-5.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: - Optimise radix KVM guest entry/exit by 2x on Power9/Power10. - Allow firmware to tell us whether to disable the entry and uaccess flushes on Power10 or later CPUs. - Add BPF_PROBE_MEM support for 32 and 64-bit BPF jits. - Several fixes and improvements to our hard lockup watchdog. - Activate HAVE_DYNAMIC_FTRACE_WITH_REGS on 32-bit. - Allow building the 64-bit Book3S kernel without hash MMU support, ie. Radix only. - Add KUAP (SMAP) support for 40x, 44x, 8xx, Book3E (64-bit). - Add new encodings for perf_mem_data_src.mem_hops field, and use them on Power10. - A series of small performance improvements to 64-bit interrupt entry. - Several commits fixing issues when building with the clang integrated assembler. - Many other small features and fixes. Thanks to Alan Modra, Alexey Kardashevskiy, Ammar Faizi, Anders Roxell, Arnd Bergmann, Athira Rajeev, Cédric Le Goater, Christophe JAILLET, Christophe Leroy, Christoph Hellwig, Daniel Axtens, David Yang, Erhard Furtner, Fabiano Rosas, Greg Kroah-Hartman, Guo Ren, Hari Bathini, Jason Wang, Joel Stanley, Julia Lawall, Kajol Jain, Kees Cook, Laurent Dufour, Madhavan Srinivasan, Mark Brown, Minghao Chi, Nageswara R Sastry, Naresh Kamboju, Nathan Chancellor, Nathan Lynch, Nicholas Piggin, Nick Child, Oliver O'Halloran, Peiwei Hu, Randy Dunlap, Ravi Bangoria, Rob Herring, Russell Currey, Sachin Sant, Sean Christopherson, Segher Boessenkool, Thadeu Lima de Souza Cascardo, Tyrel Datwyler, Xiang wangx, and Yang Guang. * tag 'powerpc-5.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (240 commits) powerpc/xmon: Dump XIVE information for online-only processors. powerpc/opal: use default_groups in kobj_type powerpc/cacheinfo: use default_groups in kobj_type powerpc/sched: Remove unused TASK_SIZE_OF powerpc/xive: Add missing null check after calling kmalloc powerpc/floppy: Remove usage of the deprecated "pci-dma-compat.h" API selftests/powerpc: Add a test of sigreturning to an unaligned address powerpc/64s: Use EMIT_WARN_ENTRY for SRR debug warnings powerpc/64s: Mask NIP before checking against SRR0 powerpc/perf: Fix spelling of "its" powerpc/32: Fix boot failure with GCC latent entropy plugin powerpc/code-patching: Replace patch_instruction() by ppc_inst_write() in selftests powerpc/code-patching: Move code patching selftests in its own file powerpc/code-patching: Move instr_is_branch_{i/b}form() in code-patching.h powerpc/code-patching: Move patch_exception() outside code-patching.c powerpc/code-patching: Use test_trampoline for prefixed patch test powerpc/code-patching: Fix patch_branch() return on out-of-range failure powerpc/code-patching: Reorganise do_patch_instruction() to ease error handling powerpc/code-patching: Fix unmap_patch_area() error handling powerpc/code-patching: Fix error handling in do_patch_instruction() ...
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/align.c4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c5
-rw-r--r--arch/powerpc/kernel/btext.c16
-rw-r--r--arch/powerpc/kernel/cacheinfo.c5
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.c12
-rw-r--r--arch/powerpc/kernel/dbell.c3
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c24
-rw-r--r--arch/powerpc/kernel/eeh_cache.c2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c162
-rw-r--r--arch/powerpc/kernel/entry_32.S54
-rw-r--r--arch/powerpc/kernel/entry_64.S4
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c2
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S14
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S174
-rw-r--r--arch/powerpc/kernel/fadump.c26
-rw-r--r--arch/powerpc/kernel/fpu.S5
-rw-r--r--arch/powerpc/kernel/head_32.h9
-rw-r--r--arch/powerpc/kernel/head_40x.S17
-rw-r--r--arch/powerpc/kernel/head_44x.S26
-rw-r--r--arch/powerpc/kernel/head_64.S20
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S4
-rw-r--r--arch/powerpc/kernel/head_booke.h3
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S13
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c4
-rw-r--r--arch/powerpc/kernel/hw_breakpoint_constraints.c4
-rw-r--r--arch/powerpc/kernel/idle.c2
-rw-r--r--arch/powerpc/kernel/idle_6xx.S2
-rw-r--r--arch/powerpc/kernel/interrupt.c3
-rw-r--r--arch/powerpc/kernel/interrupt_64.S46
-rw-r--r--arch/powerpc/kernel/irq.c5
-rw-r--r--arch/powerpc/kernel/kgdb.c4
-rw-r--r--arch/powerpc/kernel/kprobes.c4
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S6
-rw-r--r--arch/powerpc/kernel/mce.c2
-rw-r--r--arch/powerpc/kernel/mce_power.c18
-rw-r--r--arch/powerpc/kernel/module.c11
-rw-r--r--arch/powerpc/kernel/module_32.c33
-rw-r--r--arch/powerpc/kernel/nvram_64.c6
-rw-r--r--arch/powerpc/kernel/optprobes.c12
-rw-r--r--arch/powerpc/kernel/optprobes_head.S4
-rw-r--r--arch/powerpc/kernel/paca.c18
-rw-r--r--arch/powerpc/kernel/pci-common.c2
-rw-r--r--arch/powerpc/kernel/pci_32.c4
-rw-r--r--arch/powerpc/kernel/process.c58
-rw-r--r--arch/powerpc/kernel/prom.c6
-rw-r--r--arch/powerpc/kernel/prom_init.c14
-rw-r--r--arch/powerpc/kernel/rtas.c104
-rw-r--r--arch/powerpc/kernel/rtasd.c6
-rw-r--r--arch/powerpc/kernel/security.c4
-rw-r--r--arch/powerpc/kernel/setup-common.c2
-rw-r--r--arch/powerpc/kernel/setup.h2
-rw-r--r--arch/powerpc/kernel/setup_32.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c23
-rw-r--r--arch/powerpc/kernel/signal_32.c14
-rw-r--r--arch/powerpc/kernel/smp.c47
-rw-r--r--arch/powerpc/kernel/swsusp_32.S2
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S2
-rw-r--r--arch/powerpc/kernel/sysfs.c10
-rw-r--r--arch/powerpc/kernel/time.c87
-rw-r--r--arch/powerpc/kernel/tm.S15
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c107
-rw-r--r--arch/powerpc/kernel/trace/ftrace_32.S118
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S15
-rw-r--r--arch/powerpc/kernel/udbg_16550.c10
-rw-r--r--arch/powerpc/kernel/vecemu.c2
-rw-r--r--arch/powerpc/kernel/vector.S10
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S16
-rw-r--r--arch/powerpc/kernel/watchdog.c223
69 files changed, 1133 insertions, 563 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b039877c743d..4d7829399570 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -11,6 +11,7 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
+CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index bf96b954a4eb..3e37ece06739 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -105,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = {
* so we don't need the address swizzling.
*/
static int emulate_spe(struct pt_regs *regs, unsigned int reg,
- struct ppc_inst ppc_instr)
+ ppc_inst_t ppc_instr)
{
union {
u64 ll;
@@ -300,7 +300,7 @@ Efault_write:
int fix_alignment(struct pt_regs *regs)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct instruction_op op;
int r, type;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index cc05522f50bf..7582f3e3a330 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -54,7 +54,7 @@
#endif
#ifdef CONFIG_PPC32
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
#include "head_booke.h"
#endif
#endif
@@ -139,6 +139,7 @@ int main(void)
OFFSET(THR11, thread_struct, r11);
OFFSET(THLR, thread_struct, lr);
OFFSET(THCTR, thread_struct, ctr);
+ OFFSET(THSR0, thread_struct, sr0);
#endif
#ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
@@ -218,10 +219,12 @@ int main(void)
OFFSET(PACA_EXGEN, paca_struct, exgen);
OFFSET(PACA_EXMC, paca_struct, exmc);
OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_64S_HASH_MMU
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
+#endif
OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 803c2a45b22a..9d9d56b574cc 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -161,7 +161,7 @@ void btext_map(void)
boot_text_mapped = 1;
}
-static int btext_initialize(struct device_node *np)
+static int __init btext_initialize(struct device_node *np)
{
unsigned int width, height, depth, pitch;
unsigned long address = 0;
@@ -241,8 +241,10 @@ int __init btext_find_display(int allow_nonstdout)
rc = btext_initialize(np);
printk("result: %d\n", rc);
}
- if (rc == 0)
+ if (rc == 0) {
+ of_node_put(np);
break;
+ }
}
return rc;
}
@@ -290,7 +292,7 @@ void btext_update_display(unsigned long phys, int width, int height,
}
EXPORT_SYMBOL(btext_update_display);
-void btext_clearscreen(void)
+void __init btext_clearscreen(void)
{
unsigned int *base = (unsigned int *)calc_base(0, 0);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -308,7 +310,7 @@ void btext_clearscreen(void)
rmci_maybe_off();
}
-void btext_flushscreen(void)
+void __init btext_flushscreen(void)
{
unsigned int *base = (unsigned int *)calc_base(0, 0);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -327,7 +329,7 @@ void btext_flushscreen(void)
__asm__ __volatile__ ("sync" ::: "memory");
}
-void btext_flushline(void)
+void __init btext_flushline(void)
{
unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -542,7 +544,7 @@ void btext_drawstring(const char *c)
btext_drawchar(*c++);
}
-void btext_drawtext(const char *c, unsigned int len)
+void __init btext_drawtext(const char *c, unsigned int len)
{
if (!boot_text_mapped)
return;
@@ -550,7 +552,7 @@ void btext_drawtext(const char *c, unsigned int len)
btext_drawchar(*c++);
}
-void btext_drawhex(unsigned long v)
+void __init btext_drawhex(unsigned long v)
{
if (!boot_text_mapped)
return;
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index cf1be75b7833..00b0992be3e7 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -710,7 +710,7 @@ static struct kobj_attribute cache_shared_cpu_list_attr =
__ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
/* Attributes which should always be created -- the kobject/sysfs core
- * does this automatically via kobj_type->default_attrs. This is the
+ * does this automatically via kobj_type->default_groups. This is the
* minimum data required to uniquely identify a cache.
*/
static struct attribute *cache_index_default_attrs[] = {
@@ -720,6 +720,7 @@ static struct attribute *cache_index_default_attrs[] = {
&cache_shared_cpu_list_attr.attr,
NULL,
};
+ATTRIBUTE_GROUPS(cache_index_default);
/* Attributes which should be created if the cache device node has the
* right properties -- see cacheinfo_create_index_opt_attrs
@@ -738,7 +739,7 @@ static const struct sysfs_ops cache_index_ops = {
static struct kobj_type cache_index_type = {
.release = cache_index_release,
.sysfs_ops = &cache_index_ops,
- .default_attrs = cache_index_default_attrs,
+ .default_groups = cache_index_default_groups,
};
static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
index 3cca88ee96d7..3dc61e203f37 100644
--- a/arch/powerpc/kernel/cpu_setup_power.c
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
static void init_PMU(void)
{
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
@@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
{
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
- mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+ mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
}
/*
@@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
}
@@ -150,6 +151,7 @@ void __restore_cpu_power7(void)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
}
@@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
@@ -184,6 +187,7 @@ void __restore_cpu_power8(void)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
@@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -223,6 +228,7 @@ void __restore_cpu_power9(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -264,6 +271,7 @@ void __restore_cpu_power10(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 5545c9cd17c1..f55c6fb34a3a 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -27,7 +27,8 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
ppc_msgsync();
- may_hard_irq_enable();
+ if (should_hard_irq_enable())
+ do_hard_irq_enable();
kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index ba527fb52993..7d1b2c4a4891 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void)
mtspr(SPRN_LPCR, system_registers.lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
mtspr(SPRN_PCR, system_registers.pcr);
}
@@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f)
}
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~LPCR_LPES0; /* HV external interrupts */
@@ -271,6 +273,9 @@ static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
{
u64 lpcr;
+ if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ return 0;
+
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~LPCR_ISL;
@@ -290,6 +295,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
{
u64 lpcr;
+ if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ return 0;
+
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_LPCR, lpcr);
@@ -303,15 +311,15 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
{
-#ifdef CONFIG_PPC_RADIX_MMU
+ if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU))
+ return 0;
+
+ cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO;
cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
- cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
return 1;
-#endif
- return 0;
}
static int __init feat_enable_dscr(struct dt_cpu_feature *f)
@@ -336,7 +344,7 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f)
return 1;
}
-static void hfscr_pmu_enable(void)
+static void __init hfscr_pmu_enable(void)
{
u64 hfscr = mfspr(SPRN_HFSCR);
hfscr |= PPC_BIT(60);
@@ -351,7 +359,7 @@ static void init_pmu_power8(void)
}
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
mtspr(SPRN_MMCRS, 0);
@@ -390,7 +398,7 @@ static void init_pmu_power9(void)
mtspr(SPRN_MMCRC, 0);
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
@@ -426,7 +434,7 @@ static void init_pmu_power10(void)
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
- mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+ mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
}
static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 9bdaaf7fddc9..2f9dbf8ad2ee 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -280,7 +280,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
}
DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
-void eeh_cache_debugfs_init(void)
+void __init eeh_cache_debugfs_init(void)
{
debugfs_create_file_unsafe("eeh_address_cache", 0400,
arch_debugfs_dir, NULL,
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 350dab18e137..422f80b5b27b 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -905,18 +905,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
}
#endif /* CONFIG_STACKTRACE */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
if (pe->freeze_count > eeh_max_freezes) {
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
pe->phb->global_number, pe->addr,
pe->freeze_count);
- result = PCI_ERS_RESULT_DISCONNECT;
- }
- eeh_for_each_pe(pe, tmp_pe)
- eeh_pe_for_each_dev(tmp_pe, edev, tmp)
- edev->mode &= ~EEH_DEV_NO_HANDLER;
+ goto recover_failed;
+ }
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
@@ -928,39 +929,38 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
* the error. Override the result if necessary to have partially
* hotplug for this case.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
- pe->freeze_count, eeh_max_freezes);
- pr_info("EEH: Notify device drivers to shutdown\n");
- eeh_set_channel_state(pe, pci_channel_io_frozen);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(IO frozen)", pe,
- eeh_report_error, &result);
- if ((pe->type & EEH_PE_PHB) &&
- result != PCI_ERS_RESULT_NONE &&
- result != PCI_ERS_RESULT_NEED_RESET)
- result = PCI_ERS_RESULT_NEED_RESET;
- }
+ pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+ pe->freeze_count, eeh_max_freezes);
+ pr_info("EEH: Notify device drivers to shutdown\n");
+ eeh_set_channel_state(pe, pci_channel_io_frozen);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(IO frozen)", pe,
+ eeh_report_error, &result);
+ if (result == PCI_ERS_RESULT_DISCONNECT)
+ goto recover_failed;
+
+ /*
+ * Error logged on a PHB are always fences which need a full
+ * PHB reset to clear so force that to happen.
+ */
+ if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE)
+ result = PCI_ERS_RESULT_NEED_RESET;
/* Get the current PCI slot state. This can take a long time,
* sometimes over 300 seconds for certain systems.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
- if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
- pr_warn("EEH: Permanent failure\n");
- result = PCI_ERS_RESULT_DISCONNECT;
- }
+ rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000);
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+ pr_warn("EEH: Permanent failure\n");
+ goto recover_failed;
}
/* Since rtas may enable MMIO when posting the error log,
* don't post the error log until after all dev drivers
* have been informed.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- pr_info("EEH: Collect temporary log\n");
- eeh_slot_error_detail(pe, EEH_LOG_TEMP);
- }
+ pr_info("EEH: Collect temporary log\n");
+ eeh_slot_error_detail(pe, EEH_LOG_TEMP);
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
@@ -970,9 +970,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Reset with hotplug activity\n");
rc = eeh_reset_device(pe, bus, NULL, false);
if (rc) {
- pr_warn("%s: Unable to reset, err=%d\n",
- __func__, rc);
- result = PCI_ERS_RESULT_DISCONNECT;
+ pr_warn("%s: Unable to reset, err=%d\n", __func__, rc);
+ goto recover_failed;
}
}
@@ -980,10 +979,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
pr_info("EEH: Enable I/O for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ if (rc < 0)
+ goto recover_failed;
- if (rc < 0) {
- result = PCI_ERS_RESULT_DISCONNECT;
- } else if (rc) {
+ if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
pr_info("EEH: Notify device drivers to resume I/O\n");
@@ -991,15 +990,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_report_mmio_enabled, &result);
}
}
-
- /* If all devices reported they can proceed, then re-enable DMA */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
pr_info("EEH: Enabled DMA for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+ if (rc < 0)
+ goto recover_failed;
- if (rc < 0) {
- result = PCI_ERS_RESULT_DISCONNECT;
- } else if (rc) {
+ if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
/*
@@ -1017,16 +1014,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Reset without hotplug activity\n");
rc = eeh_reset_device(pe, bus, &rmv_data, true);
if (rc) {
- pr_warn("%s: Cannot reset, err=%d\n",
- __func__, rc);
- result = PCI_ERS_RESULT_DISCONNECT;
- } else {
- result = PCI_ERS_RESULT_NONE;
- eeh_set_channel_state(pe, pci_channel_io_normal);
- eeh_set_irq_state(pe, true);
- eeh_pe_report("slot_reset", pe, eeh_report_reset,
- &result);
+ pr_warn("%s: Cannot reset, err=%d\n", __func__, rc);
+ goto recover_failed;
}
+
+ result = PCI_ERS_RESULT_NONE;
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("slot_reset", pe, eeh_report_reset,
+ &result);
}
if ((result == PCI_ERS_RESULT_RECOVERED) ||
@@ -1054,45 +1050,47 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
}
pr_info("EEH: Recovery successful.\n");
- } else {
- /*
- * About 90% of all real-life EEH failures in the field
- * are due to poorly seated PCI cards. Only 10% or so are
- * due to actual, failed cards.
- */
- pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
- "Please try reseating or replacing it\n",
- pe->phb->global_number, pe->addr);
+ goto out;
+ }
- eeh_slot_error_detail(pe, EEH_LOG_PERM);
+recover_failed:
+ /*
+ * About 90% of all real-life EEH failures in the field
+ * are due to poorly seated PCI cards. Only 10% or so are
+ * due to actual, failed cards.
+ */
+ pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+ "Please try reseating or replacing it\n",
+ pe->phb->global_number, pe->addr);
- /* Notify all devices that they're about to go down. */
- eeh_set_channel_state(pe, pci_channel_io_perm_failure);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(permanent failure)", pe,
- eeh_report_failure, NULL);
+ eeh_slot_error_detail(pe, EEH_LOG_PERM);
- /* Mark the PE to be removed permanently */
- eeh_pe_state_mark(pe, EEH_PE_REMOVED);
+ /* Notify all devices that they're about to go down. */
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
- /*
- * Shut down the device drivers for good. We mark
- * all removed devices correctly to avoid access
- * the their PCI config any more.
- */
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- } else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ /* Mark the PE to be removed permanently */
+ eeh_pe_state_mark(pe, EEH_PE_REMOVED);
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
- /* The passed PE should no longer be used */
- return;
- }
+ /*
+ * Shut down the device drivers for good. We mark
+ * all removed devices correctly to avoid access
+ * the their PCI config any more.
+ */
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
+ /* The passed PE should no longer be used */
+ return;
}
out:
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 61fdd53cdd9a..7748c278d13c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -73,13 +73,39 @@ prepare_transfer_to_handler:
_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
+#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
+ .globl __kuep_lock
+__kuep_lock:
+ lwz r9, THREAD+THSR0(r2)
+ update_user_segments_by_4 r9, r10, r11, r12
+ blr
+
+__kuep_unlock:
+ lwz r9, THREAD+THSR0(r2)
+ rlwinm r9,r9,0,~SR_NX
+ update_user_segments_by_4 r9, r10, r11, r12
+ blr
+
+.macro kuep_lock
+ bl __kuep_lock
+.endm
+.macro kuep_unlock
+ bl __kuep_unlock
+.endm
+#else
+.macro kuep_lock
+.endm
+.macro kuep_unlock
+.endm
+#endif
+
.globl transfer_to_syscall
transfer_to_syscall:
stw r11, GPR1(r1)
stw r11, 0(r1)
mflr r12
stw r12, _LINK(r1)
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#endif
lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -90,10 +116,10 @@ transfer_to_syscall:
stw r12,8(r1)
stw r2,_TRAP(r1)
SAVE_GPR(0, r1)
- SAVE_4GPRS(3, r1)
- SAVE_2GPRS(7, r1)
+ SAVE_GPRS(3, 8, r1)
addi r2,r10,-THREAD
SAVE_NVGPRS(r1)
+ kuep_lock
/* Calling convention has r9 = orig r0, r10 = regs */
addi r10,r1,STACK_FRAME_OVERHEAD
@@ -110,6 +136,7 @@ ret_from_syscall:
cmplwi cr0,r5,0
bne- 2f
#endif /* CONFIG_PPC_47x */
+ kuep_unlock
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
@@ -139,7 +166,7 @@ syscall_exit_finish:
mtxer r5
lwz r0,GPR0(r1)
lwz r3,GPR3(r1)
- REST_8GPRS(4,r1)
+ REST_GPRS(4, 11, r1)
lwz r12,GPR12(r1)
b 1b
@@ -232,9 +259,9 @@ fast_exception_return:
beq 3f /* if not, we've got problems */
#endif
-2: REST_4GPRS(3, r11)
+2: REST_GPRS(3, 6, r11)
lwz r10,_CCR(r11)
- REST_2GPRS(1, r11)
+ REST_GPRS(1, 2, r11)
mtcr r10
lwz r10,_LINK(r11)
mtlr r10
@@ -273,6 +300,7 @@ interrupt_return:
beq .Lkernel_interrupt_return
bl interrupt_exit_user_prepare
cmpwi r3,0
+ kuep_unlock
bne- .Lrestore_nvgprs
.Lfast_user_interrupt_return:
@@ -298,16 +326,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
* the reliable stack unwinder later on. Clear it.
*/
stw r0,8(r1)
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
+ REST_GPRS(7, 12, r1)
mtcr r3
mtlr r4
mtctr r5
mtspr SPRN_XER,r6
- REST_4GPRS(2, r1)
- REST_GPR(6, r1)
+ REST_GPRS(2, 6, r1)
REST_GPR(0, r1)
REST_GPR(1, r1)
rfi
@@ -341,8 +367,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
lwz r6,_CCR(r1)
li r0,0
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
+ REST_GPRS(7, 12, r1)
mtlr r3
mtctr r4
@@ -354,7 +379,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
*/
stw r0,8(r1)
- REST_4GPRS(2, r1)
+ REST_GPRS(2, 5, r1)
bne- cr1,1f /* emulate stack store */
mtcr r6
@@ -430,8 +455,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return)
bne interrupt_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
- REST_4GPRS(3, r1); \
- REST_2GPRS(7, r1); \
+ REST_GPRS(3, 8, r1); \
lwz r10,_XER(r1); \
lwz r11,_CTR(r1); \
mtspr SPRN_XER,r10; \
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 70cff7b49e17..9581906b5ee9 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -180,7 +180,7 @@ _GLOBAL(_switch)
#endif
ld r8,KSP(r4) /* new stack pointer */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
b 2f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
@@ -232,7 +232,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
slbmte r7,r0
isync
2:
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 93b0f3ec8fb0..d4b8aff20815 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -37,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
return -1;
for (i = 0; i < (len / 4); i++) {
- struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i]));
+ ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i]));
patch_instruction(epapr_hypercall_start + i, inst);
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
patch_instruction(epapr_ev_idle_start + i, inst);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 711c66b76df1..67dc4e3179a0 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -198,8 +198,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
stdcx. r0,0,r1 /* to clear the reservation */
- REST_4GPRS(2, r1)
- REST_4GPRS(6, r1)
+ REST_GPRS(2, 9, r1)
ld r10,_CTR(r1)
ld r11,_XER(r1)
@@ -375,9 +374,7 @@ ret_from_mc_except:
exc_##n##_common: \
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- std r9,GPR9(r1); /* save r9 in stackframe */ \
+ SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \
std r10,_NIP(r1); /* save SRR0 to stackframe */ \
std r11,_MSR(r1); /* save SRR1 to stackframe */ \
beq 2f; /* if from kernel mode */ \
@@ -1061,9 +1058,7 @@ bad_stack_book3e:
std r11,_ESR(r1)
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- std r9,GPR9(r1); /* save r9 in stackframe */ \
+ SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \
ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \
ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \
mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
@@ -1077,8 +1072,7 @@ bad_stack_book3e:
std r10,_LINK(r1)
std r11,_CTR(r1)
std r12,_XER(r1)
- SAVE_10GPRS(14,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_GPRS(14, 31, r1)
lhz r12,PACA_TRAP_SAVE(r13)
std r12,_TRAP(r1)
addi r11,r1,INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index eaf1f72131a1..55caeee37c08 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -48,7 +48,7 @@
.balign IFETCH_ALIGN_BYTES; \
.global name; \
_ASM_NOKPROBE_SYMBOL(name); \
- DEFINE_FIXED_SYMBOL(name); \
+ DEFINE_FIXED_SYMBOL(name, text); \
name:
#define TRAMP_REAL_BEGIN(name) \
@@ -76,31 +76,18 @@ name:
ld reg,PACAKBASE(r13); /* get high part of &label */ \
ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
-#define __LOAD_HANDLER(reg, label) \
+#define __LOAD_HANDLER(reg, label, section) \
ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(label))@l
+ ori reg,reg,(ABS_ADDR(label, section))@l
/*
* Branches from unrelocated code (e.g., interrupts) to labels outside
* head-y require >64K offsets.
*/
-#define __LOAD_FAR_HANDLER(reg, label) \
+#define __LOAD_FAR_HANDLER(reg, label, section) \
ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(label))@l; \
- addis reg,reg,(ABS_ADDR(label))@h
-
-/*
- * Branch to label using its 0xC000 address. This results in instruction
- * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
- * on using mtmsr rather than rfid.
- *
- * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than
- * load KBASE for a slight optimisation.
- */
-#define BRANCH_TO_C000(reg, label) \
- __LOAD_FAR_HANDLER(reg, label); \
- mtctr reg; \
- bctr
+ ori reg,reg,(ABS_ADDR(label, section))@l; \
+ addis reg,reg,(ABS_ADDR(label, section))@h
/*
* Interrupt code generation macros
@@ -111,9 +98,10 @@ name:
#define IAREA .L_IAREA_\name\() /* PACA save area */
#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */
#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */
+#define ICFAR .L_ICFAR_\name\() /* Uses CFAR */
+#define ICFAR_IF_HVMODE .L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */
#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */
#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */
-#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */
#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
@@ -151,15 +139,18 @@ do_define_int n
.ifndef IISIDE
IISIDE=0
.endif
+ .ifndef ICFAR
+ ICFAR=1
+ .endif
+ .ifndef ICFAR_IF_HVMODE
+ ICFAR_IF_HVMODE=0
+ .endif
.ifndef IDAR
IDAR=0
.endif
.ifndef IDSISR
IDSISR=0
.endif
- .ifndef ISET_RI
- ISET_RI=1
- .endif
.ifndef IBRANCH_TO_COMMON
IBRANCH_TO_COMMON=1
.endif
@@ -291,9 +282,21 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
std r10,IAREA+EX_R10(r13) /* save r10 - r12 */
+ .if ICFAR
BEGIN_FTR_SECTION
mfspr r10,SPRN_CFAR
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ .elseif ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+ BEGIN_FTR_SECTION_NESTED(69)
+ mfspr r10,SPRN_CFAR
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(69)
+ li r10,0
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .endif
.if \ool
.if !\virt
b tramp_real_\name
@@ -309,9 +312,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
BEGIN_FTR_SECTION
std r9,IAREA+EX_PPR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ .if ICFAR || ICFAR_IF_HVMODE
BEGIN_FTR_SECTION
std r10,IAREA+EX_CFAR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ .endif
INTERRUPT_TO_KERNEL
mfctr r10
std r10,IAREA+EX_CTR(r13)
@@ -376,7 +381,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
* This switches to virtual mode and sets MSR[RI].
*/
.macro __GEN_COMMON_ENTRY name
-DEFINE_FIXED_SYMBOL(\name\()_common_real)
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
\name\()_common_real:
.if IKVM_REAL
KVMTEST \name kvm_interrupt
@@ -399,7 +404,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
.endif
.balign IFETCH_ALIGN_BYTES
-DEFINE_FIXED_SYMBOL(\name\()_common_virt)
+DEFINE_FIXED_SYMBOL(\name\()_common_virt, text)
\name\()_common_virt:
.if IKVM_VIRT
KVMTEST \name kvm_interrupt
@@ -413,7 +418,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
* want to run in real mode.
*/
.macro __GEN_REALMODE_COMMON_ENTRY name
-DEFINE_FIXED_SYMBOL(\name\()_common_real)
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
\name\()_common_real:
.if IKVM_REAL
KVMTEST \name kvm_interrupt
@@ -512,11 +517,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
stb r10,PACASRR_VALID(r13)
.endif
- .if ISET_RI
- li r10,MSR_RI
- mtmsrd r10,1 /* Set MSR_RI */
- .endif
-
.if ISTACK
.if IKUAP
kuap_save_amr_and_lock r9, r10, cr1, cr0
@@ -568,14 +568,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
.endif
BEGIN_FTR_SECTION
+ .if ICFAR || ICFAR_IF_HVMODE
ld r10,IAREA+EX_CFAR(r13)
+ .else
+ li r10,0
+ .endif
std r10,ORIG_GPR3(r1)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
std r2,GPR2(r1) /* save r2 in stackframe */
- SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
- SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */
+ SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */
mflr r9 /* Get LR, later save to stack */
ld r2,PACATOC(r13) /* get kernel TOC into r2 */
std r9,_LINK(r1)
@@ -693,8 +696,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mtlr r9
ld r9,_CCR(r1)
mtcr r9
- REST_8GPRS(2, r1)
- REST_4GPRS(10, r1)
+ REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
ld r1,GPR1(r1)
@@ -850,12 +852,12 @@ SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000)
#ifdef CONFIG_RELOCATABLE
TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
- __LOAD_HANDLER(r10, system_call_vectored_common)
+ __LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines)
mtctr r10
bctr
TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp)
- __LOAD_HANDLER(r10, system_call_vectored_sigill)
+ __LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines)
mtctr r10
bctr
#endif
@@ -902,11 +904,6 @@ INT_DEFINE_BEGIN(system_reset)
IVEC=0x100
IAREA=PACA_EXNMI
IVIRT=0 /* no virt entry point */
- /*
- * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
- * being used, so a nested NMI exception would corrupt it.
- */
- ISET_RI=0
ISTACK=0
IKVM_REAL=1
INT_DEFINE_END(system_reset)
@@ -964,7 +961,9 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
/* We are waking up from idle, so may clobber any volatile register */
cmpwi cr1,r5,2
bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
- BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss))
+ __LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines)
+ mtctr r12
+ bctr
#endif
#ifdef CONFIG_PPC_PSERIES
@@ -979,16 +978,14 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi)
EXC_COMMON_BEGIN(system_reset_common)
__GEN_COMMON_ENTRY system_reset
/*
- * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
- * to recover, but nested NMI will notice in_nmi and not recover
- * because of the use of the NMI stack. in_nmi reentrancy is tested in
- * system_reset_exception.
+ * Increment paca->in_nmi. When the interrupt entry wrapper later
+ * enable MSR_RI, then SLB or MCE will be able to recover, but a nested
+ * NMI will notice in_nmi and not recover because of the use of the NMI
+ * stack. in_nmi reentrancy is tested in system_reset_exception.
*/
lhz r10,PACA_IN_NMI(r13)
addi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- li r10,MSR_RI
- mtmsrd r10,1
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
@@ -1062,12 +1059,6 @@ INT_DEFINE_BEGIN(machine_check_early)
IAREA=PACA_EXMC
IVIRT=0 /* no virt entry point */
IREALMODE_COMMON=1
- /*
- * MSR_RI is not enabled, because PACA_EXMC is being used, so a
- * nested machine check corrupts it. machine_check_common enables
- * MSR_RI.
- */
- ISET_RI=0
ISTACK=0
IDAR=1
IDSISR=1
@@ -1078,7 +1069,6 @@ INT_DEFINE_BEGIN(machine_check)
IVEC=0x200
IAREA=PACA_EXMC
IVIRT=0 /* no virt entry point */
- ISET_RI=0
IDAR=1
IDSISR=1
IKVM_REAL=1
@@ -1148,9 +1138,6 @@ EXC_COMMON_BEGIN(machine_check_early_common)
BEGIN_FTR_SECTION
bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- li r10,MSR_RI
- mtmsrd r10,1
-
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_early
std r3,RESULT(r1) /* Save result */
@@ -1238,10 +1225,6 @@ EXC_COMMON_BEGIN(machine_check_common)
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
GEN_COMMON machine_check
-
- /* Enable MSR_RI when finished with PACA_EXMC */
- li r10,MSR_RI
- mtmsrd r10,1
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception_async
b interrupt_return_srr
@@ -1369,11 +1352,15 @@ EXC_COMMON_BEGIN(data_access_common)
addi r3,r1,STACK_FRAME_OVERHEAD
andis. r0,r4,DSISR_DABRMATCH@h
bne- 1f
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
MMU_FTR_SECTION_ELSE
bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl do_page_fault
+#endif
b interrupt_return_srr
1: bl do_break
@@ -1416,6 +1403,7 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
EXC_COMMON_BEGIN(data_access_slb_common)
GEN_COMMON data_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1428,9 +1416,12 @@ MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ li r3,-EFAULT
+#endif
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_bad_slb_fault
+ bl do_bad_segment_interrupt
b interrupt_return_srr
@@ -1462,11 +1453,15 @@ EXC_VIRT_END(instruction_access, 0x4400, 0x80)
EXC_COMMON_BEGIN(instruction_access_common)
GEN_COMMON instruction_access
addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
MMU_FTR_SECTION_ELSE
bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl do_page_fault
+#endif
b interrupt_return_srr
@@ -1496,6 +1491,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
EXC_COMMON_BEGIN(instruction_access_slb_common)
GEN_COMMON instruction_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1508,9 +1504,12 @@ MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ li r3,-EFAULT
+#endif
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_bad_slb_fault
+ bl do_bad_segment_interrupt
b interrupt_return_srr
@@ -1536,6 +1535,12 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
*
* If soft masked, the masked handler will note the pending interrupt for
* replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not required because this is an asynchronous interrupt that in
+ * general won't have much bearing on the state of the CPU, with the possible
+ * exception of crash/debug IPIs, but those are generally moving to use SRESET
+ * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case
+ * it may be exiting the guest and need CFAR to be saved.
*/
INT_DEFINE_BEGIN(hardware_interrupt)
IVEC=0x500
@@ -1543,6 +1548,10 @@ INT_DEFINE_BEGIN(hardware_interrupt)
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
+ ICFAR=0
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR_IF_HVMODE=1
+#endif
INT_DEFINE_END(hardware_interrupt)
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -1764,6 +1773,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
* If PPC_WATCHDOG is configured, the soft masked handler will actually set
* things back up to run soft_nmi_interrupt as a regular interrupt handler
* on the emergency stack.
+ *
+ * CFAR is not required because this is asynchronous (see hardware_interrupt).
+ * A watchdog interrupt may like to have CFAR, but usually the interesting
+ * branch is long gone by that point (e.g., infinite loop).
*/
INT_DEFINE_BEGIN(decrementer)
IVEC=0x900
@@ -1771,6 +1784,7 @@ INT_DEFINE_BEGIN(decrementer)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ ICFAR=0
INT_DEFINE_END(decrementer)
EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
@@ -1846,6 +1860,8 @@ EXC_COMMON_BEGIN(hdecrementer_common)
* If soft masked, the masked handler will note the pending interrupt for
* replay, leaving MSR[EE] enabled in the interrupted context because the
* doorbells are edge triggered.
+ *
+ * CFAR is not required, similarly to hardware_interrupt.
*/
INT_DEFINE_BEGIN(doorbell_super)
IVEC=0xa00
@@ -1853,6 +1869,7 @@ INT_DEFINE_BEGIN(doorbell_super)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ ICFAR=0
INT_DEFINE_END(doorbell_super)
EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
@@ -1904,6 +1921,7 @@ INT_DEFINE_BEGIN(system_call)
IVEC=0xc00
IKVM_REAL=1
IKVM_VIRT=1
+ ICFAR=0
INT_DEFINE_END(system_call)
.macro SYSTEM_CALL virt
@@ -1942,12 +1960,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
HMT_MEDIUM
.if ! \virt
- __LOAD_HANDLER(r10, system_call_common_real)
+ __LOAD_HANDLER(r10, system_call_common_real, real_vectors)
mtctr r10
bctr
.else
#ifdef CONFIG_RELOCATABLE
- __LOAD_HANDLER(r10, system_call_common)
+ __LOAD_HANDLER(r10, system_call_common, virt_vectors)
mtctr r10
bctr
#else
@@ -2001,7 +2019,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
* outside the head section.
*/
- __LOAD_FAR_HANDLER(r10, kvmppc_hcall)
+ __LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines)
mtctr r10
bctr
#else
@@ -2202,6 +2220,11 @@ EXC_COMMON_BEGIN(hmi_exception_common)
* Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
* This is an asynchronous interrupt in response to a msgsnd doorbell.
* Similar to the 0xa00 doorbell but for host rather than guest.
+ *
+ * CFAR is not required (similar to doorbell_interrupt), unless KVM HV
+ * is enabled, in which case it may be a guest exit. Most PowerNV kernels
+ * include KVM support so it would be nice if this could be dynamically
+ * patched out if KVM was not currently running any guests.
*/
INT_DEFINE_BEGIN(h_doorbell)
IVEC=0xe80
@@ -2209,6 +2232,9 @@ INT_DEFINE_BEGIN(h_doorbell)
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR=0
+#endif
INT_DEFINE_END(h_doorbell)
EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
@@ -2232,6 +2258,9 @@ EXC_COMMON_BEGIN(h_doorbell_common)
* Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
* This is an asynchronous interrupt in response to an "external exception".
* Similar to 0x500 but for host only.
+ *
+ * Like h_doorbell, CFAR is only required for KVM HV because this can be
+ * a guest exit.
*/
INT_DEFINE_BEGIN(h_virt_irq)
IVEC=0xea0
@@ -2239,6 +2268,9 @@ INT_DEFINE_BEGIN(h_virt_irq)
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR=0
+#endif
INT_DEFINE_END(h_virt_irq)
EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
@@ -2275,6 +2307,8 @@ EXC_VIRT_NONE(0x4ee0, 0x20)
*
* If soft masked, the masked handler will note the pending interrupt for
* replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not used by perf interrupts so not required.
*/
INT_DEFINE_BEGIN(performance_monitor)
IVEC=0xf00
@@ -2282,6 +2316,7 @@ INT_DEFINE_BEGIN(performance_monitor)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ ICFAR=0
INT_DEFINE_END(performance_monitor)
EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
@@ -2706,6 +2741,7 @@ EXC_VIRT_NONE(0x5800, 0x100)
INT_DEFINE_BEGIN(soft_nmi)
IVEC=0x900
ISTACK=0
+ ICFAR=0
INT_DEFINE_END(soft_nmi)
/*
@@ -3025,7 +3061,7 @@ USE_FIXED_SECTION(virt_trampolines)
.align 7
.globl __end_interrupts
__end_interrupts:
-DEFINE_FIXED_SYMBOL(__end_interrupts)
+DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines)
CLOSE_FIXED_SECTION(real_vectors);
CLOSE_FIXED_SECTION(real_trampolines);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index b7ceb041743c..d03e488cfe9c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -251,7 +251,7 @@ bool is_fadump_reserved_mem_contiguous(void)
}
/* Print firmware assisted dump configurations for debugging purpose. */
-static void fadump_show_config(void)
+static void __init fadump_show_config(void)
{
int i;
@@ -353,7 +353,7 @@ static __init u64 fadump_calculate_reserve_size(void)
* Calculate the total memory size required to be reserved for
* firmware-assisted dump registration.
*/
-static unsigned long get_fadump_area_size(void)
+static unsigned long __init get_fadump_area_size(void)
{
unsigned long size = 0;
@@ -462,7 +462,7 @@ static int __init fadump_get_boot_mem_regions(void)
* with the given memory range.
* False, otherwise.
*/
-static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx)
+static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx)
{
bool ret = false;
int i;
@@ -737,7 +737,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
fw_dump.ops->fadump_trigger(fdh, str);
}
-u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -752,7 +752,7 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
return buf;
}
-void fadump_update_elfcore_header(char *bufp)
+void __init fadump_update_elfcore_header(char *bufp)
{
struct elf_phdr *phdr;
@@ -770,7 +770,7 @@ void fadump_update_elfcore_header(char *bufp)
return;
}
-static void *fadump_alloc_buffer(unsigned long size)
+static void *__init fadump_alloc_buffer(unsigned long size)
{
unsigned long count, i;
struct page *page;
@@ -792,7 +792,7 @@ static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
}
-s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
+s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus)
{
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
@@ -1447,7 +1447,7 @@ static ssize_t release_mem_store(struct kobject *kobj,
}
/* Release the reserved memory and disable the FADump */
-static void unregister_fadump(void)
+static void __init unregister_fadump(void)
{
fadump_cleanup();
fadump_release_memory(fw_dump.reserve_dump_area_start,
@@ -1547,7 +1547,7 @@ ATTRIBUTE_GROUPS(fadump);
DEFINE_SHOW_ATTRIBUTE(fadump_region);
-static void fadump_init_files(void)
+static void __init fadump_init_files(void)
{
int rc = 0;
@@ -1641,6 +1641,14 @@ int __init setup_fadump(void)
else if (fw_dump.reserve_dump_area_size)
fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+ /*
+ * In case of panic, fadump is triggered via ppc_panic_event()
+ * panic notifier. Setting crash_kexec_post_notifiers to 'true'
+ * lets panic() function take crash friendly path before panic
+ * notifiers are invoked.
+ */
+ crash_kexec_post_notifiers = true;
+
return 1;
}
subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index ba4afe3b5a9c..f71f2bbd4de6 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -81,7 +81,12 @@ EXPORT_SYMBOL(store_fp_state)
*/
_GLOBAL(load_up_fpu)
mfmsr r5
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ ori r5,r5,MSR_FP|MSR_RI
+#else
ori r5,r5,MSR_FP
+#endif
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
oris r5,r5,MSR_VSX@h
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 349c4a820231..c3286260a7d1 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -115,8 +115,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
stw r10,8(r1)
li r10, \trapno
stw r10,_TRAP(r1)
- SAVE_4GPRS(3, r1)
- SAVE_2GPRS(7, r1)
+ SAVE_GPRS(3, 8, r1)
SAVE_NVGPRS(r1)
stw r2,GPR2(r1)
stw r12,_NIP(r1)
@@ -136,6 +135,12 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
andi. r12,r9,MSR_PR
bne 777f
bl prepare_transfer_to_handler
+#ifdef CONFIG_PPC_KUEP
+ b 778f
+777:
+ bl __kuep_lock
+778:
+#endif
777:
#endif
.endm
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 7d72ee5ab387..b6c6d1de5fd5 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -27,6 +27,7 @@
#include <linux/init.h>
#include <linux/pgtable.h>
+#include <linux/sizes.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -297,6 +298,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
3:
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ rlwinm. r9, r9, 0, 0xff
+ beq 5f /* Kuap fault */
+#endif
4:
tophys(r11, r11)
rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
@@ -377,6 +382,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
3:
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ rlwinm. r9, r9, 0, 0xff
+ beq 5f /* Kuap fault */
+#endif
4:
tophys(r11, r11)
rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
@@ -650,7 +659,7 @@ start_here:
b . /* prevent prefetch past rfi */
/* Set up the initial MMU state so we can do the first level of
- * kernel initialization. This maps the first 16 MBytes of memory 1:1
+ * kernel initialization. This maps the first 32 MBytes of memory 1:1
* virtual to physical and more importantly sets the cache mode.
*/
initial_mmu:
@@ -687,6 +696,12 @@ initial_mmu:
tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+ li r0,62 /* TLB slot 62 */
+ addis r4,r4,SZ_16M@h
+ addis r3,r3,SZ_16M@h
+ tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
+ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+
isync
/* Establish the exception vector base
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 02d2928d1e01..b73a56466903 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -334,6 +334,10 @@ interrupt_base:
mfspr r12,SPRN_MMUCR
mfspr r13,SPRN_PID /* Get PID */
rlwimi r12,r13,0,24,31 /* Set TID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r13,0
+ beq 2f /* KUAP Fault */
+#endif
4:
mtspr SPRN_MMUCR,r12
@@ -444,6 +448,10 @@ interrupt_base:
mfspr r12,SPRN_MMUCR
mfspr r13,SPRN_PID /* Get PID */
rlwimi r12,r13,0,24,31 /* Set TID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r13,0
+ beq 2f /* KUAP Fault */
+#endif
4:
mtspr SPRN_MMUCR,r12
@@ -532,10 +540,7 @@ finish_tlb_load_44x:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
-#ifdef CONFIG_PPC_KUEP
-0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */
- patch_site 0b, patch__tlb_44x_kuep
-#endif
+ rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */
1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
/* Done...restore registers and get out of here.
@@ -575,6 +580,10 @@ finish_tlb_load_44x:
3: mfspr r11,SPRN_SPRG3
lwz r11,PGDIR(r11)
mfspr r12,SPRN_PID /* Get PID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r12,0
+ beq 2f /* KUAP Fault */
+#endif
4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
/* Mask of required permission bits. Note that while we
@@ -672,6 +681,10 @@ finish_tlb_load_44x:
3: mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
mfspr r12,SPRN_PID /* Get PID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r12,0
+ beq 2f /* KUAP Fault */
+#endif
4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
/* Make up the required permissions */
@@ -747,10 +760,7 @@ finish_tlb_load_47x:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
-#ifdef CONFIG_PPC_KUEP
-0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */
- patch_site 0b, patch__tlb_47x_kuep
-#endif
+ rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */
1: tlbwe r11,r13,2
/* Done...restore registers and get out of here.
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index f17ae2083733..5c5181e8d5f1 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -126,7 +126,7 @@ __secondary_hold_acknowledge:
. = 0x5c
.globl __run_at_load
__run_at_load:
-DEFINE_FIXED_SYMBOL(__run_at_load)
+DEFINE_FIXED_SYMBOL(__run_at_load, first_256B)
.long RUN_AT_LOAD_DEFAULT
#endif
@@ -156,7 +156,7 @@ __secondary_hold:
/* Tell the master cpu we're here */
/* Relocation is off & we are located at an address less */
/* than 0x100, so only need to grab low order offset. */
- std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0)
+ std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0)
sync
li r26,0
@@ -164,7 +164,7 @@ __secondary_hold:
tovirt(r26,r26)
#endif
/* All secondary cpus wait here until told to start. */
-100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26)
+100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26)
cmpdi 0,r12,0
beq 100b
@@ -649,15 +649,15 @@ __after_prom_start:
3:
#endif
/* # bytes of memory to copy */
- lis r5,(ABS_ADDR(copy_to_here))@ha
- addi r5,r5,(ABS_ADDR(copy_to_here))@l
+ lis r5,(ABS_ADDR(copy_to_here, text))@ha
+ addi r5,r5,(ABS_ADDR(copy_to_here, text))@l
bl copy_and_flush /* copy the first n bytes */
/* this includes the code being */
/* executed here. */
/* Jump to the copy of this code that we just made */
- addis r8,r3,(ABS_ADDR(4f))@ha
- addi r12,r8,(ABS_ADDR(4f))@l
+ addis r8,r3,(ABS_ADDR(4f, text))@ha
+ addi r12,r8,(ABS_ADDR(4f, text))@l
mtctr r12
bctr
@@ -669,8 +669,8 @@ p_end: .8byte _end - copy_to_here
* Now copy the rest of the kernel up to _end, add
* _end - copy_to_here to the copy limit and run again.
*/
- addis r8,r26,(ABS_ADDR(p_end))@ha
- ld r8,(ABS_ADDR(p_end))@l(r8)
+ addis r8,r26,(ABS_ADDR(p_end, text))@ha
+ ld r8,(ABS_ADDR(p_end, text))@l(r8)
add r5,r5,r8
5: bl copy_and_flush /* copy the rest */
@@ -904,7 +904,7 @@ _GLOBAL(relative_toc)
blr
.balign 8
-p_toc: .8byte __toc_start + 0x8000 - 0b
+p_toc: .8byte .TOC. - 0b
/*
* This is where the main kernel code starts.
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 68e5c0a7e99d..fa84744d6b24 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -931,7 +931,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
_GLOBAL(load_segment_registers)
li r0, NUM_USER_SEGMENTS /* load up user segment register values */
mtctr r0 /* for context 0 */
+#ifdef CONFIG_PPC_KUEP
+ lis r3, SR_NX@h /* Kp = 0, Ks = 0, VSID = 0 */
+#else
li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
+#endif
li r4, 0
3: mtsrin r3, r4
addi r3, r3, 0x111 /* increment VSID */
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index ef8d1b1c234e..bb6d5d0fc4ac 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -87,8 +87,7 @@ END_BTB_FLUSH_SECTION
stw r10, 8(r1)
li r10, \trapno
stw r10,_TRAP(r1)
- SAVE_4GPRS(3, r1)
- SAVE_2GPRS(7, r1)
+ SAVE_GPRS(3, 8, r1)
SAVE_NVGPRS(r1)
stw r2,GPR2(r1)
stw r12,_NIP(r1)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 0a9a0f301474..ac2b4dcf5fd3 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -462,6 +462,12 @@ END_BTB_FLUSH_SECTION
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ mfspr r12, SPRN_MAS1
+ rlwinm. r12,r12,0,0x3fff0000
+ beq 2f /* KUAP fault */
+#endif
+
4:
/* Mask of required permission bits. Note that while we
* do copy ESR:ST to _PAGE_RW position as trying to write
@@ -571,6 +577,12 @@ END_BTB_FLUSH_SECTION
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ mfspr r12, SPRN_MAS1
+ rlwinm. r12,r12,0,0x3fff0000
+ beq 2f /* KUAP fault */
+#endif
+
/* Make up the required permissions for user code */
#ifdef CONFIG_PTE_64BIT
li r13,_PAGE_PRESENT | _PAGE_BAP_UX
@@ -777,6 +789,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */
slwi r10, r12, 1
or r10, r10, r12
+ rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */
iseleq r12, r12, r10
rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */
mtspr SPRN_MAS3, r13
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 91a3be14808b..2669f80b3a49 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -523,7 +523,7 @@ static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info
static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
struct arch_hw_breakpoint **info, int *hit,
- struct ppc_inst instr)
+ ppc_inst_t instr)
{
int i;
int stepped;
@@ -616,7 +616,7 @@ int hw_breakpoint_handler(struct die_args *args)
int hit[HBP_NUM_MAX] = {0};
int nr_hit = 0;
bool ptrace_bp = false;
- struct ppc_inst instr = ppc_inst(0);
+ ppc_inst_t instr = ppc_inst(0);
int type = 0;
int size = 0;
unsigned long ea;
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
index 42b967e3d85c..a74623025f3a 100644
--- a/arch/powerpc/kernel/hw_breakpoint_constraints.c
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -80,7 +80,7 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type,
* Return true if the event is valid wrt dawr configuration,
* including extraneous exception. Otherwise return false.
*/
-bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
unsigned long ea, int type, int size,
struct arch_hw_breakpoint *info)
{
@@ -127,7 +127,7 @@ bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
return false;
}
-void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
int *type, int *size, unsigned long *ea)
{
struct instruction_op op;
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 1f835539fda4..4ad79eb638c6 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -82,7 +82,7 @@ void power4_idle(void)
return;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- asm volatile("DSSALL ; sync" ::: "memory");
+ asm volatile(PPC_DSSALL " ; sync" ::: "memory");
power4_idle_nap();
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 13cad9297d82..3c097356366b 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -129,7 +129,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
mtspr SPRN_HID0,r4
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 563ebfcd16e2..7cd6ce3ec423 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -81,7 +81,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
{
syscall_fn f;
- kuep_lock();
+ kuap_lock();
regs->orig_gpr3 = r3;
@@ -406,7 +406,6 @@ again:
/* Restore user access locks last */
kuap_user_restore(regs);
- kuep_unlock();
return ret;
}
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index ec950b08a8dc..92088f848266 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -30,21 +30,23 @@ COMPAT_SYS_CALL_TABLE:
.ifc \srr,srr
mfspr r11,SPRN_SRR0
ld r12,_NIP(r1)
+ clrrdi r12,r12,2
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
mfspr r11,SPRN_SRR1
ld r12,_MSR(r1)
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
.else
mfspr r11,SPRN_HSRR0
ld r12,_NIP(r1)
+ clrrdi r12,r12,2
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
mfspr r11,SPRN_HSRR1
ld r12,_MSR(r1)
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
.endif
#endif
.endm
@@ -162,10 +164,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* The value of AMR only matters while we're in the kernel.
*/
mtcr r2
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r13,GPR13(r1)
- ld r1,GPR1(r1)
+ REST_GPRS(2, 3, r1)
+ REST_GPR(13, r1)
+ REST_GPR(1, r1)
RFSCV_TO_USER
b . /* prevent speculative execution */
@@ -183,9 +184,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mtctr r3
mtlr r4
mtspr SPRN_XER,r5
- REST_10GPRS(2, r1)
- REST_2GPRS(12, r1)
- ld r1,GPR1(r1)
+ REST_GPRS(2, 13, r1)
+ REST_GPR(1, r1)
RFI_TO_USER
.Lsyscall_vectored_\name\()_rst_end:
@@ -374,10 +374,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* The value of AMR only matters while we're in the kernel.
*/
mtcr r2
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r13,GPR13(r1)
- ld r1,GPR1(r1)
+ REST_GPRS(2, 3, r1)
+ REST_GPR(13, r1)
+ REST_GPR(1, r1)
RFI_TO_USER
b . /* prevent speculative execution */
@@ -388,8 +387,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mtctr r3
mtspr SPRN_XER,r4
ld r0,GPR0(r1)
- REST_8GPRS(4, r1)
- ld r12,GPR12(r1)
+ REST_GPRS(4, 12, r1)
b .Lsyscall_restore_regs_cont
.Lsyscall_rst_end:
@@ -518,17 +516,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
ld r6,_XER(r1)
li r0,0
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
- REST_GPR(13, r1)
+ REST_GPRS(7, 13, r1)
mtcr r3
mtlr r4
mtctr r5
mtspr SPRN_XER,r6
- REST_4GPRS(2, r1)
- REST_GPR(6, r1)
+ REST_GPRS(2, 6, r1)
REST_GPR(0, r1)
REST_GPR(1, r1)
.ifc \srr,srr
@@ -625,8 +620,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
ld r6,_CCR(r1)
li r0,0
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
+ REST_GPRS(7, 12, r1)
mtlr r3
mtctr r4
@@ -638,7 +632,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
*/
std r0,STACK_FRAME_OVERHEAD-16(r1)
- REST_4GPRS(2, r1)
+ REST_GPRS(2, 5, r1)
bne- cr1,1f /* emulate stack store */
mtcr r6
@@ -703,7 +697,7 @@ interrupt_return_macro hsrr
.globl __end_soft_masked
__end_soft_masked:
-DEFINE_FIXED_SYMBOL(__end_soft_masked)
+DEFINE_FIXED_SYMBOL(__end_soft_masked, text)
#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_BOOK3S
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c4f1d6b7d992..2cf31a97126c 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -745,7 +745,8 @@ void __do_irq(struct pt_regs *regs)
irq = ppc_md.get_irq();
/* We can hard enable interrupts now to allow perf interrupts */
- may_hard_irq_enable();
+ if (should_hard_irq_enable())
+ do_hard_irq_enable();
/* And finally process it */
if (unlikely(!irq))
@@ -811,7 +812,7 @@ void __init init_IRQ(void)
ppc_md.init_IRQ();
}
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void *critirq_ctx[NR_CPUS] __read_mostly;
void *dbgirq_ctx[NR_CPUS] __read_mostly;
void *mcheckirq_ctx[NR_CPUS] __read_mostly;
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index bdee7262c080..9f8d0fa7b718 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -48,7 +48,7 @@ static struct hard_trap_info
{ 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */
{ 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */
{ 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+#ifdef CONFIG_BOOKE_OR_40x
{ 0x2002, 0x05 /* SIGTRAP */ }, /* debug */
#if defined(CONFIG_FSL_BOOKE)
{ 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */
@@ -67,7 +67,7 @@ static struct hard_trap_info
{ 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */
{ 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */
#endif
-#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
+#else /* !CONFIG_BOOKE_OR_40x */
{ 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
#if defined(CONFIG_PPC_8xx)
{ 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 86d77ff056a6..9a492fdec1df 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -124,7 +124,7 @@ int arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
struct kprobe *prev;
- struct ppc_inst insn = ppc_inst_read(p->addr);
+ ppc_inst_t insn = ppc_inst_read(p->addr);
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
@@ -244,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe);
static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
{
int ret;
- struct ppc_inst insn = ppc_inst_read(p->ainsn.insn);
+ ppc_inst_t insn = ppc_inst_read(p->ainsn.insn);
/* regs->nip is also adjusted if emulate_step returns 1 */
ret = emulate_step(regs, insn);
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 225511d73bef..f2e03ed423d0 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
/* Stop DST streams */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
@@ -292,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
isync
/* Stop DST streams */
- DSSALL
+ PPC_DSSALL
sync
/* Get the current enable bit of the L3CR into r4 */
@@ -401,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
_GLOBAL(__flush_disable_L1)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index fd829f7f25a4..2503dd4713b9 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -586,7 +586,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
mc_error_class[evt->error_class] : "Unknown";
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Display faulty slb contents for SLB errors. */
if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
slb_dump_contents(local_paca->mce_faulty_slbs);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index c2f55fe7092d..71e8f2a92e36 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -77,15 +77,15 @@ static bool mce_in_guest(void)
}
/* flush SLBs and reload */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
void flush_and_reload_slb(void)
{
- /* Invalidate all SLBs */
- slb_flush_all_realmode();
-
if (early_radix_enabled())
return;
+ /* Invalidate all SLBs */
+ slb_flush_all_realmode();
+
/*
* This probably shouldn't happen, but it may be possible it's
* called in early boot before SLB shadows are allocated.
@@ -99,7 +99,7 @@ void flush_and_reload_slb(void)
void flush_erat(void)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
flush_and_reload_slb();
return;
@@ -114,7 +114,7 @@ void flush_erat(void)
static int mce_flush(int what)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (what == MCE_FLUSH_SLB) {
flush_and_reload_slb();
return 1;
@@ -455,7 +455,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
* in real-mode is tricky and can lead to recursive
* faults
*/
- struct ppc_inst instr;
+ ppc_inst_t instr;
unsigned long pfn, instr_addr;
struct instruction_op op;
struct pt_regs tmp = *regs;
@@ -499,8 +499,10 @@ static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
handled = mce_flush(MCE_FLUSH_SLB);
break;
case MCE_ERROR_TYPE_ERAT:
@@ -588,8 +590,10 @@ static int mce_handle_derror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
if (mce_flush(MCE_FLUSH_SLB))
handled = 1;
break;
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index ed04a3ba66fe..40a583e9d3c7 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -90,16 +90,17 @@ int module_finalize(const Elf_Ehdr *hdr,
}
static __always_inline void *
-__module_alloc(unsigned long size, unsigned long start, unsigned long end)
+__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn)
{
pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
+ gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0);
/*
* Don't do huge page allocations for modules yet until more testing
* is done. STRICT_MODULE_RWX may require extra work to support this
* too.
*/
- return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot,
+ return __vmalloc_node_range(size, 1, start, end, gfp, prot,
VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
NUMA_NO_NODE, __builtin_return_address(0));
}
@@ -114,13 +115,13 @@ void *module_alloc(unsigned long size)
/* First try within 32M limit from _etext to avoid branch trampolines */
if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
- ptr = __module_alloc(size, limit, MODULES_END);
+ ptr = __module_alloc(size, limit, MODULES_END, true);
if (!ptr)
- ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
+ ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false);
return ptr;
#else
- return __module_alloc(size, VMALLOC_START, VMALLOC_END);
+ return __module_alloc(size, VMALLOC_START, VMALLOC_END, false);
#endif
}
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index f417afc08d33..a491ad481d85 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -273,6 +273,31 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
}
#ifdef CONFIG_DYNAMIC_FTRACE
+int module_trampoline_target(struct module *mod, unsigned long addr,
+ unsigned long *target)
+{
+ unsigned int jmp[4];
+
+ /* Find where the trampoline jumps to */
+ if (copy_from_kernel_nofault(jmp, (void *)addr, sizeof(jmp)))
+ return -EFAULT;
+
+ /* verify that this is what we expect it to be */
+ if ((jmp[0] & 0xffff0000) != PPC_RAW_LIS(_R12, 0) ||
+ (jmp[1] & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0) ||
+ jmp[2] != PPC_RAW_MTCTR(_R12) ||
+ jmp[3] != PPC_RAW_BCTR())
+ return -EINVAL;
+
+ addr = (jmp[1] & 0xffff) | ((jmp[0] & 0xffff) << 16);
+ if (addr & 0x8000)
+ addr -= 0x10000;
+
+ *target = addr;
+
+ return 0;
+}
+
int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
{
module->arch.tramp = do_plt_call(module->core_layout.base,
@@ -281,6 +306,14 @@ int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
if (!module->arch.tramp)
return -ENOENT;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ module->arch.tramp_regs = do_plt_call(module->core_layout.base,
+ (unsigned long)ftrace_regs_caller,
+ sechdrs, module);
+ if (!module->arch.tramp_regs)
+ return -ENOENT;
+#endif
+
return 0;
}
#endif
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 3c8d9bbb51cf..0d9f9cd41e13 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -540,7 +540,7 @@ static struct pstore_info nvram_pstore_info = {
.write = nvram_pstore_write,
};
-static int nvram_pstore_init(void)
+static int __init nvram_pstore_init(void)
{
int rc = 0;
@@ -562,7 +562,7 @@ static int nvram_pstore_init(void)
return rc;
}
#else
-static int nvram_pstore_init(void)
+static int __init nvram_pstore_init(void)
{
return -1;
}
@@ -755,7 +755,7 @@ static unsigned char __init nvram_checksum(struct nvram_header *p)
* Per the criteria passed via nvram_remove_partition(), should this
* partition be removed? 1=remove, 0=keep
*/
-static int nvram_can_remove_partition(struct nvram_partition *part,
+static int __init nvram_can_remove_partition(struct nvram_partition *part,
const char *name, int sig, const char *exceptions[])
{
if (part->header.signature != sig)
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index ce1903064031..3b1c2236cbee 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -153,7 +153,7 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
{
- struct ppc_inst branch_op_callback, branch_emulate_step, temp;
+ ppc_inst_t branch_op_callback, branch_emulate_step, temp;
unsigned long op_callback_addr, emulate_step_addr;
kprobe_opcode_t *buff;
long b_offset;
@@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
/*
* 3. load instruction to be emulated into relevant register, and
*/
- if (IS_ENABLED(CONFIG_PPC64)) {
- temp = ppc_inst_read(p->ainsn.insn);
- patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
- } else {
- patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX);
- }
+ temp = ppc_inst_read(p->ainsn.insn);
+ patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
/*
* 4. branch back from trampoline
@@ -269,7 +265,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
void arch_optimize_kprobes(struct list_head *oplist)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct optimized_kprobe *op;
struct optimized_kprobe *tmp;
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 19ea3312403c..5c7f0b4b784b 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -10,8 +10,8 @@
#include <asm/asm-offsets.h>
#ifdef CONFIG_PPC64
-#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base)
-#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base)
+#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base)
+#define REST_30GPRS(base) REST_GPRS(2, 31, base)
#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop
#else
#define SAVE_30GPRS(base) stmw r2, GPR2(base)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 4208b4044d12..39da688a9455 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -139,8 +139,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
}
#endif /* CONFIG_PPC_PSERIES */
-#ifdef CONFIG_PPC_BOOK3S_64
-
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* 3 persistent SLBs are allocated here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
@@ -169,8 +168,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
return s;
}
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
#ifdef CONFIG_PPC_PSERIES
/**
@@ -226,7 +224,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->kexec_state = KEXEC_STATE_NONE;
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
new_paca->slb_shadow_ptr = NULL;
#endif
@@ -307,7 +305,7 @@ void __init allocate_paca(int cpu)
#ifdef CONFIG_PPC_PSERIES
paca->lppaca_ptr = new_lppaca(cpu, limit);
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
#endif
#ifdef CONFIG_PPC_PSERIES
@@ -328,7 +326,7 @@ void __init free_unused_pacas(void)
paca_nr_cpu_ids = nr_cpu_ids;
paca_ptrs_size = new_ptrs_size;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (early_radix_enabled()) {
/* Ugly fixup, see new_slb_shadow() */
memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
@@ -341,9 +339,9 @@ void __init free_unused_pacas(void)
paca_ptrs_size + paca_struct_size, nr_cpu_ids);
}
+#ifdef CONFIG_PPC_64S_HASH_MMU
void copy_mm_to_paca(struct mm_struct *mm)
{
-#ifdef CONFIG_PPC_BOOK3S
mm_context_t *context = &mm->context;
#ifdef CONFIG_PPC_MM_SLICES
@@ -356,7 +354,5 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
#endif
-#else /* !CONFIG_PPC_BOOK3S */
- return;
-#endif
}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 6749905932f4..8bc9cf62cd93 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL(isa_mem_base);
static const struct dma_map_ops *pci_dma_ops;
-void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
+void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index b49e1060a3bf..48537964fba1 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -37,7 +37,7 @@ int pcibios_assign_bus_offset = 1;
EXPORT_SYMBOL(isa_io_base);
EXPORT_SYMBOL(pci_dram_offset);
-void pcibios_make_OF_bus_map(void);
+void __init pcibios_make_OF_bus_map(void);
static void fixup_cpc710_pci64(struct pci_dev* dev);
static u8* pci_to_OF_bus_map;
@@ -109,7 +109,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
}
}
-void
+void __init
pcibios_make_OF_bus_map(void)
{
int i;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 406d7ee9e322..984813a4d5dc 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -628,7 +628,7 @@ static void do_break_handler(struct pt_regs *regs)
{
struct arch_hw_breakpoint null_brk = {0};
struct arch_hw_breakpoint *info;
- struct ppc_inst instr = ppc_inst(0);
+ ppc_inst_t instr = ppc_inst(0);
int type = 0;
int size = 0;
unsigned long ea;
@@ -1156,6 +1156,40 @@ static inline void save_sprs(struct thread_struct *t)
#endif
}
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void kvmppc_save_user_regs(void)
+{
+ unsigned long usermsr;
+
+ if (!current->thread.regs)
+ return;
+
+ usermsr = current->thread.regs->msr;
+
+ if (usermsr & MSR_FP)
+ save_fpu(current);
+
+ if (usermsr & MSR_VEC)
+ save_altivec(current);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (usermsr & MSR_TM) {
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
+#endif
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);
+
+void kvmppc_save_current_sprs(void)
+{
+ save_sprs(&current->thread);
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
static inline void restore_sprs(struct thread_struct *old_thread,
struct thread_struct *new_thread)
{
@@ -1206,7 +1240,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
{
struct thread_struct *new_thread, *old_thread;
struct task_struct *last;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct ppc64_tlb_batch *batch;
#endif
@@ -1215,7 +1249,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
WARN_ON(!irqs_disabled());
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->active) {
current_thread_info()->local_flags |= _TLF_LAZY_MMU;
@@ -1281,9 +1315,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
set_return_regs_changed(); /* _switch changes stack (and regs) */
-#ifdef CONFIG_PPC32
- kuap_assert_locked();
-#endif
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ kuap_assert_locked();
+
last = _switch(old_thread, new_thread);
/*
@@ -1294,6 +1328,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
*/
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* This applies to a process that was context switched while inside
* arch_enter_lazy_mmu_mode(), to re-activate the batch that was
@@ -1305,6 +1340,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
+#endif
/*
* Math facilities are masked out of the child MSR in copy_thread.
@@ -1655,7 +1691,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -1767,6 +1803,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
p->thread.kuap = KUAP_NONE;
#endif
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+ p->thread.pid = MMU_NO_CONTEXT;
+#endif
setup_ksp_vsid(p, sp);
@@ -2299,10 +2338,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
* the heap, we can put it above 1TB so it is backed by a 1TB
* segment. Otherwise the heap will be in the bottom 1TB
* which always uses 256MB segments and this may result in a
- * performance penalty. We don't need to worry about radix. For
- * radix, mmu_highuser_ssize remains unchanged from 256MB.
+ * performance penalty.
*/
- if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
+ if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ad1230c4f3fe..3d30d40a0e9c 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -231,7 +231,7 @@ static void __init check_cpu_pa_features(unsigned long node)
ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
static void __init init_mmu_slb_size(unsigned long node)
{
const __be32 *slb_size_ptr;
@@ -447,7 +447,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
*/
#ifdef CONFIG_SPARSEMEM
-static bool validate_mem_limit(u64 base, u64 *size)
+static bool __init validate_mem_limit(u64 base, u64 *size)
{
u64 max_mem = 1UL << (MAX_PHYSMEM_BITS);
@@ -458,7 +458,7 @@ static bool validate_mem_limit(u64 base, u64 *size)
return true;
}
#else
-static bool validate_mem_limit(u64 base, u64 *size)
+static bool __init validate_mem_limit(u64 base, u64 *size)
{
return true;
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 18b04b08b983..0ac5faacc909 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -672,7 +672,7 @@ static inline int __init prom_getproplen(phandle node, const char *pname)
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
-static void add_string(char **str, const char *q)
+static void __init add_string(char **str, const char *q)
{
char *p = *str;
@@ -682,7 +682,7 @@ static void add_string(char **str, const char *q)
*str = p;
}
-static char *tohex(unsigned int x)
+static char *__init tohex(unsigned int x)
{
static const char digits[] __initconst = "0123456789abcdef";
static char result[9] __prombss;
@@ -728,7 +728,7 @@ static int __init prom_setprop(phandle node, const char *nodename,
#define prom_islower(c) ('a' <= (c) && (c) <= 'z')
#define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c))
-static unsigned long prom_strtoul(const char *cp, const char **endp)
+static unsigned long __init prom_strtoul(const char *cp, const char **endp)
{
unsigned long result = 0, base = 10, value;
@@ -753,7 +753,7 @@ static unsigned long prom_strtoul(const char *cp, const char **endp)
return result;
}
-static unsigned long prom_memparse(const char *ptr, const char **retptr)
+static unsigned long __init prom_memparse(const char *ptr, const char **retptr)
{
unsigned long ret = prom_strtoul(ptr, retptr);
int shift = 0;
@@ -1786,7 +1786,7 @@ static void __init prom_close_stdin(void)
}
#ifdef CONFIG_PPC_SVM
-static int prom_rtas_hcall(uint64_t args)
+static int __init prom_rtas_hcall(uint64_t args)
{
register uint64_t arg1 asm("r3") = H_RTAS;
register uint64_t arg2 asm("r4") = args;
@@ -2991,7 +2991,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
/* Check if the phy-handle property exists - bail if it does */
rv = prom_getprop(node, "phy-handle", prop, sizeof(prop));
- if (!rv)
+ if (rv <= 0)
return;
/*
@@ -3248,7 +3248,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
/*
* Perform the Enter Secure Mode ultracall.
*/
-static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
+static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt)
{
register unsigned long r3 asm("r3") = UV_ESM;
register unsigned long r4 asm("r4") = kbase;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ff80bbad22a5..733e6ef36758 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -492,8 +492,25 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
}
EXPORT_SYMBOL(rtas_call);
-/* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status
- * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds.
+/**
+ * rtas_busy_delay_time() - From an RTAS status value, calculate the
+ * suggested delay time in milliseconds.
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ * the status of a RTAS function call.
+ *
+ * Context: Any context.
+ *
+ * Return:
+ * * 100000 - If @status is 9905.
+ * * 10000 - If @status is 9904.
+ * * 1000 - If @status is 9903.
+ * * 100 - If @status is 9902.
+ * * 10 - If @status is 9901.
+ * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but
+ * some callers depend on this behavior, and the worst outcome
+ * is that they will delay for longer than necessary.
+ * * 0 - If @status is not a busy or extended delay value.
*/
unsigned int rtas_busy_delay_time(int status)
{
@@ -513,17 +530,77 @@ unsigned int rtas_busy_delay_time(int status)
}
EXPORT_SYMBOL(rtas_busy_delay_time);
-/* For an RTAS busy status code, perform the hinted delay. */
-unsigned int rtas_busy_delay(int status)
+/**
+ * rtas_busy_delay() - helper for RTAS busy and extended delay statuses
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ * the status of a RTAS function call.
+ *
+ * Context: Process context. May sleep or schedule.
+ *
+ * Return:
+ * * true - @status is RTAS_BUSY or an extended delay hint. The
+ * caller may assume that the CPU has been yielded if necessary,
+ * and that an appropriate delay for @status has elapsed.
+ * Generally the caller should reattempt the RTAS call which
+ * yielded @status.
+ *
+ * * false - @status is not @RTAS_BUSY nor an extended delay hint. The
+ * caller is responsible for handling @status.
+ */
+bool rtas_busy_delay(int status)
{
unsigned int ms;
+ bool ret;
- might_sleep();
- ms = rtas_busy_delay_time(status);
- if (ms && need_resched())
- msleep(ms);
+ switch (status) {
+ case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+ ret = true;
+ ms = rtas_busy_delay_time(status);
+ /*
+ * The extended delay hint can be as high as 100 seconds.
+ * Surely any function returning such a status is either
+ * buggy or isn't going to be significantly slowed by us
+ * polling at 1HZ. Clamp the sleep time to one second.
+ */
+ ms = clamp(ms, 1U, 1000U);
+ /*
+ * The delay hint is an order-of-magnitude suggestion, not
+ * a minimum. It is fine, possibly even advantageous, for
+ * us to pause for less time than hinted. For small values,
+ * use usleep_range() to ensure we don't sleep much longer
+ * than actually needed.
+ *
+ * See Documentation/timers/timers-howto.rst for
+ * explanation of the threshold used here. In effect we use
+ * usleep_range() for 9900 and 9901, msleep() for
+ * 9902-9905.
+ */
+ if (ms <= 20)
+ usleep_range(ms * 100, ms * 1000);
+ else
+ msleep(ms);
+ break;
+ case RTAS_BUSY:
+ ret = true;
+ /*
+ * We should call again immediately if there's no other
+ * work to do.
+ */
+ cond_resched();
+ break;
+ default:
+ ret = false;
+ /*
+ * Not a busy or extended delay status; the caller should
+ * handle @status itself. Ensure we warn on misuses in
+ * atomic context regardless.
+ */
+ might_sleep();
+ break;
+ }
- return ms;
+ return ret;
}
EXPORT_SYMBOL(rtas_busy_delay);
@@ -809,13 +886,13 @@ void rtas_os_term(char *str)
/**
* rtas_activate_firmware() - Activate a new version of firmware.
*
+ * Context: This function may sleep.
+ *
* Activate a new version of partition firmware. The OS must call this
* after resuming from a partition hibernation or migration in order
* to maintain the ability to perform live firmware updates. It's not
* catastrophic for this method to be absent or to fail; just log the
* condition in that case.
- *
- * Context: This function may sleep.
*/
void rtas_activate_firmware(void)
{
@@ -890,11 +967,12 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)
#endif /* CONFIG_PPC_PSERIES */
/**
- * Find a specific pseries error log in an RTAS extended event log.
+ * get_pseries_errorlog() - Find a specific pseries error log in an RTAS
+ * extended event log.
* @log: RTAS error/event log
* @section_id: two character section identifier
*
- * Returns a pointer to the specified errorlog or NULL if not found.
+ * Return: A pointer to the specified errorlog or NULL if not found.
*/
struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
uint16_t section_id)
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 32ee17753eb4..cf0f42909ddf 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -455,7 +455,7 @@ static void rtas_event_scan(struct work_struct *w)
}
#ifdef CONFIG_PPC64
-static void retrieve_nvram_error_log(void)
+static void __init retrieve_nvram_error_log(void)
{
unsigned int err_type ;
int rc ;
@@ -473,12 +473,12 @@ static void retrieve_nvram_error_log(void)
}
}
#else /* CONFIG_PPC64 */
-static void retrieve_nvram_error_log(void)
+static void __init retrieve_nvram_error_log(void)
{
}
#endif /* CONFIG_PPC64 */
-static void start_event_scan(void)
+static void __init start_event_scan(void)
{
printk(KERN_DEBUG "RTAS daemon started\n");
pr_debug("rtasd: will sleep for %d milliseconds\n",
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 15fb5ea1b9ea..e159d4093d98 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -44,7 +44,7 @@ static void enable_barrier_nospec(bool enable)
do_barrier_nospec_fixups(enable);
}
-void setup_barrier_nospec(void)
+void __init setup_barrier_nospec(void)
{
bool enable;
@@ -132,7 +132,7 @@ early_param("nospectre_v2", handle_nospectre_v2);
#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
-void setup_spectre_v2(void)
+void __init setup_spectre_v2(void)
{
if (no_spectrev2 || cpu_mitigations_off())
do_btb_flush_fixups();
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 4f1322b65760..f8da937df918 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -582,7 +582,7 @@ static __init int add_pcspkr(void)
device_initcall(add_pcspkr);
#endif /* CONFIG_PCSPKR_PLATFORM */
-void probe_machine(void)
+static __init void probe_machine(void)
{
extern struct machdep_calls __machine_desc_start;
extern struct machdep_calls __machine_desc_end;
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 84058bbc8fe9..93f22da12abe 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -29,7 +29,7 @@ void setup_tlb_core_data(void);
static inline void setup_tlb_core_data(void) { }
#endif
-#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void exc_lvl_early_init(void);
#else
static inline void exc_lvl_early_init(void) { }
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 7ec5c47fce0e..a6e9d36d7c01 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE);
notrace void __init machine_init(u64 dt_ptr)
{
u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache);
- struct ppc_inst insn;
+ ppc_inst_t insn;
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
@@ -175,7 +175,7 @@ void __init emergency_stack_init(void)
}
#endif
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void __init exc_lvl_early_init(void)
{
unsigned int i, hw_cpu;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6052f5d5ded3..d87f7c1103ce 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -499,7 +499,7 @@ void smp_release_cpus(void)
* routines and/or provided to userland
*/
-static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
+static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
u32 bsize, u32 sets)
{
info->size = size;
@@ -880,14 +880,23 @@ void __init setup_per_cpu_areas(void)
int rc = -EINVAL;
/*
- * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
- * to group units. For larger mappings, use 1M atom which
- * should be large enough to contain a number of units.
+ * BookE and BookS radix are historical values and should be revisited.
*/
- if (mmu_linear_psize == MMU_PAGE_4K)
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E)) {
+ atom_size = SZ_1M;
+ } else if (radix_enabled()) {
atom_size = PAGE_SIZE;
- else
- atom_size = 1 << 20;
+ } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) {
+ /*
+ * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
+ * to group units. For larger mappings, use 1M atom which
+ * should be large enough to contain a number of units.
+ */
+ if (mmu_linear_psize == MMU_PAGE_4K)
+ atom_size = PAGE_SIZE;
+ else
+ atom_size = SZ_1M;
+ }
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 3e053e2fd6b6..d84c434b2b78 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -527,16 +527,20 @@ static long restore_user_regs(struct pt_regs *regs,
regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
#ifdef CONFIG_SPE
- /* force the process to reload the spe registers from
- current->thread when it next does spe instructions */
+ /*
+ * Force the process to reload the spe registers from
+ * current->thread when it next does spe instructions.
+ * Since this is user ABI, we must enforce the sizing.
+ */
+ BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));
regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
if (msr & MSR_SPE) {
/* restore spe registers from the stack */
- unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
- ELF_NEVRREG * sizeof(u32), failed);
+ unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
+ sizeof(current->thread.spe), failed);
current->thread.used_spe = true;
} else if (current->thread.used_spe)
- memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
+ memset(&current->thread.spe, 0, sizeof(current->thread.spe));
/* Always get SPEFSCR back */
unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c23ee842c4c3..b7fd6a72aa76 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -61,6 +61,7 @@
#include <asm/cpu_has_feature.h>
#include <asm/ftrace.h>
#include <asm/kup.h>
+#include <asm/fadump.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -621,6 +622,45 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
#endif
#ifdef CONFIG_NMI_IPI
+static void crash_stop_this_cpu(struct pt_regs *regs)
+#else
+static void crash_stop_this_cpu(void *dummy)
+#endif
+{
+ /*
+ * Just busy wait here and avoid marking CPU as offline to ensure
+ * register data is captured appropriately.
+ */
+ while (1)
+ cpu_relax();
+}
+
+void crash_smp_send_stop(void)
+{
+ static bool stopped = false;
+
+ /*
+ * In case of fadump, register data for all CPUs is captured by f/w
+ * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
+ * this rtas call to avoid tricky post processing of those CPUs'
+ * backtraces.
+ */
+ if (should_fadump_crash())
+ return;
+
+ if (stopped)
+ return;
+
+ stopped = true;
+
+#ifdef CONFIG_NMI_IPI
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000);
+#else
+ smp_call_function(crash_stop_this_cpu, NULL, 0);
+#endif /* CONFIG_NMI_IPI */
+}
+
+#ifdef CONFIG_NMI_IPI
static void nmi_stop_this_cpu(struct pt_regs *regs)
{
/*
@@ -896,7 +936,8 @@ out:
return tg;
}
-static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
+static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg,
+ int cpu, int cpu_group_start)
{
int first_thread = cpu_first_thread_sibling(cpu);
int i;
@@ -1635,12 +1676,14 @@ void start_secondary(void *unused)
BUG();
}
+#ifdef CONFIG_PROFILING
int setup_profiling_timer(unsigned int multiplier)
{
return 0;
}
+#endif
-static void fixup_topology(void)
+static void __init fixup_topology(void)
{
int i;
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index f73f4d72fea4..e0cbd63007f2 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume)
#ifdef CONFIG_ALTIVEC
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
sync
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 96bb20715aa9..9f1903c7f540 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -141,7 +141,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
_GLOBAL(swsusp_arch_resume)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 08d8072d6e7a..d45a415d5374 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -214,7 +214,7 @@ static ssize_t __used store_dscr_default(struct device *dev,
static DEVICE_ATTR(dscr_default, 0600,
show_dscr_default, store_dscr_default);
-static void sysfs_create_dscr_default(void)
+static void __init sysfs_create_dscr_default(void)
{
if (cpu_has_feature(CPU_FTR_DSCR)) {
int cpu;
@@ -744,12 +744,12 @@ static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char
}
static DEVICE_ATTR(svm, 0444, show_svm, NULL);
-static void create_svm_file(void)
+static void __init create_svm_file(void)
{
device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
}
#else
-static void create_svm_file(void)
+static void __init create_svm_file(void)
{
}
#endif /* CONFIG_PPC_SVM */
@@ -1110,7 +1110,7 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
/* NUMA stuff */
#ifdef CONFIG_NUMA
-static void register_nodes(void)
+static void __init register_nodes(void)
{
int i;
@@ -1134,7 +1134,7 @@ void sysfs_remove_device_from_node(struct device *dev, int nid)
EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
#else
-static void register_nodes(void)
+static void __init register_nodes(void)
{
return;
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index cae8f03a44fe..62361cc7281c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -88,6 +88,7 @@ static struct clocksource clocksource_timebase = {
#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
+EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev);
@@ -107,6 +108,7 @@ struct clock_event_device decrementer_clockevent = {
EXPORT_SYMBOL(decrementer_clockevent);
DEFINE_PER_CPU(u64, decrementers_next_tb);
+EXPORT_SYMBOL_GPL(decrementers_next_tb);
static DEFINE_PER_CPU(struct clock_event_device, decrementers);
#define XSEC_PER_SEC (1024*1024)
@@ -496,6 +498,16 @@ EXPORT_SYMBOL(profile_pc);
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, irq_work_pending)));
+ return x;
+}
+
static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
@@ -539,13 +551,44 @@ void arch_irq_work_raise(void)
preempt_enable();
}
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+ /* We may have raced with new irq work */
+ if (unlikely(test_irq_work_pending()))
+ set_dec(1);
+}
+
#else /* CONFIG_IRQ_WORK */
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+}
#endif /* CONFIG_IRQ_WORK */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now)
+{
+ u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+
+ WARN_ON_ONCE(!arch_irqs_disabled());
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ if (now >= *next_tb) {
+ local_paca->irq_happened |= PACA_IRQ_DEC;
+ } else {
+ now = *next_tb - now;
+ if (now <= decrementer_max)
+ set_dec_or_work(now);
+ }
+}
+EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
+#endif
+
/*
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
@@ -566,22 +609,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
return;
}
- /* Ensure a positive value is written to the decrementer, or else
- * some CPUs will continue to take decrementer exceptions. When the
- * PPC_WATCHDOG (decrementer based) is configured, keep this at most
- * 31 bits, which is about 4 seconds on most systems, which gives
- * the watchdog a chance of catching timer interrupt hard lockups.
- */
- if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
- set_dec(0x7fffffff);
- else
- set_dec(decrementer_max);
-
- /* Conditionally hard-enable interrupts now that the DEC has been
- * bumped to its maximum value
- */
- may_hard_irq_enable();
+ /* Conditionally hard-enable interrupts. */
+ if (should_hard_irq_enable()) {
+ /*
+ * Ensure a positive value is written to the decrementer, or
+ * else some CPUs will continue to take decrementer exceptions.
+ * When the PPC_WATCHDOG (decrementer based) is configured,
+ * keep this at most 31 bits, which is about 4 seconds on most
+ * systems, which gives the watchdog a chance of catching timer
+ * interrupt hard lockups.
+ */
+ if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+ set_dec(0x7fffffff);
+ else
+ set_dec(decrementer_max);
+ do_hard_irq_enable();
+ }
#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
@@ -606,10 +650,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
} else {
now = *next_tb - now;
if (now <= decrementer_max)
- set_dec(now);
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
@@ -730,7 +771,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
static void start_cpu_decrementer(void)
{
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
unsigned int tcr;
/* Clear any pending timer interrupts */
@@ -843,11 +884,7 @@ static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
__this_cpu_write(decrementers_next_tb, get_tb() + evt);
- set_dec(evt);
-
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ set_dec_or_work(evt);
return 0;
}
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 2b91f233b05d..3beecc32940b 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -226,11 +226,8 @@ _GLOBAL(tm_reclaim)
/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
SAVE_GPR(0, r7) /* user r0 */
- SAVE_GPR(2, r7) /* user r2 */
- SAVE_4GPRS(3, r7) /* user r3-r6 */
- SAVE_GPR(8, r7) /* user r8 */
- SAVE_GPR(9, r7) /* user r9 */
- SAVE_GPR(10, r7) /* user r10 */
+ SAVE_GPRS(2, 6, r7) /* user r2-r6 */
+ SAVE_GPRS(8, 10, r7) /* user r8-r10 */
ld r3, GPR1(r1) /* user r1 */
ld r4, GPR7(r1) /* user r7 */
ld r5, GPR11(r1) /* user r11 */
@@ -445,12 +442,8 @@ restore_gprs:
ld r6, THREAD_TM_PPR(r3)
REST_GPR(0, r7) /* GPR0 */
- REST_2GPRS(2, r7) /* GPR2-3 */
- REST_GPR(4, r7) /* GPR4 */
- REST_4GPRS(8, r7) /* GPR8-11 */
- REST_2GPRS(12, r7) /* GPR12-13 */
-
- REST_NVGPRS(r7) /* GPR14-31 */
+ REST_GPRS(2, 4, r7) /* GPR2-4 */
+ REST_GPRS(8, 31, r7) /* GPR8-31 */
/* Load up PPR and DSCR here so we don't run with user values for long */
mtspr SPRN_DSCR, r5
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index d89c5df4f206..80b6285769f2 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -41,10 +41,10 @@
#define NUM_FTRACE_TRAMPS 8
static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
-static struct ppc_inst
+static ppc_inst_t
ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
{
- struct ppc_inst op;
+ ppc_inst_t op;
addr = ppc_function_entry((void *)addr);
@@ -55,9 +55,9 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
}
static int
-ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
+ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
{
- struct ppc_inst replaced;
+ ppc_inst_t replaced;
/*
* Note:
@@ -90,24 +90,24 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
*/
static int test_24bit_addr(unsigned long ip, unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
addr = ppc_function_entry((void *)addr);
/* use the create_branch to verify that this offset can be branched */
return create_branch(&op, (u32 *)ip, addr, 0) == 0;
}
-static int is_bl_op(struct ppc_inst op)
+static int is_bl_op(ppc_inst_t op)
{
return (ppc_inst_val(op) & 0xfc000003) == 0x48000001;
}
-static int is_b_op(struct ppc_inst op)
+static int is_b_op(ppc_inst_t op)
{
return (ppc_inst_val(op) & 0xfc000003) == 0x48000000;
}
-static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op)
+static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
{
int offset;
@@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod,
{
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
- struct ppc_inst op, pop;
+ ppc_inst_t op, pop;
/* read where this goes */
if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
@@ -221,10 +221,9 @@ static int
__ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op;
- unsigned int jmp[4];
+ ppc_inst_t op;
unsigned long ip = rec->ip;
- unsigned long tramp;
+ unsigned long tramp, ptr;
if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
@@ -238,41 +237,13 @@ __ftrace_make_nop(struct module *mod,
/* lets find where the pointer goes */
tramp = find_bl_target(ip, op);
- /*
- * On PPC32 the trampoline looks like:
- * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha
- * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l
- * 0x7d, 0x89, 0x03, 0xa6 mtctr r12
- * 0x4e, 0x80, 0x04, 0x20 bctr
- */
-
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
/* Find where the trampoline jumps to */
- if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) {
- pr_err("Failed to read %lx\n", tramp);
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
return -EFAULT;
}
- pr_devel(" %08x %08x ", jmp[0], jmp[1]);
-
- /* verify that this is what we expect it to be */
- if (((jmp[0] & 0xffff0000) != 0x3d800000) ||
- ((jmp[1] & 0xffff0000) != 0x398c0000) ||
- (jmp[2] != 0x7d8903a6) ||
- (jmp[3] != 0x4e800420)) {
- pr_err("Not a trampoline\n");
- return -EINVAL;
- }
-
- tramp = (jmp[1] & 0xffff) |
- ((jmp[0] & 0xffff) << 16);
- if (tramp & 0x8000)
- tramp -= 0x10000;
-
- pr_devel(" %lx ", tramp);
-
- if (tramp != addr) {
+ if (ptr != addr) {
pr_err("Trampoline location %08lx does not match addr\n",
tramp);
return -EINVAL;
@@ -291,7 +262,7 @@ __ftrace_make_nop(struct module *mod,
static unsigned long find_ftrace_tramp(unsigned long ip)
{
int i;
- struct ppc_inst instr;
+ ppc_inst_t instr;
/*
* We have the compiler generated long_branch tramps at the end
@@ -329,9 +300,9 @@ static int add_ftrace_tramp(unsigned long tramp)
static int setup_mcount_compiler_tramp(unsigned long tramp)
{
int i;
- struct ppc_inst op;
+ ppc_inst_t op;
unsigned long ptr;
- struct ppc_inst instr;
+ ppc_inst_t instr;
static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
/* Is this a known long jump tramp? */
@@ -396,7 +367,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long tramp, ip = rec->ip;
- struct ppc_inst op;
+ ppc_inst_t op;
/* Read where this goes */
if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
@@ -436,7 +407,7 @@ int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -489,7 +460,7 @@ int ftrace_make_nop(struct module *mod,
*/
#ifndef CONFIG_MPROFILE_KERNEL
static int
-expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
+expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
{
/*
* We expect to see:
@@ -507,7 +478,7 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
}
#else
static int
-expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
+expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
{
/* look for patched "NOP" on ppc64 with -mprofile-kernel */
if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())))
@@ -519,8 +490,8 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op[2];
- struct ppc_inst instr;
+ ppc_inst_t op[2];
+ ppc_inst_t instr;
void *ip = (void *)rec->ip;
unsigned long entry, ptr, tramp;
struct module *mod = rec->arch.mod;
@@ -588,8 +559,10 @@ static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
int err;
- struct ppc_inst op;
+ ppc_inst_t op;
u32 *ip = (u32 *)rec->ip;
+ struct module *mod = rec->arch.mod;
+ unsigned long tramp;
/* read where this goes */
if (copy_inst_from_kernel_nofault(&op, ip))
@@ -602,13 +575,23 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
}
/* If we never set up a trampoline to ftrace_caller, then bail */
- if (!rec->arch.mod->arch.tramp) {
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ if (!mod->arch.tramp || !mod->arch.tramp_regs) {
+#else
+ if (!mod->arch.tramp) {
+#endif
pr_err("No ftrace trampoline\n");
return -EINVAL;
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ if (rec->flags & FTRACE_FL_REGS)
+ tramp = mod->arch.tramp_regs;
+ else
+#endif
+ tramp = mod->arch.tramp;
/* create the branch to the trampoline */
- err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+ err = create_branch(&op, ip, tramp, BRANCH_SET_LINK);
if (err) {
pr_err("REL24 out of range!\n");
return -EINVAL;
@@ -626,7 +609,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
void *ip = (void *)rec->ip;
unsigned long tramp, entry, ptr;
@@ -674,7 +657,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -713,7 +696,7 @@ static int
__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
unsigned long ip = rec->ip;
unsigned long entry, ptr, tramp;
struct module *mod = rec->arch.mod;
@@ -807,7 +790,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -847,7 +830,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
int ret;
old = ppc_inst_read((u32 *)&ftrace_call);
@@ -932,7 +915,7 @@ int ftrace_enable_ftrace_graph_caller(void)
unsigned long ip = (unsigned long)(&ftrace_graph_call);
unsigned long addr = (unsigned long)(&ftrace_graph_caller);
unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
old = ftrace_call_replace(ip, stub, 0);
new = ftrace_call_replace(ip, addr, 0);
@@ -945,7 +928,7 @@ int ftrace_disable_ftrace_graph_caller(void)
unsigned long ip = (unsigned long)(&ftrace_graph_call);
unsigned long addr = (unsigned long)(&ftrace_graph_caller);
unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
old = ftrace_call_replace(ip, addr, 0);
new = ftrace_call_replace(ip, stub, 0);
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
index e023ae59c429..0a02c0cb12d9 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -9,55 +9,135 @@
#include <asm/asm-offsets.h>
#include <asm/ftrace.h>
#include <asm/export.h>
+#include <asm/ptrace.h>
_GLOBAL(mcount)
_GLOBAL(_mcount)
/*
* It is required that _mcount on PPC32 must preserve the
- * link register. But we have r0 to play with. We use r0
+ * link register. But we have r12 to play with. We use r12
* to push the return address back to the caller of mcount
* into the ctr register, restore the link register and
* then jump back using the ctr register.
*/
- mflr r0
- mtctr r0
- lwz r0, 4(r1)
+ mflr r12
+ mtctr r12
mtlr r0
bctr
+EXPORT_SYMBOL(_mcount)
_GLOBAL(ftrace_caller)
MCOUNT_SAVE_FRAME
/* r3 ends up with link register */
subi r3, r3, MCOUNT_INSN_SIZE
+ lis r5,function_trace_op@ha
+ lwz r5,function_trace_op@l(r5)
+ li r6, 0
.globl ftrace_call
ftrace_call:
bl ftrace_stub
nop
+ MCOUNT_RESTORE_FRAME
+ftrace_caller_common:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
b ftrace_graph_stub
_GLOBAL(ftrace_graph_stub)
#endif
- MCOUNT_RESTORE_FRAME
/* old link register ends up in ctr reg */
bctr
-EXPORT_SYMBOL(_mcount)
_GLOBAL(ftrace_stub)
blr
+_GLOBAL(ftrace_regs_caller)
+ /* Save the original return address in A's stack frame */
+ stw r0,LRSAVE(r1)
+
+ /* Create our stack frame + pt_regs */
+ stwu r1,-INT_FRAME_SIZE(r1)
+
+ /* Save all gprs to pt_regs */
+ stw r0, GPR0(r1)
+ stmw r2, GPR2(r1)
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, INT_FRAME_SIZE
+ stw r8, GPR1(r1)
+
+ /* Load special regs for save below */
+ mfmsr r8
+ mfctr r9
+ mfxer r10
+ mfcr r11
+
+ /* Get the _mcount() call site out of LR */
+ mflr r7
+ /* Save it as pt_regs->nip */
+ stw r7, _NIP(r1)
+ /* Save the read LR in pt_regs->link */
+ stw r0, _LINK(r1)
+
+ lis r3,function_trace_op@ha
+ lwz r5,function_trace_op@l(r3)
+
+ /* Calculate ip from nip-4 into r3 for call below */
+ subi r3, r7, MCOUNT_INSN_SIZE
+
+ /* Put the original return address in r4 as parent_ip */
+ mr r4, r0
+
+ /* Save special regs */
+ stw r8, _MSR(r1)
+ stw r9, _CTR(r1)
+ stw r10, _XER(r1)
+ stw r11, _CCR(r1)
+
+ /* Load &pt_regs in r6 for call below */
+ addi r6, r1, STACK_FRAME_OVERHEAD
+
+ /* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_regs_call
+ftrace_regs_call:
+ bl ftrace_stub
+ nop
+
+ /* Load ctr with the possibly modified NIP */
+ lwz r3, _NIP(r1)
+ mtctr r3
+
+ /* Restore gprs */
+ lmw r2, GPR2(r1)
+
+ /* Restore possibly modified LR */
+ lwz r0, _LINK(r1)
+ mtlr r0
+
+ /* Pop our stack frame */
+ addi r1, r1, INT_FRAME_SIZE
+
+ b ftrace_caller_common
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ stwu r1,-48(r1)
+ stw r3, 12(r1)
+ stw r4, 16(r1)
+ stw r5, 20(r1)
+ stw r6, 24(r1)
+ stw r7, 28(r1)
+ stw r8, 32(r1)
+ stw r9, 36(r1)
+ stw r10,40(r1)
+
addi r5, r1, 48
- /* load r4 with local address */
- lwz r4, 44(r1)
+ mfctr r4 /* ftrace_caller has moved local addr here */
+ stw r4, 44(r1)
+ mflr r3 /* ftrace_caller has restored LR from stack */
subi r4, r4, MCOUNT_INSN_SIZE
- /* Grab the LR out of the caller stack frame */
- lwz r3,52(r1)
-
bl prepare_ftrace_return
nop
@@ -66,9 +146,21 @@ _GLOBAL(ftrace_graph_caller)
* Change the LR in the callers stack frame to this.
*/
stw r3,52(r1)
+ mtlr r3
+ lwz r0,44(r1)
+ mtctr r0
+
+ lwz r3, 12(r1)
+ lwz r4, 16(r1)
+ lwz r5, 20(r1)
+ lwz r6, 24(r1)
+ lwz r7, 28(r1)
+ lwz r8, 32(r1)
+ lwz r9, 36(r1)
+ lwz r10,40(r1)
+
+ addi r1, r1, 48
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
bctr
_GLOBAL(return_to_handler)
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index f9fd5f743eba..d636fc755f60 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -41,15 +41,14 @@ _GLOBAL(ftrace_regs_caller)
/* Save all gprs to pt_regs */
SAVE_GPR(0, r1)
- SAVE_10GPRS(2, r1)
+ SAVE_GPRS(2, 11, r1)
/* Ok to continue? */
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi r3, 0
beq ftrace_no_trace
- SAVE_10GPRS(12, r1)
- SAVE_10GPRS(22, r1)
+ SAVE_GPRS(12, 31, r1)
/* Save previous stack pointer (r1) */
addi r8, r1, SWITCH_FRAME_SIZE
@@ -108,10 +107,8 @@ ftrace_regs_call:
#endif
/* Restore gprs */
- REST_GPR(0,r1)
- REST_10GPRS(2,r1)
- REST_10GPRS(12,r1)
- REST_10GPRS(22,r1)
+ REST_GPR(0, r1)
+ REST_GPRS(2, 31, r1)
/* Restore possibly modified LR */
ld r0, _LINK(r1)
@@ -157,7 +154,7 @@ _GLOBAL(ftrace_caller)
stdu r1, -SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
- SAVE_8GPRS(3, r1)
+ SAVE_GPRS(3, 10, r1)
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi r3, 0
@@ -194,7 +191,7 @@ ftrace_call:
mtctr r3
/* Restore gprs */
- REST_8GPRS(3,r1)
+ REST_GPRS(3, 10, r1)
/* Restore callee's TOC */
ld r2, 24(r1)
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 8513aa49614e..d3942de254c6 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -84,7 +84,7 @@ static int udbg_uart_getc(void)
return udbg_uart_in(UART_RBR);
}
-static void udbg_use_uart(void)
+static void __init udbg_use_uart(void)
{
udbg_putc = udbg_uart_putc;
udbg_flush = udbg_uart_flush;
@@ -92,7 +92,7 @@ static void udbg_use_uart(void)
udbg_getc_poll = udbg_uart_getc_poll;
}
-void udbg_uart_setup(unsigned int speed, unsigned int clock)
+void __init udbg_uart_setup(unsigned int speed, unsigned int clock)
{
unsigned int dll, base_bauds;
@@ -121,7 +121,7 @@ void udbg_uart_setup(unsigned int speed, unsigned int clock)
udbg_uart_out(UART_FCR, 0x7);
}
-unsigned int udbg_probe_uart_speed(unsigned int clock)
+unsigned int __init udbg_probe_uart_speed(unsigned int clock)
{
unsigned int dll, dlm, divisor, prescaler, speed;
u8 old_lcr;
@@ -172,7 +172,7 @@ static void udbg_uart_out_pio(unsigned int reg, u8 data)
outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride));
}
-void udbg_uart_init_pio(unsigned long port, unsigned int stride)
+void __init udbg_uart_init_pio(unsigned long port, unsigned int stride)
{
if (!port)
return;
@@ -194,7 +194,7 @@ static void udbg_uart_out_mmio(unsigned int reg, u8 data)
}
-void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
+void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
{
if (!addr)
return;
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index ae632569446f..fd9432875ebc 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -261,7 +261,7 @@ static unsigned int rfin(unsigned int x)
int emulate_altivec(struct pt_regs *regs)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
unsigned int i, word;
unsigned int va, vb, vc, vd;
vector128 *vrs;
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index ba03eedfdcd8..5cc24d8cce94 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -47,6 +47,10 @@ EXPORT_SYMBOL(store_vr_state)
*/
_GLOBAL(load_up_altivec)
mfmsr r5 /* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ ori r5,r5,MSR_RI
+#endif
oris r5,r5,MSR_VEC@h
MTMSRD(r5) /* enable use of AltiVec now */
isync
@@ -126,6 +130,12 @@ _GLOBAL(load_up_vsx)
andis. r5,r12,MSR_VEC@h
beql+ load_up_altivec /* skip if already loaded */
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ li r5,MSR_RI
+ mtmsrd r5,1
+#endif
+
ld r4,PACACURRENT(r13)
addi r4,r4,THREAD /* Get THREAD */
li r6,1
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 18e42c74abdd..2bcca818136a 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -322,10 +322,6 @@ SECTIONS
#ifdef CONFIG_PPC32
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
-#ifdef CONFIG_UBSAN
- *(.data..Lubsan_data*)
- *(.data..Lubsan_type*)
-#endif
*(.data.rel*)
*(SDATA_MAIN)
*(.sdata2)
@@ -336,24 +332,18 @@ SECTIONS
#else
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
-#ifdef CONFIG_UBSAN
- *(.data..Lubsan_data*)
- *(.data..Lubsan_type*)
-#endif
*(.data.rel*)
*(.toc1)
*(.branch_lt)
}
- . = ALIGN(256);
- .got : AT(ADDR(.got) - LOAD_OFFSET) {
- __toc_start = .;
+ .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) {
+ *(.got)
#ifndef CONFIG_RELOCATABLE
__prom_init_toc_start = .;
- arch/powerpc/kernel/prom_init.o*(.toc .got)
+ arch/powerpc/kernel/prom_init.o*(.toc)
__prom_init_toc_end = .;
#endif
- *(.got)
*(.toc)
}
#endif
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 3fa6d240bade..bfc27496fe7e 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -85,10 +85,37 @@ static DEFINE_PER_CPU(u64, wd_timer_tb);
/* SMP checker bits */
static unsigned long __wd_smp_lock;
+static unsigned long __wd_reporting;
+static unsigned long __wd_nmi_output;
static cpumask_t wd_smp_cpus_pending;
static cpumask_t wd_smp_cpus_stuck;
static u64 wd_smp_last_reset_tb;
+/*
+ * Try to take the exclusive watchdog action / NMI IPI / printing lock.
+ * wd_smp_lock must be held. If this fails, we should return and wait
+ * for the watchdog to kick in again (or another CPU to trigger it).
+ *
+ * Importantly, if hardlockup_panic is set, wd_try_report failure should
+ * not delay the panic, because whichever other CPU is reporting will
+ * call panic.
+ */
+static bool wd_try_report(void)
+{
+ if (__wd_reporting)
+ return false;
+ __wd_reporting = 1;
+ return true;
+}
+
+/* End printing after successful wd_try_report. wd_smp_lock not required. */
+static void wd_end_reporting(void)
+{
+ smp_mb(); /* End printing "critical section" */
+ WARN_ON_ONCE(__wd_reporting == 0);
+ WRITE_ONCE(__wd_reporting, 0);
+}
+
static inline void wd_smp_lock(unsigned long *flags)
{
/*
@@ -128,109 +155,182 @@ static void wd_lockup_ipi(struct pt_regs *regs)
else
dump_stack();
+ /*
+ * __wd_nmi_output must be set after we printk from NMI context.
+ *
+ * printk from NMI context defers printing to the console to irq_work.
+ * If that NMI was taken in some code that is hard-locked, then irqs
+ * are disabled so irq_work will never fire. That can result in the
+ * hard lockup messages being delayed (indefinitely, until something
+ * else kicks the console drivers).
+ *
+ * Setting __wd_nmi_output will cause another CPU to notice and kick
+ * the console drivers for us.
+ *
+ * xchg is not needed here (it could be a smp_mb and store), but xchg
+ * gives the memory ordering and atomicity required.
+ */
+ xchg(&__wd_nmi_output, 1);
+
/* Do not panic from here because that can recurse into NMI IPI layer */
}
-static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
+static bool set_cpu_stuck(int cpu)
{
- cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
- cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
+ cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ /*
+ * See wd_smp_clear_cpu_pending()
+ */
+ smp_mb();
if (cpumask_empty(&wd_smp_cpus_pending)) {
- wd_smp_last_reset_tb = tb;
+ wd_smp_last_reset_tb = get_tb();
cpumask_andnot(&wd_smp_cpus_pending,
&wd_cpus_enabled,
&wd_smp_cpus_stuck);
+ return true;
}
-}
-static void set_cpu_stuck(int cpu, u64 tb)
-{
- set_cpumask_stuck(cpumask_of(cpu), tb);
+ return false;
}
-static void watchdog_smp_panic(int cpu, u64 tb)
+static void watchdog_smp_panic(int cpu)
{
+ static cpumask_t wd_smp_cpus_ipi; // protected by reporting
unsigned long flags;
+ u64 tb, last_reset;
int c;
wd_smp_lock(&flags);
/* Double check some things under lock */
- if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb)
+ tb = get_tb();
+ last_reset = wd_smp_last_reset_tb;
+ if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb)
goto out;
if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
goto out;
- if (cpumask_weight(&wd_smp_cpus_pending) == 0)
+ if (!wd_try_report())
+ goto out;
+ for_each_online_cpu(c) {
+ if (!cpumask_test_cpu(c, &wd_smp_cpus_pending))
+ continue;
+ if (c == cpu)
+ continue; // should not happen
+
+ __cpumask_set_cpu(c, &wd_smp_cpus_ipi);
+ if (set_cpu_stuck(c))
+ break;
+ }
+ if (cpumask_empty(&wd_smp_cpus_ipi)) {
+ wd_end_reporting();
goto out;
+ }
+ wd_smp_unlock(&flags);
pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
- cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+ cpu, cpumask_pr_args(&wd_smp_cpus_ipi));
pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
- cpu, tb, wd_smp_last_reset_tb,
- tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000);
+ cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000);
if (!sysctl_hardlockup_all_cpu_backtrace) {
/*
* Try to trigger the stuck CPUs, unless we are going to
* get a backtrace on all of them anyway.
*/
- for_each_cpu(c, &wd_smp_cpus_pending) {
- if (c == cpu)
- continue;
+ for_each_cpu(c, &wd_smp_cpus_ipi) {
smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ __cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
}
- }
-
- /* Take the stuck CPUs out of the watch group */
- set_cpumask_stuck(&wd_smp_cpus_pending, tb);
-
- wd_smp_unlock(&flags);
-
- if (sysctl_hardlockup_all_cpu_backtrace)
+ } else {
trigger_allbutself_cpu_backtrace();
-
- /*
- * Force flush any remote buffers that might be stuck in IRQ context
- * and therefore could not run their irq_work.
- */
- printk_trigger_flush();
+ cpumask_clear(&wd_smp_cpus_ipi);
+ }
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
+ wd_end_reporting();
+
return;
out:
wd_smp_unlock(&flags);
}
-static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
+static void wd_smp_clear_cpu_pending(int cpu)
{
if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
struct pt_regs *regs = get_irq_regs();
unsigned long flags;
- wd_smp_lock(&flags);
-
pr_emerg("CPU %d became unstuck TB:%lld\n",
- cpu, tb);
+ cpu, get_tb());
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
+ wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
wd_smp_unlock(&flags);
+ } else {
+ /*
+ * The last CPU to clear pending should have reset the
+ * watchdog so we generally should not find it empty
+ * here if our CPU was clear. However it could happen
+ * due to a rare race with another CPU taking the
+ * last CPU out of the mask concurrently.
+ *
+ * We can't add a warning for it. But just in case
+ * there is a problem with the watchdog that is causing
+ * the mask to not be reset, try to kick it along here.
+ */
+ if (unlikely(cpumask_empty(&wd_smp_cpus_pending)))
+ goto none_pending;
}
return;
}
+
+ /*
+ * All other updates to wd_smp_cpus_pending are performed under
+ * wd_smp_lock. All of them are atomic except the case where the
+ * mask becomes empty and is reset. This will not happen here because
+ * cpu was tested to be in the bitmap (above), and a CPU only clears
+ * its own bit. _Except_ in the case where another CPU has detected a
+ * hard lockup on our CPU and takes us out of the pending mask. So in
+ * normal operation there will be no race here, no problem.
+ *
+ * In the lockup case, this atomic clear-bit vs a store that refills
+ * other bits in the accessed word wll not be a problem. The bit clear
+ * is atomic so it will not cause the store to get lost, and the store
+ * will never set this bit so it will not overwrite the bit clear. The
+ * only way for a stuck CPU to return to the pending bitmap is to
+ * become unstuck itself.
+ */
cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+
+ /*
+ * Order the store to clear pending with the load(s) to check all
+ * words in the pending mask to check they are all empty. This orders
+ * with the same barrier on another CPU. This prevents two CPUs
+ * clearing the last 2 pending bits, but neither seeing the other's
+ * store when checking if the mask is empty, and missing an empty
+ * mask, which ends with a false positive.
+ */
+ smp_mb();
if (cpumask_empty(&wd_smp_cpus_pending)) {
unsigned long flags;
+none_pending:
+ /*
+ * Double check under lock because more than one CPU could see
+ * a clear mask with the lockless check after clearing their
+ * pending bits.
+ */
wd_smp_lock(&flags);
if (cpumask_empty(&wd_smp_cpus_pending)) {
- wd_smp_last_reset_tb = tb;
+ wd_smp_last_reset_tb = get_tb();
cpumask_andnot(&wd_smp_cpus_pending,
&wd_cpus_enabled,
&wd_smp_cpus_stuck);
@@ -245,10 +345,21 @@ static void watchdog_timer_interrupt(int cpu)
per_cpu(wd_timer_tb, cpu) = tb;
- wd_smp_clear_cpu_pending(cpu, tb);
+ wd_smp_clear_cpu_pending(cpu);
if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
- watchdog_smp_panic(cpu, tb);
+ watchdog_smp_panic(cpu);
+
+ if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) {
+ /*
+ * Something has called printk from NMI context. It might be
+ * stuck, so this this triggers a flush that will get that
+ * printk output to the console.
+ *
+ * See wd_lockup_ipi.
+ */
+ printk_trigger_flush();
+ }
}
DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
@@ -267,12 +378,27 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+ /*
+ * Taking wd_smp_lock here means it is a soft-NMI lock, which
+ * means we can't take any regular or irqsafe spin locks while
+ * holding this lock. This is why timers can't printk while
+ * holding the lock.
+ */
wd_smp_lock(&flags);
if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
wd_smp_unlock(&flags);
return 0;
}
- set_cpu_stuck(cpu, tb);
+ if (!wd_try_report()) {
+ wd_smp_unlock(&flags);
+ /* Couldn't report, try again in 100ms */
+ mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000);
+ return 0;
+ }
+
+ set_cpu_stuck(cpu);
+
+ wd_smp_unlock(&flags);
pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
cpu, (void *)regs->nip);
@@ -283,14 +409,21 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
print_irqtrace_events(current);
show_regs(regs);
- wd_smp_unlock(&flags);
+ xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
if (sysctl_hardlockup_all_cpu_backtrace)
trigger_allbutself_cpu_backtrace();
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
+
+ wd_end_reporting();
}
+ /*
+ * We are okay to change DEC in soft_nmi_interrupt because the masked
+ * handler has marked a DEC as pending, so the timer interrupt will be
+ * replayed as soon as local irqs are enabled again.
+ */
if (wd_panic_timeout_tb < 0x7fffffff)
mtspr(SPRN_DEC, wd_panic_timeout_tb);
@@ -318,11 +451,15 @@ void arch_touch_nmi_watchdog(void)
{
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
- u64 tb = get_tb();
+ u64 tb;
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return;
+
+ tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
per_cpu(wd_timer_tb, cpu) = tb;
- wd_smp_clear_cpu_pending(cpu, tb);
+ wd_smp_clear_cpu_pending(cpu);
}
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
@@ -380,7 +517,7 @@ static void stop_watchdog(void *arg)
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
- wd_smp_clear_cpu_pending(cpu, get_tb());
+ wd_smp_clear_cpu_pending(cpu);
}
static int stop_watchdog_on_cpu(unsigned int cpu)