diff options
Diffstat (limited to 'arch/powerpc/kernel')
60 files changed, 1145 insertions, 823 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 244542ae2a91..d4d5946224f8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -45,11 +45,10 @@ obj-y := cputable.o syscalls.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o + of_platform.o prom_parse.o firmware.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ - paca.o nvram_64.o firmware.o note.o \ - syscall_64.o + paca.o nvram_64.o note.o syscall_64.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o @@ -71,7 +70,7 @@ obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_PPC_DT_CPU_FTRS) += dt_cpu_ftrs.o -obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ +obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 1f1ce8b86d5b..c7797eb958c7 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -178,11 +178,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __get_user_inatomic(temp.v[1], p++); ret |= __get_user_inatomic(temp.v[2], p++); ret |= __get_user_inatomic(temp.v[3], p++); - /* fall through */ + fallthrough; case 4: ret |= __get_user_inatomic(temp.v[4], p++); ret |= __get_user_inatomic(temp.v[5], p++); - /* fall through */ + fallthrough; case 2: ret |= __get_user_inatomic(temp.v[6], p++); ret |= __get_user_inatomic(temp.v[7], p++); @@ -263,11 +263,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __put_user_inatomic(data.v[1], p++); ret |= __put_user_inatomic(data.v[2], p++); ret |= __put_user_inatomic(data.v[3], p++); - /* fall through */ + fallthrough; case 4: ret |= __put_user_inatomic(data.v[4], p++); ret |= __put_user_inatomic(data.v[5], p++); - /* fall through */ + fallthrough; case 2: ret |= __put_user_inatomic(data.v[6], p++); ret |= __put_user_inatomic(data.v[7], p++); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6657dc6b2336..8711c2164b45 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -559,6 +559,8 @@ int main(void) OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending); OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); + OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); + OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar); @@ -696,6 +698,9 @@ int main(void) HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]); HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]); HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]); + HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]); + HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]); + HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]); HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]); HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]); HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 470336277c67..65ab9fcebd31 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -7,6 +7,8 @@ * Author: Nathan Lynch */ +#define pr_fmt(fmt) "cacheinfo: " fmt + #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/kernel.h> @@ -166,7 +168,7 @@ static void release_cache_debugcheck(struct cache *cache) list_for_each_entry(iter, &cache_list, list) WARN_ONCE(iter->next_local == cache, - "cache for %pOF(%s) refers to cache for %pOF(%s)\n", + "cache for %pOFP(%s) refers to cache for %pOFP(%s)\n", iter->ofnode, cache_type_string(iter), cache->ofnode, @@ -178,7 +180,7 @@ static void release_cache(struct cache *cache) if (!cache) return; - pr_debug("freeing L%d %s cache for %pOF\n", cache->level, + pr_debug("freeing L%d %s cache for %pOFP\n", cache->level, cache_type_string(cache), cache->ofnode); release_cache_debugcheck(cache); @@ -193,7 +195,7 @@ static void cache_cpu_set(struct cache *cache, int cpu) while (next) { WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map), - "CPU %i already accounted in %pOF(%s)\n", + "CPU %i already accounted in %pOFP(%s)\n", cpu, next->ofnode, cache_type_string(next)); cpumask_set_cpu(cpu, &next->shared_cpu_map); @@ -352,7 +354,7 @@ static int cache_is_unified_d(const struct device_node *np) static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level) { - pr_debug("creating L%d ucache for %pOF\n", level, node); + pr_debug("creating L%d ucache for %pOFP\n", level, node); return new_cache(cache_is_unified_d(node), level, node); } @@ -362,7 +364,7 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node, { struct cache *dcache, *icache; - pr_debug("creating L%d dcache and icache for %pOF\n", level, + pr_debug("creating L%d dcache and icache for %pOFP\n", level, node); dcache = new_cache(CACHE_TYPE_DATA, level, node); @@ -418,12 +420,27 @@ static void link_cache_lists(struct cache *smaller, struct cache *bigger) } smaller->next_local = bigger; + + /* + * The cache->next_local list sorts by level ascending: + * L1d -> L1i -> L2 -> L3 ... + */ + WARN_ONCE((smaller->level == 1 && bigger->level > 2) || + (smaller->level > 1 && bigger->level != smaller->level + 1), + "linking L%i cache %pOFP to L%i cache %pOFP; skipped a level?\n", + smaller->level, smaller->ofnode, bigger->level, bigger->ofnode); } static void do_subsidiary_caches_debugcheck(struct cache *cache) { - WARN_ON_ONCE(cache->level != 1); - WARN_ON_ONCE(!of_node_is_type(cache->ofnode, "cpu")); + WARN_ONCE(cache->level != 1, + "instantiating cache chain from L%d %s cache for " + "%pOFP instead of an L1\n", cache->level, + cache_type_string(cache), cache->ofnode); + WARN_ONCE(!of_node_is_type(cache->ofnode, "cpu"), + "instantiating cache chain from node %pOFP of type '%s' " + "instead of a cpu node\n", cache->ofnode, + of_node_get_device_type(cache->ofnode)); } static void do_subsidiary_caches(struct cache *cache) @@ -647,12 +664,13 @@ static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache * return &cache->shared_cpu_map; } -static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +static ssize_t +show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list) { struct cache_index_dir *index; struct cache *cache; const struct cpumask *mask; - int ret, cpu; + int cpu; index = kobj_to_cache_index_dir(k); cache = index->cache; @@ -664,16 +682,25 @@ static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *att mask = &cache->shared_cpu_map; } - ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n", - cpumask_pr_args(mask)); - buf[ret++] = '\n'; - buf[ret] = '\0'; - return ret; + return cpumap_print_to_pagebuf(list, buf, mask); +} + +static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + return show_shared_cpumap(k, attr, buf, false); +} + +static ssize_t shared_cpu_list_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + return show_shared_cpumap(k, attr, buf, true); } static struct kobj_attribute cache_shared_cpu_map_attr = __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); +static struct kobj_attribute cache_shared_cpu_list_attr = + __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); + /* Attributes which should always be created -- the kobject/sysfs core * does this automatically via kobj_type->default_attrs. This is the * minimum data required to uniquely identify a cache. @@ -682,6 +709,7 @@ static struct attribute *cache_index_default_attrs[] = { &cache_type_attr.attr, &cache_level_attr.attr, &cache_shared_cpu_map_attr.attr, + &cache_shared_cpu_list_attr.attr, NULL, }; @@ -733,13 +761,13 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) rc = attr->show(&dir->kobj, attr, buf); if (rc <= 0) { pr_debug("not creating %s attribute for " - "%pOF(%s) (rc = %zd)\n", + "%pOFP(%s) (rc = %zd)\n", attr->attr.name, cache->ofnode, cache_type, rc); continue; } if (sysfs_create_file(&dir->kobj, &attr->attr)) - pr_debug("could not create %s attribute for %pOF(%s)\n", + pr_debug("could not create %s attribute for %pOFP(%s)\n", attr->attr.name, cache->ofnode, cache_type); } @@ -855,7 +883,7 @@ static void cache_cpu_clear(struct cache *cache, int cpu) struct cache *next = cache->next_local; WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map), - "CPU %i not accounted in %pOF(%s)\n", + "CPU %i not accounted in %pOFP(%s)\n", cpu, cache->ofnode, cache_type_string(cache)); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index efdcfa714106..704e8b9501ee 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -94,13 +94,15 @@ _GLOBAL(__restore_cpu_power8) _GLOBAL(__setup_cpu_power10) mflr r11 bl __init_FSCR_power10 + bl __init_PMU + bl __init_PMU_ISA31 b 1f _GLOBAL(__setup_cpu_power9) mflr r11 - bl __init_FSCR -1: bl __init_PMU - bl __init_hvmode_206 + bl __init_FSCR_power9 + bl __init_PMU +1: bl __init_hvmode_206 mtlr r11 beqlr li r0,0 @@ -124,13 +126,15 @@ _GLOBAL(__setup_cpu_power9) _GLOBAL(__restore_cpu_power10) mflr r11 bl __init_FSCR_power10 + bl __init_PMU + bl __init_PMU_ISA31 b 1f _GLOBAL(__restore_cpu_power9) mflr r11 - bl __init_FSCR -1: bl __init_PMU - mfmsr r3 + bl __init_FSCR_power9 + bl __init_PMU +1: mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 beqlr @@ -198,6 +202,12 @@ __init_FSCR_power10: mtspr SPRN_FSCR, r3 // fall through +__init_FSCR_power9: + mfspr r3, SPRN_FSCR + ori r3, r3, FSCR_SCV + mtspr SPRN_FSCR, r3 + // fall through + __init_FSCR: mfspr r3,SPRN_FSCR ori r3,r3,FSCR_TAR|FSCR_EBB @@ -233,3 +243,10 @@ __init_PMU_ISA207: li r5,0 mtspr SPRN_MMCRS,r5 blr + +__init_PMU_ISA31: + li r5,0 + mtspr SPRN_MMCR3,r5 + LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE) + mtspr SPRN_MMCRA,r5 + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b4066354f073..3d406a9626e8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -120,7 +120,8 @@ extern void __restore_cpu_e6500(void); #define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ PPC_FEATURE2_ARCH_3_00 | \ PPC_FEATURE2_HAS_IEEE128 | \ - PPC_FEATURE2_DARN ) + PPC_FEATURE2_DARN | \ + PPC_FEATURE2_SCV) #define COMMON_USER_POWER10 COMMON_USER_POWER9 #define COMMON_USER2_POWER10 (COMMON_USER2_POWER9 | \ PPC_FEATURE2_ARCH_3_1 | \ diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index 500f52fa4711..cdc2dccb987d 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -37,7 +37,7 @@ int set_dawr(int nr, struct arch_hw_breakpoint *brk) dawrx |= (mrd & 0x3f) << (63 - 53); if (ppc_md.set_dawr) - return ppc_md.set_dawr(dawr, dawrx); + return ppc_md.set_dawr(nr, dawr, dawrx); if (nr == 0) { mtspr(SPRN_DAWR0, dawr); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f17ff1200eaa..52680cf07c9d 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -18,61 +18,6 @@ #ifdef CONFIG_SMP -/* - * Doorbells must only be used if CPU_FTR_DBELL is available. - * msgsnd is used in HV, and msgsndp is used in !HV. - * - * These should be used by platform code that is aware of restrictions. - * Other arch code should use ->cause_ipi. - * - * doorbell_global_ipi() sends a dbell to any target CPU. - * Must be used only by architectures that address msgsnd target - * by PIR/get_hard_smp_processor_id. - */ -void doorbell_global_ipi(int cpu) -{ - u32 tag = get_hard_smp_processor_id(cpu); - - kvmppc_set_host_ipi(cpu); - /* Order previous accesses vs. msgsnd, which is treated as a store */ - ppc_msgsnd_sync(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); -} - -/* - * doorbell_core_ipi() sends a dbell to a target CPU in the same core. - * Must be used only by architectures that address msgsnd target - * by TIR/cpu_thread_in_core. - */ -void doorbell_core_ipi(int cpu) -{ - u32 tag = cpu_thread_in_core(cpu); - - kvmppc_set_host_ipi(cpu); - /* Order previous accesses vs. msgsnd, which is treated as a store */ - ppc_msgsnd_sync(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); -} - -/* - * Attempt to cause a core doorbell if destination is on the same core. - * Returns 1 on success, 0 on failure. - */ -int doorbell_try_core_ipi(int cpu) -{ - int this_cpu = get_cpu(); - int ret = 0; - - if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) { - doorbell_core_ipi(cpu); - ret = 1; - } - - put_cpu(); - - return ret; -} - void doorbell_exception(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 3a409517c031..6f8c0c6b937a 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -24,7 +24,6 @@ /* Device-tree visible constants follow */ -#define ISA_V2_07B 2070 #define ISA_V3_0B 3000 #define ISA_V3_1 3100 @@ -67,6 +66,7 @@ struct dt_cpu_feature { extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); +extern long __machine_check_early_realmode_p10(struct pt_regs *regs); static int hv_mode; @@ -337,6 +337,7 @@ static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f) #ifdef CONFIG_PPC_RADIX_MMU cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; return 1; @@ -450,6 +451,39 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f) return 1; } +static void init_pmu_power10(void) +{ + init_pmu_power9(); + + mtspr(SPRN_MMCR3, 0); + mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); +} + +static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) +{ + hfscr_pmu_enable(); + + init_pmu_power10(); + init_pmu_registers = init_pmu_power10; + + cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT; + + cur_cpu_spec->num_pmcs = 6; + cur_cpu_spec->pmc_type = PPC_PMC_IBM; + cur_cpu_spec->oprofile_cpu_type = "ppc64/power10"; + + return 1; +} + +static int __init feat_enable_mce_power10(struct dt_cpu_feature *f) +{ + cur_cpu_spec->platform = "power10"; + cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10; + + return 1; +} + static int __init feat_enable_tm(struct dt_cpu_feature *f) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -587,6 +621,7 @@ static struct dt_cpu_feature_match __initdata {"little-endian", feat_enable_le, CPU_FTR_REAL_LE}, {"smt", feat_enable_smt, 0}, {"interrupt-facilities", feat_enable, 0}, + {"system-call-vectored", feat_enable, 0}, {"timer-facilities", feat_enable, 0}, {"timer-facilities-v3", feat_enable, 0}, {"debug-facilities", feat_enable, 0}, @@ -622,7 +657,7 @@ static struct dt_cpu_feature_match __initdata {"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL}, {"processor-utilization-of-resources-register", feat_enable_purr, 0}, {"no-execute", feat_enable, 0}, - {"strong-access-ordering", feat_enable, CPU_FTR_SAO}, + /* strong-access-ordering is unused */ {"cache-inhibited-large-page", feat_enable_large_ci, 0}, {"coprocessor-icswx", feat_enable, 0}, {"hypervisor-virtualization-interrupt", feat_enable_hvi, 0}, @@ -638,7 +673,9 @@ static struct dt_cpu_feature_match __initdata {"group-start-register", feat_enable, 0}, {"pc-relative-addressing", feat_enable, 0}, {"machine-check-power9", feat_enable_mce_power9, 0}, + {"machine-check-power10", feat_enable_mce_power10, 0}, {"performance-monitor-power9", feat_enable_pmu_power9, 0}, + {"performance-monitor-power10", feat_enable_pmu_power10, 0}, {"event-based-branch-v3", feat_enable, 0}, {"random-number-generator", feat_enable, 0}, {"system-call-vectored", feat_disable, 0}, @@ -649,6 +686,7 @@ static struct dt_cpu_feature_match __initdata {"wait-v3", feat_enable, 0}, {"prefix-instructions", feat_enable, 0}, {"matrix-multiply-assist", feat_enable_mma, 0}, + {"debug-facilities-v31", feat_enable, CPU_FTR_DAWR1}, }; static bool __initdata using_dt_cpu_ftrs; @@ -674,12 +712,12 @@ static void __init cpufeatures_setup_start(u32 isa) { pr_info("setup for ISA %d\n", isa); - if (isa >= 3000) { + if (isa >= ISA_V3_0B) { cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300; cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00; } - if (isa >= 3100) { + if (isa >= ISA_V3_1) { cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31; cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1; } @@ -776,12 +814,6 @@ static __init void cpufeatures_cpu_quirks(void) } update_tlbie_feature_flag(version); - /* - * PKEY was not in the initial base or feature node - * specification, but it should become optional in the next - * cpu feature version sequence. - */ - cur_cpu_spec->cpu_features |= CPU_FTR_PKEY; } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index d407981dec76..94682382fc8c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -167,39 +167,33 @@ void eeh_show_enabled(void) */ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); u32 cfg; int cap, i; int n = 0, l = 0; char buffer[128]; - if (!pdn) { - pr_warn("EEH: Note: No error log for absent device.\n"); - return 0; - } - n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", - pdn->phb->global_number, pdn->busno, - PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", - pdn->phb->global_number, pdn->busno, - PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); - eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); + eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); pr_warn("EEH: PCI device/vendor: %08x\n", cfg); - eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); + eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); /* Gather bridge-specific registers */ if (edev->mode & EEH_DEV_BRIDGE) { - eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); + eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); pr_warn("EEH: Bridge secondary status: %04x\n", cfg); - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); pr_warn("EEH: Bridge control: %04x\n", cfg); } @@ -207,11 +201,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) /* Dump out the PCI-X command and status regs */ cap = edev->pcix_cap; if (cap) { - eeh_ops->read_config(pdn, cap, 4, &cfg); + eeh_ops->read_config(edev, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); pr_warn("EEH: PCI-X cmd: %08x\n", cfg); - eeh_ops->read_config(pdn, cap+4, 4, &cfg); + eeh_ops->read_config(edev, cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); pr_warn("EEH: PCI-X status: %08x\n", cfg); } @@ -223,7 +217,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) pr_warn("EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); if ((i % 4) == 0) { @@ -250,7 +244,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) pr_warn("EEH: PCI-E AER capability register set follows:\n"); for (i=0; i<=13; i++) { - eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); if ((i % 4) == 0) { @@ -726,7 +720,6 @@ static void eeh_disable_and_save_dev_state(struct eeh_dev *edev, static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = userdata; @@ -734,73 +727,14 @@ static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) return; /* Apply customization from firmware */ - if (pdn && eeh_ops->restore_config) - eeh_ops->restore_config(pdn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(edev); /* The caller should restore state for the specified device */ if (pdev != dev) pci_restore_state(pdev); } -int eeh_restore_vf_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - u32 devctl, cmd, cap2, aer_capctl; - int old_mps; - - if (edev->pcie_cap) { - /* Restore MPS */ - old_mps = (ffs(pdn->mps) - 8) << 5; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; - devctl |= old_mps; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - - /* Disable Completion Timeout if possible */ - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, - 4, &cap2); - if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { - eeh_ops->read_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, &cap2); - cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; - eeh_ops->write_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, cap2); - } - } - - /* Enable SERR and parity checking */ - eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); - cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); - eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); - - /* Enable report various errors */ - if (edev->pcie_cap) { - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_CERE; - devctl |= (PCI_EXP_DEVCTL_NFERE | - PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE); - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - } - - /* Enable ECRC generation and check */ - if (edev->pcie_cap && edev->aer_cap) { - eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, &aer_capctl); - aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); - eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, aer_capctl); - } - - return 0; -} - /** * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct @@ -977,15 +911,13 @@ int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) */ void eeh_save_bars(struct eeh_dev *edev) { - struct pci_dn *pdn; int i; - pdn = eeh_dev_to_pdn(edev); - if (!pdn) + if (!edev) return; for (i = 0; i < 16; i++) - eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); + eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]); /* * For PCI bridges including root port, we need enable bus @@ -1096,7 +1028,7 @@ static int eeh_init(void) /* Initialize PHB PEs */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) - eeh_dev_phb_init_dynamic(hose); + eeh_phb_pe_create(hose); eeh_addr_cache_init(); @@ -1175,7 +1107,7 @@ void eeh_probe_device(struct pci_dev *dev) * FIXME: HEY MA, LOOK AT ME, NO LOCKING! */ if (edev->pdev && edev->pdev != dev) { - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); @@ -1254,7 +1186,7 @@ void eeh_remove_device(struct pci_dev *dev) edev->in_error = false; dev->dev.archdata.edev = NULL; if (!(edev->pe->state & EEH_PE_KEEP)) - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); else edev->mode |= EEH_DEV_DISCONNECTED; } diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c deleted file mode 100644 index 7370185c7a05..000000000000 --- a/arch/powerpc/kernel/eeh_dev.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The file intends to implement dynamic creation of EEH device, which will - * be bound with OF node and PCI device simutaneously. The EEH devices would - * be foundamental information for EEH core components to work proerly. Besides, - * We have to support multiple situations where dynamic creation of EEH device - * is required: - * - * 1) Before PCI emunation starts, we need create EEH devices according to the - * PCI sensitive OF nodes. - * 2) When PCI emunation is done, we need do the binding between PCI device and - * the associated EEH device. - * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device - * will be created while PCI sensitive OF node is detected from DR. - * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If - * PHB is newly inserted, we also need create EEH devices accordingly. - * - * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. - */ - -#include <linux/export.h> -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/string.h> - -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> - -/** - * eeh_dev_init - Create EEH device according to OF node - * @pdn: PCI device node - * - * It will create EEH device according to the given OF node. The function - * might be called by PCI emunation, DR, PHB hotplug. - */ -struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) -{ - struct eeh_dev *edev; - - /* Allocate EEH device */ - edev = kzalloc(sizeof(*edev), GFP_KERNEL); - if (!edev) - return NULL; - - /* Associate EEH device with OF node */ - pdn->edev = edev; - edev->pdn = pdn; - edev->bdfn = (pdn->busno << 8) | pdn->devfn; - edev->controller = pdn->phb; - - return edev; -} - -/** - * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB - * @phb: PHB - * - * Scan the PHB OF node and its child association, then create the - * EEH devices accordingly - */ -void eeh_dev_phb_init_dynamic(struct pci_controller *phb) -{ - /* EEH PE for PHB */ - eeh_phb_pe_create(phb); -} diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7b048cee767c..4197e4559f65 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -425,8 +425,8 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev, pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED); #ifdef CONFIG_PCI_IOV - if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) - eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); + if (eeh_ops->notify_resume) + eeh_ops->notify_resume(edev); #endif return PCI_ERS_RESULT_NONE; } @@ -477,7 +477,7 @@ static void *eeh_add_virt_device(struct eeh_dev *edev) } #ifdef CONFIG_PCI_IOV - pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index); + pci_iov_add_virtfn(edev->physfn, edev->vf_index); #endif return NULL; } @@ -521,9 +521,7 @@ static void eeh_rmv_device(struct eeh_dev *edev, void *userdata) if (edev->physfn) { #ifdef CONFIG_PCI_IOV - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - - pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); + pci_iov_remove_virtfn(edev->physfn, edev->vf_index); edev->pdev = NULL; #endif if (rmv_data) @@ -544,7 +542,7 @@ static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata) continue; edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); } return NULL; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 177852e39a25..d2aaaa73fdd5 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -319,56 +319,22 @@ struct eeh_pe *eeh_pe_get(struct pci_controller *phb, } /** - * eeh_pe_get_parent - Retrieve the parent PE + * eeh_pe_tree_insert - Add EEH device to parent PE * @edev: EEH device + * @new_pe_parent: PE to create additional PEs under * - * The whole PEs existing in the system are organized as hierarchy - * tree. The function is used to retrieve the parent PE according - * to the parent EEH device. - */ -static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) -{ - struct eeh_dev *parent; - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - - /* - * It might have the case for the indirect parent - * EEH device already having associated PE, but - * the direct parent EEH device doesn't have yet. - */ - if (edev->physfn) - pdn = pci_get_pdn(edev->physfn); - else - pdn = pdn ? pdn->parent : NULL; - while (pdn) { - /* We're poking out of PCI territory */ - parent = pdn_to_eeh_dev(pdn); - if (!parent) - return NULL; - - if (parent->pe) - return parent->pe; - - pdn = pdn->parent; - } - - return NULL; -} - -/** - * eeh_add_to_parent_pe - Add EEH device to parent PE - * @edev: EEH device + * Add EEH device to the PE in edev->pe_config_addr. If a PE already + * exists with that address then @edev is added to that PE. Otherwise + * a new PE is created and inserted into the PE tree as a child of + * @new_pe_parent. * - * Add EEH device to the parent PE. If the parent PE already - * exists, the PE type will be changed to EEH_PE_BUS. Otherwise, - * we have to create new PE to hold the EEH device and the new - * PE will be linked to its parent PE as well. + * If @new_pe_parent is NULL then the new PE will be inserted under + * directly under the the PHB. */ -int eeh_add_to_parent_pe(struct eeh_dev *edev) +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent) { + struct pci_controller *hose = edev->controller; struct eeh_pe *pe, *parent; - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - int config_addr = (pdn->busno << 8) | (pdn->devfn); /* Check if the PE number is valid */ if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { @@ -382,7 +348,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * PE should be composed of PCI bus and its subordinate * components. */ - pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr); + pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn); if (pe) { if (pe->type & EEH_PE_INVALID) { list_add_tail(&edev->entry, &pe->edevs); @@ -399,8 +365,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) parent = parent->parent; } - eeh_edev_dbg(edev, - "Added to device PE (parent: PE#%x)\n", + eeh_edev_dbg(edev, "Added to existing PE (parent: PE#%x)\n", pe->parent->addr); } else { /* Mark the PE as type of PCI bus */ @@ -416,15 +381,15 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Create a new EEH PE */ if (edev->physfn) - pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF); + pe = eeh_pe_alloc(hose, EEH_PE_VF); else - pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE); + pe = eeh_pe_alloc(hose, EEH_PE_DEVICE); if (!pe) { pr_err("%s: out of memory!\n", __func__); return -ENOMEM; } pe->addr = edev->pe_config_addr; - pe->config_addr = config_addr; + pe->config_addr = edev->bdfn; /* * Put the new EEH PE into hierarchy tree. If the parent @@ -432,34 +397,35 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * to PHB directly. Otherwise, we have to associate the * PE with its parent. */ - parent = eeh_pe_get_parent(edev); - if (!parent) { - parent = eeh_phb_pe_get(pdn->phb); - if (!parent) { + if (!new_pe_parent) { + new_pe_parent = eeh_phb_pe_get(hose); + if (!new_pe_parent) { pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", - __func__, pdn->phb->global_number); + __func__, hose->global_number); edev->pe = NULL; kfree(pe); return -EEXIST; } } - pe->parent = parent; + + /* link new PE into the tree */ + pe->parent = new_pe_parent; + list_add_tail(&pe->child, &new_pe_parent->child_list); /* * Put the newly created PE into the child list and * link the EEH device accordingly. */ - list_add_tail(&pe->child, &parent->child_list); list_add_tail(&edev->entry, &pe->edevs); edev->pe = pe; - eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n", - pe->parent->addr); + eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n", + new_pe_parent->addr); return 0; } /** - * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE + * eeh_pe_tree_remove - Remove one EEH device from the associated PE * @edev: EEH device * * The PE hierarchy tree might be changed when doing PCI hotplug. @@ -467,7 +433,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * during EEH recovery. So we have to call the function remove the * corresponding PE accordingly if necessary. */ -int eeh_rmv_from_parent_pe(struct eeh_dev *edev) +int eeh_pe_tree_remove(struct eeh_dev *edev) { struct eeh_pe *pe, *parent, *child; bool keep, recover; @@ -698,7 +664,6 @@ void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed) */ static void eeh_bridge_check_link(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int cap; uint32_t val; int timeout = 0; @@ -714,32 +679,32 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) /* Check slot status */ cap = edev->pcie_cap; - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTSTA, 2, &val); if (!(val & PCI_EXP_SLTSTA_PDS)) { eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val); return; } /* Check power status if we have the capability */ - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCAP, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTCAP, 2, &val); if (val & PCI_EXP_SLTCAP_PCP) { - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTCTL, 2, &val); if (val & PCI_EXP_SLTCTL_PCC) { eeh_edev_dbg(edev, "In power-off state, power it on ...\n"); val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC); val |= (0x0100 & PCI_EXP_SLTCTL_PIC); - eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val); + eeh_ops->write_config(edev, cap + PCI_EXP_SLTCTL, 2, val); msleep(2 * 1000); } } /* Enable link */ - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCTL, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKCTL, 2, &val); val &= ~PCI_EXP_LNKCTL_LD; - eeh_ops->write_config(pdn, cap + PCI_EXP_LNKCTL, 2, val); + eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val); /* Check link */ - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val); if (!(val & PCI_EXP_LNKCAP_DLLLARC)) { eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val); msleep(1000); @@ -752,7 +717,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) msleep(20); timeout += 20; - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKSTA, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKSTA, 2, &val); if (val & PCI_EXP_LNKSTA_DLLLA) break; } @@ -769,7 +734,6 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) static void eeh_restore_bridge_bars(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int i; /* @@ -777,20 +741,20 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) * Bus numbers and windows: 0x18 - 0x30 */ for (i = 4; i < 13; i++) - eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]); + eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]); /* Rom: 0x38 */ - eeh_ops->write_config(pdn, 14*4, 4, edev->config_space[14]); + eeh_ops->write_config(edev, 14*4, 4, edev->config_space[14]); /* Cache line & Latency timer: 0xC 0xD */ - eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1, + eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1, - SAVED_BYTE(PCI_LATENCY_TIMER)); + eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); /* Max latency, min grant, interrupt ping and line: 0x3C */ - eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); + eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]); /* PCI Command: 0x4 */ - eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] | + eeh_ops->write_config(edev, PCI_COMMAND, 4, edev->config_space[1] | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); /* Check the PCIe link is ready */ @@ -799,28 +763,27 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) static void eeh_restore_device_bars(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int i; u32 cmd; for (i = 4; i < 10; i++) - eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]); + eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]); /* 12 == Expansion ROM Address */ - eeh_ops->write_config(pdn, 12*4, 4, edev->config_space[12]); + eeh_ops->write_config(edev, 12*4, 4, edev->config_space[12]); - eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1, + eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1, + eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1, SAVED_BYTE(PCI_LATENCY_TIMER)); /* max latency, min grant, interrupt pin and line */ - eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); + eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]); /* * Restore PERR & SERR bits, some devices require it, * don't touch the other command bits */ - eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cmd); + eeh_ops->read_config(edev, PCI_COMMAND, 4, &cmd); if (edev->config_space[1] & PCI_COMMAND_PARITY) cmd |= PCI_COMMAND_PARITY; else @@ -829,7 +792,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev) cmd |= PCI_COMMAND_SERR; else cmd &= ~PCI_COMMAND_SERR; - eeh_ops->write_config(pdn, PCI_COMMAND, 4, cmd); + eeh_ops->write_config(edev, PCI_COMMAND, 4, cmd); } /** @@ -843,16 +806,14 @@ static void eeh_restore_device_bars(struct eeh_dev *edev) */ static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - /* Do special restore for bridges */ if (edev->mode & EEH_DEV_BRIDGE) eeh_restore_bridge_bars(edev); else eeh_restore_device_bars(edev); - if (eeh_ops->restore_config && pdn) - eeh_ops->restore_config(pdn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(edev); } /** diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 4fb0f1e1017a..429620da73ba 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -99,7 +99,7 @@ static ssize_t eeh_notify_resume_store(struct device *dev, if (!edev || !edev->pe || !eeh_ops->notify_resume) return -ENODEV; - if (eeh_ops->notify_resume(pci_get_pdn(pdev))) + if (eeh_ops->notify_resume(edev)) return -EIO; return count; diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 217ebdf5b00b..f4d0af8e1136 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -36,6 +36,12 @@ #include "head_32.h" /* + * powerpc relies on return from interrupt/syscall being context synchronising + * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional + * synchronisation instructions. + */ + +/* * Align to 4k in order to ensure that all functions modyfing srr0/srr1 * fit into one page in order to not encounter a TLB miss between the * modification of srr0/srr1 and the associated rfi. diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9d49338e0c85..33a42e42c56f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -64,15 +64,173 @@ exception_marker: .section ".text" .align 7 +#ifdef CONFIG_PPC_BOOK3S +.macro system_call_vectored name trapnr + .globl system_call_vectored_\name +system_call_vectored_\name: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne .Ltabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + INTERRUPT_TO_KERNEL + mr r10,r1 + ld r1,PACAKSAVE(r13) + std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + /* Zero r9-r12, this should only be required when restoring all GPRs */ + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_LINK(r1) + std r11,_CTR(r1) + + li r11,\trapnr + std r11,_TRAP(r1) + std r12,_CCR(r1) + std r3,ORIG_GPR3(r1) + addi r10,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r10) /* "regshere" marker */ + + /* + * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which + * would clobber syscall parameters. Also we always enter with IRQs + * enabled and nothing pending. system_call_exception() will call + * trace_hardirqs_off(). + * + * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The + * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED. + */ + + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception + +.Lsyscall_vectored_\name\()_exit: + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,1 /* scv */ + bl syscall_exit_prepare + + ld r2,_CCR(r1) + ld r4,_NIP(r1) + ld r5,_MSR(r1) + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + cmpdi r3,0 + bne .Lsyscall_vectored_\name\()_restore_regs + + /* rfscv returns with LR->NIA and CTR->MSR */ + mtlr r4 + mtctr r5 + + /* Could zero these as per ABI, but we may consider a stricter ABI + * which preserves these if libc implementations can benefit, so + * restore them for now until further measurement is done. */ + ld r0,GPR0(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + /* Zero volatile regs that may contain sensitive kernel data */ + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtspr SPRN_XER,r0 + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + mtcr r2 + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) + ld r1,GPR1(r1) + RFSCV_TO_USER + b . /* prevent speculative execution */ + +.Lsyscall_vectored_\name\()_restore_regs: + li r3,0 + mtmsrd r3,1 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + + ld r3,_CTR(r1) + ld r4,_LINK(r1) + ld r5,_XER(r1) + + REST_NVGPRS(r1) + ld r0,GPR0(r1) + mtcr r2 + mtctr r3 + mtlr r4 + mtspr SPRN_XER,r5 + REST_10GPRS(2, r1) + REST_2GPRS(12, r1) + ld r1,GPR1(r1) + RFI_TO_USER +.endm + +system_call_vectored common 0x3000 +/* + * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 + * which is tested by system_call_exception when r0 is -1 (as set by vector + * entry code). + */ +system_call_vectored sigill 0x7ff0 + + +/* + * Entered via kernel return set up by kernel/sstep.c, must match entry regs + */ + .globl system_call_vectored_emulate +system_call_vectored_emulate: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + b system_call_vectored_common +#endif + + .balign IFETCH_ALIGN_BYTES .globl system_call_common system_call_common: +_ASM_NOKPROBE_SYMBOL(system_call_common) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ bne .Ltabort_syscall END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -_ASM_NOKPROBE_SYMBOL(system_call_common) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) @@ -138,6 +296,7 @@ END_BTB_FLUSH_SECTION .Lsyscall_exit: addi r4,r1,STACK_FRAME_OVERHEAD + li r5,0 /* !scv */ bl syscall_exit_prepare ld r2,_CCR(r1) @@ -224,21 +383,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) b . /* prevent speculative execution */ #endif +#ifdef CONFIG_PPC_BOOK3S +_GLOBAL(ret_from_fork_scv) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_vectored_common_exit +#endif + _GLOBAL(ret_from_fork) bl schedule_tail REST_NVGPRS(r1) - li r3,0 + li r3,0 /* fork() return value */ b .Lsyscall_exit _GLOBAL(ret_from_kernel_thread) bl schedule_tail REST_NVGPRS(r1) - mtlr r14 + mtctr r14 mr r3,r15 #ifdef PPC64_ELF_ABI_v2 mr r12,r14 #endif - blrl + bctrl li r3,0 b .Lsyscall_exit @@ -259,10 +426,7 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs); #define FLUSH_COUNT_CACHE \ 1: nop; \ - patch_site 1b, patch__call_flush_count_cache - - -#define BCCTR_FLUSH .long 0x4c400420 + patch_site 1b, patch__call_flush_branch_caches .macro nops number .rept \number @@ -271,8 +435,8 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs); .endm .balign 32 -.global flush_count_cache -flush_count_cache: +.global flush_branch_caches +flush_branch_caches: /* Save LR into r9 */ mflr r9 @@ -294,7 +458,7 @@ flush_count_cache: li r9,0x7fff mtctr r9 - BCCTR_FLUSH + PPC_BCCTR_FLUSH 2: nop patch_site 2b patch__flush_count_cache_return @@ -303,7 +467,7 @@ flush_count_cache: .rept 278 .balign 32 - BCCTR_FLUSH + PPC_BCCTR_FLUSH nops 7 .endr @@ -357,7 +521,7 @@ _GLOBAL(_switch) * kernel/sched/core.c). * * Uncacheable stores in the case of involuntary preemption must - * be taken care of. The smp_mb__before_spin_lock() in __schedule() + * be taken care of. The smp_mb__after_spinlock() in __schedule() * is implemented as hwsync on powerpc, which orders MMIO too. So * long as there is an hwsync in the context switch path, it will * be executed on the source CPU after the task has performed diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 446e54c3f71e..f7d748b88705 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -508,8 +508,24 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .macro __GEN_COMMON_BODY name .if IMASK + .if ! ISTACK + .error "No support for masked interrupt to use custom stack" + .endif + + /* If coming from user, skip soft-mask tests. */ + andi. r10,r12,MSR_PR + bne 2f + + /* Kernel code running below __end_interrupts is implicitly + * soft-masked */ + LOAD_HANDLER(r10, __end_interrupts) + cmpld r11,r10 + li r10,IMASK + blt- 1f + + /* Test the soft mask state against our interrupt's bit */ lbz r10,PACAIRQSOFTMASK(r13) - andi. r10,r10,IMASK +1: andi. r10,r10,IMASK /* Associate vector numbers with bits in paca->irq_happened */ .if IVEC == 0x500 || IVEC == 0xea0 li r10,PACA_IRQ_EE @@ -540,7 +556,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ - mr r10,r1 /* Save r1 */ +2: mr r10,r1 /* Save r1 */ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */ beq- 100f ld r1,PACAKSAVE(r13) /* kernel stack to use */ @@ -740,6 +756,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * guarantee they will be delivered virtually. Some conditions (see the ISA) * cause exceptions to be delivered in real mode. * + * The scv instructions are a special case. They get a 0x3000 offset applied. + * scv exceptions have unique reentrancy properties, see below. + * * It's impossible to receive interrupts below 0x300 via AIL. * * KVM: None of the virtual exceptions are from the guest. Anything that @@ -749,8 +768,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors - * 0x1900 - 0x3fff : Real mode trampolines - * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors + * 0x1900 - 0x2fff : Real mode trampolines + * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors * 0x5900 - 0x6fff : Relon mode trampolines * 0x7000 - 0x7fff : FWNMI data area * 0x8000 - .... : Common interrupt handlers, remaining early @@ -761,8 +780,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * vectors there. */ OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900) -OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000) -OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900) +OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x3000) +OPEN_FIXED_SECTION(virt_vectors, 0x3000, 0x5900) OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000) #ifdef CONFIG_PPC_POWERNV @@ -798,6 +817,77 @@ USE_FIXED_SECTION(real_vectors) .globl __start_interrupts __start_interrupts: +/** + * Interrupt 0x3000 - System Call Vectored Interrupt (syscall). + * This is a synchronous interrupt invoked with the "scv" instruction. The + * system call does not alter the HV bit, so it is directed to the OS. + * + * Handling: + * scv instructions enter the kernel without changing EE, RI, ME, or HV. + * In particular, this means we can take a maskable interrupt at any point + * in the scv handler, which is unlike any other interrupt. This is solved + * by treating the instruction addresses below __end_interrupts as being + * soft-masked. + * + * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and + * ensure scv is never executed with relocation off, which means AIL-0 + * should never happen. + * + * Before leaving the below __end_interrupts text, at least of the following + * must be true: + * - MSR[PR]=1 (i.e., return to userspace) + * - MSR_EE|MSR_RI is set (no reentrant exceptions) + * - Standard kernel environment is set up (stack, paca, etc) + * + * Call convention: + * + * syscall register convention is in Documentation/powerpc/syscall64-abi.rst + */ +EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) + /* SCV 0 */ + mr r9,r13 + GET_PACA(r13) + mflr r11 + mfctr r12 + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) +#ifdef CONFIG_RELOCATABLE + b system_call_vectored_tramp +#else + b system_call_vectored_common +#endif + nop + + /* SCV 1 - 127 */ + .rept 127 + mr r9,r13 + GET_PACA(r13) + mflr r11 + mfctr r12 + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + li r0,-1 /* cause failure */ +#ifdef CONFIG_RELOCATABLE + b system_call_vectored_sigill_tramp +#else + b system_call_vectored_sigill +#endif + .endr +EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000) + +#ifdef CONFIG_RELOCATABLE +TRAMP_VIRT_BEGIN(system_call_vectored_tramp) + __LOAD_HANDLER(r10, system_call_vectored_common) + mtctr r10 + bctr + +TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp) + __LOAD_HANDLER(r10, system_call_vectored_sigill) + mtctr r10 + bctr +#endif + + /* No virt vectors corresponding with 0x0..0x100 */ EXC_VIRT_NONE(0x4000, 0x100) @@ -2838,7 +2928,8 @@ masked_interrupt: ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) - /* returns to kernel where r13 must be set up, so don't restore it */ + ld r13,PACA_EXGEN+EX_R13(r13) + /* May return to masked low address where r13 is not set up */ .if \hsrr HRFI_TO_KERNEL .else @@ -2946,6 +3037,47 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid +TRAMP_REAL_BEGIN(rfscv_flush_fallback) + /* system call volatile */ + mr r7,r13 + GET_PACA(r13); + mr r8,r1 + ld r1,PACAKSAVE(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ + mtctr r11 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 + bdnz 1b + + mtctr r9 + li r9,0 + li r10,0 + li r11,0 + mr r1,r8 + mr r13,r7 + RFSCV + USE_TEXT_SECTION() MASKED_INTERRUPT MASKED_INTERRUPT hsrr=1 @@ -2997,6 +3129,10 @@ EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline) USE_FIXED_SECTION(virt_trampolines) /* + * All code below __end_interrupts is treated as soft-masked. If + * any code runs here with MSR[EE]=1, it must then cope with pending + * soft interrupt being raised (i.e., by ensuring it is replayed). + * * The __end_interrupts marker must be past the out-of-line (OOL) * handlers, so that they are copied to real address 0x100 when running * a relocatable kernel. This ensures they can be reached from the short diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 78ab9a6ee6ac..10ebb4bf71ad 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -32,6 +32,14 @@ #include <asm/fadump-internal.h> #include <asm/setup.h> +/* + * The CPU who acquired the lock to trigger the fadump crash should + * wait for other CPUs to enter. + * + * The timeout is in milliseconds. + */ +#define CRASH_TIMEOUT 500 + static struct fw_dump fw_dump; static void __init fadump_reserve_crash_area(u64 base); @@ -39,7 +47,10 @@ static void __init fadump_reserve_crash_area(u64 base); struct kobject *fadump_kobj; #ifndef CONFIG_PRESERVE_FA_DUMP + +static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); + struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ @@ -668,8 +679,11 @@ early_param("fadump_reserve_mem", early_fadump_reserve_mem); void crash_fadump(struct pt_regs *regs, const char *str) { + unsigned int msecs; struct fadump_crash_info_header *fdh = NULL; int old_cpu, this_cpu; + /* Do not include first CPU */ + unsigned int ncpus = num_online_cpus() - 1; if (!should_fadump_crash()) return; @@ -685,6 +699,8 @@ void crash_fadump(struct pt_regs *regs, const char *str) old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); if (old_cpu != -1) { + atomic_inc(&cpus_in_fadump); + /* * We can't loop here indefinitely. Wait as long as fadump * is in force. If we race with fadump un-registration this @@ -708,6 +724,16 @@ void crash_fadump(struct pt_regs *regs, const char *str) fdh->online_mask = *cpu_online_mask; + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(&(fdh->regs)) == 0x100) { + msecs = CRASH_TIMEOUT; + while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) + mdelay(1); + } + fw_dump.ops->fadump_trigger(fdh, str); } diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index cc4a5e3f51f1..fe48d319d490 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -11,8 +11,27 @@ #include <linux/export.h> #include <linux/cache.h> +#include <linux/of.h> #include <asm/firmware.h> +#ifdef CONFIG_PPC64 unsigned long powerpc_firmware_features __read_mostly; EXPORT_SYMBOL_GPL(powerpc_firmware_features); +#endif + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +bool is_kvm_guest(void) +{ + struct device_node *hyper_node; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return 0; + + if (!of_device_is_compatible(hyper_node, "linux,kvm")) + return 0; + + return 1; +} +#endif diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index cac22cb97a8c..4ae39db70044 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -107,9 +107,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) or r12,r12,r4 std r12,_MSR(r1) #endif - /* Don't care if r4 overflows, this is desired behaviour */ - lbz r4,THREAD_LOAD_FP(r5) - addi r4,r4,1 + li r4,1 stb r4,THREAD_LOAD_FP(r5) addi r10,r5,THREAD_FPSTATE lfd fr0,FPSTATE_FPSCR(r10) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 705c042309d8..f3ab94d73936 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -474,7 +474,7 @@ InstructionTLBMiss: /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR @@ -484,7 +484,7 @@ InstructionTLBMiss: li r1,_PAGE_PRESENT | _PAGE_EXEC #endif #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif @@ -541,7 +541,7 @@ DataLoadTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR #ifdef CONFIG_SWAP @@ -549,7 +549,7 @@ DataLoadTLBMiss: #else li r1, _PAGE_PRESENT #endif - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -621,7 +621,7 @@ DataStoreTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR #ifdef CONFIG_SWAP @@ -629,7 +629,7 @@ DataStoreTLBMiss: #else li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT #endif - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -673,6 +673,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #define altivec_assist_exception unknown_exception #endif +#ifndef CONFIG_TAU_INT +#define TAUException unknown_exception +#endif + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 926bfa73586a..5b282d9965a5 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -620,7 +620,7 @@ start_here: ori r6, r6, swapper_pg_dir@l lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(r0) /* Must match your Abatron config file */ + stw r5, 0xf0(0) /* Must match your Abatron config file */ tophys(r5,r5) stw r6, 0(r5) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 0000daf0e1da..1f4a1efa0074 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -418,8 +418,9 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) if (dawr_enabled()) { max_len = DAWR_MAX_LEN; - /* DAWR region can't cross 512 bytes boundary */ - if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M)) + /* DAWR region can't cross 512 bytes boundary on p10 predecessors */ + if (!cpu_has_feature(CPU_FTR_ARCH_31) && + (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512))) return -EINVAL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { /* 8xx can setup a range without limitation */ @@ -498,11 +499,11 @@ static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info return ((info->address <= dar) && (dar - info->address < info->len)); } -static bool dar_user_range_overlaps(unsigned long dar, int size, - struct arch_hw_breakpoint *info) +static bool ea_user_range_overlaps(unsigned long ea, int size, + struct arch_hw_breakpoint *info) { - return ((dar < info->address + info->len) && - (dar + size > info->address)); + return ((ea < info->address + info->len) && + (ea + size > info->address)); } static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) @@ -515,20 +516,22 @@ static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) return ((hw_start_addr <= dar) && (hw_end_addr > dar)); } -static bool dar_hw_range_overlaps(unsigned long dar, int size, - struct arch_hw_breakpoint *info) +static bool ea_hw_range_overlaps(unsigned long ea, int size, + struct arch_hw_breakpoint *info) { unsigned long hw_start_addr, hw_end_addr; hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); - return ((dar < hw_end_addr) && (dar + size > hw_start_addr)); + return ((ea < hw_end_addr) && (ea + size > hw_start_addr)); } /* * If hw has multiple DAWR registers, we also need to check all * dawrx constraint bits to confirm this is _really_ a valid event. + * If type is UNKNOWN, but privilege level matches, consider it as + * a positive match. */ static bool check_dawrx_constraints(struct pt_regs *regs, int type, struct arch_hw_breakpoint *info) @@ -536,7 +539,12 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type, if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ)) return false; - if (OP_IS_STORE(type) && !(info->type & HW_BRK_TYPE_WRITE)) + /* + * The Cache Management instructions other than dcbz never + * cause a match. i.e. if type is CACHEOP, the instruction + * is dcbz, and dcbz is treated as Store. + */ + if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE)) return false; if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL)) @@ -553,7 +561,8 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type, * including extraneous exception. Otherwise return false. */ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, - int type, int size, struct arch_hw_breakpoint *info) + unsigned long ea, int type, int size, + struct arch_hw_breakpoint *info) { bool in_user_range = dar_in_user_range(regs->dar, info); bool dawrx_constraints; @@ -569,22 +578,27 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, } if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) { - if (in_user_range) - return true; + if (cpu_has_feature(CPU_FTR_ARCH_31) && + !dar_in_hw_range(regs->dar, info)) + return false; - if (dar_in_hw_range(regs->dar, info)) { - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - return true; - } - return false; + return true; } dawrx_constraints = check_dawrx_constraints(regs, type, info); - if (dar_user_range_overlaps(regs->dar, size, info)) + if (type == UNKNOWN) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && + !dar_in_hw_range(regs->dar, info)) + return false; + + return dawrx_constraints; + } + + if (ea_user_range_overlaps(ea, size, info)) return dawrx_constraints; - if (dar_hw_range_overlaps(regs->dar, size, info)) { + if (ea_hw_range_overlaps(ea, size, info)) { if (dawrx_constraints) { info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; return true; @@ -593,8 +607,17 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, return false; } +static int cache_op_size(void) +{ +#ifdef __powerpc64__ + return ppc64_caches.l1d.block_size; +#else + return L1_CACHE_BYTES; +#endif +} + static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, - int *type, int *size, bool *larx_stcx) + int *type, int *size, unsigned long *ea) { struct instruction_op op; @@ -602,16 +625,23 @@ static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, return; analyse_instr(&op, regs, *instr); - - /* - * Set size = 8 if analyse_instr() fails. If it's a userspace - * watchpoint(valid or extraneous), we can notify user about it. - * If it's a kernel watchpoint, instruction emulation will fail - * in stepping_handler() and watchpoint will be disabled. - */ *type = GETTYPE(op.type); - *size = !(*type == UNKNOWN) ? GETSIZE(op.type) : 8; - *larx_stcx = (*type == LARX || *type == STCX); + *ea = op.ea; +#ifdef __powerpc64__ + if (!(regs->msr & MSR_64BIT)) + *ea &= 0xffffffffUL; +#endif + + *size = GETSIZE(op.type); + if (*type == CACHEOP) { + *size = cache_op_size(); + *ea &= ~(*size - 1); + } +} + +static bool is_larx_stcx_instr(int type) +{ + return type == LARX || type == STCX; } /* @@ -678,7 +708,7 @@ int hw_breakpoint_handler(struct die_args *args) struct ppc_inst instr = ppc_inst(0); int type = 0; int size = 0; - bool larx_stcx = false; + unsigned long ea; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); @@ -692,7 +722,7 @@ int hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); if (!IS_ENABLED(CONFIG_PPC_8xx)) - get_instr_detail(regs, &instr, &type, &size, &larx_stcx); + get_instr_detail(regs, &instr, &type, &size, &ea); for (i = 0; i < nr_wp_slots(); i++) { bp[i] = __this_cpu_read(bp_per_reg[i]); @@ -702,7 +732,7 @@ int hw_breakpoint_handler(struct die_args *args) info[i] = counter_arch_bp(bp[i]); info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (check_constraints(regs, instr, type, size, info[i])) { + if (check_constraints(regs, instr, ea, type, size, info[i])) { if (!IS_ENABLED(CONFIG_PPC_8xx) && ppc_inst_equal(instr, ppc_inst(0))) { handler_error(bp[i], info[i]); @@ -744,7 +774,7 @@ int hw_breakpoint_handler(struct die_args *args) } if (!IS_ENABLED(CONFIG_PPC_8xx)) { - if (larx_stcx) { + if (is_larx_stcx_instr(type)) { for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 05b1cc0e009e..bf21ebd36190 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -621,13 +621,14 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); seq_printf(p, " Machine check exceptions\n"); +#ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_HVMODE)) { seq_printf(p, "%*s: ", prec, "HMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat, j).hmi_exceptions); + seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs); seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } +#endif seq_printf(p, "%*s: ", prec, "NMI"); for_each_online_cpu(j) @@ -665,7 +666,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; sum += per_cpu(irq_stat, cpu).timer_irqs_others; - sum += per_cpu(irq_stat, cpu).hmi_exceptions; +#ifdef CONFIG_PPC_BOOK3S_64 + sum += paca_ptrs[cpu]->hmi_irqs; +#endif sum += per_cpu(irq_stat, cpu).sreset_irqs; #ifdef CONFIG_PPC_WATCHDOG sum += per_cpu(irq_stat, cpu).soft_nmi_irqs; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 9cc792a3a6a9..6ab9b4d037c3 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -244,7 +244,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) * So, we should never get here... but, its still * good to catch them, just in case... */ - printk("Can't step on instruction %x\n", ppc_inst_val(insn)); + printk("Can't step on instruction %s\n", ppc_inst_as_str(insn)); BUG(); } else { /* diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index fd90c0eda229..ada59f6c4298 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -49,6 +49,20 @@ static struct irq_work mce_ue_event_irq_work = { DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); +static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); + +int mce_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&mce_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_register_notifier); + +int mce_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&mce_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_unregister_notifier); + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { @@ -278,6 +292,7 @@ static void machine_process_ue_event(struct work_struct *work) while (__this_cpu_read(mce_ue_count) > 0) { index = __this_cpu_read(mce_ue_count) - 1; evt = this_cpu_ptr(&mce_ue_event_queue[index]); + blocking_notifier_call_chain(&mce_notifier_list, 0, evt); #ifdef CONFIG_MEMORY_FAILURE /* * This should probably queued elsewhere, but @@ -370,6 +385,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, static const char *mc_user_types[] = { "Indeterminate", "tlbie(l) invalid", + "scv invalid", }; static const char *mc_ra_types[] = { "Indeterminate", @@ -711,7 +727,7 @@ long hmi_exception_realmode(struct pt_regs *regs) { int ret; - __this_cpu_inc(irq_stat.hmi_exceptions); + local_paca->hmi_irqs++; ret = hmi_handle_debugtrig(regs); if (ret >= 0) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index c3b522bff9b4..b7e173754a2e 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -243,6 +243,45 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = { MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0, 0, 0, 0, 0, 0, 0 } }; +static const struct mce_ierror_table mce_p10_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008080000, true, + MCE_ERROR_TYPE_USER,MCE_USER_ERROR_SCV, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x00000000080c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008100000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008140000, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; + struct mce_derror_table { unsigned long dsisr_value; bool dar_valid; /* dar is a valid indicator of faulting address */ @@ -361,6 +400,46 @@ static const struct mce_derror_table mce_p9_derror_table[] = { MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0, false, 0, 0, 0, 0, 0 } }; +static const struct mce_derror_table mce_p10_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000200, false, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000040, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000020, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000010, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000008, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; + static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, uint64_t *phys_addr) { @@ -657,3 +736,8 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); } + +long __machine_check_early_realmode_p10(struct pt_regs *regs) +{ + return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table); +} diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 1864605eca29..7bb46ad98207 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -413,20 +413,6 @@ _GLOBAL(kexec_sequence) li r0,0 std r0,16(r1) -BEGIN_FTR_SECTION - /* - * This is the best time to turn AMR/IAMR off. - * key 0 is used in radix for supervisor<->user - * protection, but on hash key 0 is reserved - * ideally we want to enter with a clean state. - * NOTE, we rely on r0 being 0 from above. - */ - mtspr SPRN_IAMR,r0 -BEGIN_FTR_SECTION_NESTED(42) - mtspr SPRN_AMOR,r0 -END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - /* save regs for local vars on new stack. * yes, we won't go back, but ... */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index df649acb5631..a211b0253cdb 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -86,3 +86,14 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } + +#ifdef MODULES_VADDR +void *module_alloc(unsigned long size) +{ + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); +} +#endif diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 71a3f97dc988..f89376ff633e 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -62,8 +62,8 @@ static int of_pci_phb_probe(struct platform_device *dev) /* Init pci_dn data structures */ pci_devs_phb_init_dynamic(phb); - /* Create EEH PEs for the PHB */ - eeh_dev_phb_init_dynamic(phb); + /* Create EEH PE for the PHB */ + eeh_phb_pe_create(phb); /* Scan the bus */ pcibios_scan_phb(phb); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 74da65aacbc9..0ad15768d762 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -57,8 +57,8 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align, #define LPPACA_SIZE 0x400 -static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, - unsigned long limit, int cpu) +static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit, + int cpu) { size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE); static unsigned long shared_lppaca_size; @@ -68,6 +68,13 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, if (!shared_lppaca) { memblock_set_bottom_up(true); + /* + * See Documentation/powerpc/ultravisor.rst for more details. + * + * UV/HV data sharing is in PAGE_SIZE granularity. In order to + * minimize the number of pages shared, align the allocation to + * PAGE_SIZE. + */ shared_lppaca = memblock_alloc_try_nid(shared_lppaca_total_size, PAGE_SIZE, MEMBLOCK_LOW_LIMIT, @@ -122,7 +129,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit) return NULL; if (is_secure_guest()) - lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu); + lp = alloc_shared_lppaca(LPPACA_SIZE, limit, cpu); else lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 4e654df55969..e99b7c547d7e 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -124,9 +124,28 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) return NULL; } +#ifdef CONFIG_EEH +static struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) +{ + struct eeh_dev *edev; + + /* Allocate EEH device */ + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return NULL; + + /* Associate EEH device with OF node */ + pdn->edev = edev; + edev->pdn = pdn; + edev->bdfn = (pdn->busno << 8) | pdn->devfn; + edev->controller = pdn->phb; + + return edev; +} +#endif /* CONFIG_EEH */ + #ifdef CONFIG_PCI_IOV static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent, - int vf_index, int busno, int devfn) { struct pci_dn *pdn; @@ -143,7 +162,6 @@ static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent, pdn->parent = parent; pdn->busno = busno; pdn->devfn = devfn; - pdn->vf_index = vf_index; pdn->pe_number = IODA_INVALID_PE; INIT_LIST_HEAD(&pdn->child_list); INIT_LIST_HEAD(&pdn->list); @@ -174,7 +192,7 @@ struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev) for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { struct eeh_dev *edev __maybe_unused; - pdn = add_one_sriov_vf_pdn(parent, i, + pdn = add_one_sriov_vf_pdn(parent, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); if (!pdn) { @@ -187,7 +205,10 @@ struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev) /* Create the EEH device for the VF */ edev = eeh_dev_init(pdn); BUG_ON(!edev); + + /* FIXME: these should probably be populated by the EEH probe */ edev->physfn = pdev; + edev->vf_index = i; #endif /* CONFIG_EEH */ } return pci_get_pdn(pdev); @@ -242,7 +263,7 @@ void remove_sriov_vf_pdns(struct pci_dev *pdev) * have a configured PE. */ if (edev->pe) - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); pdn->edev = NULL; kfree(edev); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 794b754deec2..016bd831908e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -471,49 +471,58 @@ EXPORT_SYMBOL(giveup_all); #ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_FPU -static int restore_fp(struct task_struct *tsk) +static bool should_restore_fp(void) { - if (tsk->thread.load_fp) { - load_fp_state(¤t->thread.fp_state); + if (current->thread.load_fp) { current->thread.load_fp++; - return 1; + return true; } - return 0; + return false; +} + +static void do_restore_fp(void) +{ + load_fp_state(¤t->thread.fp_state); } #else -static int restore_fp(struct task_struct *tsk) { return 0; } +static bool should_restore_fp(void) { return false; } +static void do_restore_fp(void) { } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC -#define loadvec(thr) ((thr).load_vec) -static int restore_altivec(struct task_struct *tsk) +static bool should_restore_altivec(void) { - if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) { - load_vr_state(&tsk->thread.vr_state); - tsk->thread.used_vr = 1; - tsk->thread.load_vec++; - - return 1; + if (cpu_has_feature(CPU_FTR_ALTIVEC) && (current->thread.load_vec)) { + current->thread.load_vec++; + return true; } - return 0; + return false; +} + +static void do_restore_altivec(void) +{ + load_vr_state(¤t->thread.vr_state); + current->thread.used_vr = 1; } #else -#define loadvec(thr) 0 -static inline int restore_altivec(struct task_struct *tsk) { return 0; } +static bool should_restore_altivec(void) { return false; } +static void do_restore_altivec(void) { } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -static int restore_vsx(struct task_struct *tsk) +static bool should_restore_vsx(void) { - if (cpu_has_feature(CPU_FTR_VSX)) { - tsk->thread.used_vsr = 1; - return 1; - } - - return 0; + if (cpu_has_feature(CPU_FTR_VSX)) + return true; + return false; +} +static void do_restore_vsx(void) +{ + current->thread.used_vsr = 1; } #else -static inline int restore_vsx(struct task_struct *tsk) { return 0; } +static bool should_restore_vsx(void) { return false; } +static void do_restore_vsx(void) { } #endif /* CONFIG_VSX */ /* @@ -529,32 +538,42 @@ static inline int restore_vsx(struct task_struct *tsk) { return 0; } void notrace restore_math(struct pt_regs *regs) { unsigned long msr; - - if (!MSR_TM_ACTIVE(regs->msr) && - !current->thread.load_fp && !loadvec(current->thread)) - return; + unsigned long new_msr = 0; msr = regs->msr; - msr_check_and_set(msr_all_available); /* - * Only reload if the bit is not set in the user MSR, the bit BEING set - * indicates that the registers are hot + * new_msr tracks the facilities that are to be restored. Only reload + * if the bit is not set in the user MSR (if it is set, the registers + * are live for the user thread). */ - if ((!(msr & MSR_FP)) && restore_fp(current)) - msr |= MSR_FP | current->thread.fpexc_mode; + if ((!(msr & MSR_FP)) && should_restore_fp()) + new_msr |= MSR_FP | current->thread.fpexc_mode; - if ((!(msr & MSR_VEC)) && restore_altivec(current)) - msr |= MSR_VEC; + if ((!(msr & MSR_VEC)) && should_restore_altivec()) + new_msr |= MSR_VEC; - if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) && - restore_vsx(current)) { - msr |= MSR_VSX; + if ((!(msr & MSR_VSX)) && should_restore_vsx()) { + if (((msr | new_msr) & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) + new_msr |= MSR_VSX; } - msr_check_and_clear(msr_all_available); + if (new_msr) { + msr_check_and_set(new_msr); + + if (new_msr & MSR_FP) + do_restore_fp(); + + if (new_msr & MSR_VEC) + do_restore_altivec(); + + if (new_msr & MSR_VSX) + do_restore_vsx(); - regs->msr = msr; + msr_check_and_clear(new_msr); + + regs->msr |= new_msr; + } } #endif @@ -1599,6 +1618,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, { struct pt_regs *childregs, *kregs; extern void ret_from_fork(void); + extern void ret_from_fork_scv(void); extern void ret_from_kernel_thread(void); void (*f)(void); unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; @@ -1635,7 +1655,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, if (usp) childregs->gpr[1] = usp; p->thread.regs = childregs; - childregs->gpr[3] = 0; /* Result from fork() */ + /* 64s sets this in ret_from_fork */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + childregs->gpr[3] = 0; /* Result from fork() */ if (clone_flags & CLONE_SETTLS) { if (!is_32bit_task()) childregs->gpr[13] = tls; @@ -1643,7 +1665,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs->gpr[2] = tls; } - f = ret_from_fork; + if (trap_is_scv(regs)) + f = ret_from_fork_scv; + else + f = ret_from_fork; } childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); sp -= STACK_FRAME_OVERHEAD; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9cc49f265c86..d8a2fb87ba0c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -163,7 +163,7 @@ static struct ibm_pa_feature { { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, #ifdef CONFIG_PPC_RADIX_MMU - { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX }, + { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE }, #endif { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, @@ -175,6 +175,8 @@ static struct ibm_pa_feature { */ { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP, .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, + + { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 }, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -468,8 +470,9 @@ static bool validate_mem_limit(u64 base, u64 *size) * This contains a list of memory blocks along with NUMA affinity * information. */ -static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, - const __be32 **usm) +static int __init early_init_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm, + void *data) { u64 base, size; int is_kexec_kdump = 0, rngs; @@ -484,7 +487,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, */ if ((lmb->flags & DRCONF_MEM_RESERVED) || !(lmb->flags & DRCONF_MEM_ASSIGNED)) - return; + return 0; if (*usm) is_kexec_kdump = 1; @@ -499,7 +502,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, */ rngs = dt_mem_next_cell(dt_root_size_cells, usm); if (!rngs) /* there are no (base, size) duple */ - return; + return 0; } do { @@ -524,6 +527,8 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, if (lmb->flags & DRCONF_MEM_HOTREMOVABLE) memblock_mark_hotplug(base, size); } while (--rngs); + + return 0; } #endif /* CONFIG_PPC_PSERIES */ @@ -534,7 +539,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, #ifdef CONFIG_PPC_PSERIES if (depth == 1 && strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) { - walk_drmem_lmbs_early(node, early_init_drmem_lmb); + walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb); return 0; } #endif @@ -815,6 +820,11 @@ void __init early_init_devtree(void *params) /* Now try to figure out if we are running on LPAR and so on */ pseries_probe_fw_features(); + /* + * Initialize pkey features and default AMR/IAMR values + */ + pkey_early_init_devtree(); + #ifdef CONFIG_PPC_PS3 /* Identify PS3 firmware */ if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3")) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 90c604d00b7d..ae7ec9903191 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -169,6 +169,7 @@ static unsigned long __prombss prom_tce_alloc_end; #ifdef CONFIG_PPC_PSERIES static bool __prombss prom_radix_disable; +static bool __prombss prom_radix_gtse_disable; static bool __prombss prom_xive_disable; #endif @@ -823,6 +824,12 @@ static void __init early_cmdline_parse(void) if (prom_radix_disable) prom_debug("Radix disabled from cmdline\n"); + opt = prom_strstr(prom_cmd_line, "radix_hcall_invalidate=on"); + if (opt) { + prom_radix_gtse_disable = true; + prom_debug("Radix GTSE disabled from cmdline\n"); + } + opt = prom_strstr(prom_cmd_line, "xive=off"); if (opt) { prom_xive_disable = true; @@ -1285,10 +1292,8 @@ static void __init prom_parse_platform_support(u8 index, u8 val, prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); break; case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ - if (val & OV5_FEAT(OV5_RADIX_GTSE)) { - prom_debug("Radix - GTSE supported\n"); - support->radix_gtse = true; - } + if (val & OV5_FEAT(OV5_RADIX_GTSE)) + support->radix_gtse = !prom_radix_gtse_disable; break; case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */ prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT), @@ -1336,12 +1341,15 @@ static void __init prom_check_platform_support(void) } } - if (supported.radix_mmu && supported.radix_gtse && - IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { - /* Radix preferred - but we require GTSE for now */ - prom_debug("Asking for radix with GTSE\n"); + if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { + /* Radix preferred - Check if GTSE is also supported */ + prom_debug("Asking for radix\n"); ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); - ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + if (supported.radix_gtse) + ibm_architecture_vec.vec5.radix_ext = + OV5_FEAT(OV5_RADIX_GTSE); + else + prom_debug("Radix GTSE isn't supported\n"); } else if (supported.hash_mmu) { /* Default to hash mmu (if we can) */ prom_debug("Asking for hash\n"); @@ -3262,7 +3270,7 @@ static int enter_secure_mode(unsigned long kbase, unsigned long fdt) /* * Call the Ultravisor to transfer us to secure memory if we have an ESM blob. */ -static void setup_secure_guest(unsigned long kbase, unsigned long fdt) +static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) { int ret; @@ -3292,7 +3300,7 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt) } } #else -static void setup_secure_guest(unsigned long kbase, unsigned long fdt) +static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) { } #endif /* CONFIG_PPC_SVM */ diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 13208a9a02ca..19823a250aa0 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -470,13 +470,15 @@ static int pkey_active(struct task_struct *target, const struct user_regset *reg static int pkey_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + int ret; + BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); - BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); if (!arch_pkeys_enabled()) return -ENODEV; - return membuf_write(&to, &target->thread.amr, ELF_NPKEY * sizeof(unsigned long)); + membuf_write(&to, &target->thread.amr, 2 * sizeof(unsigned long)); + return membuf_store(&to, default_uamor); } static int pkey_set(struct task_struct *target, const struct user_regset *regset, @@ -498,9 +500,17 @@ static int pkey_set(struct task_struct *target, const struct user_regset *regset if (ret) return ret; - /* UAMOR determines which bits of the AMR can be set from userspace. */ - target->thread.amr = (new_amr & target->thread.uamor) | - (target->thread.amr & ~target->thread.uamor); + /* + * UAMOR determines which bits of the AMR can be set from userspace. + * UAMOR value 0b11 indicates that the AMR value can be modified + * from userspace. If the kernel is using a specific key, we avoid + * userspace modifying the AMR value for that key by masking them + * via UAMOR 0b00. + * + * Pick the AMR values for the keys that kernel is using. This + * will be indicated by the ~default_uamor bits. + */ + target->thread.amr = (new_amr & default_uamor) | (target->thread.amr & ~default_uamor); return 0; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index a09eba03f180..806d554ce357 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -843,96 +843,6 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } -enum rtas_cpu_state { - DOWN, - UP, -}; - -#ifndef CONFIG_SMP -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - if (!cpumask_empty(cpus)) { - cpumask_clear(cpus); - return -EINVAL; - } else - return 0; -} -#else -/* On return cpumask will be altered to indicate CPUs changed. - * CPUs with states changed will be set in the mask, - * CPUs with status unchanged will be unset in the mask. */ -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - int cpu; - int cpuret = 0; - int ret = 0; - - if (cpumask_empty(cpus)) - return 0; - - for_each_cpu(cpu, cpus) { - struct device *dev = get_cpu_device(cpu); - - switch (state) { - case DOWN: - cpuret = device_offline(dev); - break; - case UP: - cpuret = device_online(dev); - break; - } - if (cpuret < 0) { - pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", - __func__, - ((state == UP) ? "up" : "down"), - cpu, cpuret); - if (!ret) - ret = cpuret; - if (state == UP) { - /* clear bits for unchanged cpus, return */ - cpumask_shift_right(cpus, cpus, cpu); - cpumask_shift_left(cpus, cpus, cpu); - break; - } else { - /* clear bit for unchanged cpu, continue */ - cpumask_clear_cpu(cpu, cpus); - } - } - cond_resched(); - } - - return ret; -} -#endif - -int rtas_online_cpus_mask(cpumask_var_t cpus) -{ - int ret; - - ret = rtas_cpu_state_change_mask(UP, cpus); - - if (ret) { - cpumask_var_t tmp_mask; - - if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL)) - return ret; - - /* Use tmp_mask to preserve cpus mask from first failure */ - cpumask_copy(tmp_mask, cpus); - rtas_offline_cpus_mask(tmp_mask); - free_cpumask_var(tmp_mask); - } - - return ret; -} - -int rtas_offline_cpus_mask(cpumask_var_t cpus) -{ - return rtas_cpu_state_change_mask(DOWN, cpus); -} - int rtas_ibm_suspend_me(u64 handle) { long state; @@ -940,8 +850,6 @@ int rtas_ibm_suspend_me(u64 handle) unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; struct rtas_suspend_me_data data; DECLARE_COMPLETION_ONSTACK(done); - cpumask_var_t offline_mask; - int cpuret; if (!rtas_service_present("ibm,suspend-me")) return -ENOSYS; @@ -962,9 +870,6 @@ int rtas_ibm_suspend_me(u64 handle) return -EIO; } - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - atomic_set(&data.working, 0); atomic_set(&data.done, 0); atomic_set(&data.error, 0); @@ -973,24 +878,8 @@ int rtas_ibm_suspend_me(u64 handle) lock_device_hotplug(); - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask); - cpuret = rtas_online_cpus_mask(offline_mask); - if (cpuret) { - pr_err("%s: Could not bring present CPUs online.\n", __func__); - atomic_set(&data.error, cpuret); - goto out; - } - cpu_hotplug_disable(); - /* Check if we raced with a CPU-Offline Operation */ - if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) { - pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__); - atomic_set(&data.error, -EAGAIN); - goto out_hotplug_enable; - } - /* Call function on all CPUs. One of us will make the * rtas call */ @@ -1001,18 +890,11 @@ int rtas_ibm_suspend_me(u64 handle) if (atomic_read(&data.error) != 0) printk(KERN_ERR "Error doing global join\n"); -out_hotplug_enable: - cpu_hotplug_enable(); - /* Take down CPUs not online prior to suspend */ - cpuret = rtas_offline_cpus_mask(offline_mask); - if (cpuret) - pr_warn("%s: Could not restore CPUs to offline state.\n", - __func__); + cpu_hotplug_enable(); -out: unlock_device_hotplug(); - free_cpumask_var(offline_mask); + return atomic_read(&data.error); } diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 89b798f8f656..8561dfb33f24 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -273,37 +273,15 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) } } -#ifdef CONFIG_PPC_PSERIES -static void handle_prrn_event(s32 scope) -{ - /* - * For PRRN, we must pass the negative of the scope value in - * the RTAS event. - */ - pseries_devicetree_update(-scope); - numa_update_cpu_topology(false); -} - static void handle_rtas_event(const struct rtas_error_log *log) { - if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled()) + if (!machine_is(pseries)) return; - /* For PRRN Events the extended log length is used to denote - * the scope for calling rtas update-nodes. - */ - handle_prrn_event(rtas_error_extended_log_length(log)); + if (rtas_error_type(log) == RTAS_TYPE_PRRN) + pr_info_ratelimited("Platform resource reassignment ignored.\n"); } -#else - -static void handle_rtas_event(const struct rtas_error_log *log) -{ - return; -} - -#endif - static int rtas_log_open(struct inode * inode, struct file * file) { return 0; diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index 4b982324d368..f9af305d9579 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -23,12 +23,19 @@ bool is_ppc_secureboot_enabled(void) { struct device_node *node; bool enabled = false; + u32 secureboot; node = get_ppc_fw_sb_node(); enabled = of_property_read_bool(node, "os-secureboot-enforcing"); - of_node_put(node); + if (enabled) + goto out; + + if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot)) + enabled = (secureboot > 1); + +out: pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); return enabled; @@ -38,12 +45,19 @@ bool is_ppc_trustedboot_enabled(void) { struct device_node *node; bool enabled = false; + u32 trustedboot; node = get_ppc_fw_sb_node(); enabled = of_property_read_bool(node, "trusted-enabled"); - of_node_put(node); + if (enabled) + goto out; + + if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot)) + enabled = (trustedboot > 0); + +out: pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); return enabled; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index d86701ce116b..c9876aab3142 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -21,13 +21,13 @@ u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; -enum count_cache_flush_type { - COUNT_CACHE_FLUSH_NONE = 0x1, - COUNT_CACHE_FLUSH_SW = 0x2, - COUNT_CACHE_FLUSH_HW = 0x4, +enum branch_cache_flush_type { + BRANCH_CACHE_FLUSH_NONE = 0x1, + BRANCH_CACHE_FLUSH_SW = 0x2, + BRANCH_CACHE_FLUSH_HW = 0x4, }; -static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; -static bool link_stack_flush_enabled; +static enum branch_cache_flush_type count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE; +static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE; bool barrier_nospec_enabled; static bool no_nospec; @@ -219,24 +219,25 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (ccd) seq_buf_printf(&s, "Indirect branch cache disabled"); - if (link_stack_flush_enabled) - seq_buf_printf(&s, ", Software link stack flush"); - - } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { + } else if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) { seq_buf_printf(&s, "Mitigation: Software count cache flush"); - if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) seq_buf_printf(&s, " (hardware accelerated)"); - if (link_stack_flush_enabled) - seq_buf_printf(&s, ", Software link stack flush"); - } else if (btb_flush_enabled) { seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); } else { seq_buf_printf(&s, "Vulnerable"); } + if (bcs || ccd || count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) { + if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE) + seq_buf_printf(&s, ", Software link stack flush"); + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) + seq_buf_printf(&s, " (hardware accelerated)"); + } + seq_buf_printf(&s, "\n"); return s.len; @@ -427,61 +428,79 @@ static __init int stf_barrier_debugfs_init(void) device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ -static void no_count_cache_flush(void) +static void update_branch_cache_flush(void) { - count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; - pr_info("count-cache-flush: software flush disabled.\n"); -} - -static void toggle_count_cache_flush(bool enable) -{ - if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) && - !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) - enable = false; - - if (!enable) { - patch_instruction_site(&patch__call_flush_count_cache, - ppc_inst(PPC_INST_NOP)); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + // This controls the branch from guest_exit_cont to kvm_flush_link_stack + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { patch_instruction_site(&patch__call_kvm_flush_link_stack, ppc_inst(PPC_INST_NOP)); -#endif - pr_info("link-stack-flush: software flush disabled.\n"); - link_stack_flush_enabled = false; - no_count_cache_flush(); - return; + } else { + // Could use HW flush, but that could also flush count cache + patch_branch_site(&patch__call_kvm_flush_link_stack, + (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); } - - // This enables the branch from _switch to flush_count_cache - patch_branch_site(&patch__call_flush_count_cache, - (u64)&flush_count_cache, BRANCH_SET_LINK); - -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - // This enables the branch from guest_exit_cont to kvm_flush_link_stack - patch_branch_site(&patch__call_kvm_flush_link_stack, - (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); #endif - pr_info("link-stack-flush: software flush enabled.\n"); - link_stack_flush_enabled = true; + // This controls the branch from _switch to flush_branch_caches + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE && + link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { + patch_instruction_site(&patch__call_flush_branch_caches, + ppc_inst(PPC_INST_NOP)); + } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW && + link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) { + patch_instruction_site(&patch__call_flush_branch_caches, + ppc_inst(PPC_INST_BCCTR_FLUSH)); + } else { + patch_branch_site(&patch__call_flush_branch_caches, + (u64)&flush_branch_caches, BRANCH_SET_LINK); + + // If we just need to flush the link stack, early return + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) { + patch_instruction_site(&patch__flush_link_stack_return, + ppc_inst(PPC_INST_BLR)); + + // If we have flush instruction, early return + } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) { + patch_instruction_site(&patch__flush_count_cache_return, + ppc_inst(PPC_INST_BLR)); + } + } +} - // If we just need to flush the link stack, patch an early return - if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { - patch_instruction_site(&patch__flush_link_stack_return, - ppc_inst(PPC_INST_BLR)); - no_count_cache_flush(); - return; +static void toggle_branch_cache_flush(bool enable) +{ + if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) + count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE; + + pr_info("count-cache-flush: flush disabled.\n"); + } else { + if (security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { + count_cache_flush_type = BRANCH_CACHE_FLUSH_HW; + pr_info("count-cache-flush: hardware flush enabled.\n"); + } else { + count_cache_flush_type = BRANCH_CACHE_FLUSH_SW; + pr_info("count-cache-flush: software flush enabled.\n"); + } } - if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { - count_cache_flush_type = COUNT_CACHE_FLUSH_SW; - pr_info("count-cache-flush: full software flush sequence enabled.\n"); - return; + if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) { + if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE) + link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE; + + pr_info("link-stack-flush: flush disabled.\n"); + } else { + if (security_ftr_enabled(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST)) { + link_stack_flush_type = BRANCH_CACHE_FLUSH_HW; + pr_info("link-stack-flush: hardware flush enabled.\n"); + } else { + link_stack_flush_type = BRANCH_CACHE_FLUSH_SW; + pr_info("link-stack-flush: software flush enabled.\n"); + } } - patch_instruction_site(&patch__flush_count_cache_return, ppc_inst(PPC_INST_BLR)); - count_cache_flush_type = COUNT_CACHE_FLUSH_HW; - pr_info("count-cache-flush: hardware assisted flush sequence enabled\n"); + update_branch_cache_flush(); } void setup_count_cache_flush(void) @@ -505,7 +524,7 @@ void setup_count_cache_flush(void) security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); - toggle_count_cache_flush(enable); + toggle_branch_cache_flush(enable); } #ifdef CONFIG_DEBUG_FS @@ -520,14 +539,14 @@ static int count_cache_flush_set(void *data, u64 val) else return -EINVAL; - toggle_count_cache_flush(enable); + toggle_branch_cache_flush(enable); return 0; } static int count_cache_flush_get(void *data, u64 *val) { - if (count_cache_flush_type == COUNT_CACHE_FLUSH_NONE) + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) *val = 0; else *val = 1; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d3faac53295..b198b0ff25bc 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -928,6 +928,9 @@ void __init setup_arch(char **cmdline_p) /* Reserve large chunks of memory for use by CMA for KVM. */ kvm_cma_reserve(); + /* Reserve large chunks of memory for us by CMA for hugetlb */ + gigantic_hugetlb_cma_reserve(); + klp_init_thread_info(&init_task); init_mm.start_code = (unsigned long)_stext; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 0ba1ed77dc68..6be430107c6f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -196,7 +196,10 @@ static void __init configure_exceptions(void) /* Under a PAPR hypervisor, we need hypercalls */ if (firmware_has_feature(FW_FEATURE_SET_MODE)) { /* Enable AIL if possible */ - pseries_enable_reloc_on_exc(); + if (!pseries_enable_reloc_on_exc()) { + init_task.thread.fscr &= ~FSCR_SCV; + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; + } /* * Tell the hypervisor that we want our exceptions to diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index b4143b6ff093..d15a98c758b8 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -205,8 +205,14 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, return; /* error signalled ? */ - if (!(regs->ccr & 0x10000000)) + if (trap_is_scv(regs)) { + /* 32-bit compat mode sign extend? */ + if (!IS_ERR_VALUE(ret)) + return; + ret = -ret; + } else if (!(regs->ccr & 0x10000000)) { return; + } switch (ret) { case ERESTART_RESTARTBLOCK: @@ -239,9 +245,14 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, regs->nip -= 4; regs->result = 0; } else { - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; + if (trap_is_scv(regs)) { + regs->result = -EINTR; + regs->gpr[3] = -EINTR; + } else { + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + } } } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 1415c16ab628..96950f189b5a 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -102,22 +102,18 @@ static inline int save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; - int i; - /* Force usr to alway see softe as 1 (interrupts enabled) */ - elf_greg_t64 softe = 0x1; + int val, i; WARN_ON(!FULL_REGS(regs)); for (i = 0; i <= PT_RESULT; i ++) { - if (i == 14 && !FULL_REGS(regs)) - i = 32; - if ( i == PT_SOFTE) { - if(__put_user((unsigned int)softe, &frame->mc_gregs[i])) - return -EFAULT; - else - continue; - } - if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i])) + /* Force usr to alway see softe as 1 (interrupts enabled) */ + if (i == PT_SOFTE) + val = 1; + else + val = gregs[i]; + + if (__put_user(val, &frame->mc_gregs[i])) return -EFAULT; } return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 55e5f76554da..bfc939360bad 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -21,6 +21,7 @@ #include <linux/ptrace.h> #include <linux/ratelimit.h> #include <linux/syscalls.h> +#include <linux/pagemap.h> #include <asm/sigcontext.h> #include <asm/ucontext.h> @@ -39,8 +40,8 @@ #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) -#define TRAMP_TRACEBACK 3 -#define TRAMP_SIZE 6 +#define TRAMP_TRACEBACK 4 +#define TRAMP_SIZE 7 /* * When we have signals to deliver, we set up on the user stack, @@ -600,13 +601,15 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) int i; long err = 0; + /* bctrl # call the handler */ + err |= __put_user(PPC_INST_BCTRL, &tramp[0]); /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) | - (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]); + (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]); /* li r0, __NR_[rt_]sigreturn| */ - err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[1]); + err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]); /* sc */ - err |= __put_user(PPC_INST_SC, &tramp[2]); + err |= __put_user(PPC_INST_SC, &tramp[3]); /* Minimal traceback info */ for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) @@ -632,7 +635,6 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, struct ucontext __user *, new_ctx, long, ctx_size) { - unsigned char tmp; sigset_t set; unsigned long new_msr = 0; int ctx_has_vsx_region = 0; @@ -667,9 +669,8 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, } if (new_ctx == NULL) return 0; - if (!access_ok(new_ctx, ctx_size) - || __get_user(tmp, (u8 __user *) new_ctx) - || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1)) + if (!access_ok(new_ctx, ctx_size) || + fault_in_pages_readable((u8 __user *)new_ctx, ctx_size)) return -EFAULT; /* @@ -864,12 +865,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { - regs->link = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; + regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) goto badframe; - regs->link = (unsigned long) &frame->tramp[0]; + regs->nip = (unsigned long) &frame->tramp[0]; } /* Allocate a dummy caller frame for the signal handler. */ @@ -878,8 +879,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up "regs" so we "return" to the signal handler. */ if (is_elf2_task()) { - regs->nip = (unsigned long) ksig->ka.sa.sa_handler; - regs->gpr[12] = regs->nip; + regs->ctr = (unsigned long) ksig->ka.sa.sa_handler; + regs->gpr[12] = regs->ctr; } else { /* Handler is *really* a pointer to the function descriptor for * the signal routine. The first entry in the function @@ -889,7 +890,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, func_descr_t __user *funct_desc_ptr = (func_descr_t __user *) ksig->ka.sa.sa_handler; - err |= get_user(regs->nip, &funct_desc_ptr->entry); + err |= get_user(regs->ctr, &funct_desc_ptr->entry); err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 73199470c265..8261999c7d52 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,6 +59,7 @@ #include <asm/asm-prototypes.h> #include <asm/cpu_has_feature.h> #include <asm/ftrace.h> +#include <asm/kup.h> #ifdef DEBUG #include <asm/udbg.h> diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 79edba3ab312..8e50818aa50b 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -60,6 +60,11 @@ notrace long system_call_exception(long r3, long r4, long r5, local_irq_enable(); if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { + if (unlikely(regs->trap == 0x7ff0)) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } /* * We use the return value of do_syscall_trace_enter() as the * syscall number. If the syscall was rejected for any reason @@ -78,6 +83,11 @@ notrace long system_call_exception(long r3, long r4, long r5, r8 = regs->gpr[8]; } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(regs->trap == 0x7ff0)) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } return -ENOSYS; } @@ -105,16 +115,20 @@ notrace long system_call_exception(long r3, long r4, long r5, * local irqs must be disabled. Returns false if the caller must re-enable * them, check for new work, and try again. */ -static notrace inline bool prep_irq_for_enabled_exit(void) +static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); /* This pattern matches prep_irq_for_idle */ - __hard_EE_RI_disable(); + if (clear_ri) + __hard_EE_RI_disable(); + else + __hard_irq_disable(); if (unlikely(lazy_irq_pending_nocheck())) { /* Took an interrupt, may have more exit work to do. */ - __hard_RI_enable(); + if (clear_ri) + __hard_RI_enable(); trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; @@ -136,7 +150,8 @@ static notrace inline bool prep_irq_for_enabled_exit(void) * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. */ notrace unsigned long syscall_exit_prepare(unsigned long r3, - struct pt_regs *regs) + struct pt_regs *regs, + long scv) { unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; @@ -151,7 +166,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ti_flags = *ti_flagsp; - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) { + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) { if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { r3 = -r3; regs->ccr |= 0x10000000; /* Set SO bit in CR */ @@ -206,12 +221,20 @@ again: else if (cpu_has_feature(CPU_FTR_ALTIVEC)) mathflags |= MSR_VEC; + /* + * If userspace MSR has all available FP bits set, + * then they are live and no need to restore. If not, + * it means the regs were given up and restore_math + * may decide to restore them (to avoid taking an FP + * fault). + */ if ((regs->msr & mathflags) != mathflags) restore_math(regs); } } - if (unlikely(!prep_irq_for_enabled_exit())) { + /* scv need not set RI=0 because SRRs are not used */ + if (unlikely(!prep_irq_for_enabled_exit(!scv))) { local_irq_enable(); goto again; } @@ -277,12 +300,13 @@ again: else if (cpu_has_feature(CPU_FTR_ALTIVEC)) mathflags |= MSR_VEC; + /* See above restore_math comment */ if ((regs->msr & mathflags) != mathflags) restore_math(regs); } } - if (unlikely(!prep_irq_for_enabled_exit())) { + if (unlikely(!prep_irq_for_enabled_exit(true))) { local_irq_enable(); local_irq_disable(); goto again; @@ -345,7 +369,7 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit())) { + if (unlikely(!prep_irq_for_enabled_exit(true))) { /* * Can't local_irq_restore to replay if we were in * interrupt context. Must replay directly. diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 571b3259697e..46b4ebc33db7 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -622,8 +622,10 @@ SYSFS_PMCSETUP(pmc7, SPRN_PMC7); SYSFS_PMCSETUP(pmc8, SPRN_PMC8); SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); +SYSFS_PMCSETUP(mmcr3, SPRN_MMCR3); static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static DEVICE_ATTR(mmcr3, 0600, show_mmcr3, store_mmcr3); #endif /* HAS_PPC_PMC56 */ @@ -886,6 +888,9 @@ static int register_cpu_online(unsigned int cpu) #ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_create_file(s, &dev_attr_mmcra); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + device_create_file(s, &dev_attr_mmcr3); #endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) { @@ -980,6 +985,9 @@ static int unregister_cpu_online(unsigned int cpu) #ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_remove_file(s, &dev_attr_mmcra); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + device_remove_file(s, &dev_attr_mmcr3); #endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) { diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index c1fede6ec934..42761ebec9f7 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -73,8 +73,8 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) /* Make sure it is what we expect it to be */ if (!ppc_inst_equal(replaced, old)) { - pr_err("%p: replaced (%#x) != old (%#x)", - (void *)ip, ppc_inst_val(replaced), ppc_inst_val(old)); + pr_err("%p: replaced (%s) != old (%s)", + (void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old)); return -EINVAL; } @@ -137,7 +137,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -172,8 +172,8 @@ __ftrace_make_nop(struct module *mod, /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) && !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { - pr_err("Unexpected instruction %08x around bl _mcount\n", - ppc_inst_val(op)); + pr_err("Unexpected instruction %s around bl _mcount\n", + ppc_inst_as_str(op)); return -EINVAL; } #else @@ -203,7 +203,7 @@ __ftrace_make_nop(struct module *mod, } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, ppc_inst_val(op)); + pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); return -EINVAL; } #endif /* CONFIG_MPROFILE_KERNEL */ @@ -231,7 +231,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -406,7 +406,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -533,8 +533,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { - pr_err("Unexpected call sequence at %p: %x %x\n", - ip, ppc_inst_val(op[0]), ppc_inst_val(op[1])); + pr_err("Unexpected call sequence at %p: %s %s\n", + ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1])); return -EINVAL; } @@ -597,7 +597,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* It should be pointing to a nop */ if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { - pr_err("Expected NOP but have %x\n", ppc_inst_val(op)); + pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -654,7 +654,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { - pr_err("Unexpected call sequence at %p: %x\n", ip, ppc_inst_val(op)); + pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); return -EINVAL; } @@ -733,7 +733,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 97413a385720..d1ebe152f210 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2060,14 +2060,6 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -#if !defined(CONFIG_TAU_INT) -void TAUException(struct pt_regs *regs) -{ - printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", - regs->nip, regs->msr, regs->trap, print_tainted()); -} -#endif /* CONFIG_INT_TAU */ - #ifdef CONFIG_ALTIVEC void altivec_assist_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index e0f4ba45b6cc..8dad44262e75 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -677,7 +677,7 @@ int vdso_getcpu_init(void) node = cpu_to_node(cpu); WARN_ON_ONCE(node > 0xffff); - val = (cpu & 0xfff) | ((node & 0xffff) << 16); + val = (cpu & 0xffff) | ((node & 0xffff) << 16); mtspr(SPRN_SPRG_VDSO_WRITE, val); get_paca()->sprg_vdso = val; diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index e147bbdc12cd..87ab1152d5ce 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -50,7 +50,7 @@ $(obj-vdso32): %.o: %.S FORCE # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) + cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 5206c2eb2a1d..4c985467a668 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -111,6 +111,7 @@ SECTIONS *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.glink .iplt .plt .rela*) } } diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 32ebb3522ea1..38c317f25141 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -34,7 +34,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) + cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 526f5ba2593e..cab14324242b 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -8,6 +8,7 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/vdso.h> +#include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> .text @@ -24,14 +25,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc mflr r12 .cfi_register lr,r12 - mr r11,r3 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r10, r0 mtlr r12 - mr r10,r3 lwz r7,CFG_DCACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) @@ -48,7 +47,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) lwz r7,CFG_ICACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index dc84f5ae3802..067247d3efb9 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -10,35 +10,13 @@ #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/vdso.h> +#include <asm/vdso_datapage.h> .text .global __kernel_datapage_offset; __kernel_datapage_offset: .long 0 -V_FUNCTION_BEGIN(__get_datapage) - .cfi_startproc - /* We don't want that exposed or overridable as we want other objects - * to be able to bl directly to here - */ - .protected __get_datapage - .hidden __get_datapage - - mflr r0 - .cfi_register lr,r0 - - bcl 20,31,data_page_branch -data_page_branch: - mflr r3 - mtlr r0 - addi r3, r3, __kernel_datapage_offset-data_page_branch - lwz r0,0(r3) - .cfi_restore lr - add r3,r0,r3 - blr - .cfi_endproc -V_FUNCTION_END(__get_datapage) - /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; * @@ -53,7 +31,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr r4,r3 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP64 cmpldi cr0,r4,0 @@ -75,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 ld r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 1c9a04703250..20f8be40c653 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -9,6 +9,7 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/vdso.h> +#include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> @@ -26,7 +27,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) mr r11,r3 /* r11 holds tv */ mr r10,r4 /* r10 holds tz */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + get_datapage r3, r0 cmpldi r11,0 /* check if tv is NULL */ beq 2f lis r7,1000000@ha /* load up USEC_PER_SEC */ @@ -71,7 +72,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) mflr r12 /* r12 saves lr */ .cfi_register lr,r12 mr r11,r4 /* r11 saves tp */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + get_datapage r3, r0 lis r7,NSEC_PER_SEC@h /* want nanoseconds */ ori r7,r7,NSEC_PER_SEC@l beq cr5,70f @@ -188,7 +189,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) mflr r12 .cfi_register lr,r12 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 lwz r5, CLOCK_HRTIMER_RES(r3) mtlr r12 li r3,0 @@ -221,7 +222,7 @@ V_FUNCTION_BEGIN(__kernel_time) .cfi_register lr,r12 mr r11,r3 /* r11 holds t */ - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 ld r4,STAMP_XTIME_SEC(r3) diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index a8cc0409d7d2..bbf68cd01088 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -6,6 +6,7 @@ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. */ +#include <asm/cache.h> /* IFETCH_ALIGN_BYTES */ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/unistd.h> @@ -14,21 +15,17 @@ .text -/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from - the return address to get an address in the middle of the presumed - call instruction. Since we don't have a call here, we artificially - extend the range covered by the unwind info by padding before the - real start. */ - nop .balign 8 + .balign IFETCH_ALIGN_BYTES V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) -.Lsigrt_start = . - 4 +.Lsigrt_start: + bctrl /* call the handler */ addi r1, r1, __SIGNAL_FRAMESIZE li r0,__NR_rt_sigreturn sc .Lsigrt_end: V_FUNCTION_END(__kernel_sigtramp_rt64) -/* The ".balign 8" above and the following zeros mimic the old stack +/* The .balign 8 above and the following zeros mimic the old stack trampoline layout. The last magic value is the ucontext pointer, chosen in such a way that older libgcc unwind code returns a zero for a sigcontext pointer. */ diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 256fb9720298..4e3a8d4ee614 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -30,7 +30,7 @@ SECTIONS . = ALIGN(16); .text : { *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) - *(.sfpr .glink) + *(.sfpr) } :text PROVIDE(__etext = .); PROVIDE(_etext = .); @@ -111,6 +111,7 @@ SECTIONS *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.glink .iplt .plt .rela*) } } diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index efc5b52f95d2..801dc28fdcca 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -76,9 +76,7 @@ _GLOBAL(load_up_altivec) oris r12,r12,MSR_VEC@h std r12,_MSR(r1) #endif - /* Don't care if r4 overflows, this is desired behaviour */ - lbz r4,THREAD_LOAD_VEC(r5) - addi r4,r4,1 + li r4,1 stb r4,THREAD_LOAD_VEC(r5) addi r6,r5,THREAD_VRSTATE li r4,1 |