diff options
author | Ingo Molnar <mingo@kernel.org> | 2024-04-09 09:48:09 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2024-04-09 09:48:09 +0200 |
commit | d1eec383a8abe348b5ce72df9e96bff3d9039134 (patch) | |
tree | 6ea912eb2d4f2bee8301a04ebf82283805218ebf /arch/x86/kernel | |
parent | x86/tsc: Make __use_tsc __ro_after_init (diff) | |
parent | Linux 6.9-rc3 (diff) | |
download | linux-d1eec383a8abe348b5ce72df9e96bff3d9039134.tar.xz linux-d1eec383a8abe348b5ce72df9e96bff3d9039134.zip |
Merge tag 'v6.9-rc3' into locking/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/callthunks.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 38 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 93 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/generic.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/internal.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/scattered.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/topology.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/topology_common.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/eisa.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.h | 14 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/mpparse.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/nmi.c | 24 | ||||
-rw-r--r-- | arch/x86/kernel/probe_roms.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/sev.c | 37 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 32 | ||||
-rw-r--r-- | arch/x86/kernel/x86_init.c | 2 |
24 files changed, 185 insertions, 201 deletions
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 30335182b6b0..e92ff0c11db8 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -314,7 +314,7 @@ static bool is_callthunk(void *addr) return !bcmp(pad, insn_buff, tmpl_size); } -int x86_call_depth_emit_accounting(u8 **pprog, void *func) +int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip) { unsigned int tmpl_size = SKL_TMPL_SIZE; u8 insn_buff[MAX_PATCH_LEN]; @@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func) return 0; memcpy(insn_buff, skl_call_thunk_template, tmpl_size); - apply_relocation(insn_buff, tmpl_size, *pprog, + apply_relocation(insn_buff, tmpl_size, ip, skl_call_thunk_template, tmpl_size); memcpy(*pprog, insn_buff, tmpl_size); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6d8677e80ddb..9bf17c9c29da 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -345,6 +345,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } +static void bsp_determine_snp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + cc_vendor = CC_VENDOR_AMD; + + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { + /* + * RMP table entry format is not architectural and is defined by the + * per-processor PPR. Restrict SNP support on the known CPU models + * for which the RMP table entry format is currently defined for. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + c->x86 >= 0x19 && snp_probe_rmptable_info()) { + cc_platform_set(CC_ATTR_HOST_SEV_SNP); + } else { + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + } + } +#endif +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -452,21 +474,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) break; } - if (cpu_has(c, X86_FEATURE_SEV_SNP)) { - /* - * RMP table entry format is not architectural and it can vary by processor - * and is defined by the per-processor PPR. Restrict SNP support on the - * known CPU model and family for which the RMP table entry format is - * currently defined for. - */ - if (!boot_cpu_has(X86_FEATURE_ZEN3) && - !boot_cpu_has(X86_FEATURE_ZEN4) && - !boot_cpu_has(X86_FEATURE_ZEN5)) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - else if (!snp_probe_rmptable_info()) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - } - + bsp_determine_snp(c); return; warn: diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ba8cf5e9ce56..5c1e6d6be267 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2307,6 +2307,8 @@ void arch_smt_update(void) void __init arch_cpu_finalize_init(void) { + struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info); + identify_boot_cpu(); select_idle_routine(); @@ -2345,6 +2347,13 @@ void __init arch_cpu_finalize_init(void) fpu__init_system(); fpu__init_cpu(); + /* + * Ensure that access to the per CPU representation has the initial + * boot CPU configuration. + */ + *c = boot_cpu_data; + c->initialized = true; + alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b5cc557cfc37..84d41be6d06b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2500,12 +2500,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, return -EINVAL; b = &per_cpu(mce_banks_array, s->id)[bank]; - if (!b->init) return -ENODEV; b->ctl = new; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); return size; } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 303fef824167..e0fd57a8ba84 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -45,70 +45,70 @@ bool hyperv_paravisor_present __ro_after_init; EXPORT_SYMBOL_GPL(hyperv_paravisor_present); #if IS_ENABLED(CONFIG_HYPERV) -static inline unsigned int hv_get_nested_reg(unsigned int reg) +static inline unsigned int hv_get_nested_msr(unsigned int reg) { - if (hv_is_sint_reg(reg)) - return reg - HV_REGISTER_SINT0 + HV_REGISTER_NESTED_SINT0; + if (hv_is_sint_msr(reg)) + return reg - HV_X64_MSR_SINT0 + HV_X64_MSR_NESTED_SINT0; switch (reg) { - case HV_REGISTER_SIMP: - return HV_REGISTER_NESTED_SIMP; - case HV_REGISTER_SIEFP: - return HV_REGISTER_NESTED_SIEFP; - case HV_REGISTER_SVERSION: - return HV_REGISTER_NESTED_SVERSION; - case HV_REGISTER_SCONTROL: - return HV_REGISTER_NESTED_SCONTROL; - case HV_REGISTER_EOM: - return HV_REGISTER_NESTED_EOM; + case HV_X64_MSR_SIMP: + return HV_X64_MSR_NESTED_SIMP; + case HV_X64_MSR_SIEFP: + return HV_X64_MSR_NESTED_SIEFP; + case HV_X64_MSR_SVERSION: + return HV_X64_MSR_NESTED_SVERSION; + case HV_X64_MSR_SCONTROL: + return HV_X64_MSR_NESTED_SCONTROL; + case HV_X64_MSR_EOM: + return HV_X64_MSR_NESTED_EOM; default: return reg; } } -u64 hv_get_non_nested_register(unsigned int reg) +u64 hv_get_non_nested_msr(unsigned int reg) { u64 value; - if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) + if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) hv_ivm_msr_read(reg, &value); else rdmsrl(reg, value); return value; } -EXPORT_SYMBOL_GPL(hv_get_non_nested_register); +EXPORT_SYMBOL_GPL(hv_get_non_nested_msr); -void hv_set_non_nested_register(unsigned int reg, u64 value) +void hv_set_non_nested_msr(unsigned int reg, u64 value) { - if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) { + if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) { hv_ivm_msr_write(reg, value); /* Write proxy bit via wrmsl instruction */ - if (hv_is_sint_reg(reg)) + if (hv_is_sint_msr(reg)) wrmsrl(reg, value | 1 << 20); } else { wrmsrl(reg, value); } } -EXPORT_SYMBOL_GPL(hv_set_non_nested_register); +EXPORT_SYMBOL_GPL(hv_set_non_nested_msr); -u64 hv_get_register(unsigned int reg) +u64 hv_get_msr(unsigned int reg) { if (hv_nested) - reg = hv_get_nested_reg(reg); + reg = hv_get_nested_msr(reg); - return hv_get_non_nested_register(reg); + return hv_get_non_nested_msr(reg); } -EXPORT_SYMBOL_GPL(hv_get_register); +EXPORT_SYMBOL_GPL(hv_get_msr); -void hv_set_register(unsigned int reg, u64 value) +void hv_set_msr(unsigned int reg, u64 value) { if (hv_nested) - reg = hv_get_nested_reg(reg); + reg = hv_get_nested_msr(reg); - hv_set_non_nested_register(reg, value); + hv_set_non_nested_msr(reg, value); } -EXPORT_SYMBOL_GPL(hv_set_register); +EXPORT_SYMBOL_GPL(hv_set_msr); static void (*vmbus_handler)(void); static void (*hv_stimer0_handler)(void); @@ -352,13 +352,24 @@ static void __init reduced_hw_init(void) x86_init.irqs.pre_vector_init = x86_init_noop; } +int hv_get_hypervisor_version(union hv_hypervisor_version_info *info) +{ + unsigned int hv_max_functions; + + hv_max_functions = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS); + if (hv_max_functions < HYPERV_CPUID_VERSION) { + pr_err("%s: Could not detect Hyper-V version\n", __func__); + return -ENODEV; + } + + cpuid(HYPERV_CPUID_VERSION, &info->eax, &info->ebx, &info->ecx, &info->edx); + + return 0; +} + static void __init ms_hyperv_init_platform(void) { int hv_max_functions_eax; - int hv_host_info_eax; - int hv_host_info_ebx; - int hv_host_info_ecx; - int hv_host_info_edx; #ifdef CONFIG_PARAVIRT pv_info.name = "Hyper-V"; @@ -409,21 +420,6 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: running on a nested hypervisor\n"); } - /* - * Extract host information. - */ - if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) { - hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION); - hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION); - hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION); - hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION); - - pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n", - hv_host_info_ebx >> 16, hv_host_info_ebx & 0xFFFF, - hv_host_info_eax, hv_host_info_edx & 0xFFFFFF, - hv_host_info_ecx, hv_host_info_edx >> 24); - } - if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { x86_platform.calibrate_tsc = hv_get_tsc_khz; @@ -456,7 +452,7 @@ static void __init ms_hyperv_init_platform(void) /* To be supported: more work is required. */ ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; - /* HV_REGISTER_CRASH_CTL is unsupported. */ + /* HV_MSR_CRASH_CTL is unsupported. */ ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; /* Don't trust Hyper-V's TLB-flushing hypercalls. */ @@ -648,6 +644,7 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .init.x2apic_available = ms_hyperv_x2apic_available, .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id, .init.init_platform = ms_hyperv_init_platform, + .init.guest_late_init = ms_hyperv_late_init, #ifdef CONFIG_AMD_MEM_ENCRYPT .runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare, .runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish, diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 422a4ddc2ab7..7b29ebda024f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return; rdmsr(MSR_AMD64_SYSCFG, lo, hi); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index c99f26ebe7a6..1a8687f8073a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) else cpu = cpumask_any_but(mask, exclude_cpu); - if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) + /* Only continue if tick_nohz_full_mask has been initialized. */ + if (!tick_nohz_full_enabled()) return cpu; /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 0dad49a09b7a..a515328d9d7d 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 3259b1d4fefe..aaca8d235dc2 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -415,6 +415,17 @@ void __init topology_init_possible_cpus(void) unsigned int total = assigned + disabled; u32 apicid, firstid; + /* + * If there was no APIC registered, then fake one so that the + * topology bitmap is populated. That ensures that the code below + * is valid and the various query interfaces can be used + * unconditionally. This does not affect the actual APIC code in + * any way because either the local APIC address has not been + * registered or the local APIC was disabled on the command line. + */ + if (topo_info.boot_cpu_apic_id == BAD_APICID) + topology_register_boot_apic(0); + if (!restrict_to_up()) { if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { disabled += assigned - nr_cpu_ids; diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index a50ae8d63d1c..9a6069e7133c 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -140,7 +140,7 @@ static void parse_topology(struct topo_scan *tscan, bool early) } } -static void topo_set_ids(struct topo_scan *tscan) +static void topo_set_ids(struct topo_scan *tscan, bool early) { struct cpuinfo_x86 *c = tscan->c; u32 apicid = c->topo.apicid; @@ -148,8 +148,10 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); - c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); - c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + if (!early) { + c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); + c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + } /* Package relative core ID */ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> @@ -187,7 +189,7 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); } - topo_set_ids(&tscan); + topo_set_ids(&tscan, false); } void __init cpu_init_topology(struct cpuinfo_x86 *c) @@ -208,7 +210,7 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) x86_topo_system.dom_size[dom] = 1U << sft; } - topo_set_ids(&tscan); + topo_set_ids(&tscan, true); /* * AMD systems have Nodes per package which cannot be mapped to diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index b66f540de054..6f1b379e3b38 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1016,17 +1016,6 @@ void __init e820__reserve_setup_data(void) e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - /* - * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by - * kexec and do not need to be reserved. - */ - if (data->type != SETUP_EFI && - data->type != SETUP_IMA && - data->type != SETUP_RNG_SEED) - e820__range_update_kexec(pa_data, - sizeof(*data) + data->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - if (data->type == SETUP_INDIRECT) { len += data->len; early_memunmap(data, sizeof(*data)); @@ -1038,12 +1027,9 @@ void __init e820__reserve_setup_data(void) indirect = (struct setup_indirect *)data->data; - if (indirect->type != SETUP_INDIRECT) { + if (indirect->type != SETUP_INDIRECT) e820__range_update(indirect->addr, indirect->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - e820__range_update_kexec(indirect->addr, indirect->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - } } pa_data = pa_next; @@ -1051,7 +1037,6 @@ void __init e820__reserve_setup_data(void) } e820__update_table(e820_table); - e820__update_table(e820_table_kexec); pr_info("extended physical RAM map:\n"); e820__print_table("reserve setup_data"); diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index e963344b0449..53935b4d62e3 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -2,6 +2,7 @@ /* * EISA specific code */ +#include <linux/cc_platform.h> #include <linux/ioport.h> #include <linux/eisa.h> #include <linux/io.h> @@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void) { void __iomem *p; - if (xen_pv_domain() && !xen_initial_domain()) + if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return 0; p = ioremap(0x0FFFD9, 4); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 117e74c44e75..33a214b1a4ce 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -178,10 +178,11 @@ void fpu__init_cpu_xstate(void) * Must happen after CR4 setup and before xsetbv() to allow KVM * lazy passthrough. Write independent of the dynamic state static * key as that does not work on the boot CPU. This also ensures - * that any stale state is wiped out from XFD. + * that any stale state is wiped out from XFD. Reset the per CPU + * xfd cache too. */ if (cpu_feature_enabled(X86_FEATURE_XFD)) - wrmsrl(MSR_IA32_XFD, init_fpstate.xfd); + xfd_set_state(init_fpstate.xfd); /* * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 3518fb26d06b..19ca623ffa2a 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -148,20 +148,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs #endif #ifdef CONFIG_X86_64 +static inline void xfd_set_state(u64 xfd) +{ + wrmsrl(MSR_IA32_XFD, xfd); + __this_cpu_write(xfd_state, xfd); +} + static inline void xfd_update_state(struct fpstate *fpstate) { if (fpu_state_size_dynamic()) { u64 xfd = fpstate->xfd; - if (__this_cpu_read(xfd_state) != xfd) { - wrmsrl(MSR_IA32_XFD, xfd); - __this_cpu_write(xfd_state, xfd); - } + if (__this_cpu_read(xfd_state) != xfd) + xfd_set_state(xfd); } } extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu); #else +static inline void xfd_set_state(u64 xfd) { } + static inline void xfd_update_state(struct fpstate *fpstate) { } static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) { diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 212e8e06aeba..a817ed0724d1 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -81,6 +81,13 @@ static inline bool check_la57_support(void) if (!(native_read_cr4() & X86_CR4_LA57)) return false; + RIP_REL_REF(__pgtable_l5_enabled) = 1; + RIP_REL_REF(pgdir_shift) = 48; + RIP_REL_REF(ptrs_per_p4d) = 512; + RIP_REL_REF(page_offset_base) = __PAGE_OFFSET_BASE_L5; + RIP_REL_REF(vmalloc_base) = __VMALLOC_BASE_L5; + RIP_REL_REF(vmemmap_base) = __VMEMMAP_BASE_L5; + return true; } @@ -175,7 +182,7 @@ unsigned long __head __startup_64(unsigned long physaddr, p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt); p4d[MAX_PTRS_PER_P4D - 1] += load_delta; - pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE_NOENC; + pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; } RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta; @@ -431,15 +438,6 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode (__START_KERNEL & PGDIR_MASK))); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); - if (check_la57_support()) { - __pgtable_l5_enabled = 1; - pgdir_shift = 48; - ptrs_per_p4d = 512; - page_offset_base = __PAGE_OFFSET_BASE_L5; - vmalloc_base = __VMALLOC_BASE_L5; - vmemmap_base = __VMEMMAP_BASE_L5; - } - cr4_init_shadow(); /* Kill off the identity-map trampoline */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 091b3ab76a18..d0e49bd7c6f3 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -373,7 +373,16 @@ out: kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - if (is_endbr(*(u32 *)addr)) { + u32 insn; + + /* + * Since 'addr' is not guaranteed to be safe to access, use + * copy_from_kernel_nofault() to read the instruction: + */ + if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32))) + return NULL; + + if (is_endbr(insn)) { *on_func_entry = !offset || offset == 4; if (*on_func_entry) offset = 4; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 31a48bae2392..263f8aed4e2c 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -65,6 +65,7 @@ static int __init parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); +static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled); static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; static int has_steal_clock = 0; @@ -244,7 +245,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void) { u32 flags = 0; - if (__this_cpu_read(apf_reason.enabled)) { + if (__this_cpu_read(async_pf_enabled)) { flags = __this_cpu_read(apf_reason.flags); __this_cpu_write(apf_reason.flags, 0); } @@ -295,7 +296,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) inc_irq_stat(irq_hv_callback_count); - if (__this_cpu_read(apf_reason.enabled)) { + if (__this_cpu_read(async_pf_enabled)) { token = __this_cpu_read(apf_reason.token); kvm_async_pf_task_wake(token); __this_cpu_write(apf_reason.token, 0); @@ -362,7 +363,7 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR); wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); - __this_cpu_write(apf_reason.enabled, 1); + __this_cpu_write(async_pf_enabled, true); pr_debug("setup async PF for cpu %d\n", smp_processor_id()); } @@ -383,11 +384,11 @@ static void kvm_guest_cpu_init(void) static void kvm_pv_disable_apf(void) { - if (!__this_cpu_read(apf_reason.enabled)) + if (!__this_cpu_read(async_pf_enabled)) return; wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); - __this_cpu_write(apf_reason.enabled, 0); + __this_cpu_write(async_pf_enabled, false); pr_debug("disable async PF for cpu %d\n", smp_processor_id()); } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 1ccd30c8246f..e89171b0347a 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -197,12 +197,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) if (!smp_check_mpc(mpc, oem, str)) return 0; - /* Initialize the lapic mapping */ - if (!acpi_lapic) - register_lapic_address(mpc->lapic); - - if (early) + if (early) { + /* Initialize the lapic mapping */ + if (!acpi_lapic) + register_lapic_address(mpc->lapic); return 1; + } /* Now process the configuration blocks. */ while (count < mpc->length) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 9a5b372c706f..ed163c8c8604 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -580,7 +580,7 @@ EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx); static char *nmi_check_stall_msg[] = { /* */ -/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */ +/* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */ /* | +------ cpu_is_offline(cpu) */ /* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */ /* | | | NMI handler has been invoked. */ @@ -628,22 +628,26 @@ void nmi_backtrace_stall_check(const struct cpumask *btp) nmi_seq = READ_ONCE(nsp->idt_nmi_seq); if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) { msgp = "CPU entered NMI handler function, but has not exited"; - } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) { - msgp = "CPU is handling NMIs"; - } else { - idx = ((nsp->idt_seq_snap & 0x1) << 2) | + } else if (nsp->idt_nmi_seq_snap == nmi_seq || + nsp->idt_nmi_seq_snap + 1 == nmi_seq) { + idx = ((nmi_seq & 0x1) << 2) | (cpu_is_offline(cpu) << 1) | (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls)); msgp = nmi_check_stall_msg[idx]; if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1)) modp = ", but OK because ignore_nmis was set"; - if (nmi_seq & 0x1) - msghp = " (CPU currently in NMI handler function)"; - else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) + if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) msghp = " (CPU exited one NMI handler function)"; + else if (nmi_seq & 0x1) + msghp = " (CPU currently in NMI handler function)"; + else + msghp = " (CPU was never in an NMI handler function)"; + } else { + msgp = "CPU is handling NMIs"; } - pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n", - __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies)); + pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp); + pr_alert("%s: last activity: %lu jiffies ago.\n", + __func__, j - READ_ONCE(nsp->recv_jiffies)); } } diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 319fef37d9dc..cc2c34ba7228 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -203,16 +203,6 @@ void __init probe_roms(void) unsigned char c; int i; - /* - * The ROM memory range is not part of the e820 table and is therefore not - * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted - * memory, and SNP requires encrypted memory to be validated before access. - * Do that here. - */ - snp_prep_memory(video_rom_resource.start, - ((system_rom_resource.end + 1) - video_rom_resource.start), - SNP_PAGE_STATE_PRIVATE); - /* video rom */ upper = adapter_rom_resources[0].start; for (start = video_rom_resource.start; start < upper; start += 2048) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3e1e96efadfe..e125e059e2c4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -9,7 +9,6 @@ #include <linux/console.h> #include <linux/crash_dump.h> #include <linux/dma-map-ops.h> -#include <linux/dmi.h> #include <linux/efi.h> #include <linux/ima.h> #include <linux/init_ohci1394_dma.h> @@ -36,6 +35,7 @@ #include <asm/bios_ebda.h> #include <asm/bugs.h> #include <asm/cacheinfo.h> +#include <asm/coco.h> #include <asm/cpu.h> #include <asm/efi.h> #include <asm/gart.h> @@ -902,7 +902,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); reserve_ibft_region(); - dmi_setup(); + x86_init.resources.dmi_setup(); /* * VMware detection requires dmi to be available, so this @@ -992,6 +992,7 @@ void __init setup_arch(char **cmdline_p) * memory size. */ mem_encrypt_setup_arch(); + cc_random_init(); efi_fake_memmap(); efi_find_mirror(); @@ -1206,16 +1207,6 @@ void __init i386_reserve_resources(void) #endif /* CONFIG_X86_32 */ -#ifndef CONFIG_SMP -void __init smp_prepare_boot_cpu(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - *c = boot_cpu_data; - c->initialized = true; -} -#endif - static struct notifier_block kernel_offset_notifier = { .notifier_call = dump_kernel_offset }; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index b59b09c2f284..38ad066179d8 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/io.h> #include <linux/psp-sev.h> +#include <linux/dmi.h> #include <uapi/linux/sev-guest.h> #include <asm/init.h> @@ -795,21 +796,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); } -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) -{ - unsigned long vaddr, npages; - - vaddr = (unsigned long)__va(paddr); - npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; - - if (op == SNP_PAGE_STATE_PRIVATE) - early_snp_set_memory_private(vaddr, paddr, npages); - else if (op == SNP_PAGE_STATE_SHARED) - early_snp_set_memory_shared(vaddr, paddr, npages); - else - WARN(1, "invalid memory op %d\n", op); -} - static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, unsigned long vaddr_end, int op) { @@ -2136,6 +2122,17 @@ void __head __noreturn snp_abort(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } +/* + * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are + * enabled, as the alternative (fallback) logic for DMI probing in the legacy + * ROM region can cause a crash since this region is not pre-validated. + */ +void __init snp_dmi_setup(void) +{ + if (efi_enabled(EFI_CONFIG_TABLES)) + dmi_setup(); +} + static void dump_cpuid_table(void) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -2287,16 +2284,6 @@ static int __init snp_init_platform_device(void) } device_initcall(snp_init_platform_device); -void kdump_sev_callback(void) -{ - /* - * Do wbinvd() on remote CPUs when SNP is enabled in order to - * safely do SNP_SHUTDOWN on the local CPU. - */ - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - wbinvd(); -} - void sev_show_status(void) { int i; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fe355c89f6c1..76bb65045c64 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -313,14 +313,6 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -static void __init smp_store_boot_cpu_info(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - *c = boot_cpu_data; - c->initialized = true; -} - /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -1039,29 +1031,15 @@ static __init void disable_smp(void) cpumask_set_cpu(0, topology_die_cpumask(0)); } -static void __init smp_cpu_index_default(void) -{ - int i; - struct cpuinfo_x86 *c; - - for_each_possible_cpu(i) { - c = &cpu_data(i); - /* mark all to hotplug */ - c->cpu_index = nr_cpu_ids; - } -} - void __init smp_prepare_cpus_common(void) { unsigned int i; - smp_cpu_index_default(); - - /* - * Setup boot CPU information - */ - smp_store_boot_cpu_info(); /* Final full version of the data */ - mb(); + /* Mark all except the boot CPU as hotpluggable */ + for_each_possible_cpu(i) { + if (i) + per_cpu(cpu_info.cpu_index, i) = nr_cpu_ids; + } for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a42830dc151b..d5dc5a92635a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -3,6 +3,7 @@ * * For licencing details see kernel-base/COPYING */ +#include <linux/dmi.h> #include <linux/init.h> #include <linux/ioport.h> #include <linux/export.h> @@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = { .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, + .dmi_setup = dmi_setup, }, .mpparse = { |