diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/events/amd/uncore.c | 17 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_emulate.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/apic/vector.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/intel.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/therm_throt.c | 9 | ||||
-rw-r--r-- | arch/x86/kvm/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/ioapic.c | 7 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 21 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp32.c | 10 |
15 files changed, 109 insertions, 40 deletions
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a6ea07f2aa84..4d867a752f0e 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -190,15 +190,12 @@ static int amd_uncore_event_init(struct perf_event *event) /* * NB and Last level cache counters (MSRs) are shared across all cores - * that share the same NB / Last level cache. Interrupts can be directed - * to a single target core, however, event counts generated by processes - * running on other cores cannot be masked out. So we do not support - * sampling and per-thread events. + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; - - /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; @@ -306,7 +303,7 @@ static struct pmu amd_nb_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, }; static struct pmu amd_llc_pmu = { @@ -317,7 +314,7 @@ static struct pmu amd_llc_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 2a8f2bd2e5cf..c06e8353efd3 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -360,7 +360,6 @@ struct x86_emulate_ctxt { u64 d; unsigned long _eip; struct operand memop; - /* Fields above regs are cleared together. */ unsigned long _regs[NR_VCPU_REGS]; struct operand *memopp; struct fetch_cache fetch; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 2c5676b0a6e7..48293d15f1e1 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -838,13 +838,15 @@ static void free_moved_vector(struct apic_chip_data *apicd) bool managed = apicd->is_managed; /* - * This should never happen. Managed interrupts are not - * migrated except on CPU down, which does not involve the - * cleanup vector. But try to keep the accounting correct - * nevertheless. + * Managed interrupts are usually not migrated away + * from an online CPU, but CPU isolation 'managed_irq' + * can make that happen. + * 1) Activation does not take the isolation into account + * to keep the code simple + * 2) Migration away from an isolated CPU can happen when + * a non-isolated CPU which is in the calculated + * affinity mask comes online. */ - WARN_ON_ONCE(managed); - trace_vector_free_moved(apicd->irq, cpu, vector, managed); irq_matrix_free(vector_matrix, cpu, vector, managed); per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 5627b1091b85..f996ffb887bc 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -493,17 +493,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) return; if ((val & 3UL) == 1UL) { - /* PPIN available but disabled: */ + /* PPIN locked in disabled mode */ return; } - /* If PPIN is disabled, but not locked, try to enable: */ - if (!(val & 3UL)) { + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); rdmsrl_safe(MSR_PPIN_CTL, &val); } - if ((val & 3UL) == 2UL) + /* Is the enable bit set? */ + if (val & 2UL) set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); } } diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index 58b4ee3cda77..f36dc0742085 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -486,9 +486,14 @@ static int thermal_throttle_offline(unsigned int cpu) { struct thermal_state *state = &per_cpu(thermal_state, cpu); struct device *dev = get_cpu_device(cpu); + u32 l; + + /* Mask the thermal vector before draining evtl. pending work */ + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED); - cancel_delayed_work(&state->package_throttle.therm_work); - cancel_delayed_work(&state->core_throttle.therm_work); + cancel_delayed_work_sync(&state->package_throttle.therm_work); + cancel_delayed_work_sync(&state->core_throttle.therm_work); state->package_throttle.rate_control_active = false; state->core_throttle.rate_control_active = false; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 1bb4927030af..9fea0757db92 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -68,7 +68,7 @@ config KVM_WERROR depends on (X86_64 && !KASAN) || !COMPILE_TEST depends on EXPERT help - Add -Werror to the build flags for (and only for) i915.ko. + Add -Werror to the build flags for KVM. If in doubt, say "N". diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index dd19fb3539e0..bc00642e5d3b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5173,6 +5173,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->fetch.ptr = ctxt->fetch.data; ctxt->fetch.end = ctxt->fetch.data + insn_len; ctxt->opcode_len = 1; + ctxt->intercept = x86_intercept_none; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); else { diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 7668fed1ce65..750ff0b29404 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -378,12 +378,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (e->fields.delivery_mode == APIC_DM_FIXED) { struct kvm_lapic_irq irq; - irq.shorthand = APIC_DEST_NOSHORT; irq.vector = e->fields.vector; irq.delivery_mode = e->fields.delivery_mode << 8; - irq.dest_id = e->fields.dest_id; irq.dest_mode = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode); + irq.level = false; + irq.trig_mode = e->fields.trig_mode; + irq.shorthand = APIC_DEST_NOSHORT; + irq.dest_id = e->fields.dest_id; + irq.msi_redir_hint = false; bitmap_zero(&vcpu_bitmap, 16); kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, &vcpu_bitmap); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 24c0b2ba8fb9..91000501756e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6312,7 +6312,8 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu, enum exit_fastpath_completion *exit_fastpath) { if (!is_guest_mode(vcpu) && - to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE) + to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && + to_svm(vcpu)->vmcb->control.exit_info_1) *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e920d7834d73..9750e590c89d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -224,7 +224,7 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) return; kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true); - vmx->nested.hv_evmcs_vmptr = -1ull; + vmx->nested.hv_evmcs_vmptr = 0; vmx->nested.hv_evmcs = NULL; } @@ -1923,7 +1923,8 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) return 1; - if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { + if (unlikely(!vmx->nested.hv_evmcs || + evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { if (!vmx->nested.hv_evmcs) vmx->nested.current_vmptr = -1ull; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 40b1e6138cd5..26f8f31563e9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2338,6 +2338,17 @@ static void hardware_disable(void) kvm_cpu_vmxoff(); } +/* + * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID + * directly instead of going through cpu_has(), to ensure KVM is trapping + * ENCLS whenever it's supported in hardware. It does not matter whether + * the host OS supports or has enabled SGX. + */ +static bool cpu_has_sgx(void) +{ + return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0)); +} + static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result) { @@ -2418,8 +2429,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX | - SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_ENCLS_EXITING; + SECONDARY_EXEC_ENABLE_VMFUNC; + if (cpu_has_sgx()) + opt2 |= SECONDARY_EXEC_ENCLS_EXITING; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5de200663f51..3156e25b0774 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7195,10 +7195,12 @@ static void kvm_timer_init(void) cpu = get_cpu(); policy = cpufreq_cpu_get(cpu); - if (policy && policy->cpuinfo.max_freq) - max_tsc_khz = policy->cpuinfo.max_freq; + if (policy) { + if (policy->cpuinfo.max_freq) + max_tsc_khz = policy->cpuinfo.max_freq; + cpufreq_cpu_put(policy); + } put_cpu(); - cpufreq_cpu_put(policy); #endif cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fa4ea09593ab..629fdf13f846 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -190,7 +190,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } -void vmalloc_sync_all(void) +static void vmalloc_sync(void) { unsigned long address; @@ -217,6 +217,16 @@ void vmalloc_sync_all(void) } } +void vmalloc_sync_mappings(void) +{ + vmalloc_sync(); +} + +void vmalloc_sync_unmappings(void) +{ + vmalloc_sync(); +} + /* * 32-bit: * @@ -319,11 +329,23 @@ out: #else /* CONFIG_X86_64: */ -void vmalloc_sync_all(void) +void vmalloc_sync_mappings(void) { + /* + * 64-bit mappings might allocate new p4d/pud pages + * that need to be propagated to all tasks' PGDs. + */ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); } +void vmalloc_sync_unmappings(void) +{ + /* + * Unmappings never allocate or free p4d/pud pages. + * No work is required here. + */ +} + /* * 64-bit: * diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 44e4beb4239f..18c637c0dc6f 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -106,6 +106,22 @@ static unsigned int __ioremap_check_encrypted(struct resource *res) return 0; } +/* + * The EFI runtime services data area is not covered by walk_mem_res(), but must + * be mapped encrypted when SEV is active. + */ +static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc) +{ + if (!sev_active()) + return; + + if (!IS_ENABLED(CONFIG_EFI)) + return; + + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA) + desc->flags |= IORES_MAP_ENCRYPTED; +} + static int __ioremap_collect_map_flags(struct resource *res, void *arg) { struct ioremap_desc *desc = arg; @@ -124,6 +140,9 @@ static int __ioremap_collect_map_flags(struct resource *res, void *arg) * To avoid multiple resource walks, this function walks resources marked as * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES). + * + * After that, deal with misc other ranges in __ioremap_check_other() which do + * not fall into the above category. */ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, struct ioremap_desc *desc) @@ -135,6 +154,8 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, memset(desc, 0, sizeof(struct ioremap_desc)); walk_mem_res(start, end, desc, __ioremap_collect_map_flags); + + __ioremap_check_other(addr, desc); } /* diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 393d251798c0..4d2a7a764602 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -2039,10 +2039,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } /* and dreg_lo,sreg_lo */ EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); - /* and dreg_hi,sreg_hi */ - EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); - /* or dreg_lo,dreg_hi */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); + if (is_jmp64) { + /* and dreg_hi,sreg_hi */ + EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); + /* or dreg_lo,dreg_hi */ + EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); + } goto emit_cond_jmp; } case BPF_JMP | BPF_JSET | BPF_K: |