summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/Makefile14
-rw-r--r--arch/x86/boot/compressed/head_32.S28
-rw-r--r--arch/x86/boot/compressed/head_64.S8
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb.c4
-rw-r--r--arch/x86/events/amd/core.c2
-rw-r--r--arch/x86/events/intel/core.c1
-rw-r--r--arch/x86/events/intel/lbr.c6
-rw-r--r--arch/x86/events/intel/pt.c75
-rw-r--r--arch/x86/events/intel/pt.h3
-rw-r--r--arch/x86/events/intel/rapl.c1
-rw-r--r--arch/x86/include/asm/hugetlb.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/msr-index.h8
-rw-r--r--arch/x86/include/asm/perf_event.h4
-rw-r--r--arch/x86/kernel/apic/vector.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-genpool.c4
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c12
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kvm/cpuid.c1
-rw-r--r--arch/x86/kvm/hyperv.c5
-rw-r--r--arch/x86/kvm/lapic.c8
-rw-r--r--arch/x86/kvm/mmu.c12
-rw-r--r--arch/x86/kvm/mmu.h9
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/vmx.c4
-rw-r--r--arch/x86/kvm/x86.c30
-rw-r--r--arch/x86/mm/setup_nx.c5
-rw-r--r--arch/x86/xen/apic.c12
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/spinlock.c6
30 files changed, 219 insertions, 59 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6915ff2bd996..8774cb23064f 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -26,7 +26,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
-KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
+KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
cflags-$(CONFIG_X86_32) := -march=i386
cflags-$(CONFIG_X86_64) := -mcmodel=small
@@ -40,6 +40,18 @@ GCOV_PROFILE := n
UBSAN_SANITIZE :=n
LDFLAGS := -m elf_$(UTS_MACHINE)
+ifeq ($(CONFIG_RELOCATABLE),y)
+# If kernel is relocatable, build compressed kernel as PIE.
+ifeq ($(CONFIG_X86_32),y)
+LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
+else
+# To build 64-bit compressed kernel as PIE, we disable relocation
+# overflow check to avoid relocation overflow error with a new linker
+# command-line option, -z noreloc-overflow.
+LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
+ && echo "-z noreloc-overflow -pie --no-dynamic-linker")
+endif
+endif
LDFLAGS_vmlinux := -T
hostprogs-y := mkpiggy
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 8ef964ddc18e..0256064da8da 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -31,6 +31,34 @@
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
+/*
+ * The 32-bit x86 assembler in binutils 2.26 will generate R_386_GOT32X
+ * relocation to get the symbol address in PIC. When the compressed x86
+ * kernel isn't built as PIC, the linker optimizes R_386_GOT32X
+ * relocations to their fixed symbol addresses. However, when the
+ * compressed x86 kernel is loaded at a different address, it leads
+ * to the following load failure:
+ *
+ * Failed to allocate space for phdrs
+ *
+ * during the decompression stage.
+ *
+ * If the compressed x86 kernel is relocatable at run-time, it should be
+ * compiled with -fPIE, instead of -fPIC, if possible and should be built as
+ * Position Independent Executable (PIE) so that linker won't optimize
+ * R_386_GOT32X relocation to its fixed symbol address. Older
+ * linkers generate R_386_32 relocations against locally defined symbols,
+ * _bss, _ebss, _got and _egot, in PIE. It isn't wrong, just less
+ * optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle
+ * R_386_32 relocations when relocating the kernel. To generate
+ * R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as
+ * hidden:
+ */
+ .hidden _bss
+ .hidden _ebss
+ .hidden _got
+ .hidden _egot
+
__HEAD
ENTRY(startup_32)
#ifdef CONFIG_EFI_STUB
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index b0c0d16ef58d..86558a199139 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,6 +33,14 @@
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
+/*
+ * Locally defined symbols should be marked hidden:
+ */
+ .hidden _bss
+ .hidden _ebss
+ .hidden _got
+ .hidden _egot
+
__HEAD
.code32
ENTRY(startup_32)
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index a8a0224fa0f8..081255cea1ee 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -453,10 +453,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
req = cast_mcryptd_ctx_to_req(req_ctx);
if (irqs_disabled())
- rctx->complete(&req->base, ret);
+ req_ctx->complete(&req->base, ret);
else {
local_bh_disable();
- rctx->complete(&req->base, ret);
+ req_ctx->complete(&req->base, ret);
local_bh_enable();
}
}
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 86a9bec18dab..bd3e8421b57c 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -115,7 +115,7 @@ static __initconst const u64 amd_hw_cache_event_ids
/*
* AMD Performance Monitor K7 and later.
*/
-static const u64 amd_perfmon_event_map[] =
+static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 68fa55b4d42e..aff79884e17d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3639,6 +3639,7 @@ __init int intel_pmu_init(void)
case 78: /* 14nm Skylake Mobile */
case 94: /* 14nm Skylake Desktop */
+ case 85: /* 14nm Skylake Server */
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 6c3b7c1780c9..1ca5d1e7d4f2 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -63,7 +63,7 @@ static enum {
#define LBR_PLM (LBR_KERNEL | LBR_USER)
-#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
+#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */
#define LBR_NOT_SUPP -1 /* LBR filter not supported */
#define LBR_IGN 0 /* ignored */
@@ -610,8 +610,10 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
* The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
* in suppress mode. So LBR_SELECT should be set to
* (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
+ * But the 10th bit LBR_CALL_STACK does not operate
+ * in suppress mode.
*/
- reg->config = mask ^ x86_pmu.lbr_sel_mask;
+ reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 6af7cf71d6b2..09a77dbc73c9 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void)
struct dev_ext_attribute *de_attrs;
struct attribute **attrs;
size_t size;
+ u64 reg;
int ret;
long i;
+ if (boot_cpu_has(X86_FEATURE_VMX)) {
+ /*
+ * Intel SDM, 36.5 "Tracing post-VMXON" says that
+ * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
+ * post-VMXON.
+ */
+ rdmsrl(MSR_IA32_VMX_MISC, reg);
+ if (reg & BIT(14))
+ pt_pmu.vmx = true;
+ }
+
attrs = NULL;
for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
+ event->hw.config = reg;
wrmsrl(MSR_IA32_RTIT_CTL, reg);
}
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
{
- u64 ctl;
+ u64 ctl = READ_ONCE(event->hw.config);
+
+ /* may be already stopped by a PMI */
+ if (!(ctl & RTIT_CTL_TRACEEN))
+ return;
- rdmsrl(MSR_IA32_RTIT_CTL, ctl);
- if (start)
- ctl |= RTIT_CTL_TRACEEN;
- else
- ctl &= ~RTIT_CTL_TRACEEN;
+ ctl &= ~RTIT_CTL_TRACEEN;
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+ WRITE_ONCE(event->hw.config, ctl);
+
/*
* A wrmsr that disables trace generation serializes other PT
* registers and causes all data packets to be written to memory,
@@ -291,8 +306,7 @@ static void pt_config_start(bool start)
* The below WMB, separating data store and aux_head store matches
* the consumer's RMB that separates aux_head load and data load.
*/
- if (!start)
- wmb();
+ wmb();
}
static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -942,11 +956,17 @@ void intel_pt_interrupt(void)
if (!ACCESS_ONCE(pt->handle_nmi))
return;
- pt_config_start(false);
+ /*
+ * If VMX is on and PT does not support it, don't touch anything.
+ */
+ if (READ_ONCE(pt->vmx_on))
+ return;
if (!event)
return;
+ pt_config_stop(event);
+
buf = perf_get_aux(&pt->handle);
if (!buf)
return;
@@ -983,6 +1003,35 @@ void intel_pt_interrupt(void)
}
}
+void intel_pt_handle_vmx(int on)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct perf_event *event;
+ unsigned long flags;
+
+ /* PT plays nice with VMX, do nothing */
+ if (pt_pmu.vmx)
+ return;
+
+ /*
+ * VMXON will clear RTIT_CTL.TraceEn; we need to make
+ * sure to not try to set it while VMX is on. Disable
+ * interrupts to avoid racing with pmu callbacks;
+ * concurrent PMI should be handled fine.
+ */
+ local_irq_save(flags);
+ WRITE_ONCE(pt->vmx_on, on);
+
+ if (on) {
+ /* prevent pt_config_stop() from writing RTIT_CTL */
+ event = pt->handle.event;
+ if (event)
+ event->hw.config = 0;
+ }
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
+
/*
* PMU callbacks
*/
@@ -992,6 +1041,9 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ if (READ_ONCE(pt->vmx_on))
+ return;
+
if (!buf || pt_buffer_is_full(buf, pt)) {
event->hw.state = PERF_HES_STOPPED;
return;
@@ -1014,7 +1066,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
* see comment in intel_pt_interrupt().
*/
ACCESS_ONCE(pt->handle_nmi) = 0;
- pt_config_start(false);
+
+ pt_config_stop(event);
if (event->hw.state == PERF_HES_STOPPED)
return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d205..3abb5f5cccc8 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
struct pt_pmu {
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+ bool vmx;
};
/**
@@ -107,10 +108,12 @@ struct pt_buffer {
* struct pt - per-cpu pt context
* @handle: perf output handle
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
+ * @vmx_on: 1 if VMX is ON on this cpu
*/
struct pt {
struct perf_output_handle handle;
int handle_nmi;
+ int vmx_on;
};
#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 70c93f9b03ac..1705c9d75e44 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -718,6 +718,7 @@ static int __init rapl_pmu_init(void)
break;
case 60: /* Haswell */
case 69: /* Haswell-Celeron */
+ case 70: /* Haswell GT3e */
case 61: /* Broadwell */
case 71: /* Broadwell-H */
rapl_cntr_mask = RAPL_IDX_HSW;
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index f8a29d2c97b0..e6a8613fbfb0 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -4,6 +4,7 @@
#include <asm/page.h>
#include <asm-generic/hugetlb.h>
+#define hugepages_supported() cpu_has_pse
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f62a9f37f79f..b7e394485a5f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -43,7 +43,7 @@
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
-#define KVM_HALT_POLL_NS_DEFAULT 500000
+#define KVM_HALT_POLL_NS_DEFAULT 400000
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 426e946ed0c0..5b3c9a55f51c 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -167,6 +167,14 @@
#define MSR_PKG_C9_RESIDENCY 0x00000631
#define MSR_PKG_C10_RESIDENCY 0x00000632
+/* Interrupt Response Limit */
+#define MSR_PKGC3_IRTL 0x0000060a
+#define MSR_PKGC6_IRTL 0x0000060b
+#define MSR_PKGC7_IRTL 0x0000060c
+#define MSR_PKGC8_IRTL 0x00000633
+#define MSR_PKGC9_IRTL 0x00000634
+#define MSR_PKGC10_IRTL 0x00000635
+
/* Run Time Average Power Limiting (RAPL) Interface */
#define MSR_RAPL_POWER_UNIT 0x00000606
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f26..f353061bba1d 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { }
static inline void perf_check_microcode(void) { }
#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_handle_vmx(int on);
+#endif
+
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
extern void amd_pmu_enable_virt(void);
extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index ad59d70bcb1a..ef495511f019 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -256,7 +256,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
struct irq_desc *desc;
int cpu, vector;
- BUG_ON(!data->cfg.vector);
+ if (!data->cfg.vector)
+ return;
vector = data->cfg.vector;
for_each_cpu_and(cpu, data->domain, cpu_online_mask)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 0a850100c594..2658e2af74ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -29,7 +29,7 @@ static char gen_pool_buf[MCE_POOLSZ];
void mce_gen_pool_process(void)
{
struct llist_node *head;
- struct mce_evt_llist *node;
+ struct mce_evt_llist *node, *tmp;
struct mce *mce;
head = llist_del_all(&mce_event_llist);
@@ -37,7 +37,7 @@ void mce_gen_pool_process(void)
return;
head = llist_reverse_order(head);
- llist_for_each_entry(node, head, llnode) {
+ llist_for_each_entry_safe(node, tmp, head, llnode) {
mce = &node->mce;
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 4e7c6933691c..10c11b4da31d 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -152,6 +152,11 @@ static struct clocksource hyperv_cs = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
+static unsigned char hv_get_nmi_reason(void)
+{
+ return 0;
+}
+
static void __init ms_hyperv_init_platform(void)
{
/*
@@ -191,6 +196,13 @@ static void __init ms_hyperv_init_platform(void)
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
#endif
mark_tsc_unstable("running on Hyper-V");
+
+ /*
+ * Generation 2 instances don't support reading the NMI status from
+ * 0x61 port.
+ */
+ if (efi_enabled(EFI_BOOT))
+ x86_platform.get_nmi_reason = hv_get_nmi_reason;
}
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 54cdbd2003fe..af1112980dd4 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -389,12 +389,6 @@ default_entry:
/* Make changes effective */
wrmsr
- /*
- * And make sure that all the mappings we set up have NX set from
- * the beginning.
- */
- orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4)
-
enable_paging:
/*
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8efb839948e5..bbbaa802d13e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -534,6 +534,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
do_cpuid_1_ent(&entry[i], function, idx);
if (idx == 1) {
entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
+ cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
entry[i].ebx = 0;
if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
entry[i].ebx =
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 5ff3485acb60..01bd7b7a6866 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1116,6 +1116,11 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
break;
case HVCALL_POST_MESSAGE:
case HVCALL_SIGNAL_EVENT:
+ /* don't bother userspace if it has no way to handle it */
+ if (!vcpu_to_synic(vcpu)->active) {
+ res = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+ }
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
vcpu->run->hyperv.u.hcall.input = param;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 443d2a57ad3d..1a2da0e5a373 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1369,7 +1369,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
hrtimer_start(&apic->lapic_timer.timer,
ktime_add_ns(now, apic->lapic_timer.period),
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_PINNED);
apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
PRIx64 ", "
@@ -1402,7 +1402,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
expire = ktime_add_ns(now, ns);
expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
hrtimer_start(&apic->lapic_timer.timer,
- expire, HRTIMER_MODE_ABS);
+ expire, HRTIMER_MODE_ABS_PINNED);
} else
apic_timer_expired(apic);
@@ -1868,7 +1868,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
apic->vcpu = vcpu;
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_PINNED);
apic->lapic_timer.timer.function = apic_timer_fn;
/*
@@ -2003,7 +2003,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
timer = &vcpu->arch.apic->lapic_timer.timer;
if (hrtimer_cancel(timer))
- hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
}
/*
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 70e95d097ef1..1ff4dbb73fb7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -557,8 +557,15 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
!is_writable_pte(new_spte))
ret = true;
- if (!shadow_accessed_mask)
+ if (!shadow_accessed_mask) {
+ /*
+ * We don't set page dirty when dropping non-writable spte.
+ * So do it now if the new spte is becoming non-writable.
+ */
+ if (ret)
+ kvm_set_pfn_dirty(spte_to_pfn(old_spte));
return ret;
+ }
/*
* Flush TLB when accessed/dirty bits are changed in the page tables,
@@ -605,7 +612,8 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
- if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
+ if (old_spte & (shadow_dirty_mask ? shadow_dirty_mask :
+ PT_WRITABLE_MASK))
kvm_set_pfn_dirty(pfn);
return 1;
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b70df72e2b33..66b33b96a31b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -173,10 +173,9 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
bool fault = (mmu->permissions[index] >> pte_access) & 1;
+ u32 errcode = PFERR_PRESENT_MASK;
WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
- pfec |= PFERR_PRESENT_MASK;
-
if (unlikely(mmu->pkru_mask)) {
u32 pkru_bits, offset;
@@ -189,15 +188,15 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
- offset = pfec - 1 +
+ offset = (pfec & ~1) +
((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
pkru_bits &= mmu->pkru_mask >> offset;
- pfec |= -pkru_bits & PFERR_PK_MASK;
+ errcode |= -pkru_bits & PFERR_PK_MASK;
fault |= (pkru_bits != 0);
}
- return -(uint32_t)fault & pfec;
+ return -(u32)fault & errcode;
}
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1d971c7553c3..bc019f70e0b6 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -360,7 +360,7 @@ retry_walk:
goto error;
if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
- errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
+ errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
goto error;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ee1c8a93871c..133679d520af 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3103,6 +3103,8 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ intel_pt_handle_vmx(1);
+
asm volatile (ASM_VMX_VMXON_RAX
: : "a"(&addr), "m"(addr)
: "memory", "cc");
@@ -3172,6 +3174,8 @@ static void vmclear_local_loaded_vmcss(void)
static void kvm_cpu_vmxoff(void)
{
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+ intel_pt_handle_vmx(0);
}
static void hardware_disable(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 742d0f7d3556..9b7798c7b210 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -700,7 +700,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
return 1;
}
- kvm_put_guest_xcr0(vcpu);
vcpu->arch.xcr0 = xcr0;
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
@@ -6095,12 +6094,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
}
/* try to inject new event if pending */
- if (vcpu->arch.nmi_pending) {
- if (kvm_x86_ops->nmi_allowed(vcpu)) {
- --vcpu->arch.nmi_pending;
- vcpu->arch.nmi_injected = true;
- kvm_x86_ops->set_nmi(vcpu);
- }
+ if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+ --vcpu->arch.nmi_pending;
+ vcpu->arch.nmi_injected = true;
+ kvm_x86_ops->set_nmi(vcpu);
} else if (kvm_cpu_has_injectable_intr(vcpu)) {
/*
* Because interrupts can be injected asynchronously, we are
@@ -6569,10 +6566,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (inject_pending_event(vcpu, req_int_win) != 0)
req_immediate_exit = true;
/* enable NMI/IRQ window open exits if needed */
- else if (vcpu->arch.nmi_pending)
- kvm_x86_ops->enable_nmi_window(vcpu);
- else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
- kvm_x86_ops->enable_irq_window(vcpu);
+ else {
+ if (vcpu->arch.nmi_pending)
+ kvm_x86_ops->enable_nmi_window(vcpu);
+ if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
+ kvm_x86_ops->enable_irq_window(vcpu);
+ }
if (kvm_lapic_enabled(vcpu)) {
update_cr8_intercept(vcpu);
@@ -6590,8 +6589,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops->prepare_guest_switch(vcpu);
if (vcpu->fpu_active)
kvm_load_guest_fpu(vcpu);
- kvm_load_guest_xcr0(vcpu);
-
vcpu->mode = IN_GUEST_MODE;
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -6618,6 +6615,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
+ kvm_load_guest_xcr0(vcpu);
+
if (req_immediate_exit)
smp_send_reschedule(vcpu->cpu);
@@ -6667,6 +6666,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
+ kvm_put_guest_xcr0(vcpu);
+
/* Interrupt is enabled by handle_external_intr() */
kvm_x86_ops->handle_external_intr(vcpu);
@@ -7314,7 +7315,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
* and assume host would use all available bits.
* Guest xcr0 would be loaded later.
*/
- kvm_put_guest_xcr0(vcpu);
vcpu->guest_fpu_loaded = 1;
__kernel_fpu_begin();
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
@@ -7323,8 +7323,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- kvm_put_guest_xcr0(vcpu);
-
if (!vcpu->guest_fpu_loaded) {
vcpu->fpu_counter = 0;
return;
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 8bea84724a7d..f65a33f505b6 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -32,8 +32,9 @@ early_param("noexec", noexec_setup);
void x86_configure_nx(void)
{
- /* If disable_nx is set, clear NX on all new mappings going forward. */
- if (disable_nx)
+ if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
+ __supported_pte_mask |= _PAGE_NX;
+ else
__supported_pte_mask &= ~_PAGE_NX;
}
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index abf4901c917b..db52a7fafcc2 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -66,7 +66,7 @@ static u32 xen_apic_read(u32 reg)
ret = HYPERVISOR_platform_op(&op);
if (ret)
- return 0;
+ op.u.pcpu_info.apic_id = BAD_APICID;
return op.u.pcpu_info.apic_id << 24;
}
@@ -142,6 +142,14 @@ static void xen_silent_inquire(int apicid)
{
}
+static int xen_cpu_present_to_apicid(int cpu)
+{
+ if (cpu_present(cpu))
+ return xen_get_apic_id(xen_apic_read(APIC_ID));
+ else
+ return BAD_APICID;
+}
+
static struct apic xen_pv_apic = {
.name = "Xen PV",
.probe = xen_apic_probe_pv,
@@ -162,7 +170,7 @@ static struct apic xen_pv_apic = {
.ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */
.setup_apic_routing = NULL,
- .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .cpu_present_to_apicid = xen_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid, /* Used on 32-bit */
.check_phys_apicid_present = default_check_phys_apicid_present, /* smp_sanity_check needs it */
.phys_pkg_id = xen_phys_pkg_id, /* detect_ht */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3c6d17fd423a..719cf291dcdf 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -545,6 +545,8 @@ static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
* data back is to call:
*/
tick_nohz_idle_enter();
+
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
#else /* !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 9e2ba5c6e1dd..f42e78de1e10 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -27,6 +27,12 @@ static bool xen_pvspin = true;
static void xen_qlock_kick(int cpu)
{
+ int irq = per_cpu(lock_kicker_irq, cpu);
+
+ /* Don't kick if the target's kicker interrupt is not initialized. */
+ if (irq == -1)
+ return;
+
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
}