summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c8
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_early.c33
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c10
-rw-r--r--arch/x86/kernel/cpu/microcode/core_early.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.h4
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c98
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c25
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c57
-rw-r--r--arch/x86/kernel/cpu/proc.c10
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/entry_64.S81
-rw-r--r--arch/x86/kernel/irq.c30
-rw-r--r--arch/x86/kernel/kprobes/core.c8
-rw-r--r--arch/x86/kernel/kprobes/opt.c4
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kernel/traps.c159
-rw-r--r--arch/x86/kernel/uprobes.c2
-rw-r--r--arch/x86/kernel/x86_init.c10
24 files changed, 402 insertions, 179 deletions
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index f04dbb3069b8..5caed1dd7ccf 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -21,6 +21,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
{}
@@ -30,6 +31,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids);
static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
{}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1183d545da1e..7ffe0a2b870f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3158,7 +3158,7 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- __write_msi_msg(data->msi_desc, &msg);
+ __pci_write_msi_msg(data->msi_desc, &msg);
return IRQ_SET_MASK_OK_NOCOPY;
}
@@ -3169,8 +3169,8 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
*/
static struct irq_chip msi_chip = {
.name = "PCI-MSI",
- .irq_unmask = unmask_msi_irq,
- .irq_mask = mask_msi_irq,
+ .irq_unmask = pci_msi_unmask_irq,
+ .irq_mask = pci_msi_mask_irq,
.irq_ack = ack_apic_edge,
.irq_set_affinity = msi_set_affinity,
.irq_retrigger = ioapic_retrigger_irq,
@@ -3196,7 +3196,7 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
* MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
*/
if (!irq_offset)
- write_msi_msg(irq, &msg);
+ pci_write_msi_msg(irq, &msg);
setup_remapped_irq(irq, irq_cfg(irq), chip);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index e7c798b354fa..4f9359f36bb7 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -48,7 +48,6 @@ int main(void)
#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
ENTRY(bx);
- ENTRY(bx);
ENTRY(cx);
ENTRY(dx);
ENTRY(sp);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b4f78c9ba19..cfa9b5b2c27a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -146,6 +146,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
static int __init x86_xsave_setup(char *s)
{
+ if (strlen(s))
+ return 0;
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
index 7aa1acc79789..06674473b0e6 100644
--- a/arch/x86/kernel/cpu/microcode/amd_early.c
+++ b/arch/x86/kernel/cpu/microcode/amd_early.c
@@ -108,12 +108,13 @@ static size_t compute_container_size(u8 *data, u32 total_size)
* load_microcode_amd() to save equivalent cpu table and microcode patches in
* kernel heap memory.
*/
-static void apply_ucode_in_initrd(void *ucode, size_t size)
+static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch)
{
struct equiv_cpu_entry *eq;
size_t *cont_sz;
u32 *header;
u8 *data, **cont;
+ u8 (*patch)[PATCH_MAX_SIZE];
u16 eq_id = 0;
int offset, left;
u32 rev, eax, ebx, ecx, edx;
@@ -123,10 +124,12 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
cont_sz = (size_t *)__pa_nodebug(&container_size);
cont = (u8 **)__pa_nodebug(&container);
+ patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
#else
new_rev = &ucode_new_rev;
cont_sz = &container_size;
cont = &container;
+ patch = &amd_ucode_patch;
#endif
data = ucode;
@@ -213,9 +216,9 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
rev = mc->hdr.patch_id;
*new_rev = rev;
- /* save ucode patch */
- memcpy(amd_ucode_patch, mc,
- min_t(u32, header[1], PATCH_MAX_SIZE));
+ if (save_patch)
+ memcpy(patch, mc,
+ min_t(u32, header[1], PATCH_MAX_SIZE));
}
}
@@ -246,7 +249,7 @@ void __init load_ucode_amd_bsp(void)
*data = cp.data;
*size = cp.size;
- apply_ucode_in_initrd(cp.data, cp.size);
+ apply_ucode_in_initrd(cp.data, cp.size, true);
}
#ifdef CONFIG_X86_32
@@ -263,7 +266,7 @@ void load_ucode_amd_ap(void)
size_t *usize;
void **ucode;
- mc = (struct microcode_amd *)__pa(amd_ucode_patch);
+ mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
__apply_microcode_amd(mc);
return;
@@ -275,7 +278,7 @@ void load_ucode_amd_ap(void)
if (!*ucode || !*usize)
return;
- apply_ucode_in_initrd(*ucode, *usize);
+ apply_ucode_in_initrd(*ucode, *usize, false);
}
static void __init collect_cpu_sig_on_bsp(void *arg)
@@ -339,7 +342,7 @@ void load_ucode_amd_ap(void)
* AP has a different equivalence ID than BSP, looks like
* mixed-steppings silicon so go through the ucode blob anew.
*/
- apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size);
+ apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false);
}
}
#endif
@@ -347,7 +350,9 @@ void load_ucode_amd_ap(void)
int __init save_microcode_in_initrd_amd(void)
{
unsigned long cont;
+ int retval = 0;
enum ucode_state ret;
+ u8 *cont_va;
u32 eax;
if (!container)
@@ -355,13 +360,15 @@ int __init save_microcode_in_initrd_amd(void)
#ifdef CONFIG_X86_32
get_bsp_sig();
- cont = (unsigned long)container;
+ cont = (unsigned long)container;
+ cont_va = __va(container);
#else
/*
* We need the physical address of the container for both bitness since
* boot_params.hdr.ramdisk_image is a physical address.
*/
- cont = __pa(container);
+ cont = __pa(container);
+ cont_va = container;
#endif
/*
@@ -372,6 +379,8 @@ int __init save_microcode_in_initrd_amd(void)
if (relocated_ramdisk)
container = (u8 *)(__va(relocated_ramdisk) +
(cont - boot_params.hdr.ramdisk_image));
+ else
+ container = cont_va;
if (ucode_new_rev)
pr_info("microcode: updated early to new patch_level=0x%08x\n",
@@ -382,7 +391,7 @@ int __init save_microcode_in_initrd_amd(void)
ret = load_microcode_amd(eax, container, container_size);
if (ret != UCODE_OK)
- return -EINVAL;
+ retval = -EINVAL;
/*
* This will be freed any msec now, stash patches for the current
@@ -391,5 +400,5 @@ int __init save_microcode_in_initrd_amd(void)
container = NULL;
container_size = 0;
- return 0;
+ return retval;
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index dd9d6190b08d..08fe6e8a726e 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -465,6 +465,16 @@ static void mc_bp_resume(void)
if (uci->valid && uci->mc)
microcode_ops->apply_microcode(cpu);
+#ifdef CONFIG_X86_64
+ else if (!uci->mc)
+ /*
+ * We might resume and not have applied late microcode but still
+ * have a newer patch stashed from the early loader. We don't
+ * have it in uci->mc so we have to load it the same way we're
+ * applying patches early on the APs.
+ */
+ load_ucode_ap();
+#endif
}
static struct syscore_ops mc_syscore_ops = {
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c
index 5f28a64e71ea..2c017f242a78 100644
--- a/arch/x86/kernel/cpu/microcode/core_early.c
+++ b/arch/x86/kernel/cpu/microcode/core_early.c
@@ -124,7 +124,7 @@ void __init load_ucode_bsp(void)
static bool check_loader_disabled_ap(void)
{
#ifdef CONFIG_X86_32
- return __pa_nodebug(dis_ucode_ldr);
+ return *((bool *)__pa_nodebug(&dis_ucode_ldr));
#else
return dis_ucode_ldr;
#endif
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index fc5eb390b368..4e6cdb0ddc70 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -253,6 +253,10 @@ struct cpu_hw_events {
#define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+/* Like UEVENT_CONSTRAINT, but match flags too */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
#define INTEL_PLD_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index cbb1be3ed9e4..a61f5c6911da 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -565,6 +565,21 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
perf_ibs->offset_max,
offset + 1);
} while (offset < offset_max);
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ /*
+ * Read IbsBrTarget and IbsOpData4 separately
+ * depending on their availability.
+ * Can't add to offset_max as they are staggered
+ */
+ if (ibs_caps & IBS_CAPS_BRNTRGT) {
+ rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
+ size++;
+ }
+ if (ibs_caps & IBS_CAPS_OPDATA4) {
+ rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
+ size++;
+ }
+ }
ibs_data.size = sizeof(u64) * size;
regs = *iregs;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 46211bcc813e..3c895d480cd7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -552,18 +552,18 @@ int intel_pmu_drain_bts_buffer(void)
* PEBS
*/
struct event_constraint intel_core2_pebs_event_constraints[] = {
- INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
- INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
- INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
- INTEL_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
- INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
EVENT_CONSTRAINT_END
};
struct event_constraint intel_atom_pebs_event_constraints[] = {
- INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
- INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
- INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
EVENT_CONSTRAINT_END
};
@@ -577,36 +577,36 @@ struct event_constraint intel_slm_pebs_event_constraints[] = {
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
- INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
- INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
- INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
EVENT_CONSTRAINT_END
};
struct event_constraint intel_westmere_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
- INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
- INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
EVENT_CONSTRAINT_END
};
struct event_constraint intel_snb_pebs_event_constraints[] = {
- INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
@@ -617,7 +617,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
};
struct event_constraint intel_ivb_pebs_event_constraints[] = {
- INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
@@ -628,7 +628,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
};
struct event_constraint intel_hsw_pebs_event_constraints[] = {
- INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
@@ -724,6 +724,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
unsigned long ip = regs->ip;
int is_64bit = 0;
void *kaddr;
+ int size;
/*
* We don't need to fixup if the PEBS assist is fault like
@@ -758,11 +759,12 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
return 1;
}
+ size = ip - to;
if (!kernel_ip(ip)) {
- int size, bytes;
+ int bytes;
u8 *buf = this_cpu_read(insn_buffer);
- size = ip - to; /* Must fit our buffer, see above */
+ /* 'size' must fit our buffer, see above */
bytes = copy_from_user_nmi(buf, (void __user *)to, size);
if (bytes != 0)
return 0;
@@ -780,11 +782,20 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
#ifdef CONFIG_X86_64
is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
#endif
- insn_init(&insn, kaddr, is_64bit);
+ insn_init(&insn, kaddr, size, is_64bit);
insn_get_length(&insn);
+ /*
+ * Make sure there was not a problem decoding the
+ * instruction and getting the length. This is
+ * doubly important because we have an infinite
+ * loop if insn.length=0.
+ */
+ if (!insn.length)
+ break;
to += insn.length;
kaddr += insn.length;
+ size -= insn.length;
} while (to < ip);
if (to == ip) {
@@ -886,6 +897,29 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.bp = pebs->bp;
regs.sp = pebs->sp;
+ if (sample_type & PERF_SAMPLE_REGS_INTR) {
+ regs.ax = pebs->ax;
+ regs.bx = pebs->bx;
+ regs.cx = pebs->cx;
+ regs.dx = pebs->dx;
+ regs.si = pebs->si;
+ regs.di = pebs->di;
+ regs.bp = pebs->bp;
+ regs.sp = pebs->sp;
+
+ regs.flags = pebs->flags;
+#ifndef CONFIG_X86_32
+ regs.r8 = pebs->r8;
+ regs.r9 = pebs->r9;
+ regs.r10 = pebs->r10;
+ regs.r11 = pebs->r11;
+ regs.r12 = pebs->r12;
+ regs.r13 = pebs->r13;
+ regs.r14 = pebs->r14;
+ regs.r15 = pebs->r15;
+#endif
+ }
+
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
regs.ip = pebs->real_ip;
regs.flags |= PERF_EFLAGS_EXACT;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 45fa730a5283..58f1a94beaf0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -465,7 +465,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
{
struct insn insn;
void *addr;
- int bytes, size = MAX_INSN_SIZE;
+ int bytes_read, bytes_left;
int ret = X86_BR_NONE;
int ext, to_plm, from_plm;
u8 buf[MAX_INSN_SIZE];
@@ -493,8 +493,10 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
return X86_BR_NONE;
/* may fail if text not present */
- bytes = copy_from_user_nmi(buf, (void __user *)from, size);
- if (bytes != 0)
+ bytes_left = copy_from_user_nmi(buf, (void __user *)from,
+ MAX_INSN_SIZE);
+ bytes_read = MAX_INSN_SIZE - bytes_left;
+ if (!bytes_read)
return X86_BR_NONE;
addr = buf;
@@ -505,10 +507,19 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
* Ensure we don't blindy read any address by validating it is
* a known text address.
*/
- if (kernel_text_address(from))
+ if (kernel_text_address(from)) {
addr = (void *)from;
- else
+ /*
+ * Assume we can get the maximum possible size
+ * when grabbing kernel data. This is not
+ * _strictly_ true since we could possibly be
+ * executing up next to a memory hole, but
+ * it is very unlikely to be a problem.
+ */
+ bytes_read = MAX_INSN_SIZE;
+ } else {
return X86_BR_NONE;
+ }
}
/*
@@ -518,8 +529,10 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
#ifdef CONFIG_X86_64
is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
#endif
- insn_init(&insn, addr, is64);
+ insn_init(&insn, addr, bytes_read, is64);
insn_get_opcode(&insn);
+ if (!insn.opcode.got)
+ return X86_BR_ABORT;
switch (insn.opcode.bytes[0]) {
case 0xf:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index adf138eac85c..745b158e9a65 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -449,7 +449,11 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = {
static struct uncore_event_desc snbep_uncore_imc_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
{ /* end: all zeroes */ },
};
@@ -486,14 +490,17 @@ static struct attribute_group snbep_uncore_qpi_format_group = {
.attrs = snbep_uncore_qpi_formats_attr,
};
-#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
- .init_box = snbep_uncore_msr_init_box, \
+#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
.disable_box = snbep_uncore_msr_disable_box, \
.enable_box = snbep_uncore_msr_enable_box, \
.disable_event = snbep_uncore_msr_disable_event, \
.enable_event = snbep_uncore_msr_enable_event, \
.read_counter = uncore_msr_read_counter
+#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
+ __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), \
+ .init_box = snbep_uncore_msr_init_box \
+
static struct intel_uncore_ops snbep_uncore_msr_ops = {
SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
};
@@ -1919,6 +1926,30 @@ static struct intel_uncore_type hswep_uncore_cbox = {
.format_group = &hswep_uncore_cbox_format_group,
};
+/*
+ * Write SBOX Initialization register bit by bit to avoid spurious #GPs
+ */
+static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+
+ if (msr) {
+ u64 init = SNBEP_PMON_BOX_CTL_INT;
+ u64 flags = 0;
+ int i;
+
+ for_each_set_bit(i, (unsigned long *)&init, 64) {
+ flags |= (1ULL << i);
+ wrmsrl(msr, flags);
+ }
+ }
+}
+
+static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = {
+ __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .init_box = hswep_uncore_sbox_msr_init_box
+};
+
static struct attribute *hswep_uncore_sbox_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -1944,7 +1975,7 @@ static struct intel_uncore_type hswep_uncore_sbox = {
.event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
.box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
.msr_offset = HSWEP_SBOX_MSR_OFFSET,
- .ops = &snbep_uncore_msr_ops,
+ .ops = &hswep_uncore_sbox_msr_ops,
.format_group = &hswep_uncore_sbox_format_group,
};
@@ -2009,7 +2040,11 @@ static struct intel_uncore_type hswep_uncore_ha = {
static struct uncore_event_desc hswep_uncore_imc_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"),
INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
{ /* end: all zeroes */ },
};
@@ -2025,13 +2060,27 @@ static struct intel_uncore_type hswep_uncore_imc = {
SNBEP_UNCORE_PCI_COMMON_INIT(),
};
+static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8};
+
+static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 count = 0;
+
+ pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
+ pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
+
+ return count;
+}
+
static struct intel_uncore_ops hswep_uncore_irp_ops = {
.init_box = snbep_uncore_pci_init_box,
.disable_box = snbep_uncore_pci_disable_box,
.enable_box = snbep_uncore_pci_enable_box,
.disable_event = ivbep_uncore_irp_disable_event,
.enable_event = ivbep_uncore_irp_enable_event,
- .read_counter = ivbep_uncore_irp_read_counter,
+ .read_counter = hswep_uncore_irp_read_counter,
};
static struct intel_uncore_type hswep_uncore_irp = {
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 5433658e598d..e7d8c7608471 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -72,7 +72,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (c->x86_mask || c->cpuid_level >= 0)
seq_printf(m, "stepping\t: %d\n", c->x86_mask);
else
- seq_printf(m, "stepping\t: unknown\n");
+ seq_puts(m, "stepping\t: unknown\n");
if (c->microcode)
seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
@@ -92,12 +92,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
show_cpuinfo_core(m, c, cpu);
show_cpuinfo_misc(m, c);
- seq_printf(m, "flags\t\t:");
+ seq_puts(m, "flags\t\t:");
for (i = 0; i < 32*NCAPINTS; i++)
if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
seq_printf(m, " %s", x86_cap_flags[i]);
- seq_printf(m, "\nbugs\t\t:");
+ seq_puts(m, "\nbugs\t\t:");
for (i = 0; i < 32*NBUGINTS; i++) {
unsigned int bug_bit = 32*NCAPINTS + i;
@@ -118,7 +118,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
c->x86_phys_bits, c->x86_virt_bits);
- seq_printf(m, "power management:");
+ seq_puts(m, "power management:");
for (i = 0; i < 32; i++) {
if (c->x86_power & (1 << i)) {
if (i < ARRAY_SIZE(x86_power_flags) &&
@@ -131,7 +131,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
}
}
- seq_printf(m, "\n\n");
+ seq_puts(m, "\n\n");
return 0;
}
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 1abcb50b48ae..ff86f19b5758 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = {
[ DEBUG_STACK-1 ] = "#DB",
[ NMI_STACK-1 ] = "NMI",
[ DOUBLEFAULT_STACK-1 ] = "#DF",
- [ STACKFAULT_STACK-1 ] = "#SS",
[ MCE_STACK-1 ] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
[ N_EXCEPTION_STACKS ...
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index df088bb03fb3..c0226ab54106 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -828,9 +828,15 @@ ENTRY(native_iret)
jnz native_irq_return_ldt
#endif
+.global native_irq_return_iret
native_irq_return_iret:
+ /*
+ * This may fault. Non-paranoid faults on return to userspace are
+ * handled by fixup_bad_iret. These include #SS, #GP, and #NP.
+ * Double-faults due to espfix64 are handled in do_double_fault.
+ * Other faults here are fatal.
+ */
iretq
- _ASM_EXTABLE(native_irq_return_iret, bad_iret)
#ifdef CONFIG_X86_ESPFIX64
native_irq_return_ldt:
@@ -858,25 +864,6 @@ native_irq_return_ldt:
jmp native_irq_return_iret
#endif
- .section .fixup,"ax"
-bad_iret:
- /*
- * The iret traps when the %cs or %ss being restored is bogus.
- * We've lost the original trap vector and error code.
- * #GPF is the most likely one to get for an invalid selector.
- * So pretend we completed the iret and took the #GPF in user mode.
- *
- * We are now running with the kernel GS after exception recovery.
- * But error_entry expects us to have user GS to match the user %cs,
- * so swap back.
- */
- pushq $0
-
- SWAPGS
- jmp general_protection
-
- .previous
-
/* edi: workmask, edx: work */
retint_careful:
CFI_RESTORE_STATE
@@ -922,37 +909,6 @@ ENTRY(retint_kernel)
CFI_ENDPROC
END(common_interrupt)
- /*
- * If IRET takes a fault on the espfix stack, then we
- * end up promoting it to a doublefault. In that case,
- * modify the stack to make it look like we just entered
- * the #GP handler from user space, similar to bad_iret.
- */
-#ifdef CONFIG_X86_ESPFIX64
- ALIGN
-__do_double_fault:
- XCPT_FRAME 1 RDI+8
- movq RSP(%rdi),%rax /* Trap on the espfix stack? */
- sarq $PGDIR_SHIFT,%rax
- cmpl $ESPFIX_PGD_ENTRY,%eax
- jne do_double_fault /* No, just deliver the fault */
- cmpl $__KERNEL_CS,CS(%rdi)
- jne do_double_fault
- movq RIP(%rdi),%rax
- cmpq $native_irq_return_iret,%rax
- jne do_double_fault /* This shouldn't happen... */
- movq PER_CPU_VAR(kernel_stack),%rax
- subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
- movq %rax,RSP(%rdi)
- movq $0,(%rax) /* Missing (lost) #GP error code */
- movq $general_protection,RIP(%rdi)
- retq
- CFI_ENDPROC
-END(__do_double_fault)
-#else
-# define __do_double_fault do_double_fault
-#endif
-
/*
* APIC interrupts.
*/
@@ -1124,7 +1080,7 @@ idtentry overflow do_overflow has_error_code=0
idtentry bounds do_bounds has_error_code=0
idtentry invalid_op do_invalid_op has_error_code=0
idtentry device_not_available do_device_not_available has_error_code=0
-idtentry double_fault __do_double_fault has_error_code=1 paranoid=1
+idtentry double_fault do_double_fault has_error_code=1 paranoid=1
idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
idtentry invalid_TSS do_invalid_TSS has_error_code=1
idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1289,7 +1245,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
-idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1
+idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
idtentry xen_debug do_debug has_error_code=0
idtentry xen_int3 do_int3 has_error_code=0
@@ -1399,17 +1355,16 @@ error_sti:
/*
* There are two places in the kernel that can potentially fault with
- * usergs. Handle them here. The exception handlers after iret run with
- * kernel gs again, so don't set the user space flag. B stepping K8s
- * sometimes report an truncated RIP for IRET exceptions returning to
- * compat mode. Check for these here too.
+ * usergs. Handle them here. B stepping K8s sometimes report a
+ * truncated RIP for IRET exceptions returning to compat mode. Check
+ * for these here too.
*/
error_kernelspace:
CFI_REL_OFFSET rcx, RCX+8
incl %ebx
leaq native_irq_return_iret(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
- je error_swapgs
+ je error_bad_iret
movl %ecx,%eax /* zero extend */
cmpq %rax,RIP+8(%rsp)
je bstep_iret
@@ -1420,7 +1375,15 @@ error_kernelspace:
bstep_iret:
/* Fix truncated RIP */
movq %rcx,RIP+8(%rsp)
- jmp error_swapgs
+ /* fall through */
+
+error_bad_iret:
+ SWAPGS
+ mov %rsp,%rdi
+ call fixup_bad_iret
+ mov %rax,%rsp
+ decl %ebx /* Return to usergs */
+ jmp error_sti
CFI_ENDPROC
END(error_entry)
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 922d28581024..6307a0f0cf17 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -59,78 +59,78 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%*s: ", prec, "NMI");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
- seq_printf(p, " Non-maskable interrupts\n");
+ seq_puts(p, " Non-maskable interrupts\n");
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "%*s: ", prec, "LOC");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
- seq_printf(p, " Local timer interrupts\n");
+ seq_puts(p, " Local timer interrupts\n");
seq_printf(p, "%*s: ", prec, "SPU");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
- seq_printf(p, " Spurious interrupts\n");
+ seq_puts(p, " Spurious interrupts\n");
seq_printf(p, "%*s: ", prec, "PMI");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
- seq_printf(p, " Performance monitoring interrupts\n");
+ seq_puts(p, " Performance monitoring interrupts\n");
seq_printf(p, "%*s: ", prec, "IWI");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
- seq_printf(p, " IRQ work interrupts\n");
+ seq_puts(p, " IRQ work interrupts\n");
seq_printf(p, "%*s: ", prec, "RTR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
- seq_printf(p, " APIC ICR read retries\n");
+ seq_puts(p, " APIC ICR read retries\n");
#endif
if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
- seq_printf(p, " Platform interrupts\n");
+ seq_puts(p, " Platform interrupts\n");
}
#ifdef CONFIG_SMP
seq_printf(p, "%*s: ", prec, "RES");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
- seq_printf(p, " Rescheduling interrupts\n");
+ seq_puts(p, " Rescheduling interrupts\n");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
irq_stats(j)->irq_tlb_count);
- seq_printf(p, " Function call interrupts\n");
+ seq_puts(p, " Function call interrupts\n");
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
- seq_printf(p, " TLB shootdowns\n");
+ seq_puts(p, " TLB shootdowns\n");
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
seq_printf(p, "%*s: ", prec, "TRM");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
- seq_printf(p, " Thermal event interrupts\n");
+ seq_puts(p, " Thermal event interrupts\n");
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
seq_printf(p, "%*s: ", prec, "THR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
- seq_printf(p, " Threshold APIC interrupts\n");
+ seq_puts(p, " Threshold APIC interrupts\n");
#endif
#ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE");
for_each_online_cpu(j)
seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
- seq_printf(p, " Machine check exceptions\n");
+ seq_puts(p, " Machine check exceptions\n");
seq_printf(p, "%*s: ", prec, "MCP");
for_each_online_cpu(j)
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
- seq_printf(p, " Machine check polls\n");
+ seq_puts(p, " Machine check polls\n");
#endif
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
seq_printf(p, "%*s: ", prec, "THR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
- seq_printf(p, " Hypervisor callback interrupts\n");
+ seq_puts(p, " Hypervisor callback interrupts\n");
#endif
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 67e6d19ef1be..f7e3cd50ece0 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -285,7 +285,7 @@ static int can_probe(unsigned long paddr)
* normally used, we just go through if there is no kprobe.
*/
__addr = recover_probed_instruction(buf, addr);
- kernel_insn_init(&insn, (void *)__addr);
+ kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
insn_get_length(&insn);
/*
@@ -330,8 +330,10 @@ int __copy_instruction(u8 *dest, u8 *src)
{
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
+ unsigned long recovered_insn =
+ recover_probed_instruction(buf, (unsigned long)src);
- kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src));
+ kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
/* Another subsystem puts a breakpoint, failed to recover */
if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
@@ -342,7 +344,7 @@ int __copy_instruction(u8 *dest, u8 *src)
if (insn_rip_relative(&insn)) {
s64 newdisp;
u8 *disp;
- kernel_insn_init(&insn, dest);
+ kernel_insn_init(&insn, dest, insn.length);
insn_get_displacement(&insn);
/*
* The copied instruction uses the %rip-relative addressing
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index f1314d0bcf0a..7c523bbf3dc8 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -251,13 +251,15 @@ static int can_optimize(unsigned long paddr)
/* Decode instructions */
addr = paddr - offset;
while (addr < paddr - offset + size) { /* Decode until function end */
+ unsigned long recovered_insn;
if (search_exception_tables(addr))
/*
* Since some fixup code will jumps into this function,
* we can't optimize kprobe in this function.
*/
return 0;
- kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr));
+ recovered_insn = recover_probed_instruction(buf, addr);
+ kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
/* Another subsystem puts a breakpoint */
if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 749b0e423419..e510618b2e91 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1484,7 +1484,7 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
*/
if (work & _TIF_NOHZ) {
user_exit();
- work &= ~TIF_NOHZ;
+ work &= ~_TIF_NOHZ;
}
#ifdef CONFIG_SECCOMP
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ab08aa2276fb..214245d6b996 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -960,6 +960,8 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = _brk_end;
+ mpx_mm_init(&init_mm);
+
code_resource.start = __pa_symbol(_text);
code_resource.end = __pa_symbol(_etext)-1;
data_resource.start = __pa_symbol(_etext);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a03ec604ceed..7a8f5845e8eb 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1303,10 +1303,14 @@ static void __ref remove_cpu_from_maps(int cpu)
numa_remove_cpu(cpu);
}
+static DEFINE_PER_CPU(struct completion, die_complete);
+
void cpu_disable_common(void)
{
int cpu = smp_processor_id();
+ init_completion(&per_cpu(die_complete, smp_processor_id()));
+
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
@@ -1316,8 +1320,6 @@ void cpu_disable_common(void)
fixup_irqs();
}
-static DEFINE_PER_CPU(struct completion, die_complete);
-
int native_cpu_disable(void)
{
int ret;
@@ -1327,16 +1329,21 @@ int native_cpu_disable(void)
return ret;
clear_local_APIC();
- init_completion(&per_cpu(die_complete, smp_processor_id()));
cpu_disable_common();
return 0;
}
+void cpu_die_common(unsigned int cpu)
+{
+ wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ);
+}
+
void native_cpu_die(unsigned int cpu)
{
/* We don't do anything here: idle task is faking death itself. */
- wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ);
+
+ cpu_die_common(cpu);
/* They ack this in play_dead() by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 0d0e922fafc1..a9ae20579895 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -60,6 +60,7 @@
#include <asm/fixmap.h>
#include <asm/mach_traps.h>
#include <asm/alternative.h>
+#include <asm/mpx.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -228,37 +229,44 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error)
DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow)
-DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds)
DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op)
DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun)
DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
-#ifdef CONFIG_X86_32
DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
-#endif
DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
#ifdef CONFIG_X86_64
/* Runs on IST stack */
-dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
-{
- enum ctx_state prev_state;
-
- prev_state = exception_enter();
- if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
- X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
- preempt_conditional_sti(regs);
- do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
- preempt_conditional_cli(regs);
- }
- exception_exit(prev_state);
-}
-
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
{
static const char str[] = "double fault";
struct task_struct *tsk = current;
+#ifdef CONFIG_X86_ESPFIX64
+ extern unsigned char native_irq_return_iret[];
+
+ /*
+ * If IRET takes a non-IST fault on the espfix64 stack, then we
+ * end up promoting it to a doublefault. In that case, modify
+ * the stack to make it look like we just entered the #GP
+ * handler from user space, similar to bad_iret.
+ */
+ if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
+ regs->cs == __KERNEL_CS &&
+ regs->ip == (unsigned long)native_irq_return_iret)
+ {
+ struct pt_regs *normal_regs = task_pt_regs(current);
+
+ /* Fake a #GP(0) from userspace. */
+ memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
+ normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
+ regs->ip = (unsigned long)general_protection;
+ regs->sp = (unsigned long)&normal_regs->orig_ax;
+ return;
+ }
+#endif
+
exception_enter();
/* Return not checked because double check cannot be ignored */
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
@@ -278,6 +286,89 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
}
#endif
+dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
+{
+ struct task_struct *tsk = current;
+ struct xsave_struct *xsave_buf;
+ enum ctx_state prev_state;
+ struct bndcsr *bndcsr;
+ siginfo_t *info;
+
+ prev_state = exception_enter();
+ if (notify_die(DIE_TRAP, "bounds", regs, error_code,
+ X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
+ goto exit;
+ conditional_sti(regs);
+
+ if (!user_mode(regs))
+ die("bounds", regs, error_code);
+
+ if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
+ /* The exception is not from Intel MPX */
+ goto exit_trap;
+ }
+
+ /*
+ * We need to look at BNDSTATUS to resolve this exception.
+ * It is not directly accessible, though, so we need to
+ * do an xsave and then pull it out of the xsave buffer.
+ */
+ fpu_save_init(&tsk->thread.fpu);
+ xsave_buf = &(tsk->thread.fpu.state->xsave);
+ bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR);
+ if (!bndcsr)
+ goto exit_trap;
+
+ /*
+ * The error code field of the BNDSTATUS register communicates status
+ * information of a bound range exception #BR or operation involving
+ * bound directory.
+ */
+ switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) {
+ case 2: /* Bound directory has invalid entry. */
+ if (mpx_handle_bd_fault(xsave_buf))
+ goto exit_trap;
+ break; /* Success, it was handled */
+ case 1: /* Bound violation. */
+ info = mpx_generate_siginfo(regs, xsave_buf);
+ if (PTR_ERR(info)) {
+ /*
+ * We failed to decode the MPX instruction. Act as if
+ * the exception was not caused by MPX.
+ */
+ goto exit_trap;
+ }
+ /*
+ * Success, we decoded the instruction and retrieved
+ * an 'info' containing the address being accessed
+ * which caused the exception. This information
+ * allows and application to possibly handle the
+ * #BR exception itself.
+ */
+ do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, info);
+ kfree(info);
+ break;
+ case 0: /* No exception caused by Intel MPX operations. */
+ goto exit_trap;
+ default:
+ die("bounds", regs, error_code);
+ }
+
+exit:
+ exception_exit(prev_state);
+ return;
+exit_trap:
+ /*
+ * This path out is for all the cases where we could not
+ * handle the exception in some way (like allocating a
+ * table or telling userspace about it. We will also end
+ * up here if the kernel has MPX turned off at compile
+ * time..
+ */
+ do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
+ exception_exit(prev_state);
+}
+
dotraplinkage void
do_general_protection(struct pt_regs *regs, long error_code)
{
@@ -379,7 +470,7 @@ NOKPROBE_SYMBOL(do_int3);
* for scheduling or signal handling. The actual stack switch is done in
* entry.S
*/
-asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
+asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
{
struct pt_regs *regs = eregs;
/* Did already sync */
@@ -399,6 +490,36 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
return regs;
}
NOKPROBE_SYMBOL(sync_regs);
+
+struct bad_iret_stack {
+ void *error_entry_ret;
+ struct pt_regs regs;
+};
+
+asmlinkage __visible notrace
+struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
+{
+ /*
+ * This is called from entry_64.S early in handling a fault
+ * caused by a bad iret to user mode. To handle the fault
+ * correctly, we want move our stack frame to task_pt_regs
+ * and we want to pretend that the exception came from the
+ * iret target.
+ */
+ struct bad_iret_stack *new_stack =
+ container_of(task_pt_regs(current),
+ struct bad_iret_stack, regs);
+
+ /* Copy the IRET target to the new stack. */
+ memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
+
+ /* Copy the remainder of the stack from the current stack. */
+ memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
+
+ BUG_ON(!user_mode_vm(&new_stack->regs));
+ return new_stack;
+}
+NOKPROBE_SYMBOL(fixup_bad_iret);
#endif
/*
@@ -778,7 +899,7 @@ void __init trap_init(void)
set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
set_intr_gate(X86_TRAP_TS, invalid_TSS);
set_intr_gate(X86_TRAP_NP, segment_not_present);
- set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK);
+ set_intr_gate(X86_TRAP_SS, stack_segment);
set_intr_gate(X86_TRAP_GP, general_protection);
set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
set_intr_gate(X86_TRAP_MF, coprocessor_error);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 5d1cbfe4ae58..8b96a947021f 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -219,7 +219,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
{
u32 volatile *good_insns;
- insn_init(insn, auprobe->insn, x86_64);
+ insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64);
/* has the side-effect of processing the entire instruction */
insn_get_length(insn);
if (WARN_ON_ONCE(!insn_complete(insn)))
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index e48b674639cc..234b0722de53 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -116,8 +116,6 @@ struct x86_msi_ops x86_msi = {
.teardown_msi_irqs = default_teardown_msi_irqs,
.restore_msi_irqs = default_restore_msi_irqs,
.setup_hpet_msi = default_setup_hpet_msi,
- .msi_mask_irq = default_msi_mask_irq,
- .msix_mask_irq = default_msix_mask_irq,
};
/* MSI arch specific hooks */
@@ -140,14 +138,6 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
{
x86_msi.restore_msi_irqs(dev);
}
-u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
-{
- return x86_msi.msi_mask_irq(desc, mask, flag);
-}
-u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
-{
- return x86_msi.msix_mask_irq(desc, flag);
-}
#endif
struct x86_io_apic_ops x86_io_apic_ops = {