diff options
Diffstat (limited to 'arch/x86')
73 files changed, 559 insertions, 323 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 536ccfcc01c6..34d9e15857c3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -40,8 +40,8 @@ GCOV_PROFILE := n UBSAN_SANITIZE :=n LDFLAGS := -m elf_$(UTS_MACHINE) -ifeq ($(CONFIG_RELOCATABLE),y) -# If kernel is relocatable, build compressed kernel as PIE. +# Compressed kernel should be built as PIE since it may be loaded at any +# address by the bootloader. ifeq ($(CONFIG_X86_32),y) LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker) else @@ -51,7 +51,6 @@ else LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \ && echo "-z noreloc-overflow -pie --no-dynamic-linker") endif -endif LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 26240dde081e..4224ede43b4e 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -87,6 +87,12 @@ int validate_cpu(void) return -1; } + if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) && + !has_eflag(X86_EFLAGS_ID)) { + printf("This kernel requires a CPU with the CPUID instruction. Build with CONFIG_M486=y to run on this CPU.\n"); + return -1; + } + if (err_flags) { puts("This kernel requires the following features " "not present on the CPU:\n"); diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 0ab5ee1c26af..aa8b0672f87a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -888,7 +888,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req) unsigned long auth_tag_len = crypto_aead_authsize(tfm); u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); struct scatter_walk src_sg_walk; - struct scatter_walk dst_sg_walk; + struct scatter_walk dst_sg_walk = {}; unsigned int i; /* Assuming we are supporting rfc4106 64-bit extended */ @@ -968,7 +968,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); u8 authTag[16]; struct scatter_walk src_sg_walk; - struct scatter_walk dst_sg_walk; + struct scatter_walk dst_sg_walk = {}; unsigned int i; if (unlikely(req->assoclen != 16 && req->assoclen != 20)) diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 77f28ce9c646..9976fcecd17e 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -5,8 +5,8 @@ OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y -CFLAGS_syscall_64.o += -Wno-override-init -CFLAGS_syscall_32.o += -Wno-override-init +CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,) obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ff6ef7b30822..2b3618542544 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -389,5 +389,3 @@ 380 i386 pkey_mprotect sys_pkey_mprotect 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free -#383 i386 pkey_get sys_pkey_get -#384 i386 pkey_set sys_pkey_set diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 2f024d02511d..e93ef0b38db8 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -338,8 +338,6 @@ 329 common pkey_mprotect sys_pkey_mprotect 330 common pkey_alloc sys_pkey_alloc 331 common pkey_free sys_pkey_free -#332 common pkey_get sys_pkey_get -#333 common pkey_set sys_pkey_set # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index f5f4b3fbbbc2..afb222b63cae 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -662,7 +662,13 @@ static int __init amd_core_pmu_init(void) pr_cont("Fam15h "); x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; break; - + case 0x17: + pr_cont("Fam17h "); + /* + * In family 17h, there are no event constraints in the PMC hardware. + * We fallback to using default amd_get_event_constraints. + */ + break; default: pr_err("core perfctr but no constraints; unknown hardware!\n"); return -ENODEV; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d31735f37ed7..6e395c996900 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -69,7 +69,7 @@ u64 x86_perf_event_update(struct perf_event *event) int shift = 64 - x86_pmu.cntval_bits; u64 prev_raw_count, new_raw_count; int idx = hwc->idx; - s64 delta; + u64 delta; if (idx == INTEL_PMC_IDX_FIXED_BTS) return 0; @@ -2352,7 +2352,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent frame.next_frame = 0; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, 8)) + if (!valid_user_frame(fp, sizeof(frame))) break; bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4); @@ -2362,9 +2362,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent if (bytes != 0) break; - if (!valid_user_frame(fp, sizeof(frame))) - break; - perf_callchain_store(entry, cs_base + frame.return_address); fp = compat_ptr(ss_base + frame.next_frame); } @@ -2413,7 +2410,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs frame.next_frame = NULL; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2)) + if (!valid_user_frame(fp, sizeof(frame))) break; bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp)); @@ -2423,9 +2420,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs if (bytes != 0) break; - if (!valid_user_frame(fp, sizeof(frame))) - break; - perf_callchain_store(entry, frame.return_address); fp = (void __user *)frame.next_frame; } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a3a9eb84b5cf..cb8522290e6a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3607,10 +3607,14 @@ __init int intel_pmu_init(void) /* * Quirk: v2 perfmon does not report fixed-purpose events, so - * assume at least 3 events: + * assume at least 3 events, when not running in a hypervisor: */ - if (version > 1) - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); + if (version > 1) { + int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); + + x86_pmu.num_counters_fixed = + max((int)edx.split.num_counters_fixed, assume); + } if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; @@ -3898,6 +3902,7 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, @@ -3912,7 +3917,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - pr_cont("Knights Landing events, "); + pr_cont("Knights Landing/Mill events, "); break; case INTEL_FAM6_SKYLAKE_MOBILE: @@ -4029,7 +4034,7 @@ __init int intel_pmu_init(void) /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { - x86_pmu.max_period = x86_pmu.cntval_mask; + x86_pmu.max_period = x86_pmu.cntval_mask >> 1; x86_pmu.perfctr = MSR_IA32_PMC0; pr_cont("full-width counters, "); } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 3ca87b5a8677..da51e5a3e2ff 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -48,7 +48,8 @@ * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW + * SKL,KNL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 @@ -56,15 +57,16 @@ * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 - * Available model: SNB,IVB,HSW,BDW,SKL + * Available model: SNB,IVB,HSW,BDW,SKL,KNL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW + * SKL,KNL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -118,6 +120,7 @@ struct cstate_model { /* Quirk flags */ #define SLM_PKG_C6_USE_C7_MSR (1UL << 0) +#define KNL_CORE_C6_MSR (1UL << 1) struct perf_cstate_msr { u64 msr; @@ -488,6 +491,18 @@ static const struct cstate_model slm_cstates __initconst = { .quirks = SLM_PKG_C6_USE_C7_MSR, }; + +static const struct cstate_model knl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), + .quirks = KNL_CORE_C6_MSR, +}; + + + #define X86_CSTATES_MODEL(model, states) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } @@ -523,6 +538,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); @@ -558,6 +576,11 @@ static int __init cstate_probe(const struct cstate_model *cm) if (cm->quirks & SLM_PKG_C6_USE_C7_MSR) pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; + /* KNL has different MSR for CORE C6 */ + if (cm->quirks & KNL_CORE_C6_MSR) + pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY; + + has_cstate_core = cstate_probe_msr(cm->core_events, PERF_CSTATE_CORE_EVENT_MAX, core_msr, core_events_attrs); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 0319311dbdbb..be202390bbd3 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1108,20 +1108,20 @@ static void setup_pebs_sample_data(struct perf_event *event, } /* - * We use the interrupt regs as a base because the PEBS record - * does not contain a full regs set, specifically it seems to - * lack segment descriptors, which get used by things like - * user_mode(). + * We use the interrupt regs as a base because the PEBS record does not + * contain a full regs set, specifically it seems to lack segment + * descriptors, which get used by things like user_mode(). * - * In the simple case fix up only the IP and BP,SP regs, for - * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. - * A possible PERF_SAMPLE_REGS will have to transfer all regs. + * In the simple case fix up only the IP for PERF_SAMPLE_IP. + * + * We must however always use BP,SP from iregs for the unwinder to stay + * sane; the record BP,SP can point into thin air when the record is + * from a previous PMI context or an (I)RET happend between the record + * and PMI. */ *regs = *iregs; regs->flags = pebs->flags; set_linear_ip(regs, pebs->ip); - regs->bp = pebs->bp; - regs->sp = pebs->sp; if (sample_type & PERF_SAMPLE_REGS_INTR) { regs->ax = pebs->ax; @@ -1130,10 +1130,21 @@ static void setup_pebs_sample_data(struct perf_event *event, regs->dx = pebs->dx; regs->si = pebs->si; regs->di = pebs->di; - regs->bp = pebs->bp; - regs->sp = pebs->sp; - regs->flags = pebs->flags; + /* + * Per the above; only set BP,SP if we don't need callchains. + * + * XXX: does this make sense? + */ + if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { + regs->bp = pebs->bp; + regs->sp = pebs->sp; + } + + /* + * Preserve PERF_EFLAGS_VM from set_linear_ip(). + */ + regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM); #ifndef CONFIG_X86_32 regs->r8 = pebs->r8; regs->r9 = pebs->r9; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index fc6cf21c535e..81b321ace8e0 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -458,8 +458,8 @@ void intel_pmu_lbr_del(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - if (branch_user_callstack(cpuc->br_sel) && event->ctx && - event->ctx->task_ctx_data) { + if (branch_user_callstack(cpuc->br_sel) && + event->ctx->task_ctx_data) { task_ctx = event->ctx->task_ctx_data; task_ctx->lbr_callstack_users--; } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index b0f0e835a770..0a535cea8ff3 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -763,6 +763,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index d9844cc74486..dbaaf7dc8373 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -319,9 +319,9 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, */ static int uncore_pmu_event_init(struct perf_event *event); -static bool is_uncore_event(struct perf_event *event) +static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) { - return event->pmu->event_init == uncore_pmu_event_init; + return &box->pmu->pmu == event->pmu; } static int @@ -340,7 +340,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, n = box->n_events; - if (is_uncore_event(leader)) { + if (is_box_event(box, leader)) { box->event_list[n] = leader; n++; } @@ -349,7 +349,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, return n; list_for_each_entry(event, &leader->sibling_list, group_entry) { - if (!is_uncore_event(event) || + if (!is_box_event(box, event) || event->state <= PERF_EVENT_STATE_OFF) continue; @@ -1349,6 +1349,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init), diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 5f845eef9a4d..a3dcc12bef4a 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -8,8 +8,12 @@ #define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 -#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f -#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 +#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 +#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 +#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f +#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -486,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event *event, int flags) snb_uncore_imc_event_start(event, 0); - box->n_events++; - return 0; } static void snb_uncore_imc_event_del(struct perf_event *event, int flags) { - struct intel_uncore_box *box = uncore_event_to_box(event); - int i; - snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); - - for (i = 0; i < box->n_events; i++) { - if (event == box->event_list[i]) { - --box->n_events; - break; - } - } } int snb_pci2phy_map_init(int devid) @@ -616,13 +608,29 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = { static const struct pci_device_id skl_uncore_pci_ids[] = { { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -666,8 +674,12 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ - IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ + IMC_DEV(SKL_Y_IMC, &skl_uncore_pci_driver), /* 6th Gen Core Y */ IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ + IMC_DEV(SKL_HD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Dual Core */ + IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ + IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ + IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ { /* end marker */ } }; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 5874d8de1f8d..a77ee026643d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -113,7 +113,7 @@ struct debug_store { * Per register state. */ struct er_account { - raw_spinlock_t lock; /* per-core: protect structure */ + raw_spinlock_t lock; /* per-core: protect structure */ u64 config; /* extra MSR config */ u64 reg; /* extra MSR number */ atomic_t ref; /* reference count */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 03d269bed941..24118c0b4640 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -272,7 +272,6 @@ struct compat_shmid64_ds { /* * The type of struct elf_prstatus.pr_reg in compatible core dumps. */ -#ifdef CONFIG_X86_X32_ABI typedef struct user_regs_struct compat_elf_gregset_t; /* Full regset -- prstatus on x32, otherwise on ia32 */ @@ -281,10 +280,9 @@ typedef struct user_regs_struct compat_elf_gregset_t; do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \ while (0) +#ifdef CONFIG_X86_X32_ABI #define COMPAT_USE_64BIT_TIME \ (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) -#else -typedef struct user_regs_struct32 compat_elf_gregset_t; #endif /* diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1188bc849ee3..a39629206864 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -194,6 +194,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 9ae5ab80a497..34a46dc076d3 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -64,5 +64,6 @@ /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ +#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 5b6753d1f7f4..49da9f497b90 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -17,6 +17,7 @@ extern int intel_mid_pci_init(void); extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); +extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev); extern void intel_mid_pwr_power_off(void); diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index de25aad07853..d34bd370074b 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -351,4 +351,10 @@ extern void arch_phys_wc_del(int handle); #define arch_phys_wc_add arch_phys_wc_add #endif +#ifdef CONFIG_X86_PAT +extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); +extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); +#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc +#endif + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4b20f7304b9c..bdde80731f49 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -948,7 +948,6 @@ struct kvm_x86_ops { int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); bool (*invpcid_supported)(void); - void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -958,8 +957,6 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); - u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc); - void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); int (*check_intercept)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c6676b29..78f3760ca1f2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -88,7 +88,6 @@ #define MSR_IA32_RTIT_CTL 0x00000570 #define MSR_IA32_RTIT_STATUS 0x00000571 -#define MSR_IA32_RTIT_STATUS 0x00000571 #define MSR_IA32_RTIT_ADDR0_A 0x00000580 #define MSR_IA32_RTIT_ADDR0_B 0x00000581 #define MSR_IA32_RTIT_ADDR1_A 0x00000582 diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 3d33a719f5c1..a34e0d4b957d 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -103,8 +103,10 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) ({ \ long tmp; \ struct rw_semaphore* ret; \ + register void *__sp asm(_ASM_SP); \ + \ asm volatile("# beginning down_write\n\t" \ - LOCK_PREFIX " xadd %1,(%3)\n\t" \ + LOCK_PREFIX " xadd %1,(%4)\n\t" \ /* adds 0xffff0001, returns the old value */ \ " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ /* was the active mask 0 before? */\ @@ -112,7 +114,7 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ - : "+m" (sem->count), "=d" (tmp), "=a" (ret) \ + : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret; \ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2aaca53c0974..ad6f5eb07a95 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -52,6 +52,15 @@ struct task_struct; #include <asm/cpufeature.h> #include <linux/atomic.h> +struct thread_info { + unsigned long flags; /* low level flags */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .flags = 0, \ +} + #define init_stack (init_thread_union.stack) #else /* !__ASSEMBLY__ */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8a5abaa7d453..931ced8ca345 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -454,6 +454,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* * stash over-ride to indicate we've been here diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index aeef53ce93e1..35690a168cf7 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -815,9 +815,9 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) l = li; } addr1 = (base << shift) + - f * (unsigned long)(1 << m_io); + f * (1ULL << m_io); addr2 = (base << shift) + - (l + 1) * (unsigned long)(1 << m_io); + (l + 1) * (1ULL << m_io); pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n", id, fi, li, lnasid, addr1, addr2); if (max_io < l) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index c7364bd633e1..51287cd90bf6 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1042,8 +1042,11 @@ static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) if (apm_info.get_power_status_broken) return APM_32_UNSUPPORTED; - if (apm_bios_call(&call)) + if (apm_bios_call(&call)) { + if (!call.err) + return APM_NO_ERROR; return call.err; + } *status = call.ebx; *bat = call.ecx; if (apm_info.get_power_status_swabinminutes) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b81fe2d63e15..1e81a37c034e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -347,7 +347,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); - unsigned int socket_id, core_complex_id; bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ @@ -365,10 +364,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) if (c->x86 != 0x17 || !cpuid_edx(0x80000006)) return; - socket_id = (c->apicid >> bits) - 1; - core_complex_id = (c->apicid & ((1 << bits) - 1)) >> 3; - - per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id; + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; #endif } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bd910a7dd0a..cc9e980c68ec 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -979,6 +979,35 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c) } /* + * The physical to logical package id mapping is initialized from the + * acpi/mptables information. Make sure that CPUID actually agrees with + * that. + */ +static void sanitize_package_id(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int pkg, apicid, cpu = smp_processor_id(); + + apicid = apic->cpu_present_to_apicid(cpu); + pkg = apicid >> boot_cpu_data.x86_coreid_bits; + + if (apicid != c->initial_apicid) { + pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n", + cpu, apicid, c->initial_apicid); + c->initial_apicid = apicid; + } + if (pkg != c->phys_proc_id) { + pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n", + cpu, pkg, c->phys_proc_id); + c->phys_proc_id = pkg; + } + c->logical_proc_id = topology_phys_to_logical_pkg(pkg); +#else + c->logical_proc_id = 0; +#endif +} + +/* * This does the hard work of actually picking apart the CPU stuff... */ static void identify_cpu(struct cpuinfo_x86 *c) @@ -1103,8 +1132,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif - /* The boot/hotplug time assigment got cleared, restore it */ - c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id); + sanitize_package_id(c); } /* diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 620ab06bcf45..017bda12caae 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -429,7 +429,7 @@ int __init save_microcode_in_initrd_amd(void) * We need the physical address of the container for both bitness since * boot_params.hdr.ramdisk_image is a physical address. */ - cont = __pa(container); + cont = __pa_nodebug(container); cont_va = container; #endif diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df9398d..1db8dc490b66 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 81160578b91a..5130985b758b 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -27,6 +27,7 @@ #include <asm/div64.h> #include <asm/x86_init.h> #include <asm/hypervisor.h> +#include <asm/timer.h> #include <asm/apic.h> #define CPUID_VMWARE_INFO_LEAF 0x40000000 @@ -94,6 +95,10 @@ static void __init vmware_platform_setup(void) } else { pr_warn("Failed to get TSC freq from the hypervisor\n"); } + +#ifdef CONFIG_X86_IO_APIC + no_timer_check = 1; +#endif } /* diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 9b7cf5c28f5f..85f854b98a9d 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -112,7 +112,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, for (; stack < stack_info.end; stack++) { unsigned long real_addr; int reliable = 0; - unsigned long addr = *stack; + unsigned long addr = READ_ONCE_NOCHECK(*stack); unsigned long *ret_addr_p = unwind_get_return_address_ptr(&state); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index b85fe5f91c3f..90e8dde3ec26 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -350,7 +350,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, * continue building up new bios map based on this * information */ - if (current_type != last_type) { + if (current_type != last_type || current_type == E820_PRAM) { if (last_type != 0) { new_bios[new_bios_entry].size = change_point[chgidx]->addr - last_addr; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 47004010ad5d..ebb4e95fbd74 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -521,14 +521,14 @@ void fpu__clear(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ - if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { - /* FPU state will be reallocated lazily at the first use. */ - fpu__drop(fpu); - } else { - if (!fpu->fpstate_active) { - fpu__activate_curr(fpu); - user_fpu_begin(); - } + fpu__drop(fpu); + + /* + * Make sure fpstate is cleared and initialized. + */ + if (static_cpu_has(X86_FEATURE_FPU)) { + fpu__activate_curr(fpu); + user_fpu_begin(); copy_init_fpstate_to_fpregs(); } } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 124aa5c593f8..095ef7ddd6ae 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_XGETBV1); setup_clear_cpu_cap(X86_FEATURE_PKU); + setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); + setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); } /* diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b6b2f0264af3..2dabea46f039 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -665,14 +665,17 @@ __PAGE_ALIGNED_BSS initial_pg_pmd: .fill 1024*KPMDS,4,0 #else -ENTRY(initial_page_table) +.globl initial_page_table +initial_page_table: .fill 1024,4,0 #endif initial_pg_fixmap: .fill 1024,4,0 -ENTRY(empty_zero_page) +.globl empty_zero_page +empty_zero_page: .fill 4096,1,0 -ENTRY(swapper_pg_dir) +.globl swapper_pg_dir +swapper_pg_dir: .fill 1024,4,0 EXPORT_SYMBOL(empty_zero_page) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 28cee019209c..d9d8d16b69db 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -50,6 +50,7 @@ #include <linux/kallsyms.h> #include <linux/ftrace.h> #include <linux/frame.h> +#include <linux/kasan.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -1057,9 +1058,10 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) * tailcall optimization. So, to be absolutely safe * we also save and restore enough stack bytes to cover * the argument area. + * Use __memcpy() to avoid KASAN stack out-of-bounds reports as we copy + * raw stack chunk with redzones: */ - memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, - MIN_STACK_SIZE(addr)); + __memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); regs->flags &= ~X86_EFLAGS_IF; trace_hardirqs_off(); regs->ip = (unsigned long)(jp->entry); @@ -1080,6 +1082,9 @@ void jprobe_return(void) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + /* Unpoison stack redzones in the frames we are going to jump over. */ + kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp); + asm volatile ( #ifdef CONFIG_X86_64 " xchg %%rbx,%%rsp \n" @@ -1118,7 +1123,7 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) /* It's OK to start function graph tracing again */ unpause_graph_tracing(); *regs = kcb->jprobe_saved_regs; - memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp)); + __memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp)); preempt_enable_no_resched(); return 1; } diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index efe73aacf966..7b0d3da52fb4 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -18,8 +18,10 @@ #ifdef CC_USING_FENTRY # define function_hook __fentry__ +EXPORT_SYMBOL(__fentry__) #else # define function_hook mcount +EXPORT_SYMBOL(mcount) #endif /* All cases save the original rbp (8 bytes) */ @@ -295,7 +297,6 @@ trace: jmp fgraph_trace END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ -EXPORT_SYMBOL(function_hook) #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 51402a7e4ca6..0bee04d41bed 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -625,8 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, amd_disable_seq_and_redirect_scrub); -#endif - #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) #include <linux/jump_label.h> #include <asm/string_64.h> @@ -657,3 +655,4 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap); #endif +#endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbfbca5fea0c..9c337b0e8ba7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1221,11 +1221,16 @@ void __init setup_arch(char **cmdline_p) */ get_smp_config(); + /* + * Systems w/o ACPI and mptables might not have it mapped the local + * APIC yet, but prefill_possible_map() might need to access it. + */ + init_apic_mappings(); + prefill_possible_map(); init_cpu_to_node(); - init_apic_mappings(); io_apic_init_mappings(); kvm_guest_init(); diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 40df33753bae..ec1f756f9dc9 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -105,9 +105,6 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) /* Don't let flags to be set from userspace */ act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI); - if (user_64bit_mode(current_pt_regs())) - return; - if (in_ia32_syscall()) act->sa.sa_flags |= SA_IA32_ABI; if (in_x32_syscall()) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 68f8cc222f25..c00cb64bc0a1 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -261,8 +261,10 @@ static inline void __smp_reschedule_interrupt(void) __visible void smp_reschedule_interrupt(struct pt_regs *regs) { + irq_enter(); ack_APIC_irq(); __smp_reschedule_interrupt(); + irq_exit(); /* * KVM uses this interrupt to force a cpu out of guest mode */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 951f093a96fe..42f5eb7b4f6c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1409,15 +1409,17 @@ __init void prefill_possible_map(void) /* No boot processor was found in mptable or ACPI MADT */ if (!num_processors) { - int apicid = boot_cpu_physical_apicid; - int cpu = hard_smp_processor_id(); + if (boot_cpu_has(X86_FEATURE_APIC)) { + int apicid = boot_cpu_physical_apicid; + int cpu = hard_smp_processor_id(); - pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu); + pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu); - /* Make sure boot cpu is enumerated */ - if (apic->cpu_present_to_apicid(0) == BAD_APICID && - apic->apic_id_valid(apicid)) - generic_processor_info(apicid, boot_cpu_apic_version); + /* Make sure boot cpu is enumerated */ + if (apic->cpu_present_to_apicid(0) == BAD_APICID && + apic->apic_id_valid(apicid)) + generic_processor_info(apicid, boot_cpu_apic_version); + } if (!num_processors) num_processors = 1; diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c9a073866ca7..a23ce84a3f6c 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -57,7 +57,8 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) unsigned char opcode[15]; unsigned long addr = convert_ip_to_linear(child, regs); - copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); + copied = access_process_vm(child, addr, opcode, sizeof(opcode), + FOLL_FORCE); for (i = 0; i < copied; i++) { switch (opcode[i]) { /* popf and iret */ diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 764a29f84de7..85195d447a92 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -66,13 +66,36 @@ __init int create_simplefb(const struct screen_info *si, { struct platform_device *pd; struct resource res; - unsigned long len; + u64 base, size; + u32 length; - /* don't use lfb_size as it may contain the whole VMEM instead of only - * the part that is occupied by the framebuffer */ - len = mode->height * mode->stride; - len = PAGE_ALIGN(len); - if (len > (u64)si->lfb_size << 16) { + /* + * If the 64BIT_BASE capability is set, ext_lfb_base will contain the + * upper half of the base address. Assemble the address, then make sure + * it is valid and we can actually access it. + */ + base = si->lfb_base; + if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE) + base |= (u64)si->ext_lfb_base << 32; + if (!base || (u64)(resource_size_t)base != base) { + printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n"); + return -EINVAL; + } + + /* + * Don't use lfb_size as IORESOURCE size, since it may contain the + * entire VMEM, and thus require huge mappings. Use just the part we + * need, that is, the part where the framebuffer is located. But verify + * that it does not exceed the advertised VMEM. + * Note that in case of VBE, the lfb_size is shifted by 16 bits for + * historical reasons. + */ + size = si->lfb_size; + if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) + size <<= 16; + length = mode->height * mode->stride; + length = PAGE_ALIGN(length); + if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } @@ -81,8 +104,8 @@ __init int create_simplefb(const struct screen_info *si, memset(&res, 0, sizeof(res)); res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; res.name = simplefb_resname; - res.start = si->lfb_base; - res.end = si->lfb_base + len - 1; + res.start = base; + res.end = res.start + length - 1; if (res.end <= res.start) return -EINVAL; diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c index 9298993dc8b7..22881ddcbb9f 100644 --- a/arch/x86/kernel/unwind_guess.c +++ b/arch/x86/kernel/unwind_guess.c @@ -7,11 +7,15 @@ unsigned long unwind_get_return_address(struct unwind_state *state) { + unsigned long addr; + if (unwind_done(state)) return 0; + addr = READ_ONCE_NOCHECK(*state->sp); + return ftrace_graph_ret_addr(state->task, &state->graph_idx, - *state->sp, state->sp); + addr, state->sp); } EXPORT_SYMBOL_GPL(unwind_get_return_address); @@ -23,9 +27,12 @@ bool unwind_next_frame(struct unwind_state *state) return false; do { - for (state->sp++; state->sp < info->end; state->sp++) - if (__kernel_text_address(*state->sp)) + for (state->sp++; state->sp < info->end; state->sp++) { + unsigned long addr = READ_ONCE_NOCHECK(*state->sp); + + if (__kernel_text_address(addr)) return true; + } state->sp = info->next_sp; @@ -47,7 +54,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, get_stack_info(first_frame, state->task, &state->stack_info, &state->stack_mask); - if (!__kernel_text_address(*first_frame)) + /* + * The caller can provide the address of the first frame directly + * (first_frame) or indirectly (regs->sp) to indicate which stack frame + * to start unwinding at. Skip ahead until we reach it. + */ + if (!unwind_done(state) && + (!on_stack(&state->stack_info, first_frame, sizeof(long)) || + !__kernel_text_address(*first_frame))) unwind_next_frame(state); } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4e95d3eb2955..a3ce9d260d68 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2105,16 +2105,10 @@ static int em_iret(struct x86_emulate_ctxt *ctxt) static int em_jmp_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned short sel, old_sel; - struct desc_struct old_desc, new_desc; - const struct x86_emulate_ops *ops = ctxt->ops; + unsigned short sel; + struct desc_struct new_desc; u8 cpl = ctxt->ops->cpl(ctxt); - /* Assignment of RIP may only fail in 64-bit mode */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - ops->get_segment(ctxt, &old_sel, &old_desc, NULL, - VCPU_SREG_CS); - memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, @@ -2124,12 +2118,10 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) return rc; rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); - if (rc != X86EMUL_CONTINUE) { - WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); - /* assigning eip failed; restore the old cs */ - ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); - return rc; - } + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + return rc; } @@ -2189,14 +2181,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; unsigned long eip, cs; - u16 old_cs; int cpl = ctxt->ops->cpl(ctxt); - struct desc_struct old_desc, new_desc; - const struct x86_emulate_ops *ops = ctxt->ops; - - if (ctxt->mode == X86EMUL_MODE_PROT64) - ops->get_segment(ctxt, &old_cs, &old_desc, NULL, - VCPU_SREG_CS); + struct desc_struct new_desc; rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2213,10 +2199,10 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; rc = assign_eip_far(ctxt, eip, &new_desc); - if (rc != X86EMUL_CONTINUE) { - WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); - ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); - } + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + return rc; } @@ -5045,7 +5031,7 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); - if (ctxt->rip_relative) + if (ctxt->rip_relative && likely(ctxt->memopp)) ctxt->memopp->addr.mem.ea = address_mask(ctxt, ctxt->memopp->addr.mem.ea + ctxt->_eip); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index c7220ba94aa7..6e219e5c07d2 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -94,7 +94,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPUS); + bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); @@ -594,7 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->irr = 0; ioapic->irr_delivered = 0; ioapic->id = 0; - memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); + memset(ioapic->irq_eoi, 0x00, sizeof(ioapic->irq_eoi)); rtc_irq_eoi_tracking_reset(ioapic); } diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 7d2692a49657..1cc6e54436db 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -42,13 +42,13 @@ struct kvm_vcpu; struct dest_map { /* vcpu bitmap where IRQ has been sent */ - DECLARE_BITMAP(map, KVM_MAX_VCPUS); + DECLARE_BITMAP(map, KVM_MAX_VCPU_ID); /* * Vector sent to a given vcpu, only valid when * the vcpu's bit in map is set */ - u8 vectors[KVM_MAX_VCPUS]; + u8 vectors[KVM_MAX_VCPU_ID]; }; diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 25810b144b58..6c0191615f23 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -41,6 +41,15 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, bool line_status) { struct kvm_pic *pic = pic_irqchip(kvm); + + /* + * XXX: rejecting pic routes when pic isn't in use would be better, + * but the default routing table is installed while kvm->arch.vpic is + * NULL and KVM_CREATE_IRQCHIP can race with KVM_IRQ_LINE. + */ + if (!pic) + return -1; + return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); } @@ -49,6 +58,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, bool line_status) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; + + if (!ioapic) + return -1; + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, line_status); } @@ -156,6 +169,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, } +static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (!level) + return -1; + + return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); +} + int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) @@ -163,18 +186,26 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq irq; int r; - if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) - return -EWOULDBLOCK; + switch (e->type) { + case KVM_IRQ_ROUTING_HV_SINT: + return kvm_hv_set_sint(e, kvm, irq_source_id, level, + line_status); - if (kvm_msi_route_invalid(kvm, e)) - return -EINVAL; + case KVM_IRQ_ROUTING_MSI: + if (kvm_msi_route_invalid(kvm, e)) + return -EINVAL; - kvm_set_msi_irq(kvm, e, &irq); + kvm_set_msi_irq(kvm, e, &irq); - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) - return r; - else - return -EWOULDBLOCK; + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) + return r; + break; + + default: + break; + } + + return -EWOULDBLOCK; } int kvm_request_irq_source_id(struct kvm *kvm) @@ -254,16 +285,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } -static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level, - bool line_status) -{ - if (!level) - return -1; - - return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); -} - int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) @@ -423,18 +444,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, srcu_read_unlock(&kvm->irq_srcu, idx); } -int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, - int irq_source_id, int level, bool line_status) -{ - switch (irq->type) { - case KVM_IRQ_ROUTING_HV_SINT: - return kvm_hv_set_sint(irq, kvm, irq_source_id, level, - line_status); - default: - return -EWOULDBLOCK; - } -} - void kvm_arch_irq_routing_update(struct kvm *kvm) { kvm_hv_irq_routing_update(kvm); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 23b99f305382..6f69340f9fa3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -138,7 +138,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, *mask = dest_id & 0xff; return true; case KVM_APIC_MODE_XAPIC_CLUSTER: - *cluster = map->xapic_cluster_map[dest_id >> 4]; + *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf]; *mask = dest_id & 0xf; return true; default: diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f8157a36ab09..8ca1eca5038d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1138,21 +1138,6 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - svm->vmcb->control.tsc_offset += adjustment; - if (is_guest_mode(vcpu)) - svm->nested.hsave->control.tsc_offset += adjustment; - else - trace_kvm_write_tsc_offset(vcpu->vcpu_id, - svm->vmcb->control.tsc_offset - adjustment, - svm->vmcb->control.tsc_offset); - - mark_dirty(svm->vmcb, VMCB_INTERCEPTS); -} - static void avic_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; @@ -3449,12 +3434,6 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } -static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) -{ - struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); - return vmcb->control.tsc_offset + host_tsc; -} - static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5422,8 +5401,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .has_wbinvd_exit = svm_has_wbinvd_exit, .write_tsc_offset = svm_write_tsc_offset, - .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest, - .read_l1_tsc = svm_read_l1_tsc, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cf1b16dbc98a..5382b82462fc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -187,6 +187,7 @@ struct vmcs { */ struct loaded_vmcs { struct vmcs *vmcs; + struct vmcs *shadow_vmcs; int cpu; int launched; struct list_head loaded_vmcss_on_cpu_link; @@ -411,7 +412,6 @@ struct nested_vmx { * memory during VMXOFF, VMCLEAR, VMPTRLD. */ struct vmcs12 *cached_vmcs12; - struct vmcs *current_shadow_vmcs; /* * Indicates if the shadow vmcs must be updated with the * data hold by vmcs12 @@ -421,7 +421,6 @@ struct nested_vmx { /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; int vmcs02_num; - u64 vmcs01_tsc_offset; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; @@ -1419,6 +1418,8 @@ static void vmcs_clear(struct vmcs *vmcs) static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) { vmcs_clear(loaded_vmcs->vmcs); + if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) + vmcs_clear(loaded_vmcs->shadow_vmcs); loaded_vmcs->cpu = -1; loaded_vmcs->launched = 0; } @@ -2605,20 +2606,6 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu) } /* - * Like guest_read_tsc, but always returns L1's notion of the timestamp - * counter, even if a nested guest (L2) is currently running. - */ -static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) -{ - u64 tsc_offset; - - tsc_offset = is_guest_mode(vcpu) ? - to_vmx(vcpu)->nested.vmcs01_tsc_offset : - vmcs_read64(TSC_OFFSET); - return host_tsc + tsc_offset; -} - -/* * writes 'offset' into guest's timestamp counter offset register */ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) @@ -2631,7 +2618,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) * to the newly set TSC to get L2's TSC. */ struct vmcs12 *vmcs12; - to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset; /* recalculate vmcs02.TSC_OFFSET: */ vmcs12 = get_vmcs12(vcpu); vmcs_write64(TSC_OFFSET, offset + @@ -2644,19 +2630,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) } } -static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) -{ - u64 offset = vmcs_read64(TSC_OFFSET); - - vmcs_write64(TSC_OFFSET, offset + adjustment); - if (is_guest_mode(vcpu)) { - /* Even when running L2, the adjustment needs to apply to L1 */ - to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment; - } else - trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset, - offset + adjustment); -} - static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); @@ -3562,6 +3535,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } static void free_kvm_area(void) @@ -6696,6 +6670,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); + item->vmcs02.shadow_vmcs = NULL; if (!item->vmcs02.vmcs) { kfree(item); return NULL; @@ -7072,7 +7047,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ vmcs_clear(shadow_vmcs); - vmx->nested.current_shadow_vmcs = shadow_vmcs; + vmx->vmcs01.shadow_vmcs = shadow_vmcs; } INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); @@ -7174,8 +7149,11 @@ static void free_nested(struct vcpu_vmx *vmx) free_page((unsigned long)vmx->nested.msr_bitmap); vmx->nested.msr_bitmap = NULL; } - if (enable_shadow_vmcs) - free_vmcs(vmx->nested.current_shadow_vmcs); + if (enable_shadow_vmcs) { + vmcs_clear(vmx->vmcs01.shadow_vmcs); + free_vmcs(vmx->vmcs01.shadow_vmcs); + vmx->vmcs01.shadow_vmcs = NULL; + } kfree(vmx->nested.cached_vmcs12); /* Unpin physical memory we referred to in current vmcs02 */ if (vmx->nested.apic_access_page) { @@ -7352,7 +7330,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) int i; unsigned long field; u64 field_value; - struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; const unsigned long *fields = shadow_read_write_fields; const int num_fields = max_shadow_read_write_fields; @@ -7401,7 +7379,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) int i, q; unsigned long field; u64 field_value = 0; - struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; vmcs_load(shadow_vmcs); @@ -7591,7 +7569,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); vmcs_write64(VMCS_LINK_POINTER, - __pa(vmx->nested.current_shadow_vmcs)); + __pa(vmx->vmcs01.shadow_vmcs)); vmx->nested.sync_shadow_vmcs = true; } } @@ -7659,7 +7637,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; - if (!(types & (1UL << type))) { + if (type >= 32 || !(types & (1 << type))) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); skip_emulated_instruction(vcpu); @@ -7722,7 +7700,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7; - if (!(types & (1UL << type))) { + if (type >= 32 || !(types & (1 << type))) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); skip_emulated_instruction(vcpu); @@ -9156,6 +9134,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->loaded_vmcs = &vmx->vmcs01; vmx->loaded_vmcs->vmcs = alloc_vmcs(); + vmx->loaded_vmcs->shadow_vmcs = NULL; if (!vmx->loaded_vmcs->vmcs) goto free_msrs; if (!vmm_exclusive) @@ -10061,9 +10040,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) vmcs_write64(TSC_OFFSET, - vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); + vcpu->arch.tsc_offset + vmcs12->tsc_offset); else - vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); @@ -10293,8 +10272,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) enter_guest_mode(vcpu); - vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); - if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); @@ -10818,7 +10795,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, load_vmcs12_host_state(vcpu, vmcs12); /* Update any VMCS fields that might have changed while L2 ran */ - vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); if (vmx->hv_deadline_tsc == -1) vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); @@ -11339,8 +11316,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, .write_tsc_offset = vmx_write_tsc_offset, - .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest, - .read_l1_tsc = vmx_read_l1_tsc, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c633de84dd7..04c5d96b1d67 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -210,7 +210,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn) struct kvm_shared_msrs *locals = container_of(urn, struct kvm_shared_msrs, urn); struct kvm_shared_msr_values *values; + unsigned long flags; + /* + * Disabling irqs at this point since the following code could be + * interrupted and executed through kvm_arch_hardware_disable() + */ + local_irq_save(flags); + if (locals->registered) { + locals->registered = false; + user_return_notifier_unregister(urn); + } + local_irq_restore(flags); for (slot = 0; slot < shared_msrs_global.nr; ++slot) { values = &locals->values[slot]; if (values->host != values->curr) { @@ -218,8 +229,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn) values->curr = values->host; } } - locals->registered = false; - user_return_notifier_unregister(urn); } static void shared_msr_update(unsigned slot, u32 msr) @@ -1409,7 +1418,7 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { - return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc)); + return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc); } EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); @@ -1547,7 +1556,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { - kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); + kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment); } static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) @@ -1555,7 +1564,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) WARN_ON(adjustment < 0); adjustment = kvm_scale_tsc(vcpu, (u64) adjustment); - kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); + adjust_tsc_offset_guest(vcpu, adjustment); } #ifdef CONFIG_X86_64 @@ -1724,18 +1733,23 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) static u64 __get_kvmclock_ns(struct kvm *kvm) { - struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0); struct kvm_arch *ka = &kvm->arch; - s64 ns; + struct pvclock_vcpu_time_info hv_clock; - if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) { - u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc); - } else { - ns = ktime_get_boot_ns() + ka->kvmclock_offset; + spin_lock(&ka->pvclock_gtod_sync_lock); + if (!ka->use_master_clock) { + spin_unlock(&ka->pvclock_gtod_sync_lock); + return ktime_get_boot_ns() + ka->kvmclock_offset; } - return ns; + hv_clock.tsc_timestamp = ka->master_cycle_now; + hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; + spin_unlock(&ka->pvclock_gtod_sync_lock); + + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + return __pvclock_read_cycles(&hv_clock, rdtsc()); } u64 get_kvmclock_ns(struct kvm *kvm) @@ -2262,7 +2276,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. */ - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: if (!guest_cpuid_has_osvw(vcpu)) @@ -2280,11 +2294,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); if (!ignore_msrs) { - vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", + vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n", msr, data); return 1; } else { - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); break; } @@ -2596,7 +2610,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: - case KVM_CAP_ADJUST_CLOCK: case KVM_CAP_VCPU_EVENTS: case KVM_CAP_HYPERV: case KVM_CAP_HYPERV_VAPIC: @@ -2623,6 +2636,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif r = 1; break; + case KVM_CAP_ADJUST_CLOCK: + r = KVM_CLOCK_TSC_STABLE; + break; case KVM_CAP_X86_SMM: /* SMBASE is usually relocated above 1M on modern chipsets, * and SMM handlers might indeed rely on 4G segment limits, @@ -3415,6 +3431,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, }; case KVM_SET_VAPIC_ADDR: { struct kvm_vapic_addr va; + int idx; r = -EINVAL; if (!lapic_in_kernel(vcpu)) @@ -3422,7 +3439,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&va, argp, sizeof va)) goto out; + idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_X86_SETUP_MCE: { @@ -4103,9 +4122,11 @@ long kvm_arch_vm_ioctl(struct file *filp, struct kvm_clock_data user_ns; u64 now_ns; - now_ns = get_kvmclock_ns(kvm); + local_irq_disable(); + now_ns = __get_kvmclock_ns(kvm); user_ns.clock = now_ns; - user_ns.flags = 0; + user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; + local_irq_enable(); memset(&user_ns.pad, 0, sizeof(user_ns.pad)); r = -EFAULT; @@ -5733,13 +5754,13 @@ static int kvmclock_cpu_online(unsigned int cpu) static void kvm_timer_init(void) { - int cpu; - max_tsc_khz = tsc_khz; if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { #ifdef CONFIG_CPU_FREQ struct cpufreq_policy policy; + int cpu; + memset(&policy, 0, sizeof(policy)); cpu = get_cpu(); cpufreq_get_policy(&policy, cpu); @@ -7410,10 +7431,12 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { + void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; + kvmclock_reset(vcpu); - free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); kvm_x86_ops->vcpu_free(vcpu); + free_cpumask_var(wbinvd_dirty_mask); } struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 79ae939970d3..fcd06f7526de 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -135,7 +135,12 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (early_recursion_flag > 2) goto halt_loop; - if (regs->cs != __KERNEL_CS) + /* + * Old CPUs leave the high bits of CS on the stack + * undefined. I'm not sure which CPUs do this, but at least + * the 486 DX works this way. + */ + if ((regs->cs & 0xFFFF) != __KERNEL_CS) goto fail; /* diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index b8b6a60b32cf..0d4fb3ebbbac 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -435,7 +435,7 @@ slow_irqon: ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index ddd2661c4502..887e57182716 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -104,10 +104,10 @@ void __init kernel_randomize_memory(void) * consistent with the vaddr_start/vaddr_end variables. */ BUILD_BUG_ON(vaddr_start >= vaddr_end); - BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) && + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && vaddr_end >= EFI_VA_START); - BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) || - config_enabled(CONFIG_EFI)) && + BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || + IS_ENABLED(CONFIG_EFI)) && vaddr_end >= __START_KERNEL_map); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 80476878eb4c..e4f800999b32 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -544,10 +544,9 @@ static int mpx_resolve_fault(long __user *addr, int write) { long gup_ret; int nr_pages = 1; - int force = 0; - gup_ret = get_user_pages((unsigned long)addr, nr_pages, write, - force, NULL, NULL); + gup_ret = get_user_pages((unsigned long)addr, nr_pages, + write ? FOLL_WRITE : 0, NULL, NULL); /* * get_user_pages() returns number of pages gotten. * 0 means we failed to fault in and get anything, diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 170cc4ff057b..83e701f160a9 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -730,6 +730,20 @@ void io_free_memtype(resource_size_t start, resource_size_t end) free_memtype(start, end); } +int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) +{ + enum page_cache_mode type = _PAGE_CACHE_MODE_WC; + + return io_reserve_memtype(start, start + size, &type); +} +EXPORT_SYMBOL(arch_io_reserve_memtype_wc); + +void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) +{ + io_free_memtype(start, start + size); +} +EXPORT_SYMBOL(arch_io_free_memtype_wc); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index bf99aa7005eb..936a488d6cf6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -861,7 +861,7 @@ static void __init __efi_enter_virtual_mode(void) int count = 0, pg_shift = 0; void *new_memmap = NULL; efi_status_t status; - phys_addr_t pa; + unsigned long pa; efi.systab = NULL; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 58b0f801f66f..319148bd4b05 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -31,6 +31,7 @@ #include <linux/io.h> #include <linux/reboot.h> #include <linux/slab.h> +#include <linux/ucs2_string.h> #include <asm/setup.h> #include <asm/page.h> @@ -211,6 +212,35 @@ void efi_sync_low_kernel_mappings(void) memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); } +/* + * Wrapper for slow_virt_to_phys() that handles NULL addresses. + */ +static inline phys_addr_t +virt_to_phys_or_null_size(void *va, unsigned long size) +{ + bool bad_size; + + if (!va) + return 0; + + if (virt_addr_valid(va)) + return virt_to_phys(va); + + /* + * A fully aligned variable on the stack is guaranteed not to + * cross a page bounary. Try to catch strings on the stack by + * checking that 'size' is a power of two. + */ + bad_size = size > PAGE_SIZE || !is_power_of_2(size); + + WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size); + + return slow_virt_to_phys(va); +} + +#define virt_to_phys_or_null(addr) \ + virt_to_phys_or_null_size((addr), sizeof(*(addr))) + int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { unsigned long pfn, text; @@ -494,8 +524,8 @@ static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); - phys_tc = virt_to_phys(tc); + phys_tm = virt_to_phys_or_null(tm); + phys_tc = virt_to_phys_or_null(tc); status = efi_thunk(get_time, phys_tm, phys_tc); @@ -511,7 +541,7 @@ static efi_status_t efi_thunk_set_time(efi_time_t *tm) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(set_time, phys_tm); @@ -529,9 +559,9 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, spin_lock(&rtc_lock); - phys_enabled = virt_to_phys(enabled); - phys_pending = virt_to_phys(pending); - phys_tm = virt_to_phys(tm); + phys_enabled = virt_to_phys_or_null(enabled); + phys_pending = virt_to_phys_or_null(pending); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(get_wakeup_time, phys_enabled, phys_pending, phys_tm); @@ -549,7 +579,7 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(set_wakeup_time, enabled, phys_tm); @@ -558,6 +588,10 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) return status; } +static unsigned long efi_name_size(efi_char16_t *name) +{ + return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1; +} static efi_status_t efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, @@ -567,11 +601,11 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, u32 phys_name, phys_vendor, phys_attr; u32 phys_data_size, phys_data; - phys_data_size = virt_to_phys(data_size); - phys_vendor = virt_to_phys(vendor); - phys_name = virt_to_phys(name); - phys_attr = virt_to_phys(attr); - phys_data = virt_to_phys(data); + phys_data_size = virt_to_phys_or_null(data_size); + phys_vendor = virt_to_phys_or_null(vendor); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_attr = virt_to_phys_or_null(attr); + phys_data = virt_to_phys_or_null_size(data, *data_size); status = efi_thunk(get_variable, phys_name, phys_vendor, phys_attr, phys_data_size, phys_data); @@ -586,9 +620,9 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, u32 phys_name, phys_vendor, phys_data; efi_status_t status; - phys_name = virt_to_phys(name); - phys_vendor = virt_to_phys(vendor); - phys_data = virt_to_phys(data); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_vendor = virt_to_phys_or_null(vendor); + phys_data = virt_to_phys_or_null_size(data, data_size); /* If data_size is > sizeof(u32) we've got problems */ status = efi_thunk(set_variable, phys_name, phys_vendor, @@ -605,9 +639,9 @@ efi_thunk_get_next_variable(unsigned long *name_size, efi_status_t status; u32 phys_name_size, phys_name, phys_vendor; - phys_name_size = virt_to_phys(name_size); - phys_vendor = virt_to_phys(vendor); - phys_name = virt_to_phys(name); + phys_name_size = virt_to_phys_or_null(name_size); + phys_vendor = virt_to_phys_or_null(vendor); + phys_name = virt_to_phys_or_null_size(name, *name_size); status = efi_thunk(get_next_variable, phys_name_size, phys_name, phys_vendor); @@ -621,7 +655,7 @@ efi_thunk_get_next_high_mono_count(u32 *count) efi_status_t status; u32 phys_count; - phys_count = virt_to_phys(count); + phys_count = virt_to_phys_or_null(count); status = efi_thunk(get_next_high_mono_count, phys_count); return status; @@ -633,7 +667,7 @@ efi_thunk_reset_system(int reset_type, efi_status_t status, { u32 phys_data; - phys_data = virt_to_phys(data); + phys_data = virt_to_phys_or_null_size(data, data_size); efi_thunk(reset_system, reset_type, status, data_size, phys_data); } @@ -661,9 +695,9 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - phys_storage = virt_to_phys(storage_space); - phys_remaining = virt_to_phys(remaining_space); - phys_max = virt_to_phys(max_variable_size); + phys_storage = virt_to_phys_or_null(storage_space); + phys_remaining = virt_to_phys_or_null(remaining_space); + phys_max = virt_to_phys_or_null(max_variable_size); status = efi_thunk(query_variable_info, attr, phys_storage, phys_remaining, phys_max); diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 429d08be7848..dd6cfa4ad3ac 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -28,4 +28,4 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o # MISC Devices obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o -obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o +obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c index de734134bc8d..3f1f1c77d090 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c @@ -1,5 +1,5 @@ /* - * platform_wdt.c: Watchdog platform library file + * Intel Merrifield watchdog platform device library file * * (C) Copyright 2014 Intel Corporation * Author: David Cohen <david.a.cohen@linux.intel.com> @@ -14,7 +14,9 @@ #include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/platform_data/intel-mid_wdt.h> + #include <asm/intel-mid.h> +#include <asm/intel_scu_ipc.h> #include <asm/io_apic.h> #define TANGIER_EXT_TIMER0_MSI 15 @@ -50,14 +52,34 @@ static struct intel_mid_wdt_pdata tangier_pdata = { .probe = tangier_probe, }; -static int __init register_mid_wdt(void) +static int wdt_scu_status_change(struct notifier_block *nb, + unsigned long code, void *data) { - if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) { - wdt_dev.dev.platform_data = &tangier_pdata; - return platform_device_register(&wdt_dev); + if (code == SCU_DOWN) { + platform_device_unregister(&wdt_dev); + return 0; } - return -ENODEV; + return platform_device_register(&wdt_dev); } +static struct notifier_block wdt_scu_notifier = { + .notifier_call = wdt_scu_status_change, +}; + +static int __init register_mid_wdt(void) +{ + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return -ENODEV; + + wdt_dev.dev.platform_data = &tangier_pdata; + + /* + * We need to be sure that the SCU IPC is ready before watchdog device + * can be registered: + */ + intel_scu_notifier_add(&wdt_scu_notifier); + + return 0; +} rootfs_initcall(register_mid_wdt); diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c index 5d3b45ad1c03..67375dda451c 100644 --- a/arch/x86/platform/intel-mid/pwr.c +++ b/arch/x86/platform/intel-mid/pwr.c @@ -272,6 +272,25 @@ int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) } EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state); +pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev) +{ + struct mid_pwr *pwr = midpwr; + int id, reg, bit; + u32 power; + + if (!pwr || !pwr->available) + return PCI_UNKNOWN; + + id = intel_mid_pwr_get_lss_id(pdev); + if (id < 0) + return PCI_UNKNOWN; + + reg = (id * LSS_PWS_BITS) / 32; + bit = (id * LSS_PWS_BITS) % 32; + power = mid_pwr_get_state(pwr, reg); + return (__force pci_power_t)((power >> bit) & 3); +} + void intel_mid_pwr_power_off(void) { struct mid_pwr *pwr = midpwr; diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index 55130846ac87..c0533fbc39e3 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c @@ -196,6 +196,7 @@ static int xo15_sci_remove(struct acpi_device *device) return 0; } +#ifdef CONFIG_PM_SLEEP static int xo15_sci_resume(struct device *dev) { /* Enable all EC events */ @@ -207,6 +208,7 @@ static int xo15_sci_resume(struct device *dev) return 0; } +#endif static SIMPLE_DEV_PM_OPS(xo15_sci_pm, NULL, xo15_sci_resume); diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index b4d5e95fe4df..4a6a5a26c582 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -40,7 +40,15 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) */ return BIOS_STATUS_UNIMPLEMENTED; - ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + /* + * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI + * callback method, which uses efi_call() directly, with the kernel page tables: + */ + if (unlikely(test_bit(EFI_OLD_MEMMAP, &efi.flags))) + ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5); + else + ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + return ret; } EXPORT_SYMBOL_GPL(uv_bios_call); diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index ac58c1616408..555b9fa0ad43 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -16,6 +16,7 @@ KCOV_INSTRUMENT := n KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large KBUILD_CFLAGS += -m$(BITS) +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h index f59590645b68..1d23bf953a4a 100644 --- a/arch/x86/tools/relocs.h +++ b/arch/x86/tools/relocs.h @@ -16,7 +16,7 @@ #include <regex.h> #include <tools/le_byteshift.h> -void die(char *fmt, ...); +void die(char *fmt, ...) __attribute__((noreturn)); #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index 5766ead6fdb9..60a5a5a85505 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -36,7 +36,8 @@ int is_syscall(unsigned long addr) * slow, but that doesn't matter, since it will be called only * in case of singlestepping, if copy_from_user failed. */ - n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + n = access_process_vm(current, addr, &instr, sizeof(instr), + FOLL_FORCE); if (n != sizeof(instr)) { printk(KERN_ERR "is_syscall : failed to read " "instruction from 0x%lx\n", addr); diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index 0b5c184dd5b3..e30202b1716e 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -212,7 +212,8 @@ int is_syscall(unsigned long addr) * slow, but that doesn't matter, since it will be called only * in case of singlestepping, if copy_from_user failed. */ - n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + n = access_process_vm(current, addr, &instr, sizeof(instr), + FOLL_FORCE); if (n != sizeof(instr)) { printk("is_syscall : failed to read instruction from " "0x%lx\n", addr); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c0fdd57da7aa..bdd855685403 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1837,6 +1837,7 @@ static void __init init_hvm_pv_info(void) xen_domain_type = XEN_HVM_DOMAIN; } +#endif static int xen_cpu_up_prepare(unsigned int cpu) { @@ -1887,6 +1888,7 @@ static int xen_cpu_up_online(unsigned int cpu) return 0; } +#ifdef CONFIG_XEN_PVHVM #ifdef CONFIG_KEXEC_CORE static void xen_hvm_shutdown(void) { |