diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/aperfmperf.c | 43 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/core.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/hpet.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 19 | ||||
-rw-r--r-- | arch/x86/kernel/irqinit.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/ksysfs.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/reboot.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 30 |
13 files changed, 118 insertions, 60 deletions
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index d869c8671e36..0ee83321a313 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -8,20 +8,25 @@ * This file is licensed under GPLv2. */ -#include <linux/jiffies.h> +#include <linux/delay.h> +#include <linux/ktime.h> #include <linux/math64.h> #include <linux/percpu.h> #include <linux/smp.h> struct aperfmperf_sample { unsigned int khz; - unsigned long jiffies; + ktime_t time; u64 aperf; u64 mperf; }; static DEFINE_PER_CPU(struct aperfmperf_sample, samples); +#define APERFMPERF_CACHE_THRESHOLD_MS 10 +#define APERFMPERF_REFRESH_DELAY_MS 20 +#define APERFMPERF_STALE_THRESHOLD_MS 1000 + /* * aperfmperf_snapshot_khz() * On the current CPU, snapshot APERF, MPERF, and jiffies @@ -33,13 +38,18 @@ static void aperfmperf_snapshot_khz(void *dummy) u64 aperf, aperf_delta; u64 mperf, mperf_delta; struct aperfmperf_sample *s = this_cpu_ptr(&samples); + ktime_t now = ktime_get(); + s64 time_delta = ktime_ms_delta(now, s->time); + unsigned long flags; - /* Don't bother re-computing within 10 ms */ - if (time_before(jiffies, s->jiffies + HZ/100)) + /* Don't bother re-computing within the cache threshold time. */ + if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) return; + local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); + local_irq_restore(flags); aperf_delta = aperf - s->aperf; mperf_delta = mperf - s->mperf; @@ -51,22 +61,21 @@ static void aperfmperf_snapshot_khz(void *dummy) if (mperf_delta == 0) return; - /* - * if (cpu_khz * aperf_delta) fits into ULLONG_MAX, then - * khz = (cpu_khz * aperf_delta) / mperf_delta - */ - if (div64_u64(ULLONG_MAX, cpu_khz) > aperf_delta) - s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); - else /* khz = aperf_delta / (mperf_delta / cpu_khz) */ - s->khz = div64_u64(aperf_delta, - div64_u64(mperf_delta, cpu_khz)); - s->jiffies = jiffies; + s->time = now; s->aperf = aperf; s->mperf = mperf; + + /* If the previous iteration was too long ago, discard it. */ + if (time_delta > APERFMPERF_STALE_THRESHOLD_MS) + s->khz = 0; + else + s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); } unsigned int arch_freq_get_on_cpu(int cpu) { + unsigned int khz; + if (!cpu_khz) return 0; @@ -74,6 +83,12 @@ unsigned int arch_freq_get_on_cpu(int cpu) return 0; smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); + khz = per_cpu(samples.khz, cpu); + if (khz) + return khz; + + msleep(APERFMPERF_REFRESH_DELAY_MS); + smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); return per_cpu(samples.khz, cpu); } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index d7cc190ae457..f7370abd33c6 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -122,7 +122,7 @@ static struct attribute *thermal_throttle_attrs[] = { NULL }; -static struct attribute_group thermal_attr_group = { +static const struct attribute_group thermal_attr_group = { .attrs = thermal_throttle_attrs, .name = "thermal_throttle" }; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 9cb98ee103db..86e8f0b2537b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -561,7 +561,7 @@ static struct attribute *mc_default_attrs[] = { NULL }; -static struct attribute_group mc_attr_group = { +static const struct attribute_group mc_attr_group = { .attrs = mc_default_attrs, .name = "microcode", }; @@ -707,7 +707,7 @@ static struct attribute *cpu_root_microcode_attrs[] = { NULL }; -static struct attribute_group cpu_root_microcode_group = { +static const struct attribute_group cpu_root_microcode_group = { .name = "microcode", .attrs = cpu_root_microcode_attrs, }; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c5bb63be4ba1..40d5a8a75212 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -237,6 +237,18 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask); } +static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + struct set_mtrr_data data = { .smp_reg = reg, + .smp_base = base, + .smp_size = size, + .smp_type = type + }; + + stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask); +} + static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) { @@ -370,7 +382,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* Search for an empty MTRR */ i = mtrr_if->get_free_region(base, size, replace); if (i >= 0) { - set_mtrr(i, base, size, type); + set_mtrr_cpuslocked(i, base, size, type); if (likely(replace < 0)) { mtrr_usage_table[i] = 1; } else { @@ -378,7 +390,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, if (increment) mtrr_usage_table[i]++; if (unlikely(replace != i)) { - set_mtrr(replace, 0, 0, 0); + set_mtrr_cpuslocked(replace, 0, 0, 0); mtrr_usage_table[replace] = 0; } } @@ -506,7 +518,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) goto out; } if (--mtrr_usage_table[reg] < 1) - set_mtrr(reg, 0, 0, 0); + set_mtrr_cpuslocked(reg, 0, 0, 0); error = reg; out: mutex_unlock(&mtrr_mutex); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 46c3c73e7f43..9ba79543d9ee 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -53,6 +53,7 @@ void __head __startup_64(unsigned long physaddr) pudval_t *pud; pmdval_t *pmd, pmd_entry; int i; + unsigned int *next_pgt_ptr; /* Is the address too large? */ if (physaddr >> MAX_PHYSMEM_BITS) @@ -91,9 +92,9 @@ void __head __startup_64(unsigned long physaddr) * creates a bunch of nonsense entries but that is fine -- * it avoids problems around wraparound. */ - - pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); - pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr); + pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); + pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); if (IS_ENABLED(CONFIG_X86_5LEVEL)) { p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 16f82a3aaec7..8ce4212e2b8d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -345,21 +345,10 @@ static int hpet_shutdown(struct clock_event_device *evt, int timer) return 0; } -static int hpet_resume(struct clock_event_device *evt, int timer) -{ - if (!timer) { - hpet_enable_legacy_int(); - } else { - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); - irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_hardirq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); - enable_irq(hdev->irq); - } +static int hpet_resume(struct clock_event_device *evt) +{ + hpet_enable_legacy_int(); hpet_print_config(); - return 0; } @@ -417,7 +406,7 @@ static int hpet_legacy_set_periodic(struct clock_event_device *evt) static int hpet_legacy_resume(struct clock_event_device *evt) { - return hpet_resume(evt, 0); + return hpet_resume(evt); } static int hpet_legacy_next_event(unsigned long delta, @@ -510,8 +499,14 @@ static int hpet_msi_set_periodic(struct clock_event_device *evt) static int hpet_msi_resume(struct clock_event_device *evt) { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + struct irq_data *data = irq_get_irq_data(hdev->irq); + struct msi_msg msg; - return hpet_resume(evt, hdev->num); + /* Restore the MSI msg and unmask the interrupt */ + irq_chip_compose_msi_msg(data, &msg); + hpet_msi_write(hdev, &msg); + hpet_msi_unmask(data); + return 0; } static int hpet_msi_next_event(unsigned long delta, diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 4aa03c5a14c9..4ed0aba8dbc8 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -155,6 +155,12 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); seq_puts(p, " Posted-interrupt notification event\n"); + seq_printf(p, "%*s: ", prec, "NPI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->kvm_posted_intr_nested_ipis); + seq_puts(p, " Nested posted-interrupt event\n"); + seq_printf(p, "%*s: ", prec, "PIW"); for_each_online_cpu(j) seq_printf(p, "%10u ", @@ -313,6 +319,19 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) exiting_irq(); set_irq_regs(old_regs); } + +/* + * Handler for POSTED_INTERRUPT_NESTED_VECTOR. + */ +__visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + entering_ack_irq(); + inc_irq_stat(kvm_posted_intr_nested_ipis); + exiting_irq(); + set_irq_regs(old_regs); +} #endif __visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7468c6987547..c7fd18526c3e 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -150,6 +150,8 @@ static void __init apic_intr_init(void) alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); /* IPI for KVM to deliver interrupt to wake up tasks */ alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi); + /* IPI for KVM to deliver nested posted interrupt */ + alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi); #endif /* IPI vectors for APIC spurious and error interrupts */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6b877807598b..f0153714ddac 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -457,6 +457,8 @@ static int arch_copy_kprobe(struct kprobe *p) int arch_prepare_kprobe(struct kprobe *p) { + int ret; + if (alternatives_text_reserved(p->addr, p->addr)) return -EINVAL; @@ -467,7 +469,13 @@ int arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.insn) return -ENOMEM; - return arch_copy_kprobe(p); + ret = arch_copy_kprobe(p); + if (ret) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } + + return ret; } void arch_arm_kprobe(struct kprobe *p) diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index 4afc67f5facc..06e1ff5562c0 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -55,7 +55,7 @@ static struct bin_attribute *boot_params_data_attrs[] = { NULL, }; -static struct attribute_group boot_params_attr_group = { +static const struct attribute_group boot_params_attr_group = { .attrs = boot_params_version_attrs, .bin_attrs = boot_params_data_attrs, }; @@ -202,7 +202,7 @@ static struct bin_attribute *setup_data_data_attrs[] = { NULL, }; -static struct attribute_group setup_data_attr_group = { +static const struct attribute_group setup_data_attr_group = { .attrs = setup_data_type_attrs, .bin_attrs = setup_data_data_attrs, }; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 71c17a5be983..d04e30e3c0ff 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -151,6 +151,8 @@ void kvm_async_pf_task_wait(u32 token) if (hlist_unhashed(&n.link)) break; + rcu_irq_exit(); + if (!n.halted) { local_irq_enable(); schedule(); @@ -159,11 +161,11 @@ void kvm_async_pf_task_wait(u32 token) /* * We cannot reschedule. So halt. */ - rcu_irq_exit(); native_safe_halt(); local_irq_disable(); - rcu_irq_enter(); } + + rcu_irq_enter(); } if (!n.halted) finish_swait(&n.wq, &wait); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 67393fc88353..a56bf6051f4e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -471,12 +471,12 @@ static int __init reboot_init(void) /* * The DMI quirks table takes precedence. If no quirks entry - * matches and the ACPI Hardware Reduced bit is set, force EFI - * reboot. + * matches and the ACPI Hardware Reduced bit is set and EFI + * runtime services are enabled, force EFI reboot. */ rv = dmi_check_system(reboot_dmi_table); - if (!rv && efi_reboot_required()) + if (!rv && efi_reboot_required() && !efi_runtime_disabled()) reboot_type = BOOT_EFI; return 0; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b474c8de7fba..54b9e89d4d6b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -971,7 +971,8 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) * Returns zero if CPU booted OK, else error code from * ->wakeup_secondary_cpu. */ -static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) +static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, + int *cpu0_nmi_registered) { volatile u32 *trampoline_status = (volatile u32 *) __va(real_mode_header->trampoline_status); @@ -979,7 +980,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long start_ip = real_mode_header->trampoline_start; unsigned long boot_error = 0; - int cpu0_nmi_registered = 0; unsigned long timeout; idle->thread.sp = (unsigned long)task_pt_regs(idle); @@ -1035,7 +1035,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); else boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, - &cpu0_nmi_registered); + cpu0_nmi_registered); if (!boot_error) { /* @@ -1080,12 +1080,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) */ smpboot_restore_warm_reset_vector(); } - /* - * Clean up the nmi handler. Do this after the callin and callout sync - * to avoid impact of possible long unregister time. - */ - if (cpu0_nmi_registered) - unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); return boot_error; } @@ -1093,8 +1087,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) int native_cpu_up(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); + int cpu0_nmi_registered = 0; unsigned long flags; - int err; + int err, ret = 0; WARN_ON(irqs_disabled()); @@ -1131,10 +1126,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) common_cpu_up(cpu, tidle); - err = do_boot_cpu(apicid, cpu, tidle); + err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); if (err) { pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); - return -EIO; + ret = -EIO; + goto unreg_nmi; } /* @@ -1150,7 +1146,15 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) touch_nmi_watchdog(); } - return 0; +unreg_nmi: + /* + * Clean up the nmi handler. Do this after the callin and callout sync + * to avoid impact of possible long unregister time. + */ + if (cpu0_nmi_registered) + unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); + + return ret; } /** |