diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/enlighten.c | 105 | ||||
-rw-r--r-- | arch/x86/xen/irq.c | 8 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 28 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.h | 2 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 3 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 15 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 27 |
7 files changed, 154 insertions, 34 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 12eb07bfb267..b132ade26f77 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -62,6 +62,15 @@ #include <asm/reboot.h> #include <asm/stackprotector.h> #include <asm/hypervisor.h> +#include <asm/mwait.h> + +#ifdef CONFIG_ACPI +#include <linux/acpi.h> +#include <asm/acpi.h> +#include <acpi/pdc_intel.h> +#include <acpi/processor.h> +#include <xen/interface/platform.h> +#endif #include "xen-ops.h" #include "mmu.h" @@ -200,13 +209,17 @@ static void __init xen_banner(void) static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; +static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask; +static __read_mostly unsigned int cpuid_leaf5_ecx_val; +static __read_mostly unsigned int cpuid_leaf5_edx_val; + static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { unsigned maskebx = ~0; unsigned maskecx = ~0; unsigned maskedx = ~0; - + unsigned setecx = 0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. @@ -214,9 +227,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, switch (*ax) { case 1: maskecx = cpuid_leaf1_ecx_mask; + setecx = cpuid_leaf1_ecx_set_mask; maskedx = cpuid_leaf1_edx_mask; break; + case CPUID_MWAIT_LEAF: + /* Synthesize the values.. */ + *ax = 0; + *bx = 0; + *cx = cpuid_leaf5_ecx_val; + *dx = cpuid_leaf5_edx_val; + return; + case 0xb: /* Suppress extended topology stuff */ maskebx = 0; @@ -232,9 +254,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, *bx &= maskebx; *cx &= maskecx; + *cx |= setecx; *dx &= maskedx; + } +static bool __init xen_check_mwait(void) +{ +#ifdef CONFIG_ACPI + struct xen_platform_op op = { + .cmd = XENPF_set_processor_pminfo, + .u.set_pminfo.id = -1, + .u.set_pminfo.type = XEN_PM_PDC, + }; + uint32_t buf[3]; + unsigned int ax, bx, cx, dx; + unsigned int mwait_mask; + + /* We need to determine whether it is OK to expose the MWAIT + * capability to the kernel to harvest deeper than C3 states from ACPI + * _CST using the processor_harvest_xen.c module. For this to work, we + * need to gather the MWAIT_LEAF values (which the cstate.c code + * checks against). The hypervisor won't expose the MWAIT flag because + * it would break backwards compatibility; so we will find out directly + * from the hardware and hypercall. + */ + if (!xen_initial_domain()) + return false; + + ax = 1; + cx = 0; + + native_cpuid(&ax, &bx, &cx, &dx); + + mwait_mask = (1 << (X86_FEATURE_EST % 32)) | + (1 << (X86_FEATURE_MWAIT % 32)); + + if ((cx & mwait_mask) != mwait_mask) + return false; + + /* We need to emulate the MWAIT_LEAF and for that we need both + * ecx and edx. The hypercall provides only partial information. + */ + + ax = CPUID_MWAIT_LEAF; + bx = 0; + cx = 0; + dx = 0; + + native_cpuid(&ax, &bx, &cx, &dx); + + /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so, + * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3. + */ + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP); + + set_xen_guest_handle(op.u.set_pminfo.pdc, buf); + + if ((HYPERVISOR_dom0_op(&op) == 0) && + (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) { + cpuid_leaf5_ecx_val = cx; + cpuid_leaf5_edx_val = dx; + } + return true; +#else + return false; +#endif +} static void __init xen_init_cpuid_mask(void) { unsigned int ax, bx, cx, dx; @@ -261,6 +349,9 @@ static void __init xen_init_cpuid_mask(void) /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ if ((cx & xsave_mask) != xsave_mask) cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ + + if (xen_check_mwait()) + cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); } static void xen_set_debugreg(int reg, unsigned long val) @@ -777,11 +868,11 @@ static DEFINE_PER_CPU(unsigned long, xen_cr0_value); static unsigned long xen_read_cr0(void) { - unsigned long cr0 = percpu_read(xen_cr0_value); + unsigned long cr0 = this_cpu_read(xen_cr0_value); if (unlikely(cr0 == 0)) { cr0 = native_read_cr0(); - percpu_write(xen_cr0_value, cr0); + this_cpu_write(xen_cr0_value, cr0); } return cr0; @@ -791,7 +882,7 @@ static void xen_write_cr0(unsigned long cr0) { struct multicall_space mcs; - percpu_write(xen_cr0_value, cr0); + this_cpu_write(xen_cr0_value, cr0); /* Only pay attention to cr0.TS; everything else is ignored. */ @@ -1141,7 +1232,9 @@ asmlinkage void __init xen_start_kernel(void) /* Prevent unwanted bits from being set in PTEs. */ __supported_pte_mask &= ~_PAGE_GLOBAL; +#if 0 if (!xen_initial_domain()) +#endif __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); __supported_pte_mask |= _PAGE_IOMAP; @@ -1204,10 +1297,6 @@ asmlinkage void __init xen_start_kernel(void) pgd = (pgd_t *)xen_start_info->pt_base; - if (!xen_initial_domain()) - __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); - - __supported_pte_mask |= _PAGE_IOMAP; /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 8bbb465b6f0a..157337657971 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -26,7 +26,7 @@ static unsigned long xen_save_fl(void) struct vcpu_info *vcpu; unsigned long flags; - vcpu = percpu_read(xen_vcpu); + vcpu = this_cpu_read(xen_vcpu); /* flag has opposite sense of mask */ flags = !vcpu->evtchn_upcall_mask; @@ -50,7 +50,7 @@ static void xen_restore_fl(unsigned long flags) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - vcpu = percpu_read(xen_vcpu); + vcpu = this_cpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = flags; preempt_enable_no_resched(); @@ -72,7 +72,7 @@ static void xen_irq_disable(void) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; + this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); @@ -86,7 +86,7 @@ static void xen_irq_enable(void) the caller is confused and is trying to re-enable interrupts on an indeterminate processor. */ - vcpu = percpu_read(xen_vcpu); + vcpu = this_cpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = 0; /* Doesn't matter if we get preempted here, because any diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 58a0e46c404d..988828b479ed 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -415,13 +415,13 @@ static pteval_t iomap_pte(pteval_t val) static pteval_t xen_pte_val(pte_t pte) { pteval_t pteval = pte.pte; - +#if 0 /* If this is a WC pte, convert back from Xen WC to Linux WC */ if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) { WARN_ON(!pat_enabled); pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; } - +#endif if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) return pteval; @@ -463,7 +463,7 @@ void xen_set_pat(u64 pat) static pte_t xen_make_pte(pteval_t pte) { phys_addr_t addr = (pte & PTE_PFN_MASK); - +#if 0 /* If Linux is trying to set a WC pte, then map to the Xen WC. * If _PAGE_PAT is set, then it probably means it is really * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope @@ -476,7 +476,7 @@ static pte_t xen_make_pte(pteval_t pte) if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT) pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; } - +#endif /* * Unprivileged domains are allowed to do IOMAPpings for * PCI passthrough, but not map ISA space. The ISA @@ -1071,14 +1071,14 @@ static void drop_other_mm_ref(void *info) struct mm_struct *mm = info; struct mm_struct *active_mm; - active_mm = percpu_read(cpu_tlbstate.active_mm); + active_mm = this_cpu_read(cpu_tlbstate.active_mm); - if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK) + if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) leave_mm(smp_processor_id()); /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) + if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd)) load_cr3(swapper_pg_dir); } @@ -1185,17 +1185,17 @@ static void __init xen_pagetable_setup_done(pgd_t *base) static void xen_write_cr2(unsigned long cr2) { - percpu_read(xen_vcpu)->arch.cr2 = cr2; + this_cpu_read(xen_vcpu)->arch.cr2 = cr2; } static unsigned long xen_read_cr2(void) { - return percpu_read(xen_vcpu)->arch.cr2; + return this_cpu_read(xen_vcpu)->arch.cr2; } unsigned long xen_read_cr2_direct(void) { - return percpu_read(xen_vcpu_info.arch.cr2); + return this_cpu_read(xen_vcpu_info.arch.cr2); } static void xen_flush_tlb(void) @@ -1278,12 +1278,12 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, static unsigned long xen_read_cr3(void) { - return percpu_read(xen_cr3); + return this_cpu_read(xen_cr3); } static void set_current_cr3(void *v) { - percpu_write(xen_current_cr3, (unsigned long)v); + this_cpu_write(xen_current_cr3, (unsigned long)v); } static void __xen_write_cr3(bool kernel, unsigned long cr3) @@ -1306,7 +1306,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) xen_extend_mmuext_op(&op); if (kernel) { - percpu_write(xen_cr3, cr3); + this_cpu_write(xen_cr3, cr3); /* Update xen_current_cr3 once the batch has actually been submitted. */ @@ -1322,7 +1322,7 @@ static void xen_write_cr3(unsigned long cr3) /* Update while interrupts are disabled, so its atomic with respect to ipis */ - percpu_write(xen_cr3, cr3); + this_cpu_write(xen_cr3, cr3); __xen_write_cr3(true, cr3); diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index dee79b78a90f..9c2e74f9096c 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -47,7 +47,7 @@ static inline void xen_mc_issue(unsigned mode) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ - local_irq_restore(percpu_read(xen_mc_irq_flags)); + local_irq_restore(this_cpu_read(xen_mc_irq_flags)); } /* Set up a callback to be called when the current batch is flushed */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e03c63692176..1ba8dff26753 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -10,6 +10,7 @@ #include <linux/pm.h> #include <linux/memblock.h> #include <linux/cpuidle.h> +#include <linux/cpufreq.h> #include <asm/elf.h> #include <asm/vdso.h> @@ -420,7 +421,7 @@ void __init xen_arch_setup(void) boot_cpu_data.hlt_works_ok = 1; #endif disable_cpuidle(); - boot_option_idle_override = IDLE_HALT; + disable_cpufreq(); WARN_ON(set_pm_idle_to_default()); fiddle_vdso(); } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 041d4fe9dfe4..02900e8ce26c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -75,8 +75,14 @@ static void __cpuinit cpu_bringup(void) xen_setup_cpu_clockevents(); + notify_cpu_starting(cpu); + + ipi_call_lock(); set_cpu_online(cpu, true); - percpu_write(cpu_state, CPU_ONLINE); + ipi_call_unlock(); + + this_cpu_write(cpu_state, CPU_ONLINE); + wmb(); /* We can take interrupts now: we're officially "up". */ @@ -409,6 +415,13 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ play_dead_common(); HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); cpu_bringup(); + /* + * Balance out the preempt calls - as we are running in cpu_idle + * loop which has been called at bootup from cpu_bringup_and_idle. + * The cpucpu_bringup_and_idle called cpu_bringup which made a + * preempt_disable() So this preempt_enable will balance it out. + */ + preempt_enable(); } #else /* !CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index cc9b1e182fcf..d69cc6c3f808 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -116,9 +116,26 @@ static inline void spin_time_accum_blocked(u64 start) } #endif /* CONFIG_XEN_DEBUG_FS */ +/* + * Size struct xen_spinlock so it's the same as arch_spinlock_t. + */ +#if NR_CPUS < 256 +typedef u8 xen_spinners_t; +# define inc_spinners(xl) \ + asm(LOCK_PREFIX " incb %0" : "+m" ((xl)->spinners) : : "memory"); +# define dec_spinners(xl) \ + asm(LOCK_PREFIX " decb %0" : "+m" ((xl)->spinners) : : "memory"); +#else +typedef u16 xen_spinners_t; +# define inc_spinners(xl) \ + asm(LOCK_PREFIX " incw %0" : "+m" ((xl)->spinners) : : "memory"); +# define dec_spinners(xl) \ + asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); +#endif + struct xen_spinlock { unsigned char lock; /* 0 -> free; 1 -> locked */ - unsigned short spinners; /* count of waiting cpus */ + xen_spinners_t spinners; /* count of waiting cpus */ }; static int xen_spin_is_locked(struct arch_spinlock *lock) @@ -164,8 +181,7 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) wmb(); /* set lock of interest before count */ - asm(LOCK_PREFIX " incw %0" - : "+m" (xl->spinners) : : "memory"); + inc_spinners(xl); return prev; } @@ -176,8 +192,7 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) */ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) { - asm(LOCK_PREFIX " decw %0" - : "+m" (xl->spinners) : : "memory"); + dec_spinners(xl); wmb(); /* decrement count before restoring lock */ __this_cpu_write(lock_spinners, prev); } @@ -373,6 +388,8 @@ void xen_uninit_lock_cpu(int cpu) void __init xen_init_spinlocks(void) { + BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); + pv_lock_ops.spin_is_locked = xen_spin_is_locked; pv_lock_ops.spin_is_contended = xen_spin_is_contended; pv_lock_ops.spin_lock = xen_spin_lock; |