diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-23 03:59:19 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-23 04:16:14 +0100 |
commit | d4c6fa73fe984e504d52f3d6bba291fd76fe49f7 (patch) | |
tree | 47842ddebb2a48cc1513b36fba18835678e2b94e /arch/x86/xen | |
parent | Merge tag 'stable/for-linus-3.4' of git://git.kernel.org/pub/scm/linux/kernel... (diff) | |
parent | xen kconfig: relax INPUT_XEN_KBDDEV_FRONTEND deps (diff) | |
download | linux-d4c6fa73fe984e504d52f3d6bba291fd76fe49f7.tar.xz linux-d4c6fa73fe984e504d52f3d6bba291fd76fe49f7.zip |
Merge tag 'stable/for-linus-3.4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
Pull xen updates from Konrad Rzeszutek Wilk:
"which has three neat features:
- PV multiconsole support, so that there can be hvc1, hvc2, etc; This
can be used in HVM and in PV mode.
- P-state and C-state power management driver that uploads said power
management data to the hypervisor. It also inhibits cpufreq
scaling drivers to load so that only the hypervisor can make power
management decisions - fixing a weird perf bug.
There is one thing in the Kconfig that you won't like: "default y
if (X86_ACPI_CPUFREQ = y || X86_POWERNOW_K8 = y)" (note, that it
all depends on CONFIG_XEN which depends on CONFIG_PARAVIRT which by
default is off). I've a fix to convert that boolean expression
into "default m" which I am going to post after the cpufreq git
pull - as the two patches to make this work depend on a fix in Dave
Jones's tree.
- Function Level Reset (FLR) support in the Xen PCI backend.
Fixes:
- Kconfig dependencies for Xen PV keyboard and video
- Compile warnings and constify fixes
- Change over to use percpu_xxx instead of this_cpu_xxx"
Fix up trivial conflicts in drivers/tty/hvc/hvc_xen.c due to changes to
a removed commit.
* tag 'stable/for-linus-3.4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
xen kconfig: relax INPUT_XEN_KBDDEV_FRONTEND deps
xen/acpi-processor: C and P-state driver that uploads said data to hypervisor.
xen: constify all instances of "struct attribute_group"
xen/xenbus: ignore console/0
hvc_xen: introduce HVC_XEN_FRONTEND
hvc_xen: implement multiconsole support
hvc_xen: support PV on HVM consoles
xenbus: don't free other end details too early
xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it.
xen/setup/pm/acpi: Remove the call to boot_option_idle_override.
xenbus: address compiler warnings
xen: use this_cpu_xxx replace percpu_xxx funcs
xen/pciback: Support pci_reset_function, aka FLR or D3 support.
pci: Introduce __pci_reset_function_locked to be used when holding device_lock.
xen: Utilize the restore_msi_irqs hook.
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/enlighten.c | 99 | ||||
-rw-r--r-- | arch/x86/xen/irq.c | 8 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 20 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.h | 2 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 2 |
6 files changed, 111 insertions, 21 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 4172af8ceeb3..b132ade26f77 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -62,6 +62,15 @@ #include <asm/reboot.h> #include <asm/stackprotector.h> #include <asm/hypervisor.h> +#include <asm/mwait.h> + +#ifdef CONFIG_ACPI +#include <linux/acpi.h> +#include <asm/acpi.h> +#include <acpi/pdc_intel.h> +#include <acpi/processor.h> +#include <xen/interface/platform.h> +#endif #include "xen-ops.h" #include "mmu.h" @@ -200,13 +209,17 @@ static void __init xen_banner(void) static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; +static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask; +static __read_mostly unsigned int cpuid_leaf5_ecx_val; +static __read_mostly unsigned int cpuid_leaf5_edx_val; + static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { unsigned maskebx = ~0; unsigned maskecx = ~0; unsigned maskedx = ~0; - + unsigned setecx = 0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. @@ -214,9 +227,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, switch (*ax) { case 1: maskecx = cpuid_leaf1_ecx_mask; + setecx = cpuid_leaf1_ecx_set_mask; maskedx = cpuid_leaf1_edx_mask; break; + case CPUID_MWAIT_LEAF: + /* Synthesize the values.. */ + *ax = 0; + *bx = 0; + *cx = cpuid_leaf5_ecx_val; + *dx = cpuid_leaf5_edx_val; + return; + case 0xb: /* Suppress extended topology stuff */ maskebx = 0; @@ -232,9 +254,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, *bx &= maskebx; *cx &= maskecx; + *cx |= setecx; *dx &= maskedx; + } +static bool __init xen_check_mwait(void) +{ +#ifdef CONFIG_ACPI + struct xen_platform_op op = { + .cmd = XENPF_set_processor_pminfo, + .u.set_pminfo.id = -1, + .u.set_pminfo.type = XEN_PM_PDC, + }; + uint32_t buf[3]; + unsigned int ax, bx, cx, dx; + unsigned int mwait_mask; + + /* We need to determine whether it is OK to expose the MWAIT + * capability to the kernel to harvest deeper than C3 states from ACPI + * _CST using the processor_harvest_xen.c module. For this to work, we + * need to gather the MWAIT_LEAF values (which the cstate.c code + * checks against). The hypervisor won't expose the MWAIT flag because + * it would break backwards compatibility; so we will find out directly + * from the hardware and hypercall. + */ + if (!xen_initial_domain()) + return false; + + ax = 1; + cx = 0; + + native_cpuid(&ax, &bx, &cx, &dx); + + mwait_mask = (1 << (X86_FEATURE_EST % 32)) | + (1 << (X86_FEATURE_MWAIT % 32)); + + if ((cx & mwait_mask) != mwait_mask) + return false; + + /* We need to emulate the MWAIT_LEAF and for that we need both + * ecx and edx. The hypercall provides only partial information. + */ + + ax = CPUID_MWAIT_LEAF; + bx = 0; + cx = 0; + dx = 0; + + native_cpuid(&ax, &bx, &cx, &dx); + + /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so, + * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3. + */ + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP); + + set_xen_guest_handle(op.u.set_pminfo.pdc, buf); + + if ((HYPERVISOR_dom0_op(&op) == 0) && + (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) { + cpuid_leaf5_ecx_val = cx; + cpuid_leaf5_edx_val = dx; + } + return true; +#else + return false; +#endif +} static void __init xen_init_cpuid_mask(void) { unsigned int ax, bx, cx, dx; @@ -261,6 +349,9 @@ static void __init xen_init_cpuid_mask(void) /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ if ((cx & xsave_mask) != xsave_mask) cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ + + if (xen_check_mwait()) + cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); } static void xen_set_debugreg(int reg, unsigned long val) @@ -777,11 +868,11 @@ static DEFINE_PER_CPU(unsigned long, xen_cr0_value); static unsigned long xen_read_cr0(void) { - unsigned long cr0 = percpu_read(xen_cr0_value); + unsigned long cr0 = this_cpu_read(xen_cr0_value); if (unlikely(cr0 == 0)) { cr0 = native_read_cr0(); - percpu_write(xen_cr0_value, cr0); + this_cpu_write(xen_cr0_value, cr0); } return cr0; @@ -791,7 +882,7 @@ static void xen_write_cr0(unsigned long cr0) { struct multicall_space mcs; - percpu_write(xen_cr0_value, cr0); + this_cpu_write(xen_cr0_value, cr0); /* Only pay attention to cr0.TS; everything else is ignored. */ diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 8bbb465b6f0a..157337657971 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -26,7 +26,7 @@ static unsigned long xen_save_fl(void) struct vcpu_info *vcpu; unsigned long flags; - vcpu = percpu_read(xen_vcpu); + vcpu = this_cpu_read(xen_vcpu); /* flag has opposite sense of mask */ flags = !vcpu->evtchn_upcall_mask; @@ -50,7 +50,7 @@ static void xen_restore_fl(unsigned long flags) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - vcpu = percpu_read(xen_vcpu); + vcpu = this_cpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = flags; preempt_enable_no_resched(); @@ -72,7 +72,7 @@ static void xen_irq_disable(void) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; + this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); @@ -86,7 +86,7 @@ static void xen_irq_enable(void) the caller is confused and is trying to re-enable interrupts on an indeterminate processor. */ - vcpu = percpu_read(xen_vcpu); + vcpu = this_cpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = 0; /* Doesn't matter if we get preempted here, because any diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 95c1cf60c669..988828b479ed 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1071,14 +1071,14 @@ static void drop_other_mm_ref(void *info) struct mm_struct *mm = info; struct mm_struct *active_mm; - active_mm = percpu_read(cpu_tlbstate.active_mm); + active_mm = this_cpu_read(cpu_tlbstate.active_mm); - if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK) + if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) leave_mm(smp_processor_id()); /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) + if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd)) load_cr3(swapper_pg_dir); } @@ -1185,17 +1185,17 @@ static void __init xen_pagetable_setup_done(pgd_t *base) static void xen_write_cr2(unsigned long cr2) { - percpu_read(xen_vcpu)->arch.cr2 = cr2; + this_cpu_read(xen_vcpu)->arch.cr2 = cr2; } static unsigned long xen_read_cr2(void) { - return percpu_read(xen_vcpu)->arch.cr2; + return this_cpu_read(xen_vcpu)->arch.cr2; } unsigned long xen_read_cr2_direct(void) { - return percpu_read(xen_vcpu_info.arch.cr2); + return this_cpu_read(xen_vcpu_info.arch.cr2); } static void xen_flush_tlb(void) @@ -1278,12 +1278,12 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, static unsigned long xen_read_cr3(void) { - return percpu_read(xen_cr3); + return this_cpu_read(xen_cr3); } static void set_current_cr3(void *v) { - percpu_write(xen_current_cr3, (unsigned long)v); + this_cpu_write(xen_current_cr3, (unsigned long)v); } static void __xen_write_cr3(bool kernel, unsigned long cr3) @@ -1306,7 +1306,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) xen_extend_mmuext_op(&op); if (kernel) { - percpu_write(xen_cr3, cr3); + this_cpu_write(xen_cr3, cr3); /* Update xen_current_cr3 once the batch has actually been submitted. */ @@ -1322,7 +1322,7 @@ static void xen_write_cr3(unsigned long cr3) /* Update while interrupts are disabled, so its atomic with respect to ipis */ - percpu_write(xen_cr3, cr3); + this_cpu_write(xen_cr3, cr3); __xen_write_cr3(true, cr3); diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index dee79b78a90f..9c2e74f9096c 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -47,7 +47,7 @@ static inline void xen_mc_issue(unsigned mode) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ - local_irq_restore(percpu_read(xen_mc_irq_flags)); + local_irq_restore(this_cpu_read(xen_mc_irq_flags)); } /* Set up a callback to be called when the current batch is flushed */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e03c63692176..12366238d07d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -420,7 +420,6 @@ void __init xen_arch_setup(void) boot_cpu_data.hlt_works_ok = 1; #endif disable_cpuidle(); - boot_option_idle_override = IDLE_HALT; WARN_ON(set_pm_idle_to_default()); fiddle_vdso(); } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 501d4e0244ba..315d8fa0c8fb 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -76,7 +76,7 @@ static void __cpuinit cpu_bringup(void) xen_setup_cpu_clockevents(); set_cpu_online(cpu, true); - percpu_write(cpu_state, CPU_ONLINE); + this_cpu_write(cpu_state, CPU_ONLINE); wmb(); /* We can take interrupts now: we're officially "up". */ |