From 6c2681c863b24360098d1ba60f2af060a13a0561 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 17 Jul 2015 06:51:34 +0200 Subject: xen: add explicit memblock_reserve() calls for special pages Some special pages containing interfaces to xen are being reserved implicitly only today. The memblock_reserve() call to reserve them is meant to reserve the p2m list supplied by xen. It is just reserving not only the p2m list itself, but some more pages up to the start of the xen built page tables. To be able to move the p2m list to another pfn range, which is needed for support of huge RAM, this memblock_reserve() must be split up to cover all affected reserved pages explicitly. The affected pages are: - start_info page - xenstore ring (might be missing, mfn is 0 in this case) - console ring (not for initial domain) Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/enlighten.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/xen/enlighten.c') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 11d6fb4e8483..373dbc9810d1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1610,7 +1610,9 @@ asmlinkage __visible void __init xen_start_kernel(void) early_boot_irqs_disabled = true; xen_raw_console_write("mapping kernel into physical memory\n"); - xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); + xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, + xen_start_info->nr_pages); + xen_reserve_special_pages(); /* * Modify the cache mode translation tables to match Xen's PAT -- cgit v1.2.3 From 65d0cf0be79feebeb19e7626fd3ed41ae73f642d Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 10 Aug 2015 16:34:34 -0400 Subject: xen/PMU: Initialization code for Xen PMU Map shared data structure that will hold CPU registers, VPMU context, V/PCPU IDs of the CPU interrupted by PMU interrupt. Hypervisor fills this information in its handler and passes it to the guest for further processing. Set up PMU VIRQ. Now that perf infrastructure will assume that PMU is available on a PV guest we need to be careful and make sure that accesses via RDPMC instruction don't cause fatal traps by the hypervisor. Provide a nop RDPMC handler. For the same reason avoid issuing a warning on a write to APIC's LVTPC. Both of these will be made functional in later patches. Signed-off-by: Boris Ostrovsky Reviewed-by: David Vrabel Signed-off-by: David Vrabel --- arch/x86/include/asm/xen/interface.h | 123 +++++++++++++++++++++++++ arch/x86/xen/Makefile | 2 +- arch/x86/xen/apic.c | 3 + arch/x86/xen/enlighten.c | 12 ++- arch/x86/xen/pmu.c | 170 +++++++++++++++++++++++++++++++++++ arch/x86/xen/pmu.h | 11 +++ arch/x86/xen/smp.c | 29 +++++- arch/x86/xen/suspend.c | 23 +++-- include/xen/interface/xen.h | 1 + include/xen/interface/xenpmu.h | 33 +++++++ 10 files changed, 398 insertions(+), 9 deletions(-) create mode 100644 arch/x86/xen/pmu.c create mode 100644 arch/x86/xen/pmu.h (limited to 'arch/x86/xen/enlighten.c') diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 3b88eeacdbda..62ca03ef5c65 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -250,6 +250,129 @@ struct vcpu_guest_context { #endif }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); + +/* AMD PMU registers and structures */ +struct xen_pmu_amd_ctxt { + /* + * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd). + * For PV(H) guests these fields are RO. + */ + uint32_t counters; + uint32_t ctrls; + + /* Counter MSRs */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint64_t regs[]; +#elif defined(__GNUC__) + uint64_t regs[0]; +#endif +}; + +/* Intel PMU registers and structures */ +struct xen_pmu_cntr_pair { + uint64_t counter; + uint64_t control; +}; + +struct xen_pmu_intel_ctxt { + /* + * Offsets to fixed and architectural counter MSRs (relative to + * xen_pmu_arch.c.intel). + * For PV(H) guests these fields are RO. + */ + uint32_t fixed_counters; + uint32_t arch_counters; + + /* PMU registers */ + uint64_t global_ctrl; + uint64_t global_ovf_ctrl; + uint64_t global_status; + uint64_t fixed_ctrl; + uint64_t ds_area; + uint64_t pebs_enable; + uint64_t debugctl; + + /* Fixed and architectural counter MSRs */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint64_t regs[]; +#elif defined(__GNUC__) + uint64_t regs[0]; +#endif +}; + +/* Sampled domain's registers */ +struct xen_pmu_regs { + uint64_t ip; + uint64_t sp; + uint64_t flags; + uint16_t cs; + uint16_t ss; + uint8_t cpl; + uint8_t pad[3]; +}; + +/* PMU flags */ +#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */ +#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */ +#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */ +#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */ + +/* + * Architecture-specific information describing state of the processor at + * the time of PMU interrupt. + * Fields of this structure marked as RW for guest should only be written by + * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the + * hypervisor during PMU interrupt). Hypervisor will read updated data in + * XENPMU_flush hypercall and clear PMU_CACHED bit. + */ +struct xen_pmu_arch { + union { + /* + * Processor's registers at the time of interrupt. + * WO for hypervisor, RO for guests. + */ + struct xen_pmu_regs regs; + /* + * Padding for adding new registers to xen_pmu_regs in + * the future + */ +#define XENPMU_REGS_PAD_SZ 64 + uint8_t pad[XENPMU_REGS_PAD_SZ]; + } r; + + /* WO for hypervisor, RO for guest */ + uint64_t pmu_flags; + + /* + * APIC LVTPC register. + * RW for both hypervisor and guest. + * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware + * during XENPMU_flush or XENPMU_lvtpc_set. + */ + union { + uint32_t lapic_lvtpc; + uint64_t pad; + } l; + + /* + * Vendor-specific PMU registers. + * RW for both hypervisor and guest (see exceptions above). + * Guest's updates to this field are verified and then loaded by the + * hypervisor into hardware during XENPMU_flush + */ + union { + struct xen_pmu_amd_ctxt amd; + struct xen_pmu_intel_ctxt intel; + + /* + * Padding for contexts (fixed parts only, does not include + * MSR banks that are specified by offsets) + */ +#define XENPMU_CTXT_PAD_SZ 128 + uint8_t pad[XENPMU_CTXT_PAD_SZ]; + } c; +}; + #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 4b6e29ac0968..e47e52787d32 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp) obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm.o xen-asm_$(BITS).o \ grant-table.o suspend.o platform-pci-unplug.o \ - p2m.o apic.o + p2m.o apic.o pmu.o obj-$(CONFIG_EVENT_TRACING) += trace.o diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 70e060ad879a..d03ebfa89b9f 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -72,6 +72,9 @@ static u32 xen_apic_read(u32 reg) static void xen_apic_write(u32 reg, u32 val) { + if (reg == APIC_LVTPC) + return; + /* Warn to see if there's any stray references */ WARN(1,"register: %x, value: %x\n", reg, val); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 373dbc9810d1..19072f91a8e2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -84,6 +84,7 @@ #include "mmu.h" #include "smp.h" #include "multicalls.h" +#include "pmu.h" EXPORT_SYMBOL_GPL(hypercall_page); @@ -1082,6 +1083,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } +unsigned long long xen_read_pmc(int counter) +{ + return 0; +} + void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1216,7 +1222,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .write_msr = xen_write_msr_safe, .read_tsc = native_read_tsc, - .read_pmc = native_read_pmc, + .read_pmc = xen_read_pmc, .read_tscp = native_read_tscp, @@ -1267,6 +1273,10 @@ static const struct pv_apic_ops xen_apic_ops __initconst = { static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; + int cpu; + + for_each_online_cpu(cpu) + xen_pmu_finish(cpu); if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) BUG(); diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c new file mode 100644 index 000000000000..1d1ae1b874ea --- /dev/null +++ b/arch/x86/xen/pmu.c @@ -0,0 +1,170 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include "xen-ops.h" +#include "pmu.h" + +/* x86_pmu.handle_irq definition */ +#include "../kernel/cpu/perf_event.h" + + +/* Shared page between hypervisor and domain */ +static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared); +#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id()) + +/* perf callbacks */ +static int xen_is_in_guest(void) +{ + const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + + if (!xenpmu_data) { + pr_warn_once("%s: pmudata not initialized\n", __func__); + return 0; + } + + if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF)) + return 0; + + return 1; +} + +static int xen_is_user_mode(void) +{ + const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + + if (!xenpmu_data) { + pr_warn_once("%s: pmudata not initialized\n", __func__); + return 0; + } + + if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) + return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER); + else + return !!(xenpmu_data->pmu.r.regs.cpl & 3); +} + +static unsigned long xen_get_guest_ip(void) +{ + const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + + if (!xenpmu_data) { + pr_warn_once("%s: pmudata not initialized\n", __func__); + return 0; + } + + return xenpmu_data->pmu.r.regs.ip; +} + +static struct perf_guest_info_callbacks xen_guest_cbs = { + .is_in_guest = xen_is_in_guest, + .is_user_mode = xen_is_user_mode, + .get_guest_ip = xen_get_guest_ip, +}; + +/* Convert registers from Xen's format to Linux' */ +static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, + struct pt_regs *regs, uint64_t pmu_flags) +{ + regs->ip = xen_regs->ip; + regs->cs = xen_regs->cs; + regs->sp = xen_regs->sp; + + if (pmu_flags & PMU_SAMPLE_PV) { + if (pmu_flags & PMU_SAMPLE_USER) + regs->cs |= 3; + else + regs->cs &= ~3; + } else { + if (xen_regs->cpl) + regs->cs |= 3; + else + regs->cs &= ~3; + } +} + +irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) +{ + int ret = IRQ_NONE; + struct pt_regs regs; + const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + + if (!xenpmu_data) { + pr_warn_once("%s: pmudata not initialized\n", __func__); + return ret; + } + + xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s, + xenpmu_data->pmu.pmu_flags); + if (x86_pmu.handle_irq(®s)) + ret = IRQ_HANDLED; + + return ret; +} + +bool is_xen_pmu(int cpu) +{ + return (per_cpu(xenpmu_shared, cpu) != NULL); +} + +void xen_pmu_init(int cpu) +{ + int err; + struct xen_pmu_params xp; + unsigned long pfn; + struct xen_pmu_data *xenpmu_data; + + BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); + + if (xen_hvm_domain()) + return; + + xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); + if (!xenpmu_data) { + pr_err("VPMU init: No memory\n"); + return; + } + pfn = virt_to_pfn(xenpmu_data); + + xp.val = pfn_to_mfn(pfn); + xp.vcpu = cpu; + xp.version.maj = XENPMU_VER_MAJ; + xp.version.min = XENPMU_VER_MIN; + err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp); + if (err) + goto fail; + + per_cpu(xenpmu_shared, cpu) = xenpmu_data; + + if (cpu == 0) + perf_register_guest_info_callbacks(&xen_guest_cbs); + + return; + +fail: + pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n", + cpu, err); + free_pages((unsigned long)xenpmu_data, 0); +} + +void xen_pmu_finish(int cpu) +{ + struct xen_pmu_params xp; + + if (xen_hvm_domain()) + return; + + xp.vcpu = cpu; + xp.version.maj = XENPMU_VER_MAJ; + xp.version.min = XENPMU_VER_MIN; + + (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp); + + free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0); + per_cpu(xenpmu_shared, cpu) = NULL; +} diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h new file mode 100644 index 000000000000..a76d2cf83581 --- /dev/null +++ b/arch/x86/xen/pmu.h @@ -0,0 +1,11 @@ +#ifndef __XEN_PMU_H +#define __XEN_PMU_H + +#include + +irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); +void xen_pmu_init(int cpu); +void xen_pmu_finish(int cpu); +bool is_xen_pmu(int cpu); + +#endif /* __XEN_PMU_H */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 86484384492e..2a9ff7342791 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -38,6 +39,7 @@ #include "xen-ops.h" #include "mmu.h" #include "smp.h" +#include "pmu.h" cpumask_var_t xen_cpu_initialized_map; @@ -50,6 +52,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 }; +static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 }; static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); @@ -148,11 +151,18 @@ static void xen_smp_intr_free(unsigned int cpu) kfree(per_cpu(xen_irq_work, cpu).name); per_cpu(xen_irq_work, cpu).name = NULL; } + + if (per_cpu(xen_pmu_irq, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL); + per_cpu(xen_pmu_irq, cpu).irq = -1; + kfree(per_cpu(xen_pmu_irq, cpu).name); + per_cpu(xen_pmu_irq, cpu).name = NULL; + } }; static int xen_smp_intr_init(unsigned int cpu) { int rc; - char *resched_name, *callfunc_name, *debug_name; + char *resched_name, *callfunc_name, *debug_name, *pmu_name; resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, @@ -218,6 +228,18 @@ static int xen_smp_intr_init(unsigned int cpu) per_cpu(xen_irq_work, cpu).irq = rc; per_cpu(xen_irq_work, cpu).name = callfunc_name; + if (is_xen_pmu(cpu)) { + pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu); + rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu, + xen_pmu_irq_handler, + IRQF_PERCPU|IRQF_NOBALANCING, + pmu_name, NULL); + if (rc < 0) + goto fail; + per_cpu(xen_pmu_irq, cpu).irq = rc; + per_cpu(xen_pmu_irq, cpu).name = pmu_name; + } + return 0; fail: @@ -335,6 +357,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) } set_cpu_sibling_map(0); + xen_pmu_init(0); + if (xen_smp_intr_init(0)) BUG(); @@ -462,6 +486,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) if (rc) return rc; + xen_pmu_init(cpu); + rc = xen_smp_intr_init(cpu); if (rc) return rc; @@ -503,6 +529,7 @@ static void xen_cpu_die(unsigned int cpu) xen_smp_intr_free(cpu); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); + xen_pmu_finish(cpu); } } diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 53b4c0811f4f..feddabdab448 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -11,6 +11,7 @@ #include "xen-ops.h" #include "mmu.h" +#include "pmu.h" static void xen_pv_pre_suspend(void) { @@ -67,16 +68,26 @@ static void xen_pv_post_suspend(int suspend_cancelled) void xen_arch_pre_suspend(void) { - if (xen_pv_domain()) - xen_pv_pre_suspend(); + int cpu; + + for_each_online_cpu(cpu) + xen_pmu_finish(cpu); + + if (xen_pv_domain()) + xen_pv_pre_suspend(); } void xen_arch_post_suspend(int cancelled) { - if (xen_pv_domain()) - xen_pv_post_suspend(cancelled); - else - xen_hvm_post_suspend(cancelled); + int cpu; + + if (xen_pv_domain()) + xen_pv_post_suspend(cancelled); + else + xen_hvm_post_suspend(cancelled); + + for_each_online_cpu(cpu) + xen_pmu_init(cpu); } static void xen_vcpu_notify_restore(void *data) diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index e9d4501d1f5e..167071c290b3 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -113,6 +113,7 @@ #define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ #define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ +#define VIRQ_XENPMU 13 /* PMC interrupt */ /* Architecture-specific VIRQ definitions. */ #define VIRQ_ARCH_0 16 diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h index eac1b498b89f..ca42301949b5 100644 --- a/include/xen/interface/xenpmu.h +++ b/include/xen/interface/xenpmu.h @@ -56,4 +56,37 @@ struct xen_pmu_params { */ #define XENPMU_FEATURE_INTEL_BTS 1 +/* + * Shared PMU data between hypervisor and PV(H) domains. + * + * The hypervisor fills out this structure during PMU interrupt and sends an + * interrupt to appropriate VCPU. + * Architecture-independent fields of xen_pmu_data are WO for the hypervisor + * and RO for the guest but some fields in xen_pmu_arch can be writable + * by both the hypervisor and the guest (see arch-$arch/pmu.h). + */ +struct xen_pmu_data { + /* Interrupted VCPU */ + uint32_t vcpu_id; + + /* + * Physical processor on which the interrupt occurred. On non-privileged + * guests set to vcpu_id; + */ + uint32_t pcpu_id; + + /* + * Domain that was interrupted. On non-privileged guests set to + * DOMID_SELF. + * On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in + * XENPMU_MODE_ALL mode, domain ID of another domain. + */ + domid_t domain_id; + + uint8_t pad[6]; + + /* Architecture-specific information */ + struct xen_pmu_arch pmu; +}; + #endif /* __XEN_PUBLIC_XENPMU_H__ */ -- cgit v1.2.3 From 6b08cd6328c58a2ae190c5ee03a2ffcab5ef828e Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 10 Aug 2015 16:34:36 -0400 Subject: xen/PMU: Intercept PMU-related MSR and APIC accesses Provide interfaces for recognizing accesses to PMU-related MSRs and LVTPC APIC and process these accesses in Xen PMU code. (The interrupt handler performs XENPMU_flush right away in the beginning since no PMU emulation is available. It will be added with a later patch). Signed-off-by: Boris Ostrovsky Reviewed-by: David Vrabel Signed-off-by: David Vrabel --- arch/x86/xen/apic.c | 5 ++- arch/x86/xen/enlighten.c | 11 +++-- arch/x86/xen/pmu.c | 95 +++++++++++++++++++++++++++++++++++++++++- arch/x86/xen/pmu.h | 4 ++ include/xen/interface/xenpmu.h | 2 + 5 files changed, 109 insertions(+), 8 deletions(-) (limited to 'arch/x86/xen/enlighten.c') diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index d03ebfa89b9f..acda713ab5be 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -7,6 +7,7 @@ #include #include #include "xen-ops.h" +#include "pmu.h" #include "smp.h" static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) @@ -72,8 +73,10 @@ static u32 xen_apic_read(u32 reg) static void xen_apic_write(u32 reg, u32 val) { - if (reg == APIC_LVTPC) + if (reg == APIC_LVTPC) { + (void)pmu_apic_update(reg); return; + } /* Warn to see if there's any stray references */ WARN(1,"register: %x, value: %x\n", reg, val); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 19072f91a8e2..fdaba49f6759 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1031,6 +1031,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) { u64 val; + if (pmu_msr_read(msr, &val, err)) + return val; + val = native_read_msr_safe(msr, err); switch (msr) { case MSR_IA32_APICBASE: @@ -1077,17 +1080,13 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) Xen console noise. */ default: - ret = native_write_msr_safe(msr, low, high); + if (!pmu_msr_write(msr, low, high, &ret)) + ret = native_write_msr_safe(msr, low, high); } return ret; } -unsigned long long xen_read_pmc(int counter) -{ - return 0; -} - void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index a4a6e4f04f37..f92b908e005f 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -51,6 +51,8 @@ static __read_mostly int amd_num_counters; /* Alias registers (0x4c1) for full-width writes to PMCs */ #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0)) +#define INTEL_PMC_TYPE_SHIFT 30 + static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters; @@ -167,6 +169,91 @@ static int is_intel_pmu_msr(u32 msr_index, int *type, int *index) } } +bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) +{ + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + if (is_amd_pmu_msr(msr)) { + *val = native_read_msr_safe(msr, err); + return true; + } + } else { + int type, index; + + if (is_intel_pmu_msr(msr, &type, &index)) { + *val = native_read_msr_safe(msr, err); + return true; + } + } + + return false; +} + +bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + if (is_amd_pmu_msr(msr)) { + *err = native_write_msr_safe(msr, low, high); + return true; + } + } else { + int type, index; + + if (is_intel_pmu_msr(msr, &type, &index)) { + *err = native_write_msr_safe(msr, low, high); + return true; + } + } + + return false; +} + +static unsigned long long xen_amd_read_pmc(int counter) +{ + uint32_t msr; + int err; + + msr = amd_counters_base + (counter * amd_msr_step); + return native_read_msr_safe(msr, &err); +} + +static unsigned long long xen_intel_read_pmc(int counter) +{ + int err; + uint32_t msr; + + if (counter & (1<pmu.l.lapic_lvtpc = val; + ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL); + + return ret; +} + /* perf callbacks */ static int xen_is_in_guest(void) { @@ -239,7 +326,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) { - int ret = IRQ_NONE; + int err, ret = IRQ_NONE; struct pt_regs regs; const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); @@ -248,6 +335,12 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) return ret; } + err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL); + if (err) { + pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err); + return ret; + } + xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s, xenpmu_data->pmu.pmu_flags); if (x86_pmu.handle_irq(®s)) diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h index a76d2cf83581..af5f0ad94078 100644 --- a/arch/x86/xen/pmu.h +++ b/arch/x86/xen/pmu.h @@ -7,5 +7,9 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); void xen_pmu_init(int cpu); void xen_pmu_finish(int cpu); bool is_xen_pmu(int cpu); +bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); +bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); +int pmu_apic_update(uint32_t reg); +unsigned long long xen_read_pmc(int counter); #endif /* __XEN_PMU_H */ diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h index ca42301949b5..139efc91bceb 100644 --- a/include/xen/interface/xenpmu.h +++ b/include/xen/interface/xenpmu.h @@ -20,6 +20,8 @@ #define XENPMU_feature_set 3 #define XENPMU_init 4 #define XENPMU_finish 5 +#define XENPMU_lvtpc_set 6 +#define XENPMU_flush 7 /* ` } */ -- cgit v1.2.3 From 3375d8284dfb7866f261ec008d15d30999ff273b Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 10 Aug 2015 16:34:38 -0400 Subject: xen/x86: Don't try to set PCE bit in CR4 Since VPMU code emulates RDPMC instruction with RDMSR and because hypervisor does not emulate it there is no reason to try setting CR4's PCE bit (and the hypervisor will warn on seeing it set). Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel --- arch/x86/xen/enlighten.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/xen/enlighten.c') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index fdaba49f6759..25309c168311 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1011,8 +1011,7 @@ static void xen_write_cr0(unsigned long cr0) static void xen_write_cr4(unsigned long cr4) { - cr4 &= ~X86_CR4_PGE; - cr4 &= ~X86_CR4_PSE; + cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE); native_write_cr4(cr4); } -- cgit v1.2.3