summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/Makefile2
-rw-r--r--arch/x86/include/asm/mce.h8
-rw-r--r--arch/x86/include/asm/olpc.h19
-rw-r--r--arch/x86/include/asm/perf_event.h13
-rw-r--r--arch/x86/include/asm/spinlock.h3
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S4
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S4
-rw-r--r--arch/x86/kernel/alternative.c4
-rw-r--r--arch/x86/kernel/apic/io_apic.c14
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c43
-rw-r--r--arch/x86/kernel/cpu/perf_event.c89
-rw-r--r--arch/x86/kernel/cpu/perf_event.h22
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c102
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c1333
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h235
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/kdebugfs.c6
-rw-r--r--arch/x86/kernel/microcode_amd.c7
-rw-r--r--arch/x86/kvm/emulate.c30
-rw-r--r--arch/x86/kvm/i8259.c17
-rw-r--r--arch/x86/kvm/mmu.c13
-rw-r--r--arch/x86/kvm/vmx.c20
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--arch/x86/mm/hugetlbpage.c21
-rw-r--r--arch/x86/mm/pageattr.c10
-rw-r--r--arch/x86/mm/srat.c15
-rw-r--r--arch/x86/platform/efi/efi.c30
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c16
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c1
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c1
-rw-r--r--arch/x86/platform/olpc/olpc.c190
-rw-r--r--arch/x86/realmode/rm/Makefile2
-rw-r--r--arch/x86/syscalls/syscall_64.tbl8
-rw-r--r--arch/x86/um/asm/ptrace.h6
44 files changed, 1911 insertions, 426 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c70684f859e1..8ec3a1aa4abd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -70,6 +70,7 @@ config X86
select HAVE_ARCH_JUMP_LABEL
select HAVE_TEXT_POKE_SMP
select HAVE_GENERIC_HARDIRQS
+ select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select SPARSE_IRQ
select GENERIC_FIND_FIRST_BIT
select GENERIC_IRQ_PROBE
@@ -84,6 +85,7 @@ config X86
select GENERIC_IOMAP
select DCACHE_WORD_ACCESS
select GENERIC_SMP_IDLE_THREAD
+ select ARCH_WANT_IPC_PARSE_VERSION if X86_32
select HAVE_ARCH_SECCOMP_FILTER
select BUILDTIME_EXTABLE_SORT
select GENERIC_CMOS_UPDATE
@@ -1525,7 +1527,7 @@ config SECCOMP
If unsure, say Y. Only embedded should say N here.
config CC_STACKPROTECTOR
- bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
+ bool "Enable -fstack-protector buffer overflow detection"
---help---
This option turns on the -fstack-protector GCC feature. This
feature puts, at the beginning of functions, a canary value on
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b0c5276861ec..682e9c210baa 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -27,6 +27,10 @@ ifeq ($(CONFIG_X86_32),y)
KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
+ # Never want PIC in a 32-bit kernel, prevent breakage with GCC built
+ # with nonstandard options
+ KBUILD_CFLAGS += -fno-pic
+
# prevent gcc from keeping the stack 16 byte aligned
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 5a747dd884db..f7535bedc33f 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -57,7 +57,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
-Wall -Wstrict-prototypes \
-march=i386 -mregparm=3 \
-include $(srctree)/$(src)/code16gcc.h \
- -fno-strict-aliasing -fomit-frame-pointer \
+ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
$(call cc-option, -ffreestanding) \
$(call cc-option, -fno-toplevel-reorder,\
$(call cc-option, -fno-unit-at-a-time)) \
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 441520e4174f..a3ac52b29cbf 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -33,6 +33,14 @@
#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+#define MCACOD 0xffff /* MCA Error Code */
+
+/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
+#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
+#define MCACOD_SCRUBMSK 0xfff0
+#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
+#define MCACOD_DATA 0x0134 /* Data Load */
+#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
/* MCi_MISC register defines */
#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index 87bdbca72f94..72f9adf6eca4 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -100,25 +100,6 @@ extern void olpc_xo1_pm_wakeup_clear(u16 value);
extern int pci_olpc_init(void);
-/* EC related functions */
-
-extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
- unsigned char *outbuf, size_t outlen);
-
-/* EC commands */
-
-#define EC_FIRMWARE_REV 0x08
-#define EC_WRITE_SCI_MASK 0x1b
-#define EC_WAKE_UP_WLAN 0x24
-#define EC_WLAN_LEAVE_RESET 0x25
-#define EC_READ_EB_MODE 0x2a
-#define EC_SET_SCI_INHIBIT 0x32
-#define EC_SET_SCI_INHIBIT_RELEASE 0x34
-#define EC_WLAN_ENTER_RESET 0x35
-#define EC_WRITE_EXT_SCI_MASK 0x38
-#define EC_SCI_QUERY 0x84
-#define EC_EXT_SCI_QUERY 0x85
-
/* SCI source values */
#define EC_SCI_SRC_EMPTY 0x00
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index c78f14a0df00..cb4e43bce98a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -196,11 +196,16 @@ static inline u32 get_ibs_caps(void) { return 0; }
extern void perf_events_lapic_init(void);
/*
- * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
- * This flag is otherwise unused and ABI specified to be 0, so nobody should
- * care what we do with it.
+ * Abuse bits {3,5} of the cpu eflags register. These flags are otherwise
+ * unused and ABI specified to be 0, so nobody should care what we do with
+ * them.
+ *
+ * EXACT - the IP points to the exact instruction that triggered the
+ * event (HW bugs exempt).
+ * VM - original X86_VM_MASK; see set_linear_ip().
*/
#define PERF_EFLAGS_EXACT (1UL << 3)
+#define PERF_EFLAGS_VM (1UL << 5)
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
@@ -234,7 +239,7 @@ extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
extern void perf_check_microcode(void);
#else
-static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
*nr = 0;
return NULL;
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index b315a33867f2..33692eaabab5 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -12,8 +12,7 @@
* Simple spin lock operations. There are two variants, one clears IRQ's
* on the local processor, one does not.
*
- * These are fair FIFO ticket locks, which are currently limited to 256
- * CPUs.
+ * These are fair FIFO ticket locks, which support up to 2^16 CPUs.
*
* (the type definitions are in asm/spinlock_types.h)
*/
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 4437001d8e3d..0d9776e9e2dc 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -15,7 +15,6 @@
# ifdef CONFIG_X86_32
# include <asm/unistd_32.h>
-# define __ARCH_WANT_IPC_PARSE_VERSION
# define __ARCH_WANT_STAT64
# define __ARCH_WANT_SYS_IPC
# define __ARCH_WANT_SYS_OLD_MMAP
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 95bf99de9058..1b8e5a03d942 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -25,10 +25,6 @@ unsigned long acpi_realmode_flags;
static char temp_stack[4096];
#endif
-asmlinkage void acpi_enter_s3(void)
-{
- acpi_enter_sleep_state(3, wake_sleep_flags);
-}
/**
* acpi_suspend_lowlevel - save kernel state
*
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 5653a5791ec9..67f59f8c6956 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -2,7 +2,6 @@
* Variables and functions used by the code in sleep.c
*/
-#include <linux/linkage.h>
#include <asm/realmode.h>
extern unsigned long saved_video_mode;
@@ -11,7 +10,6 @@ extern long saved_magic;
extern int wakeup_pmode_return;
extern u8 wake_sleep_flags;
-extern asmlinkage void acpi_enter_s3(void);
extern unsigned long acpi_copy_wakeup_routine(unsigned long);
extern void wakeup_long64(void);
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 72610839f03b..13ab720573e3 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -74,7 +74,9 @@ restore_registers:
ENTRY(do_suspend_lowlevel)
call save_processor_state
call save_registers
- call acpi_enter_s3
+ pushl $3
+ call acpi_enter_sleep_state
+ addl $4, %esp
# In case of S3 failure, we'll emerge here. Jump
# to ret_point to recover
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 014d1d28c397..8ea5164cbd04 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -71,7 +71,9 @@ ENTRY(do_suspend_lowlevel)
movq %rsi, saved_rsi
addq $8, %rsp
- call acpi_enter_s3
+ movl $3, %edi
+ xorl %eax, %eax
+ call acpi_enter_sleep_state
/* in case something went wrong, restore the machine status and go on */
jmp resume_point
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 931280ff8299..ced4534baed5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -165,7 +165,7 @@ static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
#endif
#ifdef P6_NOP1
-static const unsigned char __initconst_or_module p6nops[] =
+static const unsigned char p6nops[] =
{
P6_NOP1,
P6_NOP2,
@@ -224,7 +224,7 @@ void __init arch_init_ideal_nops(void)
ideal_nops = intel_nops;
#endif
}
-
+ break;
default:
#ifdef CONFIG_X86_64
ideal_nops = k8_nops;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 406eee784684..c265593ec2cd 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1204,7 +1204,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
BUG_ON(!cfg->vector);
vector = cfg->vector;
- for_each_cpu(cpu, cfg->domain)
+ for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
per_cpu(vector_irq, cpu)[vector] = -1;
cfg->vector = 0;
@@ -1212,7 +1212,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
if (likely(!cfg->move_in_progress))
return;
- for_each_cpu(cpu, cfg->old_domain) {
+ for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
vector++) {
if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1356,6 +1356,16 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
if (!IO_APIC_IRQ(irq))
return;
+ /*
+ * For legacy irqs, cfg->domain starts with cpu 0. Now that IO-APIC
+ * can handle this irq and the apic driver is finialized at this point,
+ * update the cfg->domain.
+ */
+ if (irq < legacy_pic->nr_legacy_irqs &&
+ cpumask_equal(cfg->domain, cpumask_of(0)))
+ apic->vector_allocation_domain(0, cfg->domain,
+ apic->target_cpus());
+
if (assign_irq_vector(irq, cfg, apic->target_cpus()))
return;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 46d8786d655e..a5fbc3c5fccc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -144,6 +144,8 @@ static int __init x86_xsave_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+ setup_clear_cpu_cap(X86_FEATURE_AVX);
+ setup_clear_cpu_cap(X86_FEATURE_AVX2);
return 1;
}
__setup("noxsave", x86_xsave_setup);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 413c2ced887c..13017626f9a8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -55,13 +55,6 @@ static struct severity {
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
-#define MCACOD 0xffff
-/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
-#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
-#define MCACOD_SCRUBMSK 0xfff0
-#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
-#define MCACOD_DATA 0x0134 /* Data Load */
-#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
MCESEV(
NO, "Invalid",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5e095f873e3e..292d0258311c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -103,6 +103,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
static DEFINE_PER_CPU(struct work_struct, mce_work);
+static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
+
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
@@ -650,14 +652,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
* Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them.
*/
-static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
+static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
+ struct pt_regs *regs)
{
int i, ret = 0;
for (i = 0; i < banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
- if (m->status & MCI_STATUS_VAL)
+ if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
+ if (quirk_no_way_out)
+ quirk_no_way_out(i, m, regs);
+ }
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
ret = 1;
}
@@ -1040,7 +1046,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*final = m;
memset(valid_banks, 0, sizeof(valid_banks));
- no_way_out = mce_no_way_out(&m, &msg, valid_banks);
+ no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
barrier();
@@ -1418,6 +1424,34 @@ static void __mcheck_cpu_init_generic(void)
}
}
+/*
+ * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
+ * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
+ * Vol 3B Table 15-20). But this confuses both the code that determines
+ * whether the machine check occurred in kernel or user mode, and also
+ * the severity assessment code. Pretend that EIPV was set, and take the
+ * ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
+ */
+static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
+{
+ if (bank != 0)
+ return;
+ if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
+ return;
+ if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
+ MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
+ MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
+ MCACOD)) !=
+ (MCI_STATUS_UC|MCI_STATUS_EN|
+ MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
+ MCI_STATUS_AR|MCACOD_INSTR))
+ return;
+
+ m->mcgstatus |= MCG_STATUS_EIPV;
+ m->ip = regs->ip;
+ m->cs = regs->cs;
+}
+
/* Add per CPU specific workarounds here */
static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
@@ -1515,6 +1549,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
mce_bootlog = 0;
+
+ if (c->x86 == 6 && c->x86_model == 45)
+ quirk_no_way_out = quirk_sandybridge_ifu;
}
if (monarch_timeout < 0)
monarch_timeout = 0;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 29557aa06dda..915b876edd1e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -32,6 +32,8 @@
#include <asm/smp.h>
#include <asm/alternative.h>
#include <asm/timer.h>
+#include <asm/desc.h>
+#include <asm/ldt.h>
#include "perf_event.h"
@@ -1738,6 +1740,29 @@ valid_user_frame(const void __user *fp, unsigned long size)
return (__range_not_ok(fp, size, TASK_SIZE) == 0);
}
+static unsigned long get_segment_base(unsigned int segment)
+{
+ struct desc_struct *desc;
+ int idx = segment >> 3;
+
+ if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+ if (idx > LDT_ENTRIES)
+ return 0;
+
+ if (idx > current->active_mm->context.size)
+ return 0;
+
+ desc = current->active_mm->context.ldt;
+ } else {
+ if (idx > GDT_ENTRIES)
+ return 0;
+
+ desc = __this_cpu_ptr(&gdt_page.gdt[0]);
+ }
+
+ return get_desc_base(desc + idx);
+}
+
#ifdef CONFIG_COMPAT
#include <asm/compat.h>
@@ -1746,13 +1771,17 @@ static inline int
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
/* 32-bit process in 64-bit kernel. */
+ unsigned long ss_base, cs_base;
struct stack_frame_ia32 frame;
const void __user *fp;
if (!test_thread_flag(TIF_IA32))
return 0;
- fp = compat_ptr(regs->bp);
+ cs_base = get_segment_base(regs->cs);
+ ss_base = get_segment_base(regs->ss);
+
+ fp = compat_ptr(ss_base + regs->bp);
while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = 0;
@@ -1765,8 +1794,8 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
if (!valid_user_frame(fp, sizeof(frame)))
break;
- perf_callchain_store(entry, frame.return_address);
- fp = compat_ptr(frame.next_frame);
+ perf_callchain_store(entry, cs_base + frame.return_address);
+ fp = compat_ptr(ss_base + frame.next_frame);
}
return 1;
}
@@ -1789,6 +1818,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
return;
}
+ /*
+ * We don't know what to do with VM86 stacks.. ignore them for now.
+ */
+ if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
+ return;
+
fp = (void __user *)regs->bp;
perf_callchain_store(entry, regs->ip);
@@ -1816,16 +1851,50 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
}
}
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+/*
+ * Deal with code segment offsets for the various execution modes:
+ *
+ * VM86 - the good olde 16 bit days, where the linear address is
+ * 20 bits and we use regs->ip + 0x10 * regs->cs.
+ *
+ * IA32 - Where we need to look at GDT/LDT segment descriptor tables
+ * to figure out what the 32bit base address is.
+ *
+ * X32 - has TIF_X32 set, but is running in x86_64
+ *
+ * X86_64 - CS,DS,SS,ES are all zero based.
+ */
+static unsigned long code_segment_base(struct pt_regs *regs)
{
- unsigned long ip;
+ /*
+ * If we are in VM86 mode, add the segment offset to convert to a
+ * linear address.
+ */
+ if (regs->flags & X86_VM_MASK)
+ return 0x10 * regs->cs;
+
+ /*
+ * For IA32 we look at the GDT/LDT segment base to convert the
+ * effective IP to a linear address.
+ */
+#ifdef CONFIG_X86_32
+ if (user_mode(regs) && regs->cs != __USER_CS)
+ return get_segment_base(regs->cs);
+#else
+ if (test_thread_flag(TIF_IA32)) {
+ if (user_mode(regs) && regs->cs != __USER32_CS)
+ return get_segment_base(regs->cs);
+ }
+#endif
+ return 0;
+}
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
- ip = perf_guest_cbs->get_guest_ip();
- else
- ip = instruction_pointer(regs);
+ return perf_guest_cbs->get_guest_ip();
- return ip;
+ return regs->ip + code_segment_base(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
@@ -1838,7 +1907,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
} else {
- if (!kernel_ip(regs->ip))
+ if (user_mode(regs))
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a15df4be151f..6605a81ba339 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -374,7 +374,7 @@ struct x86_pmu {
/*
* Intel DebugStore bits
*/
- int bts :1,
+ unsigned int bts :1,
bts_active :1,
pebs :1,
pebs_active :1,
@@ -516,6 +516,26 @@ static inline bool kernel_ip(unsigned long ip)
#endif
}
+/*
+ * Not all PMUs provide the right context information to place the reported IP
+ * into full context. Specifically segment registers are typically not
+ * supplied.
+ *
+ * Assuming the address is a linear address (it is for IBS), we fake the CS and
+ * vm86 mode using the known zero-based code segment and 'fix up' the registers
+ * to reflect this.
+ *
+ * Intel PEBS/LBR appear to typically provide the effective address, nothing
+ * much we can do about that but pray and treat it like a linear address.
+ */
+static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
+{
+ regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
+ if (regs->flags & X86_VM_MASK)
+ regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
+ regs->ip = ip;
+}
+
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index da9bcdcd9856..7bfb5bec8630 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -13,6 +13,8 @@
#include <asm/apic.h>
+#include "perf_event.h"
+
static u32 ibs_caps;
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
@@ -536,7 +538,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
regs.flags &= ~PERF_EFLAGS_EXACT;
} else {
- instruction_pointer_set(&regs, ibs_data.regs[1]);
+ set_linear_ip(&regs, ibs_data.regs[1]);
regs.flags |= PERF_EFLAGS_EXACT;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 7a8b9d0abcaa..7f2739e03e79 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -138,6 +138,84 @@ static u64 intel_pmu_event_map(int hw_event)
return intel_perfmon_event_map[hw_event];
}
+#define SNB_DMND_DATA_RD (1ULL << 0)
+#define SNB_DMND_RFO (1ULL << 1)
+#define SNB_DMND_IFETCH (1ULL << 2)
+#define SNB_DMND_WB (1ULL << 3)
+#define SNB_PF_DATA_RD (1ULL << 4)
+#define SNB_PF_RFO (1ULL << 5)
+#define SNB_PF_IFETCH (1ULL << 6)
+#define SNB_LLC_DATA_RD (1ULL << 7)
+#define SNB_LLC_RFO (1ULL << 8)
+#define SNB_LLC_IFETCH (1ULL << 9)
+#define SNB_BUS_LOCKS (1ULL << 10)
+#define SNB_STRM_ST (1ULL << 11)
+#define SNB_OTHER (1ULL << 15)
+#define SNB_RESP_ANY (1ULL << 16)
+#define SNB_NO_SUPP (1ULL << 17)
+#define SNB_LLC_HITM (1ULL << 18)
+#define SNB_LLC_HITE (1ULL << 19)
+#define SNB_LLC_HITS (1ULL << 20)
+#define SNB_LLC_HITF (1ULL << 21)
+#define SNB_LOCAL (1ULL << 22)
+#define SNB_REMOTE (0xffULL << 23)
+#define SNB_SNP_NONE (1ULL << 31)
+#define SNB_SNP_NOT_NEEDED (1ULL << 32)
+#define SNB_SNP_MISS (1ULL << 33)
+#define SNB_NO_FWD (1ULL << 34)
+#define SNB_SNP_FWD (1ULL << 35)
+#define SNB_HITM (1ULL << 36)
+#define SNB_NON_DRAM (1ULL << 37)
+
+#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
+#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO)
+#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
+ SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
+ SNB_HITM)
+
+#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
+#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY)
+
+#define SNB_L3_ACCESS SNB_RESP_ANY
+#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 snb_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
+ },
+ },
+};
+
static __initconst const u64 snb_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -235,16 +313,16 @@ static __initconst const u64 snb_hw_cache_event_ids
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
},
},
@@ -1444,8 +1522,16 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
+ /*
+ * If PMU counter has PEBS enabled it is not enough to disable counter
+ * on a guest entry since PEBS memory write can overshoot guest entry
+ * and corrupt guest memory. Disabling PEBS solves the problem.
+ */
+ arr[1].msr = MSR_IA32_PEBS_ENABLE;
+ arr[1].host = cpuc->pebs_enabled;
+ arr[1].guest = 0;
- *nr = 1;
+ *nr = 2;
return arr;
}
@@ -1964,6 +2050,8 @@ __init int intel_pmu_init(void)
case 58: /* IvyBridge */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_snb();
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 629ae0b7ad90..e38d97bf4259 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -499,7 +499,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
* We sampled a branch insn, rewind using the LBR stack
*/
if (ip == to) {
- regs->ip = from;
+ set_linear_ip(regs, from);
return 1;
}
@@ -529,7 +529,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
} while (to < ip);
if (to == ip) {
- regs->ip = old_to;
+ set_linear_ip(regs, old_to);
return 1;
}
@@ -569,7 +569,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* A possible PERF_SAMPLE_REGS will have to transfer all regs.
*/
regs = *iregs;
- regs.ip = pebs->ip;
+ regs.flags = pebs->flags;
+ set_linear_ip(&regs, pebs->ip);
regs.bp = pebs->bp;
regs.sp = pebs->sp;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 19faffc60886..0a5571080e74 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -18,6 +18,7 @@ static struct event_constraint constraint_empty =
EVENT_CONSTRAINT(0, 0, 0);
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
@@ -33,10 +34,81 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
-DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23");
-DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+
+static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ u64 count;
+
+ rdmsrl(event->hw.event_base, count);
+
+ return count;
+}
+
+/*
+ * generic get constraint function for shared match/mask registers.
+ */
+static struct event_constraint *
+uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ unsigned long flags;
+ bool ok = false;
+
+ /*
+ * reg->alloc can be set due to existing state, so for fake box we
+ * need to ignore this, otherwise we might fail to allocate proper
+ * fake state for this extra reg constraint.
+ */
+ if (reg1->idx == EXTRA_REG_NONE ||
+ (!uncore_box_is_fake(box) && reg1->alloc))
+ return NULL;
+
+ er = &box->shared_regs[reg1->idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (!atomic_read(&er->ref) ||
+ (er->config1 == reg1->config && er->config2 == reg2->config)) {
+ atomic_inc(&er->ref);
+ er->config1 = reg1->config;
+ er->config2 = reg2->config;
+ ok = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ if (ok) {
+ if (!uncore_box_is_fake(box))
+ reg1->alloc = 1;
+ return NULL;
+ }
+
+ return &constraint_empty;
+}
+
+static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+
+ /*
+ * Only put constraint if extra reg was actually allocated. Also
+ * takes care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake box we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent box
+ * state either since it will be thrown out.
+ */
+ if (uncore_box_is_fake(box) || !reg1->alloc)
+ return;
+
+ er = &box->shared_regs[reg1->idx];
+ atomic_dec(&er->ref);
+ reg1->alloc = 0;
+}
/* Sandy Bridge-EP uncore support */
static struct intel_uncore_type snbep_uncore_cbox;
@@ -64,18 +136,15 @@ static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
pci_write_config_dword(pdev, box_ctl, config);
}
-static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
- pci_write_config_dword(pdev, hwc->config_base, hwc->config |
- SNBEP_PMON_CTL_EN);
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
}
-static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
@@ -83,8 +152,7 @@ static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box,
pci_write_config_dword(pdev, hwc->config_base, hwc->config);
}
-static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box,
- struct perf_event *event)
+static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
@@ -92,14 +160,15 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box,
pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
+
return count;
}
static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
- pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL,
- SNBEP_PMON_BOX_CTL_INT);
+
+ pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT);
}
static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
@@ -112,7 +181,6 @@ static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
rdmsrl(msr, config);
config |= SNBEP_PMON_BOX_CTL_FRZ;
wrmsrl(msr, config);
- return;
}
}
@@ -126,12 +194,10 @@ static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
rdmsrl(msr, config);
config &= ~SNBEP_PMON_BOX_CTL_FRZ;
wrmsrl(msr, config);
- return;
}
}
-static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
@@ -150,68 +216,15 @@ static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
wrmsrl(hwc->config_base, hwc->config);
}
-static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 count;
-
- rdmsrl(hwc->event_base, count);
- return count;
-}
-
static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
{
unsigned msr = uncore_msr_box_ctl(box);
+
if (msr)
wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
}
-static struct event_constraint *
-snbep_uncore_get_constraint(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- struct intel_uncore_extra_reg *er;
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- unsigned long flags;
- bool ok = false;
-
- if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc))
- return NULL;
-
- er = &box->shared_regs[reg1->idx];
- raw_spin_lock_irqsave(&er->lock, flags);
- if (!atomic_read(&er->ref) || er->config1 == reg1->config) {
- atomic_inc(&er->ref);
- er->config1 = reg1->config;
- ok = true;
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- if (ok) {
- if (box->phys_id >= 0)
- reg1->alloc = 1;
- return NULL;
- }
- return &constraint_empty;
-}
-
-static void snbep_uncore_put_constraint(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- struct intel_uncore_extra_reg *er;
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-
- if (box->phys_id < 0 || !reg1->alloc)
- return;
-
- er = &box->shared_regs[reg1->idx];
- atomic_dec(&er->ref);
- reg1->alloc = 0;
-}
-
-static int snbep_uncore_hw_config(struct intel_uncore_box *box,
- struct perf_event *event)
+static int snbep_uncore_hw_config(struct intel_uncore_box *box, struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
@@ -221,14 +234,16 @@ static int snbep_uncore_hw_config(struct intel_uncore_box *box,
SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
reg1->config = event->attr.config1 &
SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK;
- } else if (box->pmu->type == &snbep_uncore_pcu) {
- reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
- reg1->config = event->attr.config1 &
- SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK;
} else {
- return 0;
+ if (box->pmu->type == &snbep_uncore_pcu) {
+ reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
+ reg1->config = event->attr.config1 & SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK;
+ } else {
+ return 0;
+ }
}
reg1->idx = 0;
+
return 0;
}
@@ -272,10 +287,19 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = {
&format_attr_thresh5.attr,
&format_attr_occ_invert.attr,
&format_attr_occ_edge.attr,
- &format_attr_filter_brand0.attr,
- &format_attr_filter_brand1.attr,
- &format_attr_filter_brand2.attr,
- &format_attr_filter_brand3.attr,
+ &format_attr_filter_band0.attr,
+ &format_attr_filter_band1.attr,
+ &format_attr_filter_band2.attr,
+ &format_attr_filter_band3.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_qpi_formats_attr[] = {
+ &format_attr_event_ext.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
NULL,
};
@@ -314,15 +338,20 @@ static struct attribute_group snbep_uncore_pcu_format_group = {
.attrs = snbep_uncore_pcu_formats_attr,
};
+static struct attribute_group snbep_uncore_qpi_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_qpi_formats_attr,
+};
+
static struct intel_uncore_ops snbep_uncore_msr_ops = {
.init_box = snbep_uncore_msr_init_box,
.disable_box = snbep_uncore_msr_disable_box,
.enable_box = snbep_uncore_msr_enable_box,
.disable_event = snbep_uncore_msr_disable_event,
.enable_event = snbep_uncore_msr_enable_event,
- .read_counter = snbep_uncore_msr_read_counter,
- .get_constraint = snbep_uncore_get_constraint,
- .put_constraint = snbep_uncore_put_constraint,
+ .read_counter = uncore_msr_read_counter,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
.hw_config = snbep_uncore_hw_config,
};
@@ -485,8 +514,13 @@ static struct intel_uncore_type snbep_uncore_qpi = {
.num_counters = 4,
.num_boxes = 2,
.perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &snbep_uncore_pci_ops,
.event_descs = snbep_uncore_qpi_events,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
+ .format_group = &snbep_uncore_qpi_format_group,
};
@@ -603,10 +637,8 @@ static void snbep_pci2phy_map_init(void)
}
/* end of Sandy Bridge-EP uncore support */
-
/* Sandy Bridge uncore support */
-static void snb_uncore_msr_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -616,20 +648,11 @@ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box,
wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
}
-static void snb_uncore_msr_disable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
{
wrmsrl(event->hw.config_base, 0);
}
-static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- u64 count;
- rdmsrl(event->hw.event_base, count);
- return count;
-}
-
static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0) {
@@ -648,15 +671,15 @@ static struct attribute *snb_uncore_formats_attr[] = {
};
static struct attribute_group snb_uncore_format_group = {
- .name = "format",
- .attrs = snb_uncore_formats_attr,
+ .name = "format",
+ .attrs = snb_uncore_formats_attr,
};
static struct intel_uncore_ops snb_uncore_msr_ops = {
.init_box = snb_uncore_msr_init_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
- .read_counter = snb_uncore_msr_read_counter,
+ .read_counter = uncore_msr_read_counter,
};
static struct event_constraint snb_uncore_cbox_constraints[] = {
@@ -697,12 +720,10 @@ static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
{
- wrmsrl(NHM_UNC_PERF_GLOBAL_CTL,
- NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
+ wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
}
-static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -744,7 +765,7 @@ static struct intel_uncore_ops nhm_uncore_msr_ops = {
.enable_box = nhm_uncore_msr_enable_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = nhm_uncore_msr_enable_event,
- .read_counter = snb_uncore_msr_read_counter,
+ .read_counter = uncore_msr_read_counter,
};
static struct intel_uncore_type nhm_uncore = {
@@ -769,8 +790,1049 @@ static struct intel_uncore_type *nhm_msr_uncores[] = {
};
/* end of Nehalem uncore support */
-static void uncore_assign_hw_event(struct intel_uncore_box *box,
- struct perf_event *event, int idx)
+/* Nehalem-EX uncore support */
+#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \
+ ((1ULL << (n)) - 1)))
+
+DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
+DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
+DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
+
+static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
+}
+
+static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+ u64 config;
+
+ if (msr) {
+ rdmsrl(msr, config);
+ config &= ~((1ULL << uncore_num_counters(box)) - 1);
+ /* WBox has a fixed counter */
+ if (uncore_msr_fixed_ctl(box))
+ config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
+ wrmsrl(msr, config);
+ }
+}
+
+static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+ u64 config;
+
+ if (msr) {
+ rdmsrl(msr, config);
+ config |= (1ULL << uncore_num_counters(box)) - 1;
+ /* WBox has a fixed counter */
+ if (uncore_msr_fixed_ctl(box))
+ config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
+ wrmsrl(msr, config);
+ }
+}
+
+static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ wrmsrl(event->hw.config_base, 0);
+}
+
+static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
+ else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+ else
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+#define NHMEX_UNCORE_OPS_COMMON_INIT() \
+ .init_box = nhmex_uncore_msr_init_box, \
+ .disable_box = nhmex_uncore_msr_disable_box, \
+ .enable_box = nhmex_uncore_msr_enable_box, \
+ .disable_event = nhmex_uncore_msr_disable_event, \
+ .read_counter = uncore_msr_read_counter
+
+static struct intel_uncore_ops nhmex_uncore_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_uncore_msr_enable_event,
+};
+
+static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_edge.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type nhmex_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 1,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_U_MSR_PMON_EV_SEL,
+ .perf_ctr = NHMEX_U_MSR_PMON_CTR,
+ .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_ubox_format_group
+};
+
+static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_cbox_formats_attr,
+};
+
+/* msr offset for each instance of cbox */
+static unsigned nhmex_cbox_msr_offsets[] = {
+ 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
+};
+
+static struct intel_uncore_type nhmex_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 6,
+ .num_boxes = 10,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0,
+ .perf_ctr = NHMEX_C0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
+ .msr_offsets = nhmex_cbox_msr_offsets,
+ .pair_ctr_ctl = 1,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_cbox_format_group
+};
+
+static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type nhmex_uncore_wbox = {
+ .name = "wbox",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_W_MSR_PMON_CNT0,
+ .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0,
+ .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR,
+ .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_W_MSR_GLOBAL_CTL,
+ .pair_ctr_ctl = 1,
+ .event_descs = nhmex_uncore_wbox_events,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_cbox_format_group
+};
+
+static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int ctr, ev_sel;
+
+ ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
+ NHMEX_B_PMON_CTR_SHIFT;
+ ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
+ NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
+
+ /* events that do not use the match/mask registers */
+ if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
+ (ctr == 2 && ev_sel != 0x4) || ctr == 3)
+ return 0;
+
+ if (box->pmu->pmu_idx == 0)
+ reg1->reg = NHMEX_B0_MSR_MATCH;
+ else
+ reg1->reg = NHMEX_B1_MSR_MATCH;
+ reg1->idx = 0;
+ reg1->config = event->attr.config1;
+ reg2->config = event->attr.config2;
+ return 0;
+}
+
+static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg1->reg, reg1->config);
+ wrmsrl(reg1->reg + 1, reg2->config);
+ }
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+ (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
+}
+
+/*
+ * The Bbox has 4 counters, but each counter monitors different events.
+ * Use bits 6-7 in the event config to select counter.
+ */
+static struct event_constraint nhmex_uncore_bbox_constraints[] = {
+ EVENT_CONSTRAINT(0 , 1, 0xc0),
+ EVENT_CONSTRAINT(0x40, 2, 0xc0),
+ EVENT_CONSTRAINT(0x80, 4, 0xc0),
+ EVENT_CONSTRAINT(0xc0, 8, 0xc0),
+ EVENT_CONSTRAINT_END,
+};
+
+static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
+ &format_attr_event5.attr,
+ &format_attr_counter.attr,
+ &format_attr_match.attr,
+ &format_attr_mask.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_bbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_bbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_bbox_msr_enable_event,
+ .hw_config = nhmex_bbox_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_bbox = {
+ .name = "bbox",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_B0_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_B0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL,
+ .msr_offset = NHMEX_B_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 1,
+ .constraints = nhmex_uncore_bbox_constraints,
+ .ops = &nhmex_uncore_bbox_ops,
+ .format_group = &nhmex_uncore_bbox_format_group
+};
+
+static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ /* only TO_R_PROG_EV event uses the match/mask register */
+ if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) !=
+ NHMEX_S_EVENT_TO_R_PROG_EV)
+ return 0;
+
+ if (box->pmu->pmu_idx == 0)
+ reg1->reg = NHMEX_S0_MSR_MM_CFG;
+ else
+ reg1->reg = NHMEX_S1_MSR_MM_CFG;
+ reg1->idx = 0;
+ reg1->config = event->attr.config1;
+ reg2->config = event->attr.config2;
+ return 0;
+}
+
+static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg1->reg, 0);
+ wrmsrl(reg1->reg + 1, reg1->config);
+ wrmsrl(reg1->reg + 2, reg2->config);
+ wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
+ }
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+}
+
+static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_match.attr,
+ &format_attr_mask.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_sbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_sbox_msr_enable_event,
+ .hw_config = nhmex_sbox_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_S0_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_S0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL,
+ .msr_offset = NHMEX_S_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 1,
+ .ops = &nhmex_uncore_sbox_ops,
+ .format_group = &nhmex_uncore_sbox_format_group
+};
+
+enum {
+ EXTRA_REG_NHMEX_M_FILTER,
+ EXTRA_REG_NHMEX_M_DSP,
+ EXTRA_REG_NHMEX_M_ISS,
+ EXTRA_REG_NHMEX_M_MAP,
+ EXTRA_REG_NHMEX_M_MSC_THR,
+ EXTRA_REG_NHMEX_M_PGT,
+ EXTRA_REG_NHMEX_M_PLD,
+ EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
+};
+
+static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
+ MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
+ MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
+ MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
+ MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
+ /* event 0xa uses two extra registers */
+ MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
+ MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
+ MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
+ /* events 0xd ~ 0x10 use the same extra register */
+ MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
+ EVENT_EXTRA_END
+};
+
+/* Nehalem-EX or Westmere-EX ? */
+bool uncore_nhmex;
+
+static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ bool ret = false;
+ u64 mask;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ er = &box->shared_regs[idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (!atomic_read(&er->ref) || er->config == config) {
+ atomic_inc(&er->ref);
+ er->config = config;
+ ret = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return ret;
+ }
+ /*
+ * The ZDP_CTL_FVC MSR has 4 fields which are used to control
+ * events 0xd ~ 0x10. Besides these 4 fields, there are additional
+ * fields which are shared.
+ */
+ idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (WARN_ON_ONCE(idx >= 4))
+ return false;
+
+ /* mask of the shared fields */
+ if (uncore_nhmex)
+ mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
+ else
+ mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK;
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+
+ raw_spin_lock_irqsave(&er->lock, flags);
+ /* add mask of the non-shared field if it's in use */
+ if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) {
+ if (uncore_nhmex)
+ mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ }
+
+ if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
+ atomic_add(1 << (idx * 8), &er->ref);
+ if (uncore_nhmex)
+ mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
+ NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK |
+ WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ er->config &= ~mask;
+ er->config |= (config & mask);
+ ret = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return ret;
+}
+
+static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ er = &box->shared_regs[idx];
+ atomic_dec(&er->ref);
+ return;
+ }
+
+ idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+ atomic_sub(1 << (idx * 8), &er->ref);
+}
+
+u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+ u64 config = reg1->config;
+
+ /* get the non-shared control bits and shift them */
+ idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (uncore_nhmex)
+ config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ if (new_idx > orig_idx) {
+ idx = new_idx - orig_idx;
+ config <<= 3 * idx;
+ } else {
+ idx = orig_idx - new_idx;
+ config >>= 3 * idx;
+ }
+
+ /* add the shared control bits back */
+ if (uncore_nhmex)
+ config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ else
+ config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ if (modify) {
+ /* adjust the main event selector */
+ if (new_idx > orig_idx)
+ hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+ else
+ hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+ reg1->config = config;
+ reg1->idx = ~0xff | new_idx;
+ }
+ return config;
+}
+
+static struct event_constraint *
+nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ int i, idx[2], alloc = 0;
+ u64 config1 = reg1->config;
+
+ idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
+ idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
+again:
+ for (i = 0; i < 2; i++) {
+ if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+ idx[i] = 0xff;
+
+ if (idx[i] == 0xff)
+ continue;
+
+ if (!nhmex_mbox_get_shared_reg(box, idx[i],
+ __BITS_VALUE(config1, i, 32)))
+ goto fail;
+ alloc |= (0x1 << i);
+ }
+
+ /* for the match/mask registers */
+ if (reg2->idx != EXTRA_REG_NONE &&
+ (uncore_box_is_fake(box) || !reg2->alloc) &&
+ !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
+ goto fail;
+
+ /*
+ * If it's a fake box -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ */
+ if (!uncore_box_is_fake(box)) {
+ if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
+ nhmex_mbox_alter_er(event, idx[0], true);
+ reg1->alloc |= alloc;
+ if (reg2->idx != EXTRA_REG_NONE)
+ reg2->alloc = 1;
+ }
+ return NULL;
+fail:
+ if (idx[0] != 0xff && !(alloc & 0x1) &&
+ idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ /*
+ * events 0xd ~ 0x10 are functional identical, but are
+ * controlled by different fields in the ZDP_CTL_FVC
+ * register. If we failed to take one field, try the
+ * rest 3 choices.
+ */
+ BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
+ idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ idx[0] = (idx[0] + 1) % 4;
+ idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
+ config1 = nhmex_mbox_alter_er(event, idx[0], false);
+ goto again;
+ }
+ }
+
+ if (alloc & 0x1)
+ nhmex_mbox_put_shared_reg(box, idx[0]);
+ if (alloc & 0x2)
+ nhmex_mbox_put_shared_reg(box, idx[1]);
+ return &constraint_empty;
+}
+
+static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+
+ if (uncore_box_is_fake(box))
+ return;
+
+ if (reg1->alloc & 0x1)
+ nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
+ if (reg1->alloc & 0x2)
+ nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
+ reg1->alloc = 0;
+
+ if (reg2->alloc) {
+ nhmex_mbox_put_shared_reg(box, reg2->idx);
+ reg2->alloc = 0;
+ }
+}
+
+static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
+{
+ if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+ return er->idx;
+ return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
+}
+
+static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_type *type = box->pmu->type;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ struct extra_reg *er;
+ unsigned msr;
+ int reg_idx = 0;
+ /*
+ * The mbox events may require 2 extra MSRs at the most. But only
+ * the lower 32 bits in these MSRs are significant, so we can use
+ * config1 to pass two MSRs' config.
+ */
+ for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ if (event->attr.config1 & ~er->valid_mask)
+ return -EINVAL;
+
+ msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
+ if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
+ return -EINVAL;
+
+ /* always use the 32~63 bits to pass the PLD config */
+ if (er->idx == EXTRA_REG_NHMEX_M_PLD)
+ reg_idx = 1;
+ else if (WARN_ON_ONCE(reg_idx > 0))
+ return -EINVAL;
+
+ reg1->idx &= ~(0xff << (reg_idx * 8));
+ reg1->reg &= ~(0xffff << (reg_idx * 16));
+ reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
+ reg1->reg |= msr << (reg_idx * 16);
+ reg1->config = event->attr.config1;
+ reg_idx++;
+ }
+ /*
+ * The mbox only provides ability to perform address matching
+ * for the PLD events.
+ */
+ if (reg_idx == 2) {
+ reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
+ if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
+ reg2->config = event->attr.config2;
+ else
+ reg2->config = ~0ULL;
+ if (box->pmu->pmu_idx == 0)
+ reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
+ else
+ reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
+ }
+ return 0;
+}
+
+static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ u64 config;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+ return box->shared_regs[idx].config;
+
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ config = er->config;
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+ return config;
+}
+
+static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int idx;
+
+ idx = __BITS_VALUE(reg1->idx, 0, 8);
+ if (idx != 0xff)
+ wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
+ nhmex_mbox_shared_reg_config(box, idx));
+ idx = __BITS_VALUE(reg1->idx, 1, 8);
+ if (idx != 0xff)
+ wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
+ nhmex_mbox_shared_reg_config(box, idx));
+
+ if (reg2->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg2->reg, 0);
+ if (reg2->config != ~0ULL) {
+ wrmsrl(reg2->reg + 1,
+ reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
+ wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
+ (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
+ wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
+ }
+ }
+
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3");
+DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5");
+DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6");
+DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7");
+DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13");
+DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61");
+DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63");
+
+static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
+ &format_attr_count_mode.attr,
+ &format_attr_storage_mode.attr,
+ &format_attr_wrap_mode.attr,
+ &format_attr_flag_mode.attr,
+ &format_attr_inc_sel.attr,
+ &format_attr_set_flag_sel.attr,
+ &format_attr_filter_cfg_en.attr,
+ &format_attr_filter_match.attr,
+ &format_attr_filter_mask.attr,
+ &format_attr_dsp.attr,
+ &format_attr_thr.attr,
+ &format_attr_fvc.attr,
+ &format_attr_pgt.attr,
+ &format_attr_map.attr,
+ &format_attr_iss.attr,
+ &format_attr_pld.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_mbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_mbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
+ { /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc wsmex_uncore_mbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"),
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_mbox_msr_enable_event,
+ .hw_config = nhmex_mbox_hw_config,
+ .get_constraint = nhmex_mbox_get_constraint,
+ .put_constraint = nhmex_mbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_mbox = {
+ .name = "mbox",
+ .num_counters = 6,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_M0_MSR_PMU_CTL0,
+ .perf_ctr = NHMEX_M0_MSR_PMU_CNT0,
+ .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL,
+ .msr_offset = NHMEX_M_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 8,
+ .event_descs = nhmex_uncore_mbox_events,
+ .ops = &nhmex_uncore_mbox_ops,
+ .format_group = &nhmex_uncore_mbox_format_group,
+};
+
+void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ int port;
+
+ /* adjust the main event selector and extra register index */
+ if (reg1->idx % 2) {
+ reg1->idx--;
+ hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ } else {
+ reg1->idx++;
+ hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ }
+
+ /* adjust extra register config */
+ port = reg1->idx / 6 + box->pmu->pmu_idx * 4;
+ switch (reg1->idx % 6) {
+ case 2:
+ /* shift the 8~15 bits to the 0~7 bits */
+ reg1->config >>= 8;
+ break;
+ case 3:
+ /* shift the 0~7 bits to the 8~15 bits */
+ reg1->config <<= 8;
+ break;
+ };
+}
+
+/*
+ * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
+ * An event set consists of 6 events, the 3rd and 4th events in
+ * an event set use the same extra register. So an event set uses
+ * 5 extra registers.
+ */
+static struct event_constraint *
+nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ int idx, er_idx;
+ u64 config1;
+ bool ok = false;
+
+ if (!uncore_box_is_fake(box) && reg1->alloc)
+ return NULL;
+
+ idx = reg1->idx % 6;
+ config1 = reg1->config;
+again:
+ er_idx = idx;
+ /* the 3rd and 4th events use the same extra register */
+ if (er_idx > 2)
+ er_idx--;
+ er_idx += (reg1->idx / 6) * 5;
+
+ er = &box->shared_regs[er_idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (idx < 2) {
+ if (!atomic_read(&er->ref) || er->config == reg1->config) {
+ atomic_inc(&er->ref);
+ er->config = reg1->config;
+ ok = true;
+ }
+ } else if (idx == 2 || idx == 3) {
+ /*
+ * these two events use different fields in a extra register,
+ * the 0~7 bits and the 8~15 bits respectively.
+ */
+ u64 mask = 0xff << ((idx - 2) * 8);
+ if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
+ !((er->config ^ config1) & mask)) {
+ atomic_add(1 << ((idx - 2) * 8), &er->ref);
+ er->config &= ~mask;
+ er->config |= config1 & mask;
+ ok = true;
+ }
+ } else {
+ if (!atomic_read(&er->ref) ||
+ (er->config == (hwc->config >> 32) &&
+ er->config1 == reg1->config &&
+ er->config2 == reg2->config)) {
+ atomic_inc(&er->ref);
+ er->config = (hwc->config >> 32);
+ er->config1 = reg1->config;
+ er->config2 = reg2->config;
+ ok = true;
+ }
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ if (!ok) {
+ /*
+ * The Rbox events are always in pairs. The paired
+ * events are functional identical, but use different
+ * extra registers. If we failed to take an extra
+ * register, try the alternative.
+ */
+ if (idx % 2)
+ idx--;
+ else
+ idx++;
+ if (idx != reg1->idx % 6) {
+ if (idx == 2)
+ config1 >>= 8;
+ else if (idx == 3)
+ config1 <<= 8;
+ goto again;
+ }
+ } else {
+ if (!uncore_box_is_fake(box)) {
+ if (idx != reg1->idx % 6)
+ nhmex_rbox_alter_er(box, event);
+ reg1->alloc = 1;
+ }
+ return NULL;
+ }
+ return &constraint_empty;
+}
+
+static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ int idx, er_idx;
+
+ if (uncore_box_is_fake(box) || !reg1->alloc)
+ return;
+
+ idx = reg1->idx % 6;
+ er_idx = idx;
+ if (er_idx > 2)
+ er_idx--;
+ er_idx += (reg1->idx / 6) * 5;
+
+ er = &box->shared_regs[er_idx];
+ if (idx == 2 || idx == 3)
+ atomic_sub(1 << ((idx - 2) * 8), &er->ref);
+ else
+ atomic_dec(&er->ref);
+
+ reg1->alloc = 0;
+}
+
+static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ int idx;
+
+ idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
+ NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ if (idx >= 0x18)
+ return -EINVAL;
+
+ reg1->idx = idx;
+ reg1->config = event->attr.config1;
+
+ switch (idx % 6) {
+ case 4:
+ case 5:
+ hwc->config |= event->attr.config & (~0ULL << 32);
+ reg2->config = event->attr.config2;
+ break;
+ };
+ return 0;
+}
+
+static u64 nhmex_rbox_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ u64 config;
+
+ er = &box->shared_regs[idx];
+
+ raw_spin_lock_irqsave(&er->lock, flags);
+ config = er->config;
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return config;
+}
+
+static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int idx, port;
+
+ idx = reg1->idx;
+ port = idx / 6 + box->pmu->pmu_idx * 4;
+
+ switch (idx % 6) {
+ case 0:
+ wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
+ break;
+ case 1:
+ wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
+ break;
+ case 2:
+ case 3:
+ wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
+ nhmex_rbox_shared_reg_config(box, 2 + (idx / 6) * 5));
+ break;
+ case 4:
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
+ hwc->config >> 32);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
+ break;
+ case 5:
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
+ hwc->config >> 32);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
+ break;
+ };
+
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+ (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
+DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
+
+static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
+ &format_attr_event5.attr,
+ &format_attr_xbr_mm_cfg.attr,
+ &format_attr_xbr_match.attr,
+ &format_attr_xbr_mask.attr,
+ &format_attr_qlx_cfg.attr,
+ &format_attr_iperf_cfg.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_rbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_rbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_rbox_msr_enable_event,
+ .hw_config = nhmex_rbox_hw_config,
+ .get_constraint = nhmex_rbox_get_constraint,
+ .put_constraint = nhmex_rbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_rbox = {
+ .name = "rbox",
+ .num_counters = 8,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_R_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_R_MSR_PMON_CNT0,
+ .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_R_MSR_GLOBAL_CTL,
+ .msr_offset = NHMEX_R_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 20,
+ .event_descs = nhmex_uncore_rbox_events,
+ .ops = &nhmex_uncore_rbox_ops,
+ .format_group = &nhmex_uncore_rbox_format_group
+};
+
+static struct intel_uncore_type *nhmex_msr_uncores[] = {
+ &nhmex_uncore_ubox,
+ &nhmex_uncore_cbox,
+ &nhmex_uncore_bbox,
+ &nhmex_uncore_sbox,
+ &nhmex_uncore_mbox,
+ &nhmex_uncore_rbox,
+ &nhmex_uncore_wbox,
+ NULL,
+};
+/* end of Nehalem-EX uncore support */
+
+static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
{
struct hw_perf_event *hwc = &event->hw;
@@ -787,8 +1849,7 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box,
hwc->event_base = uncore_perf_ctr(box, hwc->idx);
}
-static void uncore_perf_event_update(struct intel_uncore_box *box,
- struct perf_event *event)
+static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
{
u64 prev_count, new_count, delta;
int shift;
@@ -858,14 +1919,12 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
box->hrtimer.function = uncore_pmu_hrtimer;
}
-struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
- int cpu)
+struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu)
{
struct intel_uncore_box *box;
int i, size;
- size = sizeof(*box) + type->num_shared_regs *
- sizeof(struct intel_uncore_extra_reg);
+ size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
if (!box)
@@ -915,12 +1974,11 @@ static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
* perf core schedules event on the basis of cpu, uncore events are
* collected by one of the cpus inside a physical package.
*/
- return uncore_pmu_to_box(uncore_event_to_pmu(event),
- smp_processor_id());
+ return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
}
-static int uncore_collect_events(struct intel_uncore_box *box,
- struct perf_event *leader, bool dogrp)
+static int
+uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
{
struct perf_event *event;
int n, max_count;
@@ -952,8 +2010,7 @@ static int uncore_collect_events(struct intel_uncore_box *box,
}
static struct event_constraint *
-uncore_get_event_constraint(struct intel_uncore_box *box,
- struct perf_event *event)
+uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
{
struct intel_uncore_type *type = box->pmu->type;
struct event_constraint *c;
@@ -977,15 +2034,13 @@ uncore_get_event_constraint(struct intel_uncore_box *box,
return &type->unconstrainted;
}
-static void uncore_put_event_constraint(struct intel_uncore_box *box,
- struct perf_event *event)
+static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
{
if (box->pmu->type->ops->put_constraint)
box->pmu->type->ops->put_constraint(box, event);
}
-static int uncore_assign_events(struct intel_uncore_box *box,
- int assign[], int n)
+static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
{
unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
@@ -1256,6 +2311,7 @@ int uncore_pmu_event_init(struct perf_event *event)
event->hw.idx = -1;
event->hw.last_tag = ~0ULL;
event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ event->hw.branch_reg.idx = EXTRA_REG_NONE;
if (event->attr.config == UNCORE_FIXED_EVENT) {
/* no fixed counter */
@@ -1326,7 +2382,7 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
type->attr_groups[1] = NULL;
}
-static void uncore_types_exit(struct intel_uncore_type **types)
+static void __init uncore_types_exit(struct intel_uncore_type **types)
{
int i;
for (i = 0; types[i]; i++)
@@ -1407,8 +2463,7 @@ static bool pcidrv_registered;
/*
* add a pci uncore device
*/
-static int __devinit uncore_pci_add(struct intel_uncore_type *type,
- struct pci_dev *pdev)
+static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
{
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
@@ -1485,6 +2540,7 @@ static int __devinit uncore_pci_probe(struct pci_dev *pdev,
struct intel_uncore_type *type;
type = (struct intel_uncore_type *)id->driver_data;
+
return uncore_pci_add(type, pdev);
}
@@ -1612,8 +2668,8 @@ static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
return 0;
}
-static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores,
- int old_cpu, int new_cpu)
+static void __cpuinit
+uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
{
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
@@ -1694,8 +2750,8 @@ static void __cpuinit uncore_event_init_cpu(int cpu)
uncore_change_context(pci_uncores, -1, cpu);
}
-static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int
+ __cpuinit uncore_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
@@ -1732,12 +2788,12 @@ static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
}
static struct notifier_block uncore_cpu_nb __cpuinitdata = {
- .notifier_call = uncore_cpu_notifier,
+ .notifier_call = uncore_cpu_notifier,
/*
* to migrate uncore events, our notifier should be executed
* before perf core's notifier.
*/
- .priority = CPU_PRI_PERF + 1,
+ .priority = CPU_PRI_PERF + 1,
};
static void __init uncore_cpu_setup(void *dummy)
@@ -1767,6 +2823,15 @@ static int __init uncore_cpu_init(void)
snbep_uncore_cbox.num_boxes = max_cores;
msr_uncores = snbep_msr_uncores;
break;
+ case 46: /* Nehalem-EX */
+ uncore_nhmex = true;
+ case 47: /* Westmere-EX aka. Xeon E7 */
+ if (!uncore_nhmex)
+ nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
+ if (nhmex_uncore_cbox.num_boxes > max_cores)
+ nhmex_uncore_cbox.num_boxes = max_cores;
+ msr_uncores = nhmex_msr_uncores;
+ break;
default:
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index b13e9ea81def..5b81c1856aac 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -5,9 +5,7 @@
#include "perf_event.h"
#define UNCORE_PMU_NAME_LEN 32
-#define UNCORE_BOX_HASH_SIZE 8
-
-#define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC)
+#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
#define UNCORE_FIXED_EVENT 0xff
#define UNCORE_PMC_IDX_MAX_GENERIC 8
@@ -115,6 +113,10 @@
SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_PMON_CTL_EV_SEL_EXT)
+
/* SNB-EP pci control register */
#define SNBEP_PCI_PMON_BOX_CTL 0xf4
#define SNBEP_PCI_PMON_CTL0 0xd8
@@ -158,6 +160,188 @@
#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc
#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd
+/* NHM-EX event control */
+#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff
+#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00
+#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0)
+#define NHMEX_PMON_CTL_EDGE_DET (1 << 18)
+#define NHMEX_PMON_CTL_PMI_EN (1 << 20)
+#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22)
+#define NHMEX_PMON_CTL_INVERT (1 << 23)
+#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000
+#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_PMON_CTL_UMASK_MASK | \
+ NHMEX_PMON_CTL_EDGE_DET | \
+ NHMEX_PMON_CTL_INVERT | \
+ NHMEX_PMON_CTL_TRESH_MASK)
+
+/* NHM-EX Ubox */
+#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00
+#define NHMEX_U_MSR_PMON_CTR 0xc11
+#define NHMEX_U_MSR_PMON_EV_SEL 0xc10
+
+#define NHMEX_U_PMON_GLOBAL_EN (1 << 0)
+#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e
+#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28)
+#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29)
+#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
+
+#define NHMEX_U_PMON_RAW_EVENT_MASK \
+ (NHMEX_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_PMON_CTL_EDGE_DET)
+
+/* NHM-EX Cbox */
+#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00
+#define NHMEX_C0_MSR_PMON_CTR0 0xd11
+#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10
+#define NHMEX_C_MSR_OFFSET 0x20
+
+/* NHM-EX Bbox */
+#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20
+#define NHMEX_B0_MSR_PMON_CTR0 0xc31
+#define NHMEX_B0_MSR_PMON_CTL0 0xc30
+#define NHMEX_B_MSR_OFFSET 0x40
+#define NHMEX_B0_MSR_MATCH 0xe45
+#define NHMEX_B0_MSR_MASK 0xe46
+#define NHMEX_B1_MSR_MATCH 0xe4d
+#define NHMEX_B1_MSR_MASK 0xe4e
+
+#define NHMEX_B_PMON_CTL_EN (1 << 0)
+#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1
+#define NHMEX_B_PMON_CTL_EV_SEL_MASK \
+ (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_B_PMON_CTR_SHIFT 6
+#define NHMEX_B_PMON_CTR_MASK \
+ (0x3 << NHMEX_B_PMON_CTR_SHIFT)
+#define NHMEX_B_PMON_RAW_EVENT_MASK \
+ (NHMEX_B_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_B_PMON_CTR_MASK)
+
+/* NHM-EX Sbox */
+#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40
+#define NHMEX_S0_MSR_PMON_CTR0 0xc51
+#define NHMEX_S0_MSR_PMON_CTL0 0xc50
+#define NHMEX_S_MSR_OFFSET 0x80
+#define NHMEX_S0_MSR_MM_CFG 0xe48
+#define NHMEX_S0_MSR_MATCH 0xe49
+#define NHMEX_S0_MSR_MASK 0xe4a
+#define NHMEX_S1_MSR_MM_CFG 0xe58
+#define NHMEX_S1_MSR_MATCH 0xe59
+#define NHMEX_S1_MSR_MASK 0xe5a
+
+#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63)
+#define NHMEX_S_EVENT_TO_R_PROG_EV 0
+
+/* NHM-EX Mbox */
+#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0
+#define NHMEX_M0_MSR_PMU_DSP 0xca5
+#define NHMEX_M0_MSR_PMU_ISS 0xca6
+#define NHMEX_M0_MSR_PMU_MAP 0xca7
+#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8
+#define NHMEX_M0_MSR_PMU_PGT 0xca9
+#define NHMEX_M0_MSR_PMU_PLD 0xcaa
+#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab
+#define NHMEX_M0_MSR_PMU_CTL0 0xcb0
+#define NHMEX_M0_MSR_PMU_CNT0 0xcb1
+#define NHMEX_M_MSR_OFFSET 0x40
+#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54
+#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c
+
+#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63)
+#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34
+
+#define NHMEX_M_PMON_CTL_EN (1 << 0)
+#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1)
+#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2
+#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \
+ (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \
+ (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6)
+#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7)
+#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9
+#define NHMEX_M_PMON_CTL_INC_SEL_MASK \
+ (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \
+ (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
+#define NHMEX_M_PMON_RAW_EVENT_MASK \
+ (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \
+ NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \
+ NHMEX_M_PMON_CTL_WRAP_MODE | \
+ NHMEX_M_PMON_CTL_FLAG_MODE | \
+ NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+ NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
+
+#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n)))
+
+#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n)))
+
+/*
+ * use the 9~13 bits to select event If the 7th bit is not set,
+ * otherwise use the 19~21 bits to select event.
+ */
+#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_EXTAR_REG(c, r) \
+ EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+ MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
+#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
+ EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+ MBOX_SET_FLAG_SEL_MASK, \
+ (u64)-1, NHMEX_M_##r)
+
+/* NHM-EX Rbox */
+#define NHMEX_R_MSR_GLOBAL_CTL 0xe00
+#define NHMEX_R_MSR_PMON_CTL0 0xe10
+#define NHMEX_R_MSR_PMON_CNT0 0xe11
+#define NHMEX_R_MSR_OFFSET 0x20
+
+#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \
+ ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n))
+#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \
+ (((n) < 4 ? 0 : 0x10) + (n) * 4)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \
+ (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \
+ (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
+
+#define NHMEX_R_PMON_CTL_EN (1 << 0)
+#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1
+#define NHMEX_R_PMON_CTL_EV_SEL_MASK \
+ (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6)
+#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK
+
+/* NHM-EX Wbox */
+#define NHMEX_W_MSR_GLOBAL_CTL 0xc80
+#define NHMEX_W_MSR_PMON_CNT0 0xc90
+#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91
+#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394
+#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395
+
+#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31)
+
struct intel_uncore_ops;
struct intel_uncore_pmu;
struct intel_uncore_box;
@@ -178,6 +362,8 @@ struct intel_uncore_type {
unsigned msr_offset;
unsigned num_shared_regs:8;
unsigned single_fixed:1;
+ unsigned pair_ctr_ctl:1;
+ unsigned *msr_offsets;
struct event_constraint unconstrainted;
struct event_constraint *constraints;
struct intel_uncore_pmu *pmus;
@@ -213,7 +399,7 @@ struct intel_uncore_pmu {
struct intel_uncore_extra_reg {
raw_spinlock_t lock;
- u64 config1;
+ u64 config, config1, config2;
atomic_t ref;
};
@@ -295,43 +481,47 @@ unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
return idx * 8 + box->pmu->type->perf_ctr;
}
-static inline
-unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
+static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box)
+{
+ struct intel_uncore_pmu *pmu = box->pmu;
+ return pmu->type->msr_offsets ?
+ pmu->type->msr_offsets[pmu->pmu_idx] :
+ pmu->type->msr_offset * pmu->pmu_idx;
+}
+
+static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
{
if (!box->pmu->type->box_ctl)
return 0;
- return box->pmu->type->box_ctl +
- box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ return box->pmu->type->box_ctl + uncore_msr_box_offset(box);
}
-static inline
-unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
+static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
{
if (!box->pmu->type->fixed_ctl)
return 0;
- return box->pmu->type->fixed_ctl +
- box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box);
}
-static inline
-unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
+static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
{
- return box->pmu->type->fixed_ctr +
- box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
}
static inline
unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
{
- return idx + box->pmu->type->event_ctl +
- box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ return box->pmu->type->event_ctl +
+ (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+ uncore_msr_box_offset(box);
}
static inline
unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
{
- return idx + box->pmu->type->perf_ctr +
- box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ return box->pmu->type->perf_ctr +
+ (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+ uncore_msr_box_offset(box);
}
static inline
@@ -422,3 +612,8 @@ static inline void uncore_box_init(struct intel_uncore_box *box)
box->pmu->type->ops->init_box(box);
}
}
+
+static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
+{
+ return (box->phys_id < 0);
+}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 41857970517f..ed858e9e9a74 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -944,7 +944,7 @@ void __init e820_reserve_resources(void)
for (i = 0; i < e820_saved.nr_map; i++) {
struct e820entry *entry = &e820_saved.map[i];
firmware_map_add_early(entry->addr,
- entry->addr + entry->size - 1,
+ entry->addr + entry->size,
e820_type_to_string(entry->type));
}
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 1f5f1d5d2a02..d44f7829968e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -270,7 +270,7 @@ void fixup_irqs(void)
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
break_affinity = 1;
- affinity = cpu_all_mask;
+ affinity = cpu_online_mask;
}
chip = irq_data_get_irq_chip(data);
@@ -328,6 +328,7 @@ void fixup_irqs(void)
chip->irq_retrigger(data);
raw_spin_unlock(&desc->lock);
}
+ __this_cpu_write(vector_irq[vector], -1);
}
}
#endif
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 1d5d31ea686b..dc1404bf8e4b 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -107,7 +107,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
{
struct setup_data_node *node;
struct setup_data *data;
- int error = -ENOMEM;
+ int error;
struct dentry *d;
struct page *pg;
u64 pa_data;
@@ -121,8 +121,10 @@ static int __init create_setup_data_nodes(struct dentry *parent)
while (pa_data) {
node = kmalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
+ if (!node) {
+ error = -ENOMEM;
goto err_dir;
+ }
pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
if (PageHighMem(pg)) {
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 8a2ce8fd41c0..82746f942cd8 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -143,11 +143,12 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
unsigned int *current_size)
{
struct microcode_header_amd *mc_hdr;
- unsigned int actual_size;
+ unsigned int actual_size, patch_size;
u16 equiv_cpu_id;
/* size of the current patch we're staring at */
- *current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE;
+ patch_size = *(u32 *)(ucode_ptr + 4);
+ *current_size = patch_size + SECTION_HDR_SIZE;
equiv_cpu_id = find_equiv_id();
if (!equiv_cpu_id)
@@ -174,7 +175,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
/*
* now that the header looks sane, verify its size
*/
- actual_size = verify_ucode_size(cpu, *current_size, leftover_size);
+ actual_size = verify_ucode_size(cpu, patch_size, leftover_size);
if (!actual_size)
return 0;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 97d9a9914ba8..a3b57a27be88 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -475,13 +475,26 @@ register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
return address_mask(ctxt, reg);
}
+static void masked_increment(ulong *reg, ulong mask, int inc)
+{
+ assign_masked(reg, *reg + inc, mask);
+}
+
static inline void
register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
{
+ ulong mask;
+
if (ctxt->ad_bytes == sizeof(unsigned long))
- *reg += inc;
+ mask = ~0UL;
else
- *reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt));
+ mask = ad_mask(ctxt);
+ masked_increment(reg, mask, inc);
+}
+
+static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
+{
+ masked_increment(&ctxt->regs[VCPU_REGS_RSP], stack_mask(ctxt), inc);
}
static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -1522,8 +1535,8 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
{
struct segmented_address addr;
- register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -bytes);
- addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]);
+ rsp_increment(ctxt, -bytes);
+ addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt);
addr.seg = VCPU_SREG_SS;
return segmented_write(ctxt, addr, data, bytes);
@@ -1542,13 +1555,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
int rc;
struct segmented_address addr;
- addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]);
+ addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt);
addr.seg = VCPU_SREG_SS;
rc = segmented_read(ctxt, addr, dest, len);
if (rc != X86EMUL_CONTINUE)
return rc;
- register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], len);
+ rsp_increment(ctxt, len);
return rc;
}
@@ -1688,8 +1701,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
while (reg >= VCPU_REGS_RAX) {
if (reg == VCPU_REGS_RSP) {
- register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP],
- ctxt->op_bytes);
+ rsp_increment(ctxt, ctxt->op_bytes);
--reg;
}
@@ -2825,7 +2837,7 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
- register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], ctxt->src.val);
+ rsp_increment(ctxt, ctxt->src.val);
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 1df8fb9e1d5d..e498b18f010c 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -316,6 +316,11 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
addr &= 1;
if (addr == 0) {
if (val & 0x10) {
+ u8 edge_irr = s->irr & ~s->elcr;
+ int i;
+ bool found;
+ struct kvm_vcpu *vcpu;
+
s->init4 = val & 1;
s->last_irr = 0;
s->irr &= s->elcr;
@@ -333,6 +338,18 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
if (val & 0x08)
pr_pic_unimpl(
"level sensitive irq not supported");
+
+ kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
+ if (kvm_apic_accept_pic_intr(vcpu)) {
+ found = true;
+ break;
+ }
+
+
+ if (found)
+ for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
+ if (edge_irr & (1 << irq))
+ pic_clear_isr(s, irq);
} else if (val & 0x08) {
if (val & 0x04)
s->poll = 1;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 01ca00423938..7fbd0d273ea8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4113,16 +4113,21 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
LIST_HEAD(invalid_list);
/*
+ * Never scan more than sc->nr_to_scan VM instances.
+ * Will not hit this condition practically since we do not try
+ * to shrink more than one VM and it is very unlikely to see
+ * !n_used_mmu_pages so many times.
+ */
+ if (!nr_to_scan--)
+ break;
+ /*
* n_used_mmu_pages is accessed without holding kvm->mmu_lock
* here. We may skip a VM instance errorneosly, but we do not
* want to shrink a VM that only started to populate its MMU
* anyway.
*/
- if (kvm->arch.n_used_mmu_pages > 0) {
- if (!nr_to_scan--)
- break;
+ if (!kvm->arch.n_used_mmu_pages)
continue;
- }
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c39b60707e02..c00f03de1b79 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1488,13 +1488,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
loadsegment(ds, vmx->host_state.ds_sel);
loadsegment(es, vmx->host_state.es_sel);
}
-#else
- /*
- * The sysexit path does not restore ds/es, so we must set them to
- * a reasonable value ourselves.
- */
- loadsegment(ds, __USER_DS);
- loadsegment(es, __USER_DS);
#endif
reload_tss();
#ifdef CONFIG_X86_64
@@ -6370,6 +6363,19 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+#ifndef CONFIG_X86_64
+ /*
+ * The sysexit path does not restore ds/es, so we must set them to
+ * a reasonable value ourselves.
+ *
+ * We can't defer this to vmx_load_host_state() since that function
+ * may be executed in interrupt context, which saves and restore segments
+ * around it, nullifying its effect.
+ */
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+#endif
+
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
| (1 << VCPU_EXREG_RFLAGS)
| (1 << VCPU_EXREG_CPL)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 59b59508ff07..148ed666e311 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -806,7 +806,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
* kvm-specific. Those are put in the beginning of the list.
*/
-#define KVM_SAVE_MSRS_BEGIN 9
+#define KVM_SAVE_MSRS_BEGIN 10
static u32 msrs_to_save[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
@@ -925,6 +925,10 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
*/
getboottime(&boot);
+ if (kvm->arch.kvmclock_offset) {
+ struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
+ boot = timespec_sub(boot, ts);
+ }
wc.sec = boot.tv_sec;
wc.nsec = boot.tv_nsec;
wc.version = version;
@@ -1996,6 +2000,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_KVM_STEAL_TIME:
data = vcpu->arch.st.msr_val;
break;
+ case MSR_KVM_PV_EOI_EN:
+ data = vcpu->arch.pv_eoi.msr_val;
+ break;
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MCG_CAP:
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index f6679a7fb8ca..b91e48512425 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -56,9 +56,16 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
}
/*
- * search for a shareable pmd page for hugetlb.
+ * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
+ * and returns the corresponding pte. While this is not necessary for the
+ * !shared pmd case because we can allocate the pmd later as well, it makes the
+ * code much cleaner. pmd allocation is essential for the shared case because
+ * pud has to be populated inside the same i_mmap_mutex section - otherwise
+ * racing tasks could either miss the sharing (see huge_pte_offset) or select a
+ * bad pmd for sharing.
*/
-static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+static pte_t *
+huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{
struct vm_area_struct *vma = find_vma(mm, addr);
struct address_space *mapping = vma->vm_file->f_mapping;
@@ -68,9 +75,10 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
struct vm_area_struct *svma;
unsigned long saddr;
pte_t *spte = NULL;
+ pte_t *pte;
if (!vma_shareable(vma, addr))
- return;
+ return (pte_t *)pmd_alloc(mm, pud, addr);
mutex_lock(&mapping->i_mmap_mutex);
vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
@@ -97,7 +105,9 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock);
out:
+ pte = (pte_t *)pmd_alloc(mm, pud, addr);
mutex_unlock(&mapping->i_mmap_mutex);
+ return pte;
}
/*
@@ -142,8 +152,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
} else {
BUG_ON(sz != PMD_SIZE);
if (pud_none(*pud))
- huge_pmd_share(mm, addr, pud);
- pte = (pte_t *) pmd_alloc(mm, pud, addr);
+ pte = huge_pmd_share(mm, addr, pud);
+ else
+ pte = (pte_t *)pmd_alloc(mm, pud, addr);
}
}
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 931930a96160..a718e0d23503 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -919,13 +919,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
/*
* On success we use clflush, when the CPU supports it to
- * avoid the wbindv. If the CPU does not support it, in the
- * error case, and during early boot (for EFI) we fall back
- * to cpa_flush_all (which uses wbinvd):
+ * avoid the wbindv. If the CPU does not support it and in the
+ * error case we fall back to cpa_flush_all (which uses
+ * wbindv):
*/
- if (early_boot_irqs_disabled)
- __cpa_flush_all((void *)(long)cache);
- else if (!ret && cpu_has_clflush) {
+ if (!ret && cpu_has_clflush) {
if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
cpa_flush_array(addr, numpages, cache,
cpa.flags, pages);
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 4599c3e8bcb6..4ddf497ca65b 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -142,23 +142,23 @@ static inline int save_add_info(void) {return 0;}
#endif
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-void __init
+int __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
u64 start, end;
int node, pxm;
if (srat_disabled())
- return;
+ return -1;
if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
bad_srat();
- return;
+ return -1;
}
if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
- return;
+ return -1;
if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
- return;
+ return -1;
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
@@ -168,12 +168,12 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains.\n");
bad_srat();
- return;
+ return -1;
}
if (numa_add_memblk(node, start, end) < 0) {
bad_srat();
- return;
+ return -1;
}
node_set(node, numa_nodes_parsed);
@@ -181,6 +181,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
node, pxm,
(unsigned long long) start, (unsigned long long) end - 1);
+ return 0;
}
void __init acpi_numa_arch_fixup(void) {}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 2dc29f51e75a..92660edaa1e7 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -234,7 +234,22 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
return status;
}
-static int efi_set_rtc_mmss(unsigned long nowtime)
+static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+ efi_time_cap_t *tc)
+{
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ efi_call_phys_prelog();
+ status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
+ virt_to_phys(tc));
+ efi_call_phys_epilog();
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return status;
+}
+
+int efi_set_rtc_mmss(unsigned long nowtime)
{
int real_seconds, real_minutes;
efi_status_t status;
@@ -263,7 +278,7 @@ static int efi_set_rtc_mmss(unsigned long nowtime)
return 0;
}
-static unsigned long efi_get_time(void)
+unsigned long efi_get_time(void)
{
efi_status_t status;
efi_time_t eft;
@@ -606,13 +621,18 @@ static int __init efi_runtime_init(void)
}
/*
* We will only need *early* access to the following
- * EFI runtime service before set_virtual_address_map
+ * two EFI runtime services before set_virtual_address_map
* is invoked.
*/
+ efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
efi_phys.set_virtual_address_map =
(efi_set_virtual_address_map_t *)
runtime->set_virtual_address_map;
-
+ /*
+ * Make efi_get_time can be called before entering
+ * virtual mode.
+ */
+ efi.get_time = phys_efi_get_time;
early_iounmap(runtime, sizeof(efi_runtime_services_t));
return 0;
@@ -700,10 +720,12 @@ void __init efi_init(void)
efi_enabled = 0;
return;
}
+#ifdef CONFIG_X86_32
if (efi_native) {
x86_platform.get_wallclock = efi_get_time;
x86_platform.set_wallclock = efi_set_rtc_mmss;
}
+#endif
#if EFI_DEBUG
print_efi_memmap();
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index 0ce8616c88ae..d75582d1aa55 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -18,6 +18,7 @@
#include <linux/pm.h>
#include <linux/mfd/core.h>
#include <linux/suspend.h>
+#include <linux/olpc-ec.h>
#include <asm/io.h>
#include <asm/olpc.h>
@@ -51,16 +52,11 @@ EXPORT_SYMBOL_GPL(olpc_xo1_pm_wakeup_clear);
static int xo1_power_state_enter(suspend_state_t pm_state)
{
unsigned long saved_sci_mask;
- int r;
/* Only STR is supported */
if (pm_state != PM_SUSPEND_MEM)
return -EINVAL;
- r = olpc_ec_cmd(EC_SET_SCI_INHIBIT, NULL, 0, NULL, 0);
- if (r)
- return r;
-
/*
* Save SCI mask (this gets lost since PM1_EN is used as a mask for
* wakeup events, which is not necessarily the same event set)
@@ -76,16 +72,6 @@ static int xo1_power_state_enter(suspend_state_t pm_state)
/* Restore SCI mask (using dword access to CS5536_PM1_EN) */
outl(saved_sci_mask, acpi_base + CS5536_PM1_STS);
- /* Tell the EC to stop inhibiting SCIs */
- olpc_ec_cmd(EC_SET_SCI_INHIBIT_RELEASE, NULL, 0, NULL, 0);
-
- /*
- * Tell the wireless module to restart USB communication.
- * Must be done twice.
- */
- olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0);
- olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0);
-
return 0;
}
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 04b8c73659c5..63d4aa40956e 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -23,6 +23,7 @@
#include <linux/power_supply.h>
#include <linux/suspend.h>
#include <linux/workqueue.h>
+#include <linux/olpc-ec.h>
#include <asm/io.h>
#include <asm/msr.h>
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 599be499fdf7..2fdca25905ae 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/power_supply.h>
+#include <linux/olpc-ec.h>
#include <acpi/acpi_bus.h>
#include <acpi/acpi_drivers.h>
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index a4bee53c2e54..27376081ddec 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -14,14 +14,13 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/delay.h>
-#include <linux/spinlock.h>
#include <linux/io.h>
#include <linux/string.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/syscore_ops.h>
-#include <linux/debugfs.h>
#include <linux/mutex.h>
+#include <linux/olpc-ec.h>
#include <asm/geode.h>
#include <asm/setup.h>
@@ -31,17 +30,6 @@
struct olpc_platform_t olpc_platform_info;
EXPORT_SYMBOL_GPL(olpc_platform_info);
-static DEFINE_SPINLOCK(ec_lock);
-
-/* debugfs interface to EC commands */
-#define EC_MAX_CMD_ARGS (5 + 1) /* cmd byte + 5 args */
-#define EC_MAX_CMD_REPLY (8)
-
-static struct dentry *ec_debugfs_dir;
-static DEFINE_MUTEX(ec_debugfs_cmd_lock);
-static unsigned char ec_debugfs_resp[EC_MAX_CMD_REPLY];
-static unsigned int ec_debugfs_resp_bytes;
-
/* EC event mask to be applied during suspend (defining wakeup sources). */
static u16 ec_wakeup_mask;
@@ -125,16 +113,13 @@ static int __wait_on_obf(unsigned int line, unsigned int port, int desired)
* <http://wiki.laptop.org/go/Ec_specification>. Unfortunately, while
* OpenFirmware's source is available, the EC's is not.
*/
-int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
- unsigned char *outbuf, size_t outlen)
+static int olpc_xo1_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf,
+ size_t outlen, void *arg)
{
- unsigned long flags;
int ret = -EIO;
int i;
int restarts = 0;
- spin_lock_irqsave(&ec_lock, flags);
-
/* Clear OBF */
for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++)
inb(0x68);
@@ -198,10 +183,8 @@ restart:
ret = 0;
err:
- spin_unlock_irqrestore(&ec_lock, flags);
return ret;
}
-EXPORT_SYMBOL_GPL(olpc_ec_cmd);
void olpc_ec_wakeup_set(u16 value)
{
@@ -280,96 +263,6 @@ int olpc_ec_sci_query(u16 *sci_value)
}
EXPORT_SYMBOL_GPL(olpc_ec_sci_query);
-static ssize_t ec_debugfs_cmd_write(struct file *file, const char __user *buf,
- size_t size, loff_t *ppos)
-{
- int i, m;
- unsigned char ec_cmd[EC_MAX_CMD_ARGS];
- unsigned int ec_cmd_int[EC_MAX_CMD_ARGS];
- char cmdbuf[64];
- int ec_cmd_bytes;
-
- mutex_lock(&ec_debugfs_cmd_lock);
-
- size = simple_write_to_buffer(cmdbuf, sizeof(cmdbuf), ppos, buf, size);
-
- m = sscanf(cmdbuf, "%x:%u %x %x %x %x %x", &ec_cmd_int[0],
- &ec_debugfs_resp_bytes,
- &ec_cmd_int[1], &ec_cmd_int[2], &ec_cmd_int[3],
- &ec_cmd_int[4], &ec_cmd_int[5]);
- if (m < 2 || ec_debugfs_resp_bytes > EC_MAX_CMD_REPLY) {
- /* reset to prevent overflow on read */
- ec_debugfs_resp_bytes = 0;
-
- printk(KERN_DEBUG "olpc-ec: bad ec cmd: "
- "cmd:response-count [arg1 [arg2 ...]]\n");
- size = -EINVAL;
- goto out;
- }
-
- /* convert scanf'd ints to char */
- ec_cmd_bytes = m - 2;
- for (i = 0; i <= ec_cmd_bytes; i++)
- ec_cmd[i] = ec_cmd_int[i];
-
- printk(KERN_DEBUG "olpc-ec: debugfs cmd 0x%02x with %d args "
- "%02x %02x %02x %02x %02x, want %d returns\n",
- ec_cmd[0], ec_cmd_bytes, ec_cmd[1], ec_cmd[2], ec_cmd[3],
- ec_cmd[4], ec_cmd[5], ec_debugfs_resp_bytes);
-
- olpc_ec_cmd(ec_cmd[0], (ec_cmd_bytes == 0) ? NULL : &ec_cmd[1],
- ec_cmd_bytes, ec_debugfs_resp, ec_debugfs_resp_bytes);
-
- printk(KERN_DEBUG "olpc-ec: response "
- "%02x %02x %02x %02x %02x %02x %02x %02x (%d bytes expected)\n",
- ec_debugfs_resp[0], ec_debugfs_resp[1], ec_debugfs_resp[2],
- ec_debugfs_resp[3], ec_debugfs_resp[4], ec_debugfs_resp[5],
- ec_debugfs_resp[6], ec_debugfs_resp[7], ec_debugfs_resp_bytes);
-
-out:
- mutex_unlock(&ec_debugfs_cmd_lock);
- return size;
-}
-
-static ssize_t ec_debugfs_cmd_read(struct file *file, char __user *buf,
- size_t size, loff_t *ppos)
-{
- unsigned int i, r;
- char *rp;
- char respbuf[64];
-
- mutex_lock(&ec_debugfs_cmd_lock);
- rp = respbuf;
- rp += sprintf(rp, "%02x", ec_debugfs_resp[0]);
- for (i = 1; i < ec_debugfs_resp_bytes; i++)
- rp += sprintf(rp, ", %02x", ec_debugfs_resp[i]);
- mutex_unlock(&ec_debugfs_cmd_lock);
- rp += sprintf(rp, "\n");
-
- r = rp - respbuf;
- return simple_read_from_buffer(buf, size, ppos, respbuf, r);
-}
-
-static const struct file_operations ec_debugfs_genops = {
- .write = ec_debugfs_cmd_write,
- .read = ec_debugfs_cmd_read,
-};
-
-static void setup_debugfs(void)
-{
- ec_debugfs_dir = debugfs_create_dir("olpc-ec", 0);
- if (ec_debugfs_dir == ERR_PTR(-ENODEV))
- return;
-
- debugfs_create_file("cmd", 0600, ec_debugfs_dir, NULL,
- &ec_debugfs_genops);
-}
-
-static int olpc_ec_suspend(void)
-{
- return olpc_ec_mask_write(ec_wakeup_mask);
-}
-
static bool __init check_ofw_architecture(struct device_node *root)
{
const char *olpc_arch;
@@ -424,8 +317,59 @@ static int __init add_xo1_platform_devices(void)
return 0;
}
-static struct syscore_ops olpc_syscore_ops = {
- .suspend = olpc_ec_suspend,
+static int olpc_xo1_ec_probe(struct platform_device *pdev)
+{
+ /* get the EC revision */
+ olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
+ (unsigned char *) &olpc_platform_info.ecver, 1);
+
+ /* EC version 0x5f adds support for wide SCI mask */
+ if (olpc_platform_info.ecver >= 0x5f)
+ olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI;
+
+ pr_info("OLPC board revision %s%X (EC=%x)\n",
+ ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
+ olpc_platform_info.boardrev >> 4,
+ olpc_platform_info.ecver);
+
+ return 0;
+}
+static int olpc_xo1_ec_suspend(struct platform_device *pdev)
+{
+ olpc_ec_mask_write(ec_wakeup_mask);
+
+ /*
+ * Squelch SCIs while suspended. This is a fix for
+ * <http://dev.laptop.org/ticket/1835>.
+ */
+ return olpc_ec_cmd(EC_SET_SCI_INHIBIT, NULL, 0, NULL, 0);
+}
+
+static int olpc_xo1_ec_resume(struct platform_device *pdev)
+{
+ /* Tell the EC to stop inhibiting SCIs */
+ olpc_ec_cmd(EC_SET_SCI_INHIBIT_RELEASE, NULL, 0, NULL, 0);
+
+ /*
+ * Tell the wireless module to restart USB communication.
+ * Must be done twice.
+ */
+ olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0);
+ olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0);
+
+ return 0;
+}
+
+static struct olpc_ec_driver ec_xo1_driver = {
+ .probe = olpc_xo1_ec_probe,
+ .suspend = olpc_xo1_ec_suspend,
+ .resume = olpc_xo1_ec_resume,
+ .ec_cmd = olpc_xo1_ec_cmd,
+};
+
+static struct olpc_ec_driver ec_xo1_5_driver = {
+ .probe = olpc_xo1_ec_probe,
+ .ec_cmd = olpc_xo1_ec_cmd,
};
static int __init olpc_init(void)
@@ -435,16 +379,17 @@ static int __init olpc_init(void)
if (!olpc_ofw_present() || !platform_detect())
return 0;
- spin_lock_init(&ec_lock);
+ /* register the XO-1 and 1.5-specific EC handler */
+ if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) /* XO-1 */
+ olpc_ec_driver_register(&ec_xo1_driver, NULL);
+ else
+ olpc_ec_driver_register(&ec_xo1_5_driver, NULL);
+ platform_device_register_simple("olpc-ec", -1, NULL, 0);
/* assume B1 and above models always have a DCON */
if (olpc_board_at_least(olpc_board(0xb1)))
olpc_platform_info.flags |= OLPC_F_DCON;
- /* get the EC revision */
- olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
- (unsigned char *) &olpc_platform_info.ecver, 1);
-
#ifdef CONFIG_PCI_OLPC
/* If the VSA exists let it emulate PCI, if not emulate in kernel.
* XO-1 only. */
@@ -452,14 +397,6 @@ static int __init olpc_init(void)
!cs5535_has_vsa2())
x86_init.pci.arch_init = pci_olpc_init;
#endif
- /* EC version 0x5f adds support for wide SCI mask */
- if (olpc_platform_info.ecver >= 0x5f)
- olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI;
-
- printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
- ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
- olpc_platform_info.boardrev >> 4,
- olpc_platform_info.ecver);
if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */
r = add_xo1_platform_devices();
@@ -467,9 +404,6 @@ static int __init olpc_init(void)
return r;
}
- register_syscore_ops(&olpc_syscore_ops);
- setup_debugfs();
-
return 0;
}
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index b2d534cab25f..88692871823f 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -72,7 +72,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \
-Wall -Wstrict-prototypes \
-march=i386 -mregparm=3 \
-include $(srctree)/$(src)/../../boot/code16gcc.h \
- -fno-strict-aliasing -fomit-frame-pointer \
+ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
$(call cc-option, -ffreestanding) \
$(call cc-option, -fno-toplevel-reorder,\
$(call cc-option, -fno-unit-at-a-time)) \
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 51171aeff0dc..a582bfed95bb 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -60,8 +60,8 @@
51 common getsockname sys_getsockname
52 common getpeername sys_getpeername
53 common socketpair sys_socketpair
-54 common setsockopt sys_setsockopt
-55 common getsockopt sys_getsockopt
+54 64 setsockopt sys_setsockopt
+55 64 getsockopt sys_getsockopt
56 common clone stub_clone
57 common fork stub_fork
58 common vfork stub_vfork
@@ -318,7 +318,7 @@
309 common getcpu sys_getcpu
310 64 process_vm_readv sys_process_vm_readv
311 64 process_vm_writev sys_process_vm_writev
-312 64 kcmp sys_kcmp
+312 common kcmp sys_kcmp
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -353,3 +353,5 @@
538 x32 sendmmsg compat_sys_sendmmsg
539 x32 process_vm_readv compat_sys_process_vm_readv
540 x32 process_vm_writev compat_sys_process_vm_writev
+541 x32 setsockopt compat_sys_setsockopt
+542 x32 getsockopt compat_sys_getsockopt
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index 950dfb7b8417..e72cd0df5ba3 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -30,10 +30,10 @@
#define profile_pc(regs) PT_REGS_IP(regs)
#define UPT_RESTART_SYSCALL(r) (UPT_IP(r) -= 2)
-#define UPT_SET_SYSCALL_RETURN(r, res) (UPT_AX(r) = (res))
+#define PT_REGS_SET_SYSCALL_RETURN(r, res) (PT_REGS_AX(r) = (res))
-static inline long regs_return_value(struct uml_pt_regs *regs)
+static inline long regs_return_value(struct pt_regs *regs)
{
- return UPT_AX(regs);
+ return PT_REGS_AX(regs);
}
#endif /* __UM_X86_PTRACE_H */