diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-20 20:29:32 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-20 20:29:32 +0200 |
commit | 12e24f34cb0d55efd08c18b2112507d4bf498008 (patch) | |
tree | 83b07be17b8ef45f42360a3b9159b3aaae3fbad4 /arch | |
parent | Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/ke... (diff) | |
parent | perfcounter: Handle some IO return values (diff) | |
download | linux-12e24f34cb0d55efd08c18b2112507d4bf498008.tar.xz linux-12e24f34cb0d55efd08c18b2112507d4bf498008.zip |
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits)
perfcounter: Handle some IO return values
perf_counter: Push perf_sample_data through the swcounter code
perf_counter tools: Define and use our own u64, s64 etc. definitions
perf_counter: Close race in perf_lock_task_context()
perf_counter, x86: Improve interactions with fast-gup
perf_counter: Simplify and fix task migration counting
perf_counter tools: Add a data file header
perf_counter: Update userspace callchain sampling uses
perf_counter: Make callchain samples extensible
perf report: Filter to parent set by default
perf_counter tools: Handle lost events
perf_counter: Add event overlow handling
fs: Provide empty .set_page_dirty() aop for anon inodes
perf_counter: tools: Makefile tweaks for 64-bit powerpc
perf_counter: powerpc: Add processor back-end for MPC7450 family
perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels
perf_counter: powerpc: Change how processor-specific back-ends get selected
perf_counter: powerpc: Use unsigned long for register and constraint values
perf_counter: powerpc: Enable use of software counters on 32-bit powerpc
perf_counter tools: Add and use isprint()
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/Kconfig | 1 | ||||
-rw-r--r-- | arch/powerpc/include/asm/hw_irq.h | 6 | ||||
-rw-r--r-- | arch/powerpc/include/asm/perf_counter.h | 52 | ||||
-rw-r--r-- | arch/powerpc/kernel/Makefile | 8 | ||||
-rw-r--r-- | arch/powerpc/kernel/mpc7450-pmu.c | 417 | ||||
-rw-r--r-- | arch/powerpc/kernel/perf_counter.c | 257 | ||||
-rw-r--r-- | arch/powerpc/kernel/power4-pmu.c | 89 | ||||
-rw-r--r-- | arch/powerpc/kernel/power5+-pmu.c | 95 | ||||
-rw-r--r-- | arch/powerpc/kernel/power5-pmu.c | 98 | ||||
-rw-r--r-- | arch/powerpc/kernel/power6-pmu.c | 72 | ||||
-rw-r--r-- | arch/powerpc/kernel/power7-pmu.c | 61 | ||||
-rw-r--r-- | arch/powerpc/kernel/ppc970-pmu.c | 63 | ||||
-rw-r--r-- | arch/powerpc/kernel/time.c | 25 | ||||
-rw-r--r-- | arch/powerpc/platforms/Kconfig.cputype | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_counter.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_32.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/uaccess.h | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 138 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 58 |
19 files changed, 1075 insertions, 397 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9fb344d5a86a..bf6cedfa05db 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -126,6 +126,7 @@ config PPC select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 + select HAVE_PERF_COUNTERS config EARLY_PRINTK bool diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b7f8f4a87cc0..867ab8ed69b3 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -131,6 +131,8 @@ static inline int irqs_disabled_flags(unsigned long flags) struct irq_chip; #ifdef CONFIG_PERF_COUNTERS + +#ifdef CONFIG_PPC64 static inline unsigned long test_perf_counter_pending(void) { unsigned long x; @@ -154,15 +156,15 @@ static inline void clear_perf_counter_pending(void) "r" (0), "i" (offsetof(struct paca_struct, perf_counter_pending))); } +#endif /* CONFIG_PPC64 */ -#else +#else /* CONFIG_PERF_COUNTERS */ static inline unsigned long test_perf_counter_pending(void) { return 0; } -static inline void set_perf_counter_pending(void) {} static inline void clear_perf_counter_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index cc7c887705b8..8ccd4e155768 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -10,6 +10,8 @@ */ #include <linux/types.h> +#include <asm/hw_irq.h> + #define MAX_HWCOUNTERS 8 #define MAX_EVENT_ALTERNATIVES 8 #define MAX_LIMITED_HWCOUNTERS 2 @@ -19,27 +21,27 @@ * describe the PMU on a particular POWER-family CPU. */ struct power_pmu { - int n_counter; - int max_alternatives; - u64 add_fields; - u64 test_adder; - int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], u64 mmcr[]); - int (*get_constraint)(u64 event, u64 *mskp, u64 *valp); - int (*get_alternatives)(u64 event, unsigned int flags, - u64 alt[]); - void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); - int (*limited_pmc_event)(u64 event); - u32 flags; - int n_generic; - int *generic_events; - int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + const char *name; + int n_counter; + int max_alternatives; + unsigned long add_fields; + unsigned long test_adder; + int (*compute_mmcr)(u64 events[], int n_ev, + unsigned int hwc[], unsigned long mmcr[]); + int (*get_constraint)(u64 event, unsigned long *mskp, + unsigned long *valp); + int (*get_alternatives)(u64 event, unsigned int flags, + u64 alt[]); + void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); + int (*limited_pmc_event)(u64 event); + u32 flags; + int n_generic; + int *generic_events; + int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; }; -extern struct power_pmu *ppmu; - /* * Values for power_pmu.flags */ @@ -53,15 +55,23 @@ extern struct power_pmu *ppmu; #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ +extern int register_power_pmu(struct power_pmu *); + struct pt_regs; extern unsigned long perf_misc_flags(struct pt_regs *regs); -#define perf_misc_flags(regs) perf_misc_flags(regs) - extern unsigned long perf_instruction_pointer(struct pt_regs *regs); /* - * The power_pmu.get_constraint function returns a 64-bit value and - * a 64-bit mask that express the constraints between this event and + * Only override the default definitions in include/linux/perf_counter.h + * if we have hardware PMU support. + */ +#ifdef CONFIG_PPC_PERF_CTRS +#define perf_misc_flags(regs) perf_misc_flags(regs) +#endif + +/* + * The power_pmu.get_constraint function returns a 32/64-bit value and + * a 32/64-bit mask that express the constraints between this event and * other events. * * The value and mask are divided up into (non-overlapping) bitfields diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 6a4fb29a0618..b73396b93905 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -97,9 +97,10 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ - power5-pmu.o power5+-pmu.o power6-pmu.o \ - power7-pmu.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o +obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ + power5+-pmu.o power6-pmu.o power7-pmu.o +obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o @@ -108,6 +109,7 @@ obj-y += iomap.o endif obj-$(CONFIG_PPC64) += $(obj64-y) +obj-$(CONFIG_PPC32) += $(obj32-y) ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) obj-y += ppc_save_regs.o diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c new file mode 100644 index 000000000000..75ff47fed7bf --- /dev/null +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -0,0 +1,417 @@ +/* + * Performance counter support for MPC7450-family processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/string.h> +#include <linux/perf_counter.h> +#include <linux/string.h> +#include <asm/reg.h> +#include <asm/cputable.h> + +#define N_COUNTER 6 /* Number of hardware counters */ +#define MAX_ALT 3 /* Maximum number of event alternative codes */ + +/* + * Bits in event code for MPC7450 family + */ +#define PM_THRMULT_MSKS 0x40000 +#define PM_THRESH_SH 12 +#define PM_THRESH_MSK 0x3f +#define PM_PMC_SH 8 +#define PM_PMC_MSK 7 +#define PM_PMCSEL_MSK 0x7f + +/* + * Classify events according to how specific their PMC requirements are. + * Result is: + * 0: can go on any PMC + * 1: can go on PMCs 1-4 + * 2: can go on PMCs 1,2,4 + * 3: can go on PMCs 1 or 2 + * 4: can only go on one PMC + * -1: event code is invalid + */ +#define N_CLASSES 5 + +static int mpc7450_classify_event(u32 event) +{ + int pmc; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > N_COUNTER) + return -1; + return 4; + } + event &= PM_PMCSEL_MSK; + if (event <= 1) + return 0; + if (event <= 7) + return 1; + if (event <= 13) + return 2; + if (event <= 22) + return 3; + return -1; +} + +/* + * Events using threshold and possible threshold scale: + * code scale? name + * 11e N PM_INSTQ_EXCEED_CYC + * 11f N PM_ALTV_IQ_EXCEED_CYC + * 128 Y PM_DTLB_SEARCH_EXCEED_CYC + * 12b Y PM_LD_MISS_EXCEED_L1_CYC + * 220 N PM_CQ_EXCEED_CYC + * 30c N PM_GPR_RB_EXCEED_CYC + * 30d ? PM_FPR_IQ_EXCEED_CYC ? + * 311 Y PM_ITLB_SEARCH_EXCEED + * 410 N PM_GPR_IQ_EXCEED_CYC + */ + +/* + * Return use of threshold and threshold scale bits: + * 0 = uses neither, 1 = uses threshold, 2 = uses both + */ +static int mpc7450_threshold_use(u32 event) +{ + int pmc, sel; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + sel = event & PM_PMCSEL_MSK; + switch (pmc) { + case 1: + if (sel == 0x1e || sel == 0x1f) + return 1; + if (sel == 0x28 || sel == 0x2b) + return 2; + break; + case 2: + if (sel == 0x20) + return 1; + break; + case 3: + if (sel == 0xc || sel == 0xd) + return 1; + if (sel == 0x11) + return 2; + break; + case 4: + if (sel == 0x10) + return 1; + break; + } + return 0; +} + +/* + * Layout of constraint bits: + * 33222222222211111111110000000000 + * 10987654321098765432109876543210 + * |< >< > < > < ><><><><><><> + * TS TV G4 G3 G2P6P5P4P3P2P1 + * + * P1 - P6 + * 0 - 11: Count of events needing PMC1 .. PMC6 + * + * G2 + * 12 - 14: Count of events needing PMC1 or PMC2 + * + * G3 + * 16 - 18: Count of events needing PMC1, PMC2 or PMC4 + * + * G4 + * 20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4 + * + * TV + * 24 - 29: Threshold value requested + * + * TS + * 30: Threshold scale value requested + */ + +static u32 pmcbits[N_COUNTER][2] = { + { 0x00844002, 0x00111001 }, /* PMC1 mask, value: P1,G2,G3,G4 */ + { 0x00844008, 0x00111004 }, /* PMC2: P2,G2,G3,G4 */ + { 0x00800020, 0x00100010 }, /* PMC3: P3,G4 */ + { 0x00840080, 0x00110040 }, /* PMC4: P4,G3,G4 */ + { 0x00000200, 0x00000100 }, /* PMC5: P5 */ + { 0x00000800, 0x00000400 } /* PMC6: P6 */ +}; + +static u32 classbits[N_CLASSES - 1][2] = { + { 0x00000000, 0x00000000 }, /* class 0: no constraint */ + { 0x00800000, 0x00100000 }, /* class 1: G4 */ + { 0x00040000, 0x00010000 }, /* class 2: G3 */ + { 0x00004000, 0x00001000 }, /* class 3: G2 */ +}; + +static int mpc7450_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) +{ + int pmc, class; + u32 mask, value; + int thresh, tuse; + + class = mpc7450_classify_event(event); + if (class < 0) + return -1; + if (class == 4) { + pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK; + mask = pmcbits[pmc - 1][0]; + value = pmcbits[pmc - 1][1]; + } else { + mask = classbits[class][0]; + value = classbits[class][1]; + } + + tuse = mpc7450_threshold_use(event); + if (tuse) { + thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK; + mask |= 0x3f << 24; + value |= thresh << 24; + if (tuse == 2) { + mask |= 0x40000000; + if ((unsigned int)event & PM_THRMULT_MSKS) + value |= 0x40000000; + } + } + + *maskp = mask; + *valp = value; + return 0; +} + +static const unsigned int event_alternatives[][MAX_ALT] = { + { 0x217, 0x317 }, /* PM_L1_DCACHE_MISS */ + { 0x418, 0x50f, 0x60f }, /* PM_SNOOP_RETRY */ + { 0x502, 0x602 }, /* PM_L2_HIT */ + { 0x503, 0x603 }, /* PM_L3_HIT */ + { 0x504, 0x604 }, /* PM_L2_ICACHE_MISS */ + { 0x505, 0x605 }, /* PM_L3_ICACHE_MISS */ + { 0x506, 0x606 }, /* PM_L2_DCACHE_MISS */ + { 0x507, 0x607 }, /* PM_L3_DCACHE_MISS */ + { 0x50a, 0x623 }, /* PM_LD_HIT_L3 */ + { 0x50b, 0x624 }, /* PM_ST_HIT_L3 */ + { 0x50d, 0x60d }, /* PM_L2_TOUCH_HIT */ + { 0x50e, 0x60e }, /* PM_L3_TOUCH_HIT */ + { 0x512, 0x612 }, /* PM_INT_LOCAL */ + { 0x513, 0x61d }, /* PM_L2_MISS */ + { 0x514, 0x61e }, /* PM_L3_MISS */ +}; + +/* + * Scan the alternatives table for a match and return the + * index into the alternatives table if found, else -1. + */ +static int find_alternative(u32 event) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { + if (event < event_alternatives[i][0]) + break; + for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) + if (event == event_alternatives[i][j]) + return i; + } + return -1; +} + +static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[]) +{ + int i, j, nalt = 1; + u32 ae; + + alt[0] = event; + nalt = 1; + i = find_alternative((u32)event); + if (i >= 0) { + for (j = 0; j < MAX_ALT; ++j) { + ae = event_alternatives[i][j]; + if (ae && ae != (u32)event) + alt[nalt++] = ae; + } + } + return nalt; +} + +/* + * Bitmaps of which PMCs each class can use for classes 0 - 3. + * Bit i is set if PMC i+1 is usable. + */ +static const u8 classmap[N_CLASSES] = { + 0x3f, 0x0f, 0x0b, 0x03, 0 +}; + +/* Bit position and width of each PMCSEL field */ +static const int pmcsel_shift[N_COUNTER] = { + 6, 0, 27, 22, 17, 11 +}; +static const u32 pmcsel_mask[N_COUNTER] = { + 0x7f, 0x3f, 0x1f, 0x1f, 0x1f, 0x3f +}; + +/* + * Compute MMCR0/1/2 values for a set of events. + */ +static int mpc7450_compute_mmcr(u64 event[], int n_ev, + unsigned int hwc[], unsigned long mmcr[]) +{ + u8 event_index[N_CLASSES][N_COUNTER]; + int n_classevent[N_CLASSES]; + int i, j, class, tuse; + u32 pmc_inuse = 0, pmc_avail; + u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0; + u32 ev, pmc, thresh; + + if (n_ev > N_COUNTER) + return -1; + + /* First pass: count usage in each class */ + for (i = 0; i < N_CLASSES; ++i) + n_classevent[i] = 0; + for (i = 0; i < n_ev; ++i) { + class = mpc7450_classify_event(event[i]); + if (class < 0) + return -1; + j = n_classevent[class]++; + event_index[class][j] = i; + } + + /* Second pass: allocate PMCs from most specific event to least */ + for (class = N_CLASSES - 1; class >= 0; --class) { + for (i = 0; i < n_classevent[class]; ++i) { + ev = event[event_index[class][i]]; + if (class == 4) { + pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + } else { + /* Find a suitable PMC */ + pmc_avail = classmap[class] & ~pmc_inuse; + if (!pmc_avail) + return -1; + pmc = ffs(pmc_avail); + } + pmc_inuse |= 1 << (pmc - 1); + + tuse = mpc7450_threshold_use(ev); + if (tuse) { + thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK; + mmcr0 |= thresh << 16; + if (tuse == 2 && (ev & PM_THRMULT_MSKS)) + mmcr2 = 0x80000000; + } + ev &= pmcsel_mask[pmc - 1]; + ev <<= pmcsel_shift[pmc - 1]; + if (pmc <= 2) + mmcr0 |= ev; + else + mmcr1 |= ev; + hwc[event_index[class][i]] = pmc - 1; + } + } + + if (pmc_inuse & 1) + mmcr0 |= MMCR0_PMC1CE; + if (pmc_inuse & 0x3e) + mmcr0 |= MMCR0_PMCnCE; + + /* Return MMCRx values */ + mmcr[0] = mmcr0; + mmcr[1] = mmcr1; + mmcr[2] = mmcr2; + return 0; +} + +/* + * Disable counting by a PMC. + * Note that the pmc argument is 0-based here, not 1-based. + */ +static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +{ + if (pmc <= 1) + mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); + else + mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); +} + +static int mpc7450_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 1, + [PERF_COUNT_HW_INSTRUCTIONS] = 2, + [PERF_COUNT_HW_CACHE_MISSES] = 0x217, /* PM_L1_DCACHE_MISS */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x122, /* PM_BR_CMPL */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x41c, /* PM_BR_MPRED */ +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x225 }, + [C(OP_WRITE)] = { 0, 0x227 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x129, 0x115 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0x634, 0 }, + }, + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x312 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x223 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x122, 0x41c }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + +struct power_pmu mpc7450_pmu = { + .name = "MPC7450 family", + .n_counter = N_COUNTER, + .max_alternatives = MAX_ALT, + .add_fields = 0x00111555ul, + .test_adder = 0x00301000ul, + .compute_mmcr = mpc7450_compute_mmcr, + .get_constraint = mpc7450_get_constraint, + .get_alternatives = mpc7450_get_alternatives, + .disable_pmc = mpc7450_disable_pmc, + .n_generic = ARRAY_SIZE(mpc7450_generic_events), + .generic_events = mpc7450_generic_events, + .cache_events = &mpc7450_cache_events, +}; + +static int init_mpc7450_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450")) + return -ENODEV; + + return register_power_pmu(&mpc7450_pmu); +} + +arch_initcall(init_mpc7450_pmu); diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bb202388170e..809fdf94b95f 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -29,7 +29,7 @@ struct cpu_hw_counters { struct perf_counter *counter[MAX_HWCOUNTERS]; u64 events[MAX_HWCOUNTERS]; unsigned int flags[MAX_HWCOUNTERS]; - u64 mmcr[3]; + unsigned long mmcr[3]; struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; }; @@ -46,6 +46,115 @@ struct power_pmu *ppmu; */ static unsigned int freeze_counters_kernel = MMCR0_FCS; +/* + * 32-bit doesn't have MMCRA but does have an MMCR2, + * and a few other names are different. + */ +#ifdef CONFIG_PPC32 + +#define MMCR0_FCHV 0 +#define MMCR0_PMCjCE MMCR0_PMCnCE + +#define SPRN_MMCRA SPRN_MMCR2 +#define MMCRA_SAMPLE_ENABLE 0 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_set_pmu_inuse(int inuse) { } +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_read_regs(struct pt_regs *regs) { } +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return 0; +} + +#endif /* CONFIG_PPC32 */ + +/* + * Things that are specific to 64-bit implementations. + */ +#ifdef CONFIG_PPC64 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { + unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; + if (slot > 1) + return 4 * (slot - 1); + } + return 0; +} + +static inline void perf_set_pmu_inuse(int inuse) +{ + get_lppaca()->pmcregs_in_use = inuse; +} + +/* + * The user wants a data address recorded. + * If we're not doing instruction sampling, give them the SDAR + * (sampled data address). If we are doing instruction sampling, then + * only give them the SDAR if it corresponds to the instruction + * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC + * bit in MMCRA. + */ +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) +{ + unsigned long mmcra = regs->dsisr; + unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? + POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; + + if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) + *addrp = mfspr(SPRN_SDAR); +} + +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if (TRAP(regs) != 0xf00) + return 0; /* not a PMU interrupt */ + + if (ppmu->flags & PPMU_ALT_SIPR) { + if (mmcra & POWER6_MMCRA_SIHV) + return PERF_EVENT_MISC_HYPERVISOR; + return (mmcra & POWER6_MMCRA_SIPR) ? + PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; + } + if (mmcra & MMCRA_SIHV) + return PERF_EVENT_MISC_HYPERVISOR; + return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : + PERF_EVENT_MISC_KERNEL; +} + +/* + * Overload regs->dsisr to store MMCRA so we only need to read it once + * on each interrupt. + */ +static inline void perf_read_regs(struct pt_regs *regs) +{ + regs->dsisr = mfspr(SPRN_MMCRA); +} + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return !regs->softe; +} + +#endif /* CONFIG_PPC64 */ + static void perf_counter_interrupt(struct pt_regs *regs); void perf_counter_print_debug(void) @@ -78,12 +187,14 @@ static unsigned long read_pmc(int idx) case 6: val = mfspr(SPRN_PMC6); break; +#ifdef CONFIG_PPC64 case 7: val = mfspr(SPRN_PMC7); break; case 8: val = mfspr(SPRN_PMC8); break; +#endif /* CONFIG_PPC64 */ default: printk(KERN_ERR "oops trying to read PMC%d\n", idx); val = 0; @@ -115,12 +226,14 @@ static void write_pmc(int idx, unsigned long val) case 6: mtspr(SPRN_PMC6, val); break; +#ifdef CONFIG_PPC64 case 7: mtspr(SPRN_PMC7, val); break; case 8: mtspr(SPRN_PMC8, val); break; +#endif /* CONFIG_PPC64 */ default: printk(KERN_ERR "oops trying to write PMC%d\n", idx); } @@ -135,15 +248,15 @@ static void write_pmc(int idx, unsigned long val) static int power_check_constraints(u64 event[], unsigned int cflags[], int n_ev) { - u64 mask, value, nv; + unsigned long mask, value, nv; u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; + unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; int i, j; - u64 addf = ppmu->add_fields; - u64 tadd = ppmu->test_adder; + unsigned long addf = ppmu->add_fields; + unsigned long tadd = ppmu->test_adder; if (n_ev > ppmu->n_counter) return -1; @@ -283,7 +396,7 @@ static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], static void power_pmu_read(struct perf_counter *counter) { - long val, delta, prev; + s64 val, delta, prev; if (!counter->hw.idx) return; @@ -403,14 +516,12 @@ static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) void hw_perf_disable(void) { struct cpu_hw_counters *cpuhw; - unsigned long ret; unsigned long flags; local_irq_save(flags); cpuhw = &__get_cpu_var(cpu_hw_counters); - ret = cpuhw->disabled; - if (!ret) { + if (!cpuhw->disabled) { cpuhw->disabled = 1; cpuhw->n_added = 0; @@ -479,7 +590,7 @@ void hw_perf_enable(void) mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); if (cpuhw->n_counters == 0) - get_lppaca()->pmcregs_in_use = 0; + perf_set_pmu_inuse(0); goto out_enable; } @@ -512,7 +623,7 @@ void hw_perf_enable(void) * bit set and set the hardware counters to their initial values. * Then unfreeze the counters. */ - get_lppaca()->pmcregs_in_use = 1; + perf_set_pmu_inuse(1); mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) @@ -913,6 +1024,8 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) case PERF_TYPE_RAW: ev = counter->attr.config; break; + default: + return ERR_PTR(-EINVAL); } counter->hw.config_base = ev; counter->hw.idx = 0; @@ -1007,13 +1120,12 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) * things if requested. Note that interrupts are hard-disabled * here so there is no possibility of being interrupted. */ -static void record_and_restart(struct perf_counter *counter, long val, +static void record_and_restart(struct perf_counter *counter, unsigned long val, struct pt_regs *regs, int nmi) { u64 period = counter->hw.sample_period; s64 prev, delta, left; int record = 0; - u64 addr, mmcra, sdsync; /* we don't have to worry about interrupts here */ prev = atomic64_read(&counter->hw.prev_count); @@ -1033,8 +1145,8 @@ static void record_and_restart(struct perf_counter *counter, long val, left = period; record = 1; } - if (left < 0x80000000L) - val = 0x80000000L - left; + if (left < 0x80000000LL) + val = 0x80000000LL - left; } /* @@ -1047,22 +1159,9 @@ static void record_and_restart(struct perf_counter *counter, long val, .period = counter->hw.last_period, }; - if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { - /* - * The user wants a data address recorded. - * If we're not doing instruction sampling, - * give them the SDAR (sampled data address). - * If we are doing instruction sampling, then only - * give them the SDAR if it corresponds to the - * instruction pointed to by SIAR; this is indicated - * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA. - */ - mmcra = regs->dsisr; - sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? - POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; - if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) - data.addr = mfspr(SPRN_SDAR); - } + if (counter->attr.sample_type & PERF_SAMPLE_ADDR) + perf_get_data_addr(regs, &data.addr); + if (perf_counter_overflow(counter, nmi, &data)) { /* * Interrupts are coming too fast - throttle them @@ -1088,25 +1187,12 @@ static void record_and_restart(struct perf_counter *counter, long val, */ unsigned long perf_misc_flags(struct pt_regs *regs) { - unsigned long mmcra; - - if (TRAP(regs) != 0xf00) { - /* not a PMU interrupt */ - return user_mode(regs) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; - } + u32 flags = perf_get_misc_flags(regs); - mmcra = regs->dsisr; - if (ppmu->flags & PPMU_ALT_SIPR) { - if (mmcra & POWER6_MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; - } - if (mmcra & MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; + if (flags) + return flags; + return user_mode(regs) ? PERF_EVENT_MISC_USER : + PERF_EVENT_MISC_KERNEL; } /* @@ -1115,20 +1201,12 @@ unsigned long perf_misc_flags(struct pt_regs *regs) */ unsigned long perf_instruction_pointer(struct pt_regs *regs) { - unsigned long mmcra; unsigned long ip; - unsigned long slot; if (TRAP(regs) != 0xf00) return regs->nip; /* not a PMU interrupt */ - ip = mfspr(SPRN_SIAR); - mmcra = regs->dsisr; - if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { - slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; - if (slot > 1) - ip += 4 * (slot - 1); - } + ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); return ip; } @@ -1140,7 +1218,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) int i; struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; - long val; + unsigned long val; int found = 0; int nmi; @@ -1148,16 +1226,9 @@ static void perf_counter_interrupt(struct pt_regs *regs) freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), mfspr(SPRN_PMC6)); - /* - * Overload regs->dsisr to store MMCRA so we only need to read it once. - */ - regs->dsisr = mfspr(SPRN_MMCRA); + perf_read_regs(regs); - /* - * If interrupts were soft-disabled when this PMU interrupt - * occurred, treat it as an NMI. - */ - nmi = !regs->softe; + nmi = perf_intr_is_nmi(regs); if (nmi) nmi_enter(); else @@ -1214,50 +1285,22 @@ void hw_perf_counter_setup(int cpu) cpuhw->mmcr[0] = MMCR0_FC; } -extern struct power_pmu power4_pmu; -extern struct power_pmu ppc970_pmu; -extern struct power_pmu power5_pmu; -extern struct power_pmu power5p_pmu; -extern struct power_pmu power6_pmu; -extern struct power_pmu power7_pmu; - -static int init_perf_counters(void) +int register_power_pmu(struct power_pmu *pmu) { - unsigned long pvr; - - /* XXX should get this from cputable */ - pvr = mfspr(SPRN_PVR); - switch (PVR_VER(pvr)) { - case PV_POWER4: - case PV_POWER4p: - ppmu = &power4_pmu; - break; - case PV_970: - case PV_970FX: - case PV_970MP: - ppmu = &ppc970_pmu; - break; - case PV_POWER5: - ppmu = &power5_pmu; - break; - case PV_POWER5p: - ppmu = &power5p_pmu; - break; - case 0x3e: - ppmu = &power6_pmu; - break; - case 0x3f: - ppmu = &power7_pmu; - break; - } + if (ppmu) + return -EBUSY; /* something's already registered */ + + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); +#ifdef MSR_HV /* * Use FCHV to ignore kernel events if MSR.HV is set. */ if (mfmsr() & MSR_HV) freeze_counters_kernel = MMCR0_FCHV; +#endif /* CONFIG_PPC64 */ return 0; } - -arch_initcall(init_perf_counters); diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 07bd308a5fa7..db90b0c5c27b 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -10,7 +10,9 @@ */ #include <linux/kernel.h> #include <linux/perf_counter.h> +#include <linux/string.h> #include <asm/reg.h> +#include <asm/cputable.h> /* * Bits in event code for POWER4 @@ -179,22 +181,22 @@ static short mmcr1_adder_bits[8] = { */ static struct unitinfo { - u64 value, mask; - int unit; - int lowerbit; + unsigned long value, mask; + int unit; + int lowerbit; } p4_unitinfo[16] = { - [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, - [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, + [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 }, + [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, [PM_ISU1_ALT] = - { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, - [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, + { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, + [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, [PM_IFU_ALT] = - { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, - [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, - [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, - [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, - [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, - [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } + { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, + [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 }, + [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 }, + [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 }, + [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 }, + [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 } }; static unsigned char direct_marked_event[8] = { @@ -249,10 +251,11 @@ static int p4_marked_instr_event(u64 event) return (mask >> (byte * 8 + bit)) & 1; } -static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int p4_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, byte, unit, lower, sh; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; int grp = -1; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; @@ -282,14 +285,14 @@ static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) value |= p4_unitinfo[unit].value; sh = p4_unitinfo[unit].lowerbit; if (sh > 1) - value |= (u64)lower << sh; + value |= (unsigned long)lower << sh; else if (lower != sh) return -1; unit = p4_unitinfo[unit].unit; /* Set byte lane select field */ mask |= 0xfULL << (28 - 4 * byte); - value |= (u64)unit << (28 - 4 * byte); + value |= (unsigned long)unit << (28 - 4 * byte); } if (grp == 0) { /* increment PMC1/2/5/6 field */ @@ -353,9 +356,9 @@ static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p4_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; + unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel, lower; unsigned int ttm, grp; unsigned int pmc_inuse = 0; @@ -429,9 +432,11 @@ static int p4_compute_mmcr(u64 event[], int n_ev, return -1; /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ - mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; - mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; - mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; + mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2]) + << MMCR1_TTM0SEL_SH; + mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2) + << MMCR1_TTM1SEL_SH; + mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH; /* Set TTCxSEL fields. */ if (unitlower & 0xe) @@ -456,7 +461,8 @@ static int p4_compute_mmcr(u64 event[], int n_ev, ttm = unit - 1; /* 2->1, 3->2 */ else ttm = unit >> 2; - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); + mmcr1 |= (unsigned long)ttm + << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); } } @@ -519,7 +525,7 @@ static int p4_compute_mmcr(u64 event[], int n_ev, return 0; } -static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { /* * Setting the PMCxSEL field to 0 disables PMC x. @@ -583,16 +589,27 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu power4_pmu = { - .n_counter = 8, - .max_alternatives = 5, - .add_fields = 0x0000001100005555ull, - .test_adder = 0x0011083300000000ull, - .compute_mmcr = p4_compute_mmcr, - .get_constraint = p4_get_constraint, - .get_alternatives = p4_get_alternatives, - .disable_pmc = p4_disable_pmc, - .n_generic = ARRAY_SIZE(p4_generic_events), - .generic_events = p4_generic_events, - .cache_events = &power4_cache_events, +static struct power_pmu power4_pmu = { + .name = "POWER4/4+", + .n_counter = 8, + .max_alternatives = 5, + .add_fields = 0x0000001100005555ul, + .test_adder = 0x0011083300000000ul, + .compute_mmcr = p4_compute_mmcr, + .get_constraint = p4_get_constraint, + .get_alternatives = p4_get_alternatives, + .disable_pmc = p4_disable_pmc, + .n_generic = ARRAY_SIZE(p4_generic_events), + .generic_events = p4_generic_events, + .cache_events = &power4_cache_events, }; + +static int init_power4_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4")) + return -ENODEV; + + return register_power_pmu(&power4_pmu); +} + +arch_initcall(init_power4_pmu); diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 41e5d2d958d4..f4adca8e98a4 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -10,7 +10,9 @@ */ #include <linux/kernel.h> #include <linux/perf_counter.h> +#include <linux/string.h> #include <asm/reg.h> +#include <asm/cputable.h> /* * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) @@ -126,20 +128,21 @@ static const int grsel_shift[8] = { }; /* Masks and values for using events from the various units */ -static u64 unit_cons[PM_LASTUNIT+1][2] = { - [PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, - [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, - [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, - [PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, - [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, - [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, +static unsigned long unit_cons[PM_LASTUNIT+1][2] = { + [PM_FPU] = { 0x3200000000ul, 0x0100000000ul }, + [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul }, + [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul }, + [PM_IFU] = { 0x3200000000ul, 0x2100000000ul }, + [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul }, + [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul }, }; -static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int power5p_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, byte, unit, sh; int bit, fmask; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { @@ -171,17 +174,18 @@ static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) bit = event & 7; fmask = (bit == 6)? 7: 3; sh = grsel_shift[bit]; - mask |= (u64)fmask << sh; - value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; + mask |= (unsigned long)fmask << sh; + value |= (unsigned long)((event >> PM_GRS_SH) & fmask) + << sh; } /* Set byte lane select field */ - mask |= 0xfULL << (24 - 4 * byte); - value |= (u64)unit << (24 - 4 * byte); + mask |= 0xfUL << (24 - 4 * byte); + value |= (unsigned long)unit << (24 - 4 * byte); } if (pmc < 5) { /* need a counter from PMC1-4 set */ - mask |= 0x8000000000000ull; - value |= 0x1000000000000ull; + mask |= 0x8000000000000ul; + value |= 0x1000000000000ul; } *maskp = mask; *valp = value; @@ -452,10 +456,10 @@ static int power5p_marked_instr_event(u64 event) } static int power5p_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr1 = 0; - u64 mmcra = 0; + unsigned long mmcr1 = 0; + unsigned long mmcra = 0; unsigned int pmc, unit, byte, psel; unsigned int ttm; int i, isbus, bit, grsel; @@ -517,7 +521,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, continue; if (ttmuse++) return -1; - mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; + mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; } ttmuse = 0; for (; i <= PM_GRS; ++i) { @@ -525,7 +529,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, continue; if (ttmuse++) return -1; - mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; + mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; } if (ttmuse > 1) return -1; @@ -540,10 +544,11 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, unit = PM_ISU0_ALT; } else if (unit == PM_LSU1 + 1) { /* select lower word of LSU1 for this byte */ - mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); + mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); } ttm = unit >> 2; - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + mmcr1 |= (unsigned long)ttm + << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); } /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ @@ -568,7 +573,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, if (isbus && (byte & 2) && (psel == 8 || psel == 0x10 || psel == 0x28)) /* add events on higher-numbered bus */ - mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); + mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); } else { /* Instructions or run cycles on PMC5/6 */ --pmc; @@ -576,7 +581,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, if (isbus && unit == PM_GRS) { bit = psel & 7; grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; - mmcr1 |= (u64)grsel << grsel_shift[bit]; + mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; } if (power5p_marked_instr_event(event[i])) mmcra |= MMCRA_SAMPLE_ENABLE; @@ -599,7 +604,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, return 0; } -static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { if (pmc <= 3) mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); @@ -654,18 +659,30 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu power5p_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT, - .add_fields = 0x7000000000055ull, - .test_adder = 0x3000040000000ull, - .compute_mmcr = power5p_compute_mmcr, - .get_constraint = power5p_get_constraint, - .get_alternatives = power5p_get_alternatives, - .disable_pmc = power5p_disable_pmc, - .limited_pmc_event = power5p_limited_pmc_event, - .flags = PPMU_LIMITED_PMC5_6, - .n_generic = ARRAY_SIZE(power5p_generic_events), - .generic_events = power5p_generic_events, - .cache_events = &power5p_cache_events, +static struct power_pmu power5p_pmu = { + .name = "POWER5+/++", + .n_counter = 6, + .max_alternatives = MAX_ALT, + .add_fields = 0x7000000000055ul, + .test_adder = 0x3000040000000ul, + .compute_mmcr = power5p_compute_mmcr, + .get_constraint = power5p_get_constraint, + .get_alternatives = power5p_get_alternatives, + .disable_pmc = power5p_disable_pmc, + .limited_pmc_event = power5p_limited_pmc_event, + .flags = PPMU_LIMITED_PMC5_6, + .n_generic = ARRAY_SIZE(power5p_generic_events), + .generic_events = power5p_generic_events, + .cache_events = &power5p_cache_events, }; + +static int init_power5p_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") + && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++")) + return -ENODEV; + + return register_power_pmu(&power5p_pmu); +} + +arch_initcall(init_power5p_pmu); diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 05600b66221a..29b2c6c0e83a 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -10,7 +10,9 @@ */ #include <linux/kernel.h> #include <linux/perf_counter.h> +#include <linux/string.h> #include <asm/reg.h> +#include <asm/cputable.h> /* * Bits in event code for POWER5 (not POWER5++) @@ -130,20 +132,21 @@ static const int grsel_shift[8] = { }; /* Masks and values for using events from the various units */ -static u64 unit_cons[PM_LASTUNIT+1][2] = { - [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, - [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, - [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, - [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, - [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, - [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, +static unsigned long unit_cons[PM_LASTUNIT+1][2] = { + [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul }, + [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul }, + [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul }, + [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul }, + [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul }, + [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul }, }; -static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int power5_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, byte, unit, sh; int bit, fmask; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; int grp = -1; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; @@ -178,8 +181,9 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) bit = event & 7; fmask = (bit == 6)? 7: 3; sh = grsel_shift[bit]; - mask |= (u64)fmask << sh; - value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; + mask |= (unsigned long)fmask << sh; + value |= (unsigned long)((event >> PM_GRS_SH) & fmask) + << sh; } /* * Bus events on bytes 0 and 2 can be counted @@ -188,22 +192,22 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) if (!pmc) grp = byte & 1; /* Set byte lane select field */ - mask |= 0xfULL << (24 - 4 * byte); - value |= (u64)unit << (24 - 4 * byte); + mask |= 0xfUL << (24 - 4 * byte); + value |= (unsigned long)unit << (24 - 4 * byte); } if (grp == 0) { /* increment PMC1/2 field */ - mask |= 0x200000000ull; - value |= 0x080000000ull; + mask |= 0x200000000ul; + value |= 0x080000000ul; } else if (grp == 1) { /* increment PMC3/4 field */ - mask |= 0x40000000ull; - value |= 0x10000000ull; + mask |= 0x40000000ul; + value |= 0x10000000ul; } if (pmc < 5) { /* need a counter from PMC1-4 set */ - mask |= 0x8000000000000ull; - value |= 0x1000000000000ull; + mask |= 0x8000000000000ul; + value |= 0x1000000000000ul; } *maskp = mask; *valp = value; @@ -383,10 +387,10 @@ static int power5_marked_instr_event(u64 event) } static int power5_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr1 = 0; - u64 mmcra = 0; + unsigned long mmcr1 = 0; + unsigned long mmcra = 0; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; int i, isbus, bit, grsel; @@ -457,7 +461,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, continue; if (ttmuse++) return -1; - mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; + mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; } ttmuse = 0; for (; i <= PM_GRS; ++i) { @@ -465,7 +469,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, continue; if (ttmuse++) return -1; - mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; + mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; } if (ttmuse > 1) return -1; @@ -480,10 +484,11 @@ static int power5_compute_mmcr(u64 event[], int n_ev, unit = PM_ISU0_ALT; } else if (unit == PM_LSU1 + 1) { /* select lower word of LSU1 for this byte */ - mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); + mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); } ttm = unit >> 2; - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + mmcr1 |= (unsigned long)ttm + << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); } /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ @@ -513,7 +518,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, --pmc; if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) /* add events on higher-numbered bus */ - mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); + mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); } else { /* Instructions or run cycles on PMC5/6 */ --pmc; @@ -521,7 +526,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, if (isbus && unit == PM_GRS) { bit = psel & 7; grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; - mmcr1 |= (u64)grsel << grsel_shift[bit]; + mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; } if (power5_marked_instr_event(event[i])) mmcra |= MMCRA_SAMPLE_ENABLE; @@ -541,7 +546,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, return 0; } -static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { if (pmc <= 3) mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); @@ -596,16 +601,27 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu power5_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT, - .add_fields = 0x7000090000555ull, - .test_adder = 0x3000490000000ull, - .compute_mmcr = power5_compute_mmcr, - .get_constraint = power5_get_constraint, - .get_alternatives = power5_get_alternatives, - .disable_pmc = power5_disable_pmc, - .n_generic = ARRAY_SIZE(power5_generic_events), - .generic_events = power5_generic_events, - .cache_events = &power5_cache_events, +static struct power_pmu power5_pmu = { + .name = "POWER5", + .n_counter = 6, + .max_alternatives = MAX_ALT, + .add_fields = 0x7000090000555ul, + .test_adder = 0x3000490000000ul, + .compute_mmcr = power5_compute_mmcr, + .get_constraint = power5_get_constraint, + .get_alternatives = power5_get_alternatives, + .disable_pmc = power5_disable_pmc, + .n_generic = ARRAY_SIZE(power5_generic_events), + .generic_events = power5_generic_events, + .cache_events = &power5_cache_events, }; + +static int init_power5_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) + return -ENODEV; + + return register_power_pmu(&power5_pmu); +} + +arch_initcall(init_power5_pmu); diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 46f74bebcfd9..09ae5bf5bda7 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -10,7 +10,9 @@ */ #include <linux/kernel.h> #include <linux/perf_counter.h> +#include <linux/string.h> #include <asm/reg.h> +#include <asm/cputable.h> /* * Bits in event code for POWER6 @@ -41,9 +43,9 @@ #define MMCR1_NESTSEL_SH 45 #define MMCR1_NESTSEL_MSK 0x7 #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) -#define MMCR1_PMC1_LLA ((u64)1 << 44) -#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) -#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) +#define MMCR1_PMC1_LLA (1ul << 44) +#define MMCR1_PMC1_LLA_VALUE (1ul << 39) +#define MMCR1_PMC1_ADDR_SEL (1ul << 35) #define MMCR1_PMC1SEL_SH 24 #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) #define MMCR1_PMCSEL_MSK 0xff @@ -173,10 +175,10 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr1 = 0; - u64 mmcra = 0; + unsigned long mmcr1 = 0; + unsigned long mmcra = 0; int i; unsigned int pmc, ev, b, u, s, psel; unsigned int ttmset = 0; @@ -215,7 +217,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, /* check for conflict on this byte of event bus */ if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) return -1; - mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); + mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b); ttmset |= 1 << b; if (u == 5) { /* Nest events have a further mux */ @@ -224,7 +226,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, MMCR1_NESTSEL(mmcr1) != s) return -1; ttmset |= 0x10; - mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; + mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH; } if (0x30 <= psel && psel <= 0x3d) { /* these need the PMCx_ADDR_SEL bits */ @@ -243,7 +245,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, if (power6_marked_instr_event(event[i])) mmcra |= MMCRA_SAMPLE_ENABLE; if (pmc < 4) - mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); + mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc); } mmcr[0] = 0; if (pmc_inuse & 1) @@ -265,10 +267,11 @@ static int p6_compute_mmcr(u64 event[], int n_ev, * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 * 32-34 select field: nest (subunit) event selector */ -static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int p6_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, byte, sh, subunit; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { @@ -282,11 +285,11 @@ static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; sh = byte * 4 + (16 - PM_UNIT_SH); mask |= PM_UNIT_MSKS << sh; - value |= (u64)(event & PM_UNIT_MSKS) << sh; + value |= (unsigned long)(event & PM_UNIT_MSKS) << sh; if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; - mask |= (u64)PM_SUBUNIT_MSK << 32; - value |= (u64)subunit << 32; + mask |= (unsigned long)PM_SUBUNIT_MSK << 32; + value |= (unsigned long)subunit << 32; } } if (pmc <= 4) { @@ -458,7 +461,7 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return nalt; } -static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { /* Set PMCxSEL to 0 to disable PMCx */ if (pmc <= 3) @@ -515,18 +518,29 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu power6_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT, - .add_fields = 0x1555, - .test_adder = 0x3000, - .compute_mmcr = p6_compute_mmcr, - .get_constraint = p6_get_constraint, - .get_alternatives = p6_get_alternatives, - .disable_pmc = p6_disable_pmc, - .limited_pmc_event = p6_limited_pmc_event, - .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, - .n_generic = ARRAY_SIZE(power6_generic_events), - .generic_events = power6_generic_events, - .cache_events = &power6_cache_events, +static struct power_pmu power6_pmu = { + .name = "POWER6", + .n_counter = 6, + .max_alternatives = MAX_ALT, + .add_fields = 0x1555, + .test_adder = 0x3000, + .compute_mmcr = p6_compute_mmcr, + .get_constraint = p6_get_constraint, + .get_alternatives = p6_get_alternatives, + .disable_pmc = p6_disable_pmc, + .limited_pmc_event = p6_limited_pmc_event, + .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, + .n_generic = ARRAY_SIZE(power6_generic_events), + .generic_events = power6_generic_events, + .cache_events = &power6_cache_events, }; + +static int init_power6_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) + return -ENODEV; + + return register_power_pmu(&power6_pmu); +} + +arch_initcall(init_power6_pmu); diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index b72e7a19d054..5d755ef7ac8f 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -10,7 +10,9 @@ */ #include <linux/kernel.h> #include <linux/perf_counter.h> +#include <linux/string.h> #include <asm/reg.h> +#include <asm/cputable.h> /* * Bits in event code for POWER7 @@ -71,10 +73,11 @@ * 0-9: Count of events needing PMC1..PMC5 */ -static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int power7_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, sh; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { @@ -224,10 +227,10 @@ static int power7_marked_instr_event(u64 event) } static int power7_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr1 = 0; - u64 mmcra = 0; + unsigned long mmcr1 = 0; + unsigned long mmcra = 0; unsigned int pmc, unit, combine, l2sel, psel; unsigned int pmc_inuse = 0; int i; @@ -265,11 +268,14 @@ static int power7_compute_mmcr(u64 event[], int n_ev, --pmc; } if (pmc <= 3) { - mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc); - mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc); + mmcr1 |= (unsigned long) unit + << (MMCR1_TTM0SEL_SH - 4 * pmc); + mmcr1 |= (unsigned long) combine + << (MMCR1_PMC1_COMBINE_SH - pmc); mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); if (unit == 6) /* L2 events */ - mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH; + mmcr1 |= (unsigned long) l2sel + << MMCR1_L2SEL_SH; } if (power7_marked_instr_event(event[i])) mmcra |= MMCRA_SAMPLE_ENABLE; @@ -287,10 +293,10 @@ static int power7_compute_mmcr(u64 event[], int n_ev, return 0; } -static void power7_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { if (pmc <= 3) - mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc)); + mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power7_generic_events[] = { @@ -342,16 +348,27 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu power7_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT + 1, - .add_fields = 0x1555ull, - .test_adder = 0x3000ull, - .compute_mmcr = power7_compute_mmcr, - .get_constraint = power7_get_constraint, - .get_alternatives = power7_get_alternatives, - .disable_pmc = power7_disable_pmc, - .n_generic = ARRAY_SIZE(power7_generic_events), - .generic_events = power7_generic_events, - .cache_events = &power7_cache_events, +static struct power_pmu power7_pmu = { + .name = "POWER7", + .n_counter = 6, + .max_alternatives = MAX_ALT + 1, + .add_fields = 0x1555ul, + .test_adder = 0x3000ul, + .compute_mmcr = power7_compute_mmcr, + .get_constraint = power7_get_constraint, + .get_alternatives = power7_get_alternatives, + .disable_pmc = power7_disable_pmc, + .n_generic = ARRAY_SIZE(power7_generic_events), + .generic_events = power7_generic_events, + .cache_events = &power7_cache_events, }; + +static int init_power7_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) + return -ENODEV; + + return register_power_pmu(&power7_pmu); +} + +arch_initcall(init_power7_pmu); diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index ba0a357a89f4..6637c87fe70e 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -10,7 +10,9 @@ */ #include <linux/string.h> #include <linux/perf_counter.h> +#include <linux/string.h> #include <asm/reg.h> +#include <asm/cputable.h> /* * Bits in event code for PPC970 @@ -183,7 +185,7 @@ static int p970_marked_instr_event(u64 event) } /* Masks and values for using events from the various units */ -static u64 unit_cons[PM_LASTUNIT+1][2] = { +static unsigned long unit_cons[PM_LASTUNIT+1][2] = { [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, @@ -192,10 +194,11 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = { [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, }; -static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int p970_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, byte, unit, sh, spcsel; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; int grp = -1; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; @@ -222,7 +225,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) grp = byte & 1; /* Set byte lane select field */ mask |= 0xfULL << (28 - 4 * byte); - value |= (u64)unit << (28 - 4 * byte); + value |= (unsigned long)unit << (28 - 4 * byte); } if (grp == 0) { /* increment PMC1/2/5/6 field */ @@ -236,7 +239,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; if (spcsel) { mask |= 3ull << 48; - value |= (u64)spcsel << 48; + value |= (unsigned long)spcsel << 48; } *maskp = mask; *valp = value; @@ -257,9 +260,9 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p970_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; + unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; unsigned int pmc_inuse = 0; @@ -320,7 +323,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev, continue; ttm = unitmap[i]; ++ttmuse[(ttm >> 2) & 1]; - mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; + mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH; } /* Check only one unit per TTMx */ if (ttmuse[0] > 1 || ttmuse[1] > 1) @@ -340,7 +343,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev, if (unit == PM_LSU1L && byte >= 2) mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); } - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + mmcr1 |= (unsigned long)ttm + << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); } /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ @@ -386,7 +390,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev, for (pmc = 0; pmc < 2; ++pmc) mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); for (; pmc < 8; ++pmc) - mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); + mmcr1 |= (unsigned long)pmcsel[pmc] + << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); if (pmc_inuse & 1) mmcr0 |= MMCR0_PMC1CE; if (pmc_inuse & 0xfe) @@ -401,7 +406,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev, return 0; } -static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { int shift, i; @@ -467,16 +472,28 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu ppc970_pmu = { - .n_counter = 8, - .max_alternatives = 2, - .add_fields = 0x001100005555ull, - .test_adder = 0x013300000000ull, - .compute_mmcr = p970_compute_mmcr, - .get_constraint = p970_get_constraint, - .get_alternatives = p970_get_alternatives, - .disable_pmc = p970_disable_pmc, - .n_generic = ARRAY_SIZE(ppc970_generic_events), - .generic_events = ppc970_generic_events, - .cache_events = &ppc970_cache_events, +static struct power_pmu ppc970_pmu = { + .name = "PPC970/FX/MP", + .n_counter = 8, + .max_alternatives = 2, + .add_fields = 0x001100005555ull, + .test_adder = 0x013300000000ull, + .compute_mmcr = p970_compute_mmcr, + .get_constraint = p970_get_constraint, + .get_alternatives = p970_get_alternatives, + .disable_pmc = p970_disable_pmc, + .n_generic = ARRAY_SIZE(ppc970_generic_events), + .generic_events = ppc970_generic_events, + .cache_events = &ppc970_cache_events, }; + +static int init_ppc970_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") + && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP")) + return -ENODEV; + + return register_power_pmu(&ppc970_pmu); +} + +arch_initcall(init_ppc970_pmu); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 15391c2ab013..eae4511ceeac 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,6 +53,7 @@ #include <linux/posix-timers.h> #include <linux/irq.h> #include <linux/delay.h> +#include <linux/perf_counter.h> #include <asm/io.h> #include <asm/processor.h> @@ -525,6 +526,26 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ +#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32) +DEFINE_PER_CPU(u8, perf_counter_pending); + +void set_perf_counter_pending(void) +{ + get_cpu_var(perf_counter_pending) = 1; + set_dec(1); + put_cpu_var(perf_counter_pending); +} + +#define test_perf_counter_pending() __get_cpu_var(perf_counter_pending) +#define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0 + +#else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ + +#define test_perf_counter_pending() 0 +#define clear_perf_counter_pending() + +#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ + /* * For iSeries shared processors, we have to let the hypervisor * set the hardware decrementer. We set a virtual decrementer @@ -551,6 +572,10 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 + if (test_perf_counter_pending()) { + clear_perf_counter_pending(); + perf_counter_do_pending(); + } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index c4192542b809..61187bec7506 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,7 +1,7 @@ config PPC64 bool "64-bit kernel" default n - select HAVE_PERF_COUNTERS + select PPC_HAVE_PMU_SUPPORT help This option selects whether a 32-bit or a 64-bit kernel will be built. @@ -78,6 +78,7 @@ config POWER4_ONLY config 6xx def_bool y depends on PPC32 && PPC_BOOK3S + select PPC_HAVE_PMU_SUPPORT config POWER3 bool @@ -246,6 +247,15 @@ config VIRT_CPU_ACCOUNTING If in doubt, say Y here. +config PPC_HAVE_PMU_SUPPORT + bool + +config PPC_PERF_CTRS + def_bool y + depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT + help + This enables the powerpc-specific perf_counter back-end. + config SMP depends on PPC_STD_MMU || FSL_BOOKE bool "Symmetric multi-processing support" diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 876ed97147b3..5fb33e160ea0 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,11 +84,6 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -extern void set_perf_counter_pending(void); - -#define clear_perf_counter_pending() do { } while (0) -#define test_perf_counter_pending() (0) - #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 31bd120cf2a2..01fd9461d323 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); #endif #if defined(CONFIG_HIGHPTE) +#define __KM_PTE \ + (in_nmi() ? KM_NMI_PTE : \ + in_irq() ? KM_IRQ_PTE : \ + KM_PTE0) #define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ pte_index((address))) #define pte_offset_map_nested(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ pte_index((address))) -#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) +#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) #else #define pte_offset_map(dir, address) \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b685ece89d5c..512ee87062c2 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -25,7 +25,12 @@ #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) #define KERNEL_DS MAKE_MM_SEG(-1UL) -#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#ifdef CONFIG_X86_32 +# define USER_DS MAKE_MM_SEG(PAGE_OFFSET) +#else +# define USER_DS MAKE_MM_SEG(__VIRTUAL_MASK) +#endif #define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 275bc142cd5d..76dfef23f789 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -19,6 +19,7 @@ #include <linux/kdebug.h> #include <linux/sched.h> #include <linux/uaccess.h> +#include <linux/highmem.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event) return event & CORE_EVNTSEL_MASK; } -static const u64 amd_0f_hw_cache_event_ids +static const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [ C(L1D) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ + [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ }, }, [ C(L1I ) ] = { @@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids [ C(RESULT_MISS) ] = -1, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ [ C(RESULT_MISS) ] = 0, }, }, [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ + [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { @@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids }, [ C(DTLB) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -1223,6 +1224,8 @@ again: if (!intel_pmu_save_and_restart(counter)) continue; + data.period = counter->hw.last_period; + if (perf_counter_overflow(counter, 1, &data)) intel_pmu_disable_counter(&counter->hw, bit); } @@ -1459,18 +1462,16 @@ static int intel_pmu_init(void) static int amd_pmu_init(void) { + /* Performance-monitoring supported from K7 and later: */ + if (boot_cpu_data.x86 < 6) + return -ENODEV; + x86_pmu = amd_pmu; - switch (boot_cpu_data.x86) { - case 0x0f: - case 0x10: - case 0x11: - memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); + /* Events are common for all AMDs */ + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); - pr_cont("AMD Family 0f/10/11 events, "); - break; - } return 0; } @@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) */ static inline -void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) +void callchain_store(struct perf_callchain_entry *entry, u64 ip) { - if (entry->nr < MAX_STACK_DEPTH) + if (entry->nr < PERF_MAX_STACK_DEPTH) entry->ip[entry->nr++] = ip; } @@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - /* Don't bother with IRQ stacks for now */ - return -1; + /* Process all stacks: */ + return 0; } static void backtrace_address(void *data, unsigned long addr, int reliable) @@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = { .address = backtrace_address, }; +#include "../dumpstack.h" + static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { - unsigned long bp; - char *stack; - int nr = entry->nr; + callchain_store(entry, PERF_CONTEXT_KERNEL); + callchain_store(entry, regs->ip); - callchain_store(entry, instruction_pointer(regs)); + dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); +} - stack = ((char *)regs + sizeof(struct pt_regs)); -#ifdef CONFIG_FRAME_POINTER - bp = frame_pointer(regs); -#else - bp = 0; -#endif +/* + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + */ +static unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) +{ + unsigned long offset, addr = (unsigned long)from; + int type = in_nmi() ? KM_NMI : KM_IRQ0; + unsigned long size, len = 0; + struct page *page; + void *map; + int ret; - dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); + do { + ret = __get_user_pages_fast(addr, 1, 0, &page); + if (!ret) + break; - entry->kernel = entry->nr - nr; -} + offset = addr & (PAGE_SIZE - 1); + size = min(PAGE_SIZE - offset, n - len); + map = kmap_atomic(page, type); + memcpy(to, map+offset, size); + kunmap_atomic(map, type); + put_page(page); -struct stack_frame { - const void __user *next_fp; - unsigned long return_address; -}; + len += size; + to += size; + addr += size; + + } while (len < n); + + return len; +} static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) { - int ret; + unsigned long bytes; - if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) - return 0; + bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); - ret = 1; - pagefault_disable(); - if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) - ret = 0; - pagefault_enable(); - - return ret; + return bytes == sizeof(*frame); } static void @@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) { struct stack_frame frame; const void __user *fp; - int nr = entry->nr; - regs = (struct pt_regs *)current->thread.sp0 - 1; - fp = (void __user *)regs->bp; + if (!user_mode(regs)) + regs = task_pt_regs(current); + fp = (void __user *)regs->bp; + + callchain_store(entry, PERF_CONTEXT_USER); callchain_store(entry, regs->ip); - while (entry->nr < MAX_STACK_DEPTH) { - frame.next_fp = NULL; + while (entry->nr < PERF_MAX_STACK_DEPTH) { + frame.next_frame = NULL; frame.return_address = 0; if (!copy_stack_frame(fp, &frame)) break; - if ((unsigned long)fp < user_stack_pointer(regs)) + if ((unsigned long)fp < regs->sp) break; callchain_store(entry, frame.return_address); - fp = frame.next_fp; + fp = frame.next_frame; } - - entry->user = entry->nr - nr; } static void @@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) entry = &__get_cpu_var(irq_entry); entry->nr = 0; - entry->hv = 0; - entry->kernel = 0; - entry->user = 0; perf_do_callchain(regs, entry); diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index f97480941269..71da1bca13cb 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -14,7 +14,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) { #ifndef CONFIG_X86_PAE - return *ptep; + return ACCESS_ONCE(*ptep); #else /* * With get_user_pages_fast, we walk down the pagetables without taking @@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + unsigned long flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch size + * will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables and pages from being freed on x86. + * + * So long as we atomically load page table pointers versus teardown + * (which we do on x86, with the above PAE exception), we can follow the + * address down to the the page and take a ref on it. + */ + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address |