From b0f82b81fe6bbcf78d478071f33e44554726bc81 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 20 May 2010 07:47:21 +0200 Subject: perf: Drop the skip argument from perf_arch_fetch_regs_caller Drop this argument now that we always want to rewind only to the state of the first caller. It means frame pointers are not necessary anymore to reliably get the source of an event. But this also means we need this helper to be a macro now, as an inline function is not an option since we need to know when to provide a default implentation. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul Mackerras Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo --- arch/sparc/include/asm/perf_event.h | 8 ++++++++ arch/sparc/kernel/helpers.S | 6 +++--- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/sparc') diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h index 7e2669894ce8..74c4e0cd889c 100644 --- a/arch/sparc/include/asm/perf_event.h +++ b/arch/sparc/include/asm/perf_event.h @@ -6,7 +6,15 @@ extern void set_perf_event_pending(void); #define PERF_EVENT_INDEX_OFFSET 0 #ifdef CONFIG_PERF_EVENTS +#include + extern void init_hw_perf_events(void); + +extern void +__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); + +#define perf_arch_fetch_caller_regs(pt_regs, ip) \ + __perf_arch_fetch_caller_regs(pt_regs, ip, 1); #else static inline void init_hw_perf_events(void) { } #endif diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S index 92090cc9e829..682fee06a16b 100644 --- a/arch/sparc/kernel/helpers.S +++ b/arch/sparc/kernel/helpers.S @@ -47,9 +47,9 @@ stack_trace_flush: .size stack_trace_flush,.-stack_trace_flush #ifdef CONFIG_PERF_EVENTS - .globl perf_arch_fetch_caller_regs - .type perf_arch_fetch_caller_regs,#function -perf_arch_fetch_caller_regs: + .globl __perf_arch_fetch_caller_regs + .type __perf_arch_fetch_caller_regs,#function +__perf_arch_fetch_caller_regs: /* We always read the %pstate into %o5 since we will use * that to construct a fake %tstate to store into the regs. */ -- cgit v1.2.3 From 8d2cacbbb8deadfae78aa16e4e1ee619bdd7019e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 May 2010 17:49:05 +0200 Subject: perf: Cleanup {start,commit,cancel}_txn details Clarify some of the transactional group scheduling API details and change it so that a successfull ->commit_txn also closes the transaction. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1274803086.5882.1752.camel@twins> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 7 ++++--- arch/sparc/kernel/perf_event.c | 7 ++++--- arch/x86/kernel/cpu/perf_event.c | 14 +++++--------- include/linux/perf_event.h | 27 ++++++++++++++++++++++----- kernel/perf_event.c | 9 +-------- 5 files changed, 36 insertions(+), 28 deletions(-) (limited to 'arch/sparc') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 43b83c35cf54..ac2a8c2554d9 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event) * skip the schedulability test here, it will be peformed * at commit time(->commit_txn) as a whole */ - if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuhw->group_flag & PERF_EVENT_TXN) goto nocheck; if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) @@ -858,7 +858,7 @@ void power_pmu_start_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; + cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -871,7 +871,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; + cpuhw->group_flag &= ~PERF_EVENT_TXN; } /* @@ -897,6 +897,7 @@ int power_pmu_commit_txn(const struct pmu *pmu) for (i = cpuhw->n_txn_start; i < n; ++i) cpuhw->event[i]->hw.config = cpuhw->events[i]; + cpuhw->group_flag &= ~PERF_EVENT_TXN; return 0; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 0ec92c8861dd..beeb92fa3acd 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1005,7 +1005,7 @@ static int sparc_pmu_enable(struct perf_event *event) * skip the schedulability test here, it will be peformed * at commit time(->commit_txn) as a whole */ - if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuc->group_flag & PERF_EVENT_TXN) goto nocheck; if (check_excludes(cpuc->event, n0, 1)) @@ -1102,7 +1102,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; + cpuhw->group_flag |= PERF_EVENT_TXN; } /* @@ -1114,7 +1114,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; + cpuhw->group_flag &= ~PERF_EVENT_TXN; } /* @@ -1137,6 +1137,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu) if (sparc_check_constraints(cpuc->event, cpuc->events, n)) return -EAGAIN; + cpuc->group_flag &= ~PERF_EVENT_TXN; return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5db5b7d65a18..af04c6fa59cb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -969,7 +969,7 @@ static int x86_pmu_enable(struct perf_event *event) * skip the schedulability test here, it will be peformed * at commit time(->commit_txn) as a whole */ - if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuc->group_flag & PERF_EVENT_TXN) goto out; ret = x86_pmu.schedule_events(cpuc, n, assign); @@ -1096,7 +1096,7 @@ static void x86_pmu_disable(struct perf_event *event) * The events never got scheduled and ->cancel_txn will truncate * the event_list. */ - if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuc->group_flag & PERF_EVENT_TXN) return; x86_pmu_stop(event); @@ -1388,7 +1388,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->group_flag |= PERF_EVENT_TXN_STARTED; + cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1401,7 +1401,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; + cpuc->group_flag &= ~PERF_EVENT_TXN; /* * Truncate the collected events. */ @@ -1435,11 +1435,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) */ memcpy(cpuc->assign, assign, n*sizeof(int)); - /* - * Clear out the txn count so that ->cancel_txn() which gets - * run after ->commit_txn() doesn't undo things. - */ - cpuc->n_txn = 0; + cpuc->group_flag &= ~PERF_EVENT_TXN; return 0; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 36efad90cd43..f1b6ba0770e0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -549,7 +549,10 @@ struct hw_perf_event { struct perf_event; -#define PERF_EVENT_TXN_STARTED 1 +/* + * Common implementation detail of pmu::{start,commit,cancel}_txn + */ +#define PERF_EVENT_TXN 0x1 /** * struct pmu - generic performance monitoring unit @@ -563,14 +566,28 @@ struct pmu { void (*unthrottle) (struct perf_event *event); /* - * group events scheduling is treated as a transaction, - * add group events as a whole and perform one schedulability test. - * If test fails, roll back the whole group + * Group events scheduling is treated as a transaction, add group + * events as a whole and perform one schedulability test. If the test + * fails, roll back the whole group */ + /* + * Start the transaction, after this ->enable() doesn't need + * to do schedulability tests. + */ void (*start_txn) (const struct pmu *pmu); - void (*cancel_txn) (const struct pmu *pmu); + /* + * If ->start_txn() disabled the ->enable() schedulability test + * then ->commit_txn() is required to perform one. On success + * the transaction is closed. On error the transaction is kept + * open until ->cancel_txn() is called. + */ int (*commit_txn) (const struct pmu *pmu); + /* + * Will cancel the transaction, assumes ->disable() is called for + * each successfull ->enable() during the transaction. + */ + void (*cancel_txn) (const struct pmu *pmu); }; /** diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 227ed9c8ec34..6f60920772b3 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -675,7 +675,6 @@ group_sched_in(struct perf_event *group_event, struct perf_event *event, *partial_group = NULL; const struct pmu *pmu = group_event->pmu; bool txn = false; - int ret; if (group_event->state == PERF_EVENT_STATE_OFF) return 0; @@ -703,15 +702,9 @@ group_sched_in(struct perf_event *group_event, } } - if (!txn) + if (!txn || !pmu->commit_txn(pmu)) return 0; - ret = pmu->commit_txn(pmu); - if (!ret) { - pmu->cancel_txn(pmu); - return 0; - } - group_error: /* * Groups can be scheduled in as one unit only, so undo any -- cgit v1.2.3 From 1996bda2a42480c275656233e631ee0966574be4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 May 2010 14:05:13 +0200 Subject: arch: Implement local64_t On 64bit, local_t is of size long, and thus we make local64_t an alias. On 32bit, we fall back to atomic64_t. (architecture can provide optimized 32-bit version) (This new facility is to be used by perf events optimizations.) Signed-off-by: Peter Zijlstra Cc: linux-arch@vger.kernel.org Cc: Andrew Morton Cc: Linus Torvalds LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/local64.h | 1 + arch/arm/include/asm/local64.h | 1 + arch/avr32/include/asm/local64.h | 1 + arch/blackfin/include/asm/local64.h | 1 + arch/cris/include/asm/local64.h | 1 + arch/frv/include/asm/local64.h | 1 + arch/frv/kernel/local64.h | 1 + arch/h8300/include/asm/local64.h | 1 + arch/ia64/include/asm/local64.h | 1 + arch/m32r/include/asm/local64.h | 1 + arch/m68k/include/asm/local64.h | 1 + arch/microblaze/include/asm/local64.h | 1 + arch/mips/include/asm/local64.h | 1 + arch/mn10300/include/asm/local64.h | 1 + arch/parisc/include/asm/local64.h | 1 + arch/powerpc/include/asm/local64.h | 1 + arch/s390/include/asm/local64.h | 1 + arch/score/include/asm/local64.h | 1 + arch/sh/include/asm/local64.h | 1 + arch/sparc/include/asm/local64.h | 1 + arch/x86/include/asm/local64.h | 1 + arch/xtensa/include/asm/local64.h | 1 + include/asm-generic/local64.h | 96 +++++++++++++++++++++++++++++++++++ 23 files changed, 118 insertions(+) create mode 100644 arch/alpha/include/asm/local64.h create mode 100644 arch/arm/include/asm/local64.h create mode 100644 arch/avr32/include/asm/local64.h create mode 100644 arch/blackfin/include/asm/local64.h create mode 100644 arch/cris/include/asm/local64.h create mode 100644 arch/frv/include/asm/local64.h create mode 100644 arch/frv/kernel/local64.h create mode 100644 arch/h8300/include/asm/local64.h create mode 100644 arch/ia64/include/asm/local64.h create mode 100644 arch/m32r/include/asm/local64.h create mode 100644 arch/m68k/include/asm/local64.h create mode 100644 arch/microblaze/include/asm/local64.h create mode 100644 arch/mips/include/asm/local64.h create mode 100644 arch/mn10300/include/asm/local64.h create mode 100644 arch/parisc/include/asm/local64.h create mode 100644 arch/powerpc/include/asm/local64.h create mode 100644 arch/s390/include/asm/local64.h create mode 100644 arch/score/include/asm/local64.h create mode 100644 arch/sh/include/asm/local64.h create mode 100644 arch/sparc/include/asm/local64.h create mode 100644 arch/x86/include/asm/local64.h create mode 100644 arch/xtensa/include/asm/local64.h create mode 100644 include/asm-generic/local64.h (limited to 'arch/sparc') diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/alpha/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/arm/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/avr32/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/blackfin/include/asm/local64.h b/arch/blackfin/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/blackfin/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/cris/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/frv/include/asm/local64.h b/arch/frv/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/frv/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/frv/kernel/local64.h b/arch/frv/kernel/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/frv/kernel/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/h8300/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/ia64/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/m32r/include/asm/local64.h b/arch/m32r/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/m32r/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/m68k/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/microblaze/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/mips/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/mn10300/include/asm/local64.h b/arch/mn10300/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/mn10300/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/parisc/include/asm/local64.h b/arch/parisc/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/parisc/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/powerpc/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/s390/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/score/include/asm/local64.h b/arch/score/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/score/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/sh/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/sparc/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/x86/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/arch/xtensa/include/asm/local64.h b/arch/xtensa/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/xtensa/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-generic/local64.h b/include/asm-generic/local64.h new file mode 100644 index 000000000000..02ac760c1a8b --- /dev/null +++ b/include/asm-generic/local64.h @@ -0,0 +1,96 @@ +#ifndef _ASM_GENERIC_LOCAL64_H +#define _ASM_GENERIC_LOCAL64_H + +#include +#include + +/* + * A signed long type for operations which are atomic for a single CPU. + * Usually used in combination with per-cpu variables. + * + * This is the default implementation, which uses atomic64_t. Which is + * rather pointless. The whole point behind local64_t is that some processors + * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs + * running on this CPU. local64_t allows exploitation of such capabilities. + */ + +/* Implement in terms of atomics. */ + +#if BITS_PER_LONG == 64 + +#include + +typedef struct { + local_t a; +} local64_t; + +#define LOCAL64_INIT(i) { LOCAL_INIT(i) } + +#define local64_read(l) local_read(&(l)->a) +#define local64_set(l,i) local_set((&(l)->a),(i)) +#define local64_inc(l) local_inc(&(l)->a) +#define local64_dec(l) local_dec(&(l)->a) +#define local64_add(i,l) local_add((i),(&(l)->a)) +#define local64_sub(i,l) local_sub((i),(&(l)->a)) + +#define local64_sub_and_test(i, l) local_sub_and_test((i), (&(l)->a)) +#define local64_dec_and_test(l) local_dec_and_test(&(l)->a) +#define local64_inc_and_test(l) local_inc_and_test(&(l)->a) +#define local64_add_negative(i, l) local_add_negative((i), (&(l)->a)) +#define local64_add_return(i, l) local_add_return((i), (&(l)->a)) +#define local64_sub_return(i, l) local_sub_return((i), (&(l)->a)) +#define local64_inc_return(l) local_inc_return(&(l)->a) + +#define local64_cmpxchg(l, o, n) local_cmpxchg((&(l)->a), (o), (n)) +#define local64_xchg(l, n) local_xchg((&(l)->a), (n)) +#define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u)) +#define local64_inc_not_zero(l) local_inc_not_zero(&(l)->a) + +/* Non-atomic variants, ie. preemption disabled and won't be touched + * in interrupt, etc. Some archs can optimize this case well. */ +#define __local64_inc(l) local64_set((l), local64_read(l) + 1) +#define __local64_dec(l) local64_set((l), local64_read(l) - 1) +#define __local64_add(i,l) local64_set((l), local64_read(l) + (i)) +#define __local64_sub(i,l) local64_set((l), local64_read(l) - (i)) + +#else /* BITS_PER_LONG != 64 */ + +#include + +/* Don't use typedef: don't want them to be mixed with atomic_t's. */ +typedef struct { + atomic64_t a; +} local64_t; + +#define LOCAL64_INIT(i) { ATOMIC_LONG_INIT(i) } + +#define local64_read(l) atomic64_read(&(l)->a) +#define local64_set(l,i) atomic64_set((&(l)->a),(i)) +#define local64_inc(l) atomic64_inc(&(l)->a) +#define local64_dec(l) atomic64_dec(&(l)->a) +#define local64_add(i,l) atomic64_add((i),(&(l)->a)) +#define local64_sub(i,l) atomic64_sub((i),(&(l)->a)) + +#define local64_sub_and_test(i, l) atomic64_sub_and_test((i), (&(l)->a)) +#define local64_dec_and_test(l) atomic64_dec_and_test(&(l)->a) +#define local64_inc_and_test(l) atomic64_inc_and_test(&(l)->a) +#define local64_add_negative(i, l) atomic64_add_negative((i), (&(l)->a)) +#define local64_add_return(i, l) atomic64_add_return((i), (&(l)->a)) +#define local64_sub_return(i, l) atomic64_sub_return((i), (&(l)->a)) +#define local64_inc_return(l) atomic64_inc_return(&(l)->a) + +#define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n)) +#define local64_xchg(l, n) atomic64_xchg((&(l)->a), (n)) +#define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u)) +#define local64_inc_not_zero(l) atomic64_inc_not_zero(&(l)->a) + +/* Non-atomic variants, ie. preemption disabled and won't be touched + * in interrupt, etc. Some archs can optimize this case well. */ +#define __local64_inc(l) local64_set((l), local64_read(l) + 1) +#define __local64_dec(l) local64_set((l), local64_read(l) - 1) +#define __local64_add(i,l) local64_set((l), local64_read(l) + (i)) +#define __local64_sub(i,l) local64_set((l), local64_read(l) - (i)) + +#endif /* BITS_PER_LONG != 64 */ + +#endif /* _ASM_GENERIC_LOCAL64_H */ -- cgit v1.2.3 From e78505958cf123048fb48cb56b79cebb8edd15fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 May 2010 14:43:08 +0200 Subject: perf: Convert perf_event to local_t Since now all modification to event->count (and ->prev_count and ->period_left) are local to a cpu, change then to local64_t so we avoid the LOCK'ed ops. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/arm/kernel/perf_event.c | 18 ++++++++--------- arch/powerpc/kernel/perf_event.c | 34 ++++++++++++++++---------------- arch/sh/kernel/perf_event.c | 6 +++--- arch/sparc/kernel/perf_event.c | 18 ++++++++--------- arch/x86/kernel/cpu/perf_event.c | 18 ++++++++--------- include/linux/perf_event.h | 7 ++++--- kernel/perf_event.c | 42 ++++++++++++++++++++-------------------- 7 files changed, 72 insertions(+), 71 deletions(-) (limited to 'arch/sparc') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index c45768614c8a..5b7cfafc0720 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx) { - s64 left = atomic64_read(&hwc->period_left); + s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0; if (unlikely(left <= -period)) { left = period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (unlikely(left <= 0)) { left += period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } @@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event, if (left > (s64)armpmu->max_period) left = armpmu->max_period; - atomic64_set(&hwc->prev_count, (u64)-left); + local64_set(&hwc->prev_count, (u64)-left); armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); @@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event, s64 delta; again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); new_raw_count = armpmu->read_counter(idx); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); - atomic64_sub(delta, &hwc->period_left); + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); return new_raw_count; } @@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event) if (!hwc->sample_period) { hwc->sample_period = armpmu->max_period; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); } err = 0; diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index ac2a8c2554d9..af1d9a7c65d1 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event) * Therefore we treat them like NMIs. */ do { - prev = atomic64_read(&event->hw.prev_count); + prev = local64_read(&event->hw.prev_count); barrier(); val = read_pmc(event->hw.idx); - } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); /* The counters are only 32 bits wide */ delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &event->count); - atomic64_sub(delta, &event->hw.period_left); + local64_add(delta, &event->count); + local64_sub(delta, &event->hw.period_left); } /* @@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw, if (!event->hw.idx) continue; val = (event->hw.idx == 5) ? pmc5 : pmc6; - prev = atomic64_read(&event->hw.prev_count); + prev = local64_read(&event->hw.prev_count); event->hw.idx = 0; delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &event->count); + local64_add(delta, &event->count); } } @@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw, event = cpuhw->limited_counter[i]; event->hw.idx = cpuhw->limited_hwidx[i]; val = (event->hw.idx == 5) ? pmc5 : pmc6; - atomic64_set(&event->hw.prev_count, val); + local64_set(&event->hw.prev_count, val); perf_event_update_userpage(event); } } @@ -666,11 +666,11 @@ void hw_perf_enable(void) } val = 0; if (event->hw.sample_period) { - left = atomic64_read(&event->hw.period_left); + left = local64_read(&event->hw.period_left); if (left < 0x80000000L) val = 0x80000000L - left; } - atomic64_set(&event->hw.prev_count, val); + local64_set(&event->hw.prev_count, val); event->hw.idx = idx; write_pmc(idx, val); perf_event_update_userpage(event); @@ -842,8 +842,8 @@ static void power_pmu_unthrottle(struct perf_event *event) if (left < 0x80000000L) val = 0x80000000L - left; write_pmc(event->hw.idx, val); - atomic64_set(&event->hw.prev_count, val); - atomic64_set(&event->hw.period_left, left); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); perf_enable(); local_irq_restore(flags); @@ -1109,7 +1109,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) event->hw.config = events[n]; event->hw.event_base = cflags[n]; event->hw.last_period = event->hw.sample_period; - atomic64_set(&event->hw.period_left, event->hw.last_period); + local64_set(&event->hw.period_left, event->hw.last_period); /* * See if we need to reserve the PMU. @@ -1147,16 +1147,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val, int record = 0; /* we don't have to worry about interrupts here */ - prev = atomic64_read(&event->hw.prev_count); + prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &event->count); + local64_add(delta, &event->count); /* * See if the total period for this event has expired, * and update for the next period. */ val = 0; - left = atomic64_read(&event->hw.period_left) - delta; + left = local64_read(&event->hw.period_left) - delta; if (period) { if (left <= 0) { left += period; @@ -1194,8 +1194,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val, } write_pmc(event->hw.idx, val); - atomic64_set(&event->hw.prev_count, val); - atomic64_set(&event->hw.period_left, left); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); } diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 81b6de41ae5d..7a3dc3567258 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event, * this is the simplest approach for maintaining consistency. */ again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); new_raw_count = sh_pmu->read(idx); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; @@ -203,7 +203,7 @@ again: delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); + local64_add(delta, &event->count); } static void sh_pmu_disable(struct perf_event *event) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index beeb92fa3acd..8a6660da8e08 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event, s64 delta; again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); new_raw_count = read_pmc(idx); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); - atomic64_sub(delta, &hwc->period_left); + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); return new_raw_count; } @@ -591,27 +591,27 @@ again: static int sparc_perf_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx) { - s64 left = atomic64_read(&hwc->period_left); + s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0; if (unlikely(left <= -period)) { left = period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (unlikely(left <= 0)) { left += period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (left > MAX_PERIOD) left = MAX_PERIOD; - atomic64_set(&hwc->prev_count, (u64)-left); + local64_set(&hwc->prev_count, (u64)-left); write_pmc(idx, (u64)(-left) & 0xffffffff); @@ -1087,7 +1087,7 @@ static int __hw_perf_event_init(struct perf_event *event) if (!hwc->sample_period) { hwc->sample_period = MAX_PERIOD; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); } return 0; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 79e199843db6..2d0d29069275 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -296,10 +296,10 @@ x86_perf_event_update(struct perf_event *event) * count to the generic event atomically: */ again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); rdmsrl(hwc->event_base + idx, new_raw_count); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; @@ -314,8 +314,8 @@ again: delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); - atomic64_sub(delta, &hwc->period_left); + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); return new_raw_count; } @@ -439,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event) if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); } else { /* * If we have a PMU initialized but no APIC @@ -886,7 +886,7 @@ static int x86_perf_event_set_period(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - s64 left = atomic64_read(&hwc->period_left); + s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0, idx = hwc->idx; @@ -898,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event) */ if (unlikely(left <= -period)) { left = period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (unlikely(left <= 0)) { left += period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } @@ -924,7 +924,7 @@ x86_perf_event_set_period(struct perf_event *event) * The hw event starts counting from this event offset, * mark it to be able to extra future deltas: */ - atomic64_set(&hwc->prev_count, (u64)-left); + local64_set(&hwc->prev_count, (u64)-left); wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f34dab9b275e..7342979f95f2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -487,6 +487,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #define PERF_MAX_STACK_DEPTH 255 @@ -536,10 +537,10 @@ struct hw_perf_event { struct arch_hw_breakpoint info; #endif }; - atomic64_t prev_count; + local64_t prev_count; u64 sample_period; u64 last_period; - atomic64_t period_left; + local64_t period_left; u64 interrupts; u64 freq_time_stamp; @@ -670,7 +671,7 @@ struct perf_event { enum perf_event_active_state state; unsigned int attach_state; - atomic64_t count; + local64_t count; atomic64_t child_count; /* diff --git a/kernel/perf_event.c b/kernel/perf_event.c index a395fda2d94c..97c73018592e 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1148,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event, * In order to keep per-task stats reliable we need to flip the event * values when we flip the contexts. */ - value = atomic64_read(&next_event->count); - value = atomic64_xchg(&event->count, value); - atomic64_set(&next_event->count, value); + value = local64_read(&next_event->count); + value = local64_xchg(&event->count, value); + local64_set(&next_event->count, value); swap(event->total_time_enabled, next_event->total_time_enabled); swap(event->total_time_running, next_event->total_time_running); @@ -1540,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) hwc->sample_period = sample_period; - if (atomic64_read(&hwc->period_left) > 8*sample_period) { + if (local64_read(&hwc->period_left) > 8*sample_period) { perf_disable(); perf_event_stop(event); - atomic64_set(&hwc->period_left, 0); + local64_set(&hwc->period_left, 0); perf_event_start(event); perf_enable(); } @@ -1584,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) perf_disable(); event->pmu->read(event); - now = atomic64_read(&event->count); + now = local64_read(&event->count); delta = now - hwc->freq_count_stamp; hwc->freq_count_stamp = now; @@ -1738,7 +1738,7 @@ static void __perf_event_read(void *info) static inline u64 perf_event_count(struct perf_event *event) { - return atomic64_read(&event->count) + atomic64_read(&event->child_count); + return local64_read(&event->count) + atomic64_read(&event->child_count); } static u64 perf_event_read(struct perf_event *event) @@ -2141,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void perf_event_reset(struct perf_event *event) { (void)perf_event_read(event); - atomic64_set(&event->count, 0); + local64_set(&event->count, 0); perf_event_update_userpage(event); } @@ -2359,7 +2359,7 @@ void perf_event_update_userpage(struct perf_event *event) userpg->index = perf_event_index(event); userpg->offset = perf_event_count(event); if (event->state == PERF_EVENT_STATE_ACTIVE) - userpg->offset -= atomic64_read(&event->hw.prev_count); + userpg->offset -= local64_read(&event->hw.prev_count); userpg->time_enabled = event->total_time_enabled + atomic64_read(&event->child_total_time_enabled); @@ -4035,14 +4035,14 @@ static u64 perf_swevent_set_period(struct perf_event *event) hwc->last_period = hwc->sample_period; again: - old = val = atomic64_read(&hwc->period_left); + old = val = local64_read(&hwc->period_left); if (val < 0) return 0; nr = div64_u64(period + val, period); offset = nr * period; val -= offset; - if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) + if (local64_cmpxchg(&hwc->period_left, old, val) != old) goto again; return nr; @@ -4081,7 +4081,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, { struct hw_perf_event *hwc = &event->hw; - atomic64_add(nr, &event->count); + local64_add(nr, &event->count); if (!regs) return; @@ -4092,7 +4092,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) return perf_swevent_overflow(event, 1, nmi, data, regs); - if (atomic64_add_negative(nr, &hwc->period_left)) + if (local64_add_negative(nr, &hwc->period_left)) return; perf_swevent_overflow(event, 0, nmi, data, regs); @@ -4383,8 +4383,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event) u64 now; now = cpu_clock(cpu); - prev = atomic64_xchg(&event->hw.prev_count, now); - atomic64_add(now - prev, &event->count); + prev = local64_xchg(&event->hw.prev_count, now); + local64_add(now - prev, &event->count); } static int cpu_clock_perf_event_enable(struct perf_event *event) @@ -4392,7 +4392,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int cpu = raw_smp_processor_id(); - atomic64_set(&hwc->prev_count, cpu_clock(cpu)); + local64_set(&hwc->prev_count, cpu_clock(cpu)); perf_swevent_start_hrtimer(event); return 0; @@ -4424,9 +4424,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now) u64 prev; s64 delta; - prev = atomic64_xchg(&event->hw.prev_count, now); + prev = local64_xchg(&event->hw.prev_count, now); delta = now - prev; - atomic64_add(delta, &event->count); + local64_add(delta, &event->count); } static int task_clock_perf_event_enable(struct perf_event *event) @@ -4436,7 +4436,7 @@ static int task_clock_perf_event_enable(struct perf_event *event) now = event->ctx->time; - atomic64_set(&hwc->prev_count, now); + local64_set(&hwc->prev_count, now); perf_swevent_start_hrtimer(event); @@ -4879,7 +4879,7 @@ perf_event_alloc(struct perf_event_attr *attr, hwc->sample_period = 1; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); /* * we currently do not support PERF_FORMAT_GROUP on inherited events @@ -5313,7 +5313,7 @@ inherit_event(struct perf_event *parent_event, hwc->sample_period = sample_period; hwc->last_period = sample_period; - atomic64_set(&hwc->period_left, sample_period); + local64_set(&hwc->period_left, sample_period); } child_event->overflow_handler = parent_event->overflow_handler; -- cgit v1.2.3