From 2ce4da2efcaca0dcbfed7a1f24177f18e75e0e89 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Sep 2009 20:42:10 -0700 Subject: sparc: Support HW cache events. First supported chip for HW cache events is Ultra-IIIi. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 145 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 139 insertions(+), 6 deletions(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2d6a1b10c81d..48375f694673 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -68,8 +68,19 @@ struct perf_event_map { #define PIC_LOWER 0x02 }; +#define C(x) PERF_COUNT_HW_CACHE_##x + +#define CACHE_OP_UNSUPPORTED 0xfffe +#define CACHE_OP_NONSENSE 0xffff + +typedef struct perf_event_map cache_map_t + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + struct sparc_pmu { const struct perf_event_map *(*event_map)(int); + const cache_map_t *cache_map; int max_events; int upper_shift; int lower_shift; @@ -92,8 +103,96 @@ static const struct perf_event_map *ultra3i_event_map(int event_id) return &ultra3i_perfmon_event_map[event_id]; } +static const cache_map_t ultra3i_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, + [C(RESULT_MISS)] = { 0x0a, PIC_UPPER }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER }, + [C(RESULT_MISS)] = { 0x0c, PIC_UPPER }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x12, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x11, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + static const struct sparc_pmu ultra3i_pmu = { .event_map = ultra3i_event_map, + .cache_map = &ultra3i_cache_map, .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), .upper_shift = 11, .lower_shift = 4, @@ -375,6 +474,37 @@ void perf_event_release_pmc(void) } } +static const struct perf_event_map *sparc_map_cache_event(u64 config) +{ + unsigned int cache_type, cache_op, cache_result; + const struct perf_event_map *pmap; + + if (!sparc_pmu->cache_map) + return ERR_PTR(-ENOENT); + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return ERR_PTR(-EINVAL); + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return ERR_PTR(-EINVAL); + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return ERR_PTR(-EINVAL); + + pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); + + if (pmap->encoding == CACHE_OP_UNSUPPORTED) + return ERR_PTR(-ENOENT); + + if (pmap->encoding == CACHE_OP_NONSENSE) + return ERR_PTR(-EINVAL); + + return pmap; +} + static void hw_perf_event_destroy(struct perf_event *event) { perf_event_release_pmc(); @@ -390,12 +520,17 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&nmi_active) < 0) return -ENODEV; - if (attr->type != PERF_TYPE_HARDWARE) + if (attr->type == PERF_TYPE_HARDWARE) { + if (attr->config >= sparc_pmu->max_events) + return -EINVAL; + pmap = sparc_pmu->event_map(attr->config); + } else if (attr->type == PERF_TYPE_HW_CACHE) { + pmap = sparc_map_cache_event(attr->config); + if (IS_ERR(pmap)) + return PTR_ERR(pmap); + } else return -EOPNOTSUPP; - if (attr->config >= sparc_pmu->max_events) - return -EINVAL; - perf_event_grab_pmc(); event->destroy = hw_perf_event_destroy; @@ -417,8 +552,6 @@ static int __hw_perf_event_init(struct perf_event *event) atomic64_set(&hwc->period_left, hwc->sample_period); } - pmap = sparc_pmu->event_map(attr->config); - enc = pmap->encoding; if (pmap->pic_mask & PIC_UPPER) { hwc->idx = PIC_UPPER_INDEX; -- cgit v1.2.3 From 28e8f9bead060aafc630a4256d23e2a55fb8b97d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Sep 2009 20:54:22 -0700 Subject: sparc: Support all ultra3 and ultra4 derivatives. For the generic events we support, all of these chips have the same encodings as ultra3i. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 48375f694673..8abdc4d1baa5 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -91,19 +91,19 @@ struct sparc_pmu { int lower_nop; }; -static const struct perf_event_map ultra3i_perfmon_event_map[] = { +static const struct perf_event_map ultra3_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, }; -static const struct perf_event_map *ultra3i_event_map(int event_id) +static const struct perf_event_map *ultra3_event_map(int event_id) { - return &ultra3i_perfmon_event_map[event_id]; + return &ultra3_perfmon_event_map[event_id]; } -static const cache_map_t ultra3i_cache_map = { +static const cache_map_t ultra3_cache_map = { [C(L1D)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, @@ -190,10 +190,10 @@ static const cache_map_t ultra3i_cache_map = { }, }; -static const struct sparc_pmu ultra3i_pmu = { - .event_map = ultra3i_event_map, - .cache_map = &ultra3i_cache_map, - .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), +static const struct sparc_pmu ultra3_pmu = { + .event_map = ultra3_event_map, + .cache_map = &ultra3_cache_map, + .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), .upper_shift = 11, .lower_shift = 4, .event_mask = 0x3f, @@ -658,8 +658,11 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { static bool __init supported_pmu(void) { - if (!strcmp(sparc_pmu_type, "ultra3i")) { - sparc_pmu = &ultra3i_pmu; + if (!strcmp(sparc_pmu_type, "ultra3") || + !strcmp(sparc_pmu_type, "ultra3+") || + !strcmp(sparc_pmu_type, "ultra3i") || + !strcmp(sparc_pmu_type, "ultra4+")) { + sparc_pmu = &ultra3_pmu; return true; } if (!strcmp(sparc_pmu_type, "niagara2")) { -- cgit v1.2.3 From d0b86480f5b33f4a86d7c106706d6e0dcd1935ce Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Sep 2009 21:04:16 -0700 Subject: sparc: Add Niagara2 HW cache event support. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 88 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 8abdc4d1baa5..6f01e04cc323 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -215,8 +215,96 @@ static const struct perf_event_map *niagara2_event_map(int event_id) return &niagara2_perfmon_event_map[event_id]; } +static const cache_map_t niagara2_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + static const struct sparc_pmu niagara2_pmu = { .event_map = niagara2_event_map, + .cache_map = &niagara2_cache_map, .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), .upper_shift = 19, .lower_shift = 6, -- cgit v1.2.3 From 7eebda60d57a0862a410f45122c73b8bbe6e260c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Sep 2009 21:23:41 -0700 Subject: sparc: Niagara1 perf event support. This chip is extremely limited, and many of the events supported are approximations at best. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 119 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6f01e04cc323..9541b456c3ee 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -201,6 +201,121 @@ static const struct sparc_pmu ultra3_pmu = { .lower_nop = 0x14, }; +/* Niagara1 is very limited. The upper PIC is hard-locked to count + * only instructions, so it is free running which creates all kinds of + * problems. Some hardware designs make one wonder if the creastor + * even looked at how this stuff gets used by software. + */ +static const struct perf_event_map niagara1_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER }, +}; + +static const struct perf_event_map *niagara1_event_map(int event_id) +{ + return &niagara1_perfmon_event_map[event_id]; +} + +static const cache_map_t niagara1_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER }, + [C(RESULT_MISS)] = { 0x02, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x05, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x04, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + +static const struct sparc_pmu niagara1_pmu = { + .event_map = niagara1_event_map, + .cache_map = &niagara1_cache_map, + .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), + .upper_shift = 0, + .lower_shift = 4, + .event_mask = 0x7, + .upper_nop = 0x0, + .lower_nop = 0x0, +}; + static const struct perf_event_map niagara2_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, @@ -753,6 +868,10 @@ static bool __init supported_pmu(void) sparc_pmu = &ultra3_pmu; return true; } + if (!strcmp(sparc_pmu_type, "niagara")) { + sparc_pmu = &niagara1_pmu; + return true; + } if (!strcmp(sparc_pmu_type, "niagara2")) { sparc_pmu = &niagara2_pmu; return true; -- cgit v1.2.3 From 01552f765cae873d0ea3cca1e64e41dfd62659e6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 27 Sep 2009 20:43:07 -0700 Subject: sparc64: Add initial perf event conflict resolution and checks. Cribbed from powerpc code, as usual. :-) Currently it is only used to validate that all counters have the same user/kernel/hv attributes. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 82 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 5 deletions(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 9541b456c3ee..919952498155 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -713,12 +713,66 @@ static void hw_perf_event_destroy(struct perf_event *event) perf_event_release_pmc(); } +static int check_excludes(struct perf_event **evts, int n_prev, int n_new) +{ + int eu = 0, ek = 0, eh = 0; + struct perf_event *event; + int i, n, first; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + first = 1; + for (i = 0; i < n; i++) { + event = evts[i]; + if (first) { + eu = event->attr.exclude_user; + ek = event->attr.exclude_kernel; + eh = event->attr.exclude_hv; + first = 0; + } else if (event->attr.exclude_user != eu || + event->attr.exclude_kernel != ek || + event->attr.exclude_hv != eh) { + return -EAGAIN; + } + } + + return 0; +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *evts[], u64 *events) +{ + struct perf_event *event; + int n = 0; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + evts[n] = group; + events[n++] = group->hw.config; + } + list_for_each_entry(event, &group->sibling_list, group_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + evts[n] = event; + events[n++] = event->hw.config; + } + } + return n; +} + static int __hw_perf_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; + struct perf_event *evts[MAX_HWEVENTS]; struct hw_perf_event *hwc = &event->hw; const struct perf_event_map *pmap; - u64 enc; + u64 enc, events[MAX_HWEVENTS]; + int n; if (atomic_read(&nmi_active) < 0) return -ENODEV; @@ -734,9 +788,6 @@ static int __hw_perf_event_init(struct perf_event *event) } else return -EOPNOTSUPP; - perf_event_grab_pmc(); - event->destroy = hw_perf_event_destroy; - /* We save the enable bits in the config_base. So to * turn off sampling just write 'config', and to enable * things write 'config | config_base'. @@ -749,13 +800,34 @@ static int __hw_perf_event_init(struct perf_event *event) if (!attr->exclude_hv) hwc->config_base |= sparc_pmu->hv_bit; + enc = pmap->encoding; + + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, + perf_max_events - 1, + evts, events); + if (n < 0) + return -EINVAL; + } + events[n] = enc; + evts[n] = event; + + if (check_excludes(evts, n, 1)) + return -EINVAL; + + /* Try to do all error checking before this point, as unwinding + * state after grabbing the PMC is difficult. + */ + perf_event_grab_pmc(); + event->destroy = hw_perf_event_destroy; + if (!hwc->sample_period) { hwc->sample_period = MAX_PERIOD; hwc->last_period = hwc->sample_period; atomic64_set(&hwc->period_left, hwc->sample_period); } - enc = pmap->encoding; if (pmap->pic_mask & PIC_UPPER) { hwc->idx = PIC_UPPER_INDEX; enc <<= sparc_pmu->upper_shift; -- cgit v1.2.3 From 1b6b9d62475bdea00b366cb80b0fadba446f176d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 28 Sep 2009 14:39:58 -0700 Subject: sparc64: Increase vmalloc size to fix percpu regressions. Since we now use the embedding percpu allocator we have to make the vmalloc area at least as large as the stretch can be between nodes. Besides some minor asm adjustments, this turned out to be pretty trivial. Signed-off-by: David S. Miller --- arch/sparc/include/asm/pgtable_64.h | 4 ++-- arch/sparc/kernel/ktlb.S | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 0ff92fa22064..f3cb790fa2ae 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -41,8 +41,8 @@ #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL) #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL) #define VMALLOC_START _AC(0x0000000100000000,UL) -#define VMALLOC_END _AC(0x0000000200000000,UL) -#define VMEMMAP_BASE _AC(0x0000000200000000,UL) +#define VMALLOC_END _AC(0x0000010000000000,UL) +#define VMEMMAP_BASE _AC(0x0000010000000000,UL) #define vmemmap ((struct page *)VMEMMAP_BASE) diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 3ea6e8cde8c5..1d361477d7d6 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -280,8 +280,8 @@ kvmap_dtlb_nonlinear: #ifdef CONFIG_SPARSEMEM_VMEMMAP /* Do not use the TSB for vmemmap. */ - mov (VMEMMAP_BASE >> 24), %g5 - sllx %g5, 24, %g5 + mov (VMEMMAP_BASE >> 40), %g5 + sllx %g5, 40, %g5 cmp %g4,%g5 bgeu,pn %xcc, kvmap_vmemmap nop @@ -293,8 +293,8 @@ kvmap_dtlb_tsbmiss: sethi %hi(MODULES_VADDR), %g5 cmp %g4, %g5 blu,pn %xcc, kvmap_dtlb_longpath - mov (VMALLOC_END >> 24), %g5 - sllx %g5, 24, %g5 + mov (VMALLOC_END >> 40), %g5 + sllx %g5, 40, %g5 cmp %g4, %g5 bgeu,pn %xcc, kvmap_dtlb_longpath nop -- cgit v1.2.3 From a70c691376c7c7f94af41395848066f59501fffd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 Sep 2009 18:18:55 +0900 Subject: sparc64: implement page mapping percpu first chunk allocator Implement page mapping percpu first chunk allocator as a fallback to the embedding allocator. The next patch will make the embedding allocator check distances between units to determine whether it fits within the vmalloc area so that this fallback can be used on such cases. sparc64 currently has relatively small vmalloc area which makes it impossible to create any dynamic chunks on certain configurations leading to percpu allocation failures. This and the next patch should allow those configurations to keep working until proper solution is found. While at it, mark pcpu_cpu_distance() with __init. Signed-off-by: Tejun Heo Acked-by: David S. Miller --- arch/sparc/Kconfig | 3 +++ arch/sparc/kernel/smp_64.c | 53 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 9 deletions(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 97fca4695e0b..ac45aab741a5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -102,6 +102,9 @@ config HAVE_SETUP_PER_CPU_AREA config NEED_PER_CPU_EMBED_FIRST_CHUNK def_bool y if SPARC64 +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y if SPARC64 + config GENERIC_HARDIRQS_NO__DO_IRQ bool def_bool y if SPARC64 diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index ff68373ce6d6..aa36223497b9 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1420,7 +1420,7 @@ static void __init pcpu_free_bootmem(void *ptr, size_t size) free_bootmem(__pa(ptr), size); } -static int pcpu_cpu_distance(unsigned int from, unsigned int to) +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) { if (cpu_to_node(from) == cpu_to_node(to)) return LOCAL_DISTANCE; @@ -1428,18 +1428,53 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } +static void __init pcpu_populate_pte(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + pud_t *pud; + pmd_t *pmd; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + pmd_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + pud_populate(&init_mm, pud, new); + } + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + pte_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd, new); + } +} + void __init setup_per_cpu_areas(void) { unsigned long delta; unsigned int cpu; - int rc; - - rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, - PERCPU_DYNAMIC_RESERVE, 4 << 20, - pcpu_cpu_distance, pcpu_alloc_bootmem, - pcpu_free_bootmem); - if (rc) - panic("failed to initialize first chunk (%d)", rc); + int rc = -EINVAL; + + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, 4 << 20, + pcpu_cpu_distance, + pcpu_alloc_bootmem, + pcpu_free_bootmem); + if (rc) + pr_warning("PERCPU: %s allocator failed (%d), " + "falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); + } + if (rc < 0) + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, + pcpu_alloc_bootmem, + pcpu_free_bootmem, + pcpu_populate_pte); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) -- cgit v1.2.3 From a72a8a5f2ea32074e98803d4b15d0e093c5a9e4d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 28 Sep 2009 17:35:20 -0700 Subject: sparc64: Add a basic conflict engine in preparation for multi-counter support. The hardware counter ->event_base state records and encoding of the "struct perf_event_map" entry used for the event. We use this to make sure that when we have more than 1 event, both can be scheduled into the hardware at the same time. As usual, structure of code is largely cribbed from powerpc. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 69 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 5 deletions(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 919952498155..2b7743466ae4 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -68,6 +68,17 @@ struct perf_event_map { #define PIC_LOWER 0x02 }; +static unsigned long perf_event_encode(const struct perf_event_map *pmap) +{ + return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; +} + +static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk) +{ + *msk = val & 0xff; + *enc = val >> 16; +} + #define C(x) PERF_COUNT_HW_CACHE_##x #define CACHE_OP_UNSUPPORTED 0xfffe @@ -713,6 +724,48 @@ static void hw_perf_event_destroy(struct perf_event *event) perf_event_release_pmc(); } +/* Make sure all events can be scheduled into the hardware at + * the same time. This is simplified by the fact that we only + * need to support 2 simultaneous HW events. + */ +static int sparc_check_constraints(unsigned long *events, int n_ev) +{ + if (n_ev <= perf_max_events) { + u8 msk1, msk2; + u16 dummy; + + if (n_ev == 1) + return 0; + BUG_ON(n_ev != 2); + perf_event_decode(events[0], &dummy, &msk1); + perf_event_decode(events[1], &dummy, &msk2); + + /* If both events can go on any counter, OK. */ + if (msk1 == (PIC_UPPER | PIC_LOWER) && + msk2 == (PIC_UPPER | PIC_LOWER)) + return 0; + + /* If one event is limited to a specific counter, + * and the other can go on both, OK. + */ + if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && + msk2 == (PIC_UPPER | PIC_LOWER)) + return 0; + if ((msk2 == PIC_UPPER || msk2 == PIC_LOWER) && + msk1 == (PIC_UPPER | PIC_LOWER)) + return 0; + + /* If the events are fixed to different counters, OK. */ + if ((msk1 == PIC_UPPER && msk2 == PIC_LOWER) || + (msk1 == PIC_LOWER && msk2 == PIC_UPPER)) + return 0; + + /* Otherwise, there is a conflict. */ + } + + return -1; +} + static int check_excludes(struct perf_event **evts, int n_prev, int n_new) { int eu = 0, ek = 0, eh = 0; @@ -742,7 +795,7 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new) } static int collect_events(struct perf_event *group, int max_count, - struct perf_event *evts[], u64 *events) + struct perf_event *evts[], unsigned long *events) { struct perf_event *event; int n = 0; @@ -751,7 +804,7 @@ static int collect_events(struct perf_event *group, int max_count, if (n >= max_count) return -1; evts[n] = group; - events[n++] = group->hw.config; + events[n++] = group->hw.event_base; } list_for_each_entry(event, &group->sibling_list, group_entry) { if (!is_software_event(event) && @@ -759,7 +812,7 @@ static int collect_events(struct perf_event *group, int max_count, if (n >= max_count) return -1; evts[n] = event; - events[n++] = event->hw.config; + events[n++] = event->hw.event_base; } } return n; @@ -770,8 +823,9 @@ static int __hw_perf_event_init(struct perf_event *event) struct perf_event_attr *attr = &event->attr; struct perf_event *evts[MAX_HWEVENTS]; struct hw_perf_event *hwc = &event->hw; + unsigned long events[MAX_HWEVENTS]; const struct perf_event_map *pmap; - u64 enc, events[MAX_HWEVENTS]; + u64 enc; int n; if (atomic_read(&nmi_active) < 0) @@ -800,6 +854,8 @@ static int __hw_perf_event_init(struct perf_event *event) if (!attr->exclude_hv) hwc->config_base |= sparc_pmu->hv_bit; + hwc->event_base = perf_event_encode(pmap); + enc = pmap->encoding; n = 0; @@ -810,12 +866,15 @@ static int __hw_perf_event_init(struct perf_event *event) if (n < 0) return -EINVAL; } - events[n] = enc; + events[n] = hwc->event_base; evts[n] = event; if (check_excludes(evts, n, 1)) return -EINVAL; + if (sparc_check_constraints(events, n + 1)) + return -EINVAL; + /* Try to do all error checking before this point, as unwinding * state after grabbing the PMC is difficult. */ -- cgit v1.2.3 From d29862f03575cdfa8819f78b0f3f78eec3b44629 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 28 Sep 2009 17:37:12 -0700 Subject: sparc64: Minor coding style fixups in perf code. These got introduced during the counter --> event tree-wide renaming. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2b7743466ae4..03b041c4d95c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -464,8 +464,7 @@ static u64 nop_for_index(int idx) sparc_pmu->lower_nop, idx); } -static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, - int idx) +static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, int idx) { u64 val, mask = mask_for_index(idx); @@ -473,8 +472,7 @@ static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, pcr_ops->write((val & ~mask) | hwc->config); } -static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, - int idx) +static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, int idx) { u64 mask = mask_for_index(idx); u64 nop = nop_for_index(idx); @@ -555,7 +553,7 @@ static void write_pmc(int idx, u64 val) } static int sparc_perf_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct hw_perf_event *hwc, int idx) { s64 left = atomic64_read(&hwc->period_left); s64 period = hwc->sample_period; @@ -607,7 +605,7 @@ static int sparc_pmu_enable(struct perf_event *event) } static u64 sparc_perf_event_update(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct hw_perf_event *hwc, int idx) { int shift = 64 - 32; u64 prev_raw_count, new_raw_count; @@ -939,7 +937,7 @@ void perf_event_print_debug(void) } static int __kprobes perf_event_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) + unsigned long cmd, void *__args) { struct die_args *args = __args; struct perf_sample_data data; -- cgit v1.2.3 From 6e804251d119bbd5522d76bdb0f48f5c9a7abf51 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 29 Sep 2009 15:10:23 -0700 Subject: sparc64: Fix comment typo in perf_event.c Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 03b041c4d95c..32fc974bf8b5 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -214,7 +214,7 @@ static const struct sparc_pmu ultra3_pmu = { /* Niagara1 is very limited. The upper PIC is hard-locked to count * only instructions, so it is free running which creates all kinds of - * problems. Some hardware designs make one wonder if the creastor + * problems. Some hardware designs make one wonder if the creator * even looked at how this stuff gets used by software. */ static const struct perf_event_map niagara1_perfmon_event_map[] = { -- cgit v1.2.3 From d17513889a8b754c5872b6b46e6f7822338a0b79 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 29 Sep 2009 21:27:06 -0700 Subject: sparc64: Cache per-cpu %pcr register value in perf code. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 59 ++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 17 deletions(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 32fc974bf8b5..04db92743896 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -56,7 +56,8 @@ struct cpu_hw_events { struct perf_event *events[MAX_HWEVENTS]; unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; - int enabled; + u64 pcr; + int enabled; }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -464,21 +465,30 @@ static u64 nop_for_index(int idx) sparc_pmu->lower_nop, idx); } -static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 val, mask = mask_for_index(idx); - val = pcr_ops->read(); - pcr_ops->write((val & ~mask) | hwc->config); + val = cpuc->pcr; + val &= ~mask; + val |= hwc->config; + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } -static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, int idx) +static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 mask = mask_for_index(idx); u64 nop = nop_for_index(idx); - u64 val = pcr_ops->read(); + u64 val; - pcr_ops->write((val & ~mask) | nop); + val = cpuc->pcr; + val &= ~mask; + val |= nop; + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } void hw_perf_enable(void) @@ -493,7 +503,7 @@ void hw_perf_enable(void) cpuc->enabled = 1; barrier(); - val = pcr_ops->read(); + val = cpuc->pcr; for (i = 0; i < MAX_HWEVENTS; i++) { struct perf_event *cp = cpuc->events[i]; @@ -505,7 +515,9 @@ void hw_perf_enable(void) val |= hwc->config_base; } - pcr_ops->write(val); + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } void hw_perf_disable(void) @@ -518,10 +530,12 @@ void hw_perf_disable(void) cpuc->enabled = 0; - val = pcr_ops->read(); + val = cpuc->pcr; val &= ~(PCR_UTRACE | PCR_STRACE | sparc_pmu->hv_bit | sparc_pmu->irq_bit); - pcr_ops->write(val); + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } static u32 read_pmc(int idx) @@ -593,13 +607,13 @@ static int sparc_pmu_enable(struct perf_event *event) if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); cpuc->events[idx] = event; set_bit(idx, cpuc->active_mask); sparc_perf_event_set_period(event, hwc, idx); - sparc_pmu_enable_event(hwc, idx); + sparc_pmu_enable_event(cpuc, hwc, idx); perf_event_update_userpage(event); return 0; } @@ -635,7 +649,7 @@ static void sparc_pmu_disable(struct perf_event *event) int idx = hwc->idx; clear_bit(idx, cpuc->active_mask); - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); barrier(); @@ -649,18 +663,29 @@ static void sparc_pmu_disable(struct perf_event *event) static void sparc_pmu_read(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; + sparc_perf_event_update(event, hwc, hwc->idx); } static void sparc_pmu_unthrottle(struct perf_event *event) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - sparc_pmu_enable_event(hwc, hwc->idx); + + sparc_pmu_enable_event(cpuc, hwc, hwc->idx); } static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmc_grab_mutex); +static void perf_stop_nmi_watchdog(void *unused) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + stop_nmi_watchdog(NULL); + cpuc->pcr = pcr_ops->read(); +} + void perf_event_grab_pmc(void) { if (atomic_inc_not_zero(&active_events)) @@ -669,7 +694,7 @@ void perf_event_grab_pmc(void) mutex_lock(&pmc_grab_mutex); if (atomic_read(&active_events) == 0) { if (atomic_read(&nmi_active) > 0) { - on_each_cpu(stop_nmi_watchdog, NULL, 1); + on_each_cpu(perf_stop_nmi_watchdog, NULL, 1); BUG_ON(atomic_read(&nmi_active) != 0); } atomic_inc(&active_events); @@ -978,7 +1003,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, continue; if (perf_event_overflow(event, 1, &data, regs)) - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); } return NOTIFY_STOP; -- cgit v1.2.3 From de23cf3c42618998a7165364f987267ac9b298f0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 9 Oct 2009 00:42:40 -0700 Subject: sparc64: Fix niagara2 perf IRQ bits. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 04db92743896..fa5936e1c3b9 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -437,7 +437,7 @@ static const struct sparc_pmu niagara2_pmu = { .lower_shift = 6, .event_mask = 0xfff, .hv_bit = 0x8, - .irq_bit = 0x03, + .irq_bit = 0x30, .upper_nop = 0x220, .lower_nop = 0x220, }; -- cgit v1.2.3 From c58543c869606532c2382f027d6466f4672ea756 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Oct 2009 00:49:09 -0700 Subject: sparc64: Set IRQF_DISABLED on LDC channel IRQs. With lots of virtual devices it's easy to generate a lot of events and chew up the kernel IRQ stack. Reported-by: hyl Signed-off-by: David S. Miller --- arch/sparc/kernel/ldc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index adf5f273868a..cb3c72c45aab 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1242,13 +1242,13 @@ int ldc_bind(struct ldc_channel *lp, const char *name) snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); err = request_irq(lp->cfg.rx_irq, ldc_rx, - IRQF_SAMPLE_RANDOM | IRQF_SHARED, + IRQF_SAMPLE_RANDOM | IRQF_DISABLED | IRQF_SHARED, lp->rx_irq_name, lp); if (err) return err; err = request_irq(lp->cfg.tx_irq, ldc_tx, - IRQF_SAMPLE_RANDOM | IRQF_SHARED, + IRQF_SAMPLE_RANDOM | IRQF_DISABLED | IRQF_SHARED, lp->tx_irq_name, lp); if (err) { free_irq(lp->cfg.rx_irq, lp); -- cgit v1.2.3 From 88b938e63e68fd35e603421f722be0f35dde1016 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 8 Nov 2009 00:26:56 -0800 Subject: sparc64: replace parentheses in pmul() `>>' has a higher precedence than `?' so src2 evaluated to either 16 or 0 dependent on the bits set in rs2. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- arch/sparc/kernel/visemul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c index b956fd71c131..d231cbd5c526 100644 --- a/arch/sparc/kernel/visemul.c +++ b/arch/sparc/kernel/visemul.c @@ -617,7 +617,7 @@ static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) rs2 = fps_regval(f, RS2(insn)); rd_val = 0; - src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0); + src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0); for (byte = 0; byte < 4; byte++) { u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; u32 prod = src1 * src2; -- cgit v1.2.3 From 1c9d80ddc60f8ac26344ec3db9830e5f8016c16d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 8 Nov 2009 17:41:20 -0800 Subject: sparc: Move of_set_property_mutex acquisition outside of devtree_lock grab. Otherwise we try to sleep with preemption disabled, etc. Noticed by Thomas Gleixner. Signed-off-by: David S. Miller --- arch/sparc/kernel/prom_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/sparc/kernel') diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c index 138910c67206..d80a65d9e893 100644 --- a/arch/sparc/kernel/prom_common.c +++ b/arch/sparc/kernel/prom_common.c @@ -79,6 +79,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len err = -ENODEV; + mutex_lock(&of_set_property_mutex); write_lock(&devtree_lock); prevp = &dp->properties; while (*prevp) { @@ -88,9 +89,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len void *old_val = prop->value; int ret; - mutex_lock(&of_set_property_mutex); ret = prom_setprop(dp->node, name, val, len); - mutex_unlock(&of_set_property_mutex); err = -EINVAL; if (ret >= 0) { @@ -109,6 +108,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len prevp = &(*prevp)->next; } write_unlock(&devtree_lock); + mutex_unlock(&of_set_property_mutex); /* XXX Upate procfs if necessary... */ -- cgit v1.2.3