diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/include/asm/hw_irq.h | 1 | ||||
-rw-r--r-- | arch/powerpc/include/asm/perf_counter.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/perf_counter.c | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_counter.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_32.h | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 115 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 56 |
7 files changed, 137 insertions, 54 deletions
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b7f8f4a87cc0..10a642df014e 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -162,7 +162,6 @@ static inline unsigned long test_perf_counter_pending(void) return 0; } -static inline void set_perf_counter_pending(void) {} static inline void clear_perf_counter_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index cc7c887705b8..b398a84edced 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -10,6 +10,8 @@ */ #include <linux/types.h> +#include <asm/hw_irq.h> + #define MAX_HWCOUNTERS 8 #define MAX_EVENT_ALTERNATIVES 8 #define MAX_LIMITED_HWCOUNTERS 2 diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bb202388170e..e6dc1850191c 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -913,6 +913,8 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) case PERF_TYPE_RAW: ev = counter->attr.config; break; + default: + return ERR_PTR(-EINVAL); } counter->hw.config_base = ev; counter->hw.idx = 0; @@ -1013,7 +1015,7 @@ static void record_and_restart(struct perf_counter *counter, long val, u64 period = counter->hw.sample_period; s64 prev, delta, left; int record = 0; - u64 addr, mmcra, sdsync; + u64 mmcra, sdsync; /* we don't have to worry about interrupts here */ prev = atomic64_read(&counter->hw.prev_count); diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 876ed97147b3..5fb33e160ea0 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,11 +84,6 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -extern void set_perf_counter_pending(void); - -#define clear_perf_counter_pending() do { } while (0) -#define test_perf_counter_pending() (0) - #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 31bd120cf2a2..01fd9461d323 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); #endif #if defined(CONFIG_HIGHPTE) +#define __KM_PTE \ + (in_nmi() ? KM_NMI_PTE : \ + in_irq() ? KM_IRQ_PTE : \ + KM_PTE0) #define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ pte_index((address))) #define pte_offset_map_nested(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ pte_index((address))) -#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) +#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) #else #define pte_offset_map(dir, address) \ diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 275bc142cd5d..e8c68a5091df 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -19,6 +19,7 @@ #include <linux/kdebug.h> #include <linux/sched.h> #include <linux/uaccess.h> +#include <linux/highmem.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event) return event & CORE_EVNTSEL_MASK; } -static const u64 amd_0f_hw_cache_event_ids +static const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [ C(L1D) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ + [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ }, }, [ C(L1I ) ] = { @@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids [ C(RESULT_MISS) ] = -1, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ [ C(RESULT_MISS) ] = 0, }, }, [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ + [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { @@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids }, [ C(DTLB) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -1459,18 +1460,16 @@ static int intel_pmu_init(void) static int amd_pmu_init(void) { + /* Performance-monitoring supported from K7 and later: */ + if (boot_cpu_data.x86 < 6) + return -ENODEV; + x86_pmu = amd_pmu; - switch (boot_cpu_data.x86) { - case 0x0f: - case 0x10: - case 0x11: - memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); + /* Events are common for all AMDs */ + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); - pr_cont("AMD Family 0f/10/11 events, "); - break; - } return 0; } @@ -1577,8 +1576,8 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - /* Don't bother with IRQ stacks for now */ - return -1; + /* Process all stacks: */ + return 0; } static void backtrace_address(void *data, unsigned long addr, int reliable) @@ -1596,6 +1595,8 @@ static const struct stacktrace_ops backtrace_ops = { .address = backtrace_address, }; +#include "../dumpstack.h" + static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { @@ -1603,40 +1604,62 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) char *stack; int nr = entry->nr; - callchain_store(entry, instruction_pointer(regs)); + callchain_store(entry, regs->ip); stack = ((char *)regs + sizeof(struct pt_regs)); #ifdef CONFIG_FRAME_POINTER - bp = frame_pointer(regs); + get_bp(bp); #else bp = 0; #endif - dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); + dump_trace(NULL, regs, (void *)&stack, bp, &backtrace_ops, entry); entry->kernel = entry->nr - nr; } +/* + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + */ +static unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) +{ + unsigned long offset, addr = (unsigned long)from; + int type = in_nmi() ? KM_NMI : KM_IRQ0; + unsigned long size, len = 0; + struct page *page; + void *map; + int ret; -struct stack_frame { - const void __user *next_fp; - unsigned long return_address; -}; + do { + ret = __get_user_pages_fast(addr, 1, 0, &page); + if (!ret) + break; + + offset = addr & (PAGE_SIZE - 1); + size = min(PAGE_SIZE - offset, n - len); + + map = kmap_atomic(page, type); + memcpy(to, map+offset, size); + kunmap_atomic(map, type); + put_page(page); + + len += size; + to += size; + addr += size; + + } while (len < n); + + return len; +} static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) { - int ret; + unsigned long bytes; - if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) - return 0; - - ret = 1; - pagefault_disable(); - if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) - ret = 0; - pagefault_enable(); + bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); - return ret; + return bytes == sizeof(*frame); } static void @@ -1646,23 +1669,25 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) const void __user *fp; int nr = entry->nr; - regs = (struct pt_regs *)current->thread.sp0 - 1; - fp = (void __user *)regs->bp; + if (!user_mode(regs)) + regs = task_pt_regs(current); + + fp = (void __user *)regs->bp; callchain_store(entry, regs->ip); while (entry->nr < MAX_STACK_DEPTH) { - frame.next_fp = NULL; + frame.next_frame = NULL; frame.return_address = 0; if (!copy_stack_frame(fp, &frame)) break; - if ((unsigned long)fp < user_stack_pointer(regs)) + if ((unsigned long)fp < regs->sp) break; callchain_store(entry, frame.return_address); - fp = frame.next_fp; + fp = frame.next_frame; } entry->user = entry->nr - nr; diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 6340cef6798a..697d5727c119 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + unsigned long flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch size + * will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables and pages from being freed on x86. + * + * So long as we atomically load page table pointers versus teardown + * (which we do on x86, with the above PAE exception), we can follow the + * address down to the the page and take a ref on it. + */ + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address |