From 87e9b2024659c614a876ce359a57e98a47b5ef37 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 17 Apr 2010 18:11:59 +0200 Subject: hw-breakpoints: Check disabled breakpoints again We stopped checking disabled breakpoints because we weren't allowing breakpoints on NULL addresses. And gdb tends to set NULL addresses on inactive breakpoints. But refusing NULL addresses was actually a regression that has been fixed now. There is no reason anymore to not validate inactive breakpoint settings. Signed-off-by: Frederic Weisbecker Acked-by: Paul Mundt Cc: Will Deacon Cc: Mahesh Salgaonkar Cc: K. Prasad Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Jason Wessel Cc: Ingo Molnar --- kernel/hw_breakpoint.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'kernel/hw_breakpoint.c') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 03808ed342a6..9ed9ae3a48b3 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -316,17 +316,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) if (ret) return ret; - /* - * Ptrace breakpoints can be temporary perf events only - * meant to reserve a slot. In this case, it is created disabled and - * we don't want to check the params right now (as we put a null addr) - * But perf tools create events as disabled and we want to check - * the params for them. - * This is a quick hack that will be removed soon, once we remove - * the tmp breakpoints from ptrace - */ - if (!bp->attr.disabled || !bp->overflow_handler) - ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); + ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); /* if arch_validate_hwbkpt_settings() fails then release bp slot */ if (ret) -- cgit v1.2.3 From b2812d031dea86926e9c10f7714af33ac2f6b43d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 18 Apr 2010 18:11:53 +0200 Subject: hw-breakpoints: Change/Enforce some breakpoints policies The current policies of breakpoints in x86 and SH are the following: - task bound breakpoints can only break on userspace addresses - cpu wide breakpoints can only break on kernel addresses The former rule prevents ptrace breakpoints to be set to trigger on kernel addresses, which is good. But as a side effect, we can't breakpoint on kernel addresses for task bound breakpoints. The latter rule simply makes no sense, there is no reason why we can't set breakpoints on userspace while performing cpu bound profiles. We want the following new policies: - task bound breakpoint can set userspace address breakpoints, with no particular privilege required. - task bound breakpoints can set kernelspace address breakpoints but must be privileged to do that. - cpu bound breakpoints can do what they want as they are privileged already. To implement these new policies, this patch checks if we are dealing with a kernel address breakpoint, if so and if the exclude_kernel parameter is set, we tell the user that the breakpoint is invalid, which makes a good generic ptrace protection. If we don't have exclude_kernel, ensure the user has the right privileges as kernel breakpoints are quite sensitive (risk of trap recursion attacks and global performance impacts). [ Paul Mundt: keep addr space check for sh signal delivery and fix double function declaration] Signed-off-by: Frederic Weisbecker Cc: Will Deacon Cc: Mahesh Salgaonkar Cc: K. Prasad Cc: Paul Mundt Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Jason Wessel Cc: Ingo Molnar Signed-off-by: Paul Mundt --- arch/sh/include/asm/hw_breakpoint.h | 5 ++--- arch/sh/kernel/hw_breakpoint.c | 34 ++++++------------------------ arch/x86/include/asm/hw_breakpoint.h | 5 ++--- arch/x86/kernel/hw_breakpoint.c | 41 ++++++------------------------------ kernel/hw_breakpoint.c | 26 +++++++++++++++++++++-- 5 files changed, 41 insertions(+), 70 deletions(-) (limited to 'kernel/hw_breakpoint.c') diff --git a/arch/sh/include/asm/hw_breakpoint.h b/arch/sh/include/asm/hw_breakpoint.h index 965dd780d51b..382bad937dcc 100644 --- a/arch/sh/include/asm/hw_breakpoint.h +++ b/arch/sh/include/asm/hw_breakpoint.h @@ -47,9 +47,8 @@ struct pmu; #define HBP_NUM 2 /* arch/sh/kernel/hw_breakpoint.c */ -extern int arch_check_va_in_userspace(unsigned long va, u16 hbp_len); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp, - struct task_struct *tsk); +extern int arch_check_bp_in_kernelspace(struct perf_event *bp); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c index 675eea7785d9..1f2cf6229862 100644 --- a/arch/sh/kernel/hw_breakpoint.c +++ b/arch/sh/kernel/hw_breakpoint.c @@ -119,26 +119,17 @@ static int get_hbp_len(u16 hbp_len) return len_in_bytes; } -/* - * Check for virtual address in user space. - */ -int arch_check_va_in_userspace(unsigned long va, u16 hbp_len) -{ - unsigned int len; - - len = get_hbp_len(hbp_len); - - return (va <= TASK_SIZE - len); -} - /* * Check for virtual address in kernel space. */ -static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +int arch_check_bp_in_kernelspace(struct perf_event *bp) { unsigned int len; + unsigned long va; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); - len = get_hbp_len(hbp_len); + va = info->address; + len = get_hbp_len(info->len); return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -226,8 +217,7 @@ static int arch_build_bp_info(struct perf_event *bp) /* * Validate the arch-specific HW Breakpoint register settings */ -int arch_validate_hwbkpt_settings(struct perf_event *bp, - struct task_struct *tsk) +int arch_validate_hwbkpt_settings(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); unsigned int align; @@ -270,15 +260,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, if (info->address & align) return -EINVAL; - /* Check that the virtual address is in the proper range */ - if (tsk) { - if (!arch_check_va_in_userspace(info->address, info->len)) - return -EFAULT; - } else { - if (!arch_check_va_in_kernelspace(info->address, info->len)) - return -EFAULT; - } - return 0; } @@ -363,8 +344,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) perf_bp_event(bp, args->regs); /* Deliver the signal to userspace */ - if (arch_check_va_in_userspace(bp->attr.bp_addr, - bp->attr.bp_len)) { + if (!arch_check_bp_in_kernelspace(bp)) { siginfo_t info; info.si_signo = args->signr; diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 2a1bd8f4f23a..c77a5a6fab9d 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -44,9 +44,8 @@ struct arch_hw_breakpoint { struct perf_event; struct pmu; -extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp, - struct task_struct *tsk); +extern int arch_check_bp_in_kernelspace(struct perf_event *bp); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d6cc065f519f..a8f1b803d2fd 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -188,26 +188,17 @@ static int get_hbp_len(u8 hbp_len) return len_in_bytes; } -/* - * Check for virtual address in user space. - */ -int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) -{ - unsigned int len; - - len = get_hbp_len(hbp_len); - - return (va <= TASK_SIZE - len); -} - /* * Check for virtual address in kernel space. */ -static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +int arch_check_bp_in_kernelspace(struct perf_event *bp) { unsigned int len; + unsigned long va; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); - len = get_hbp_len(hbp_len); + va = info->address; + len = get_hbp_len(info->len); return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -300,8 +291,7 @@ static int arch_build_bp_info(struct perf_event *bp) /* * Validate the arch-specific HW Breakpoint register settings */ -int arch_validate_hwbkpt_settings(struct perf_event *bp, - struct task_struct *tsk) +int arch_validate_hwbkpt_settings(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); unsigned int align; @@ -314,16 +304,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, ret = -EINVAL; - if (info->type == X86_BREAKPOINT_EXECUTE) - /* - * Ptrace-refactoring code - * For now, we'll allow instruction breakpoint only for user-space - * addresses - */ - if ((!arch_check_va_in_userspace(info->address, info->len)) && - info->len != X86_BREAKPOINT_EXECUTE) - return ret; - switch (info->len) { case X86_BREAKPOINT_LEN_1: align = 0; @@ -350,15 +330,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, if (info->address & align) return -EINVAL; - /* Check that the virtual address is in the proper range */ - if (tsk) { - if (!arch_check_va_in_userspace(info->address, info->len)) - return -EFAULT; - } else { - if (!arch_check_va_in_kernelspace(info->address, info->len)) - return -EFAULT; - } - return 0; } diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 9ed9ae3a48b3..89e8a050c43a 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -308,6 +308,28 @@ int dbg_release_bp_slot(struct perf_event *bp) return 0; } +static int validate_hw_breakpoint(struct perf_event *bp) +{ + int ret; + + ret = arch_validate_hwbkpt_settings(bp); + if (ret) + return ret; + + if (arch_check_bp_in_kernelspace(bp)) { + if (bp->attr.exclude_kernel) + return -EINVAL; + /* + * Don't let unprivileged users set a breakpoint in the trap + * path to avoid trap recursion attacks. + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + } + + return 0; +} + int register_perf_hw_breakpoint(struct perf_event *bp) { int ret; @@ -316,7 +338,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) if (ret) return ret; - ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); + ret = validate_hw_breakpoint(bp); /* if arch_validate_hwbkpt_settings() fails then release bp slot */ if (ret) @@ -363,7 +385,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att if (attr->disabled) goto end; - err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); + err = validate_hw_breakpoint(bp); if (!err) perf_event_enable(bp); -- cgit v1.2.3 From 0102752e4c9e0655b39734550d4c35327954f7f9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 11 Apr 2010 18:55:56 +0200 Subject: hw-breakpoints: Separate constraint space for data and instruction breakpoints There are two outstanding fashions for archs to implement hardware breakpoints. The first is to separate breakpoint address pattern definition space between data and instruction breakpoints. We then have typically distinct instruction address breakpoint registers and data address breakpoint registers, delivered with separate control registers for data and instruction breakpoints as well. This is the case of PowerPc and ARM for example. The second consists in having merged breakpoint address space definition between data and instruction breakpoint. Address registers can host either instruction or data address and the access mode for the breakpoint is defined in a control register. This is the case of x86 and Super H. This patch adds a new CONFIG_HAVE_MIXED_BREAKPOINTS_REGS config that archs can select if they belong to the second case. Those will have their slot allocation merged for instructions and data breakpoints. The others will have a separate slot tracking between data and instruction breakpoints. Signed-off-by: Frederic Weisbecker Acked-by: Paul Mundt Cc: Will Deacon Cc: Mahesh Salgaonkar Cc: K. Prasad Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Ingo Molnar --- arch/Kconfig | 11 ++++++ arch/sh/Kconfig | 1 + arch/x86/Kconfig | 1 + include/linux/hw_breakpoint.h | 9 +++-- kernel/hw_breakpoint.c | 86 +++++++++++++++++++++++++++++-------------- 5 files changed, 78 insertions(+), 30 deletions(-) (limited to 'kernel/hw_breakpoint.c') diff --git a/arch/Kconfig b/arch/Kconfig index f06010fb4838..acda512da2e2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -137,6 +137,17 @@ config HAVE_HW_BREAKPOINT bool depends on PERF_EVENTS +config HAVE_MIXED_BREAKPOINTS_REGS + bool + depends on HAVE_HW_BREAKPOINT + help + Depending on the arch implementation of hardware breakpoints, + some of them have separate registers for data and instruction + breakpoints addresses, others have mixed registers to store + them but define the access type in a control register. + Select this option if your arch implements breakpoints under the + latter fashion. + config HAVE_USER_RETURN_NOTIFIER bool diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 8d90564c2bcf..e6d8ab5cfa9d 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -44,6 +44,7 @@ config SUPERH32 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_ARCH_KGDB select HAVE_HW_BREAKPOINT + select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS if HAVE_HW_BREAKPOINT select ARCH_HIBERNATION_POSSIBLE if MMU diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 97a95dfd1181..01177dcbe261 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -53,6 +53,7 @@ config X86 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_HW_BREAKPOINT + select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS select ANON_INODES select HAVE_ARCH_KMEMCHECK diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index a0aa5a9cfb0e..7e8899093098 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -9,9 +9,12 @@ enum { }; enum { - HW_BREAKPOINT_R = 1, - HW_BREAKPOINT_W = 2, - HW_BREAKPOINT_X = 4, + HW_BREAKPOINT_EMPTY = 0, + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W, + HW_BREAKPOINT_X = 4, + HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, }; #ifdef __KERNEL__ diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 89e8a050c43a..8ead1345e33b 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -45,18 +45,28 @@ #include +enum bp_type_idx { + TYPE_INST = 0, +#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS + TYPE_DATA = 0, +#else + TYPE_DATA = 1, +#endif + TYPE_MAX +}; + /* * Constraints data */ /* Number of pinned cpu breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); +static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]); /* Number of pinned task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); +static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[TYPE_MAX][HBP_NUM]); /* Number of non-pinned cpu/task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); +static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); /* Gather the number of total pinned and un-pinned bp in a cpuset */ struct bp_busy_slots { @@ -67,14 +77,22 @@ struct bp_busy_slots { /* Serialize accesses to the above constraints */ static DEFINE_MUTEX(nr_bp_mutex); +static inline enum bp_type_idx find_slot_idx(struct perf_event *bp) +{ + if (bp->attr.bp_type & HW_BREAKPOINT_RW) + return TYPE_DATA; + + return TYPE_INST; +} + /* * Report the maximum number of pinned breakpoints a task * have in this cpu */ -static unsigned int max_task_bp_pinned(int cpu) +static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) { int i; - unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); + unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); for (i = HBP_NUM -1; i >= 0; i--) { if (tsk_pinned[i] > 0) @@ -84,7 +102,7 @@ static unsigned int max_task_bp_pinned(int cpu) return 0; } -static int task_bp_pinned(struct task_struct *tsk) +static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) { struct perf_event_context *ctx = tsk->perf_event_ctxp; struct list_head *list; @@ -105,7 +123,8 @@ static int task_bp_pinned(struct task_struct *tsk) */ list_for_each_entry(bp, list, event_entry) { if (bp->attr.type == PERF_TYPE_BREAKPOINT) - count++; + if (find_slot_idx(bp) == type) + count++; } raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -118,18 +137,19 @@ static int task_bp_pinned(struct task_struct *tsk) * a given cpu (cpu > -1) or in all of them (cpu = -1). */ static void -fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) +fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, + enum bp_type_idx type) { int cpu = bp->cpu; struct task_struct *tsk = bp->ctx->task; if (cpu >= 0) { - slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); + slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu); if (!tsk) - slots->pinned += max_task_bp_pinned(cpu); + slots->pinned += max_task_bp_pinned(cpu, type); else - slots->pinned += task_bp_pinned(tsk); - slots->flexible = per_cpu(nr_bp_flexible, cpu); + slots->pinned += task_bp_pinned(tsk, type); + slots->flexible = per_cpu(nr_bp_flexible[type], cpu); return; } @@ -137,16 +157,16 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) for_each_online_cpu(cpu) { unsigned int nr; - nr = per_cpu(nr_cpu_bp_pinned, cpu); + nr = per_cpu(nr_cpu_bp_pinned[type], cpu); if (!tsk) - nr += max_task_bp_pinned(cpu); + nr += max_task_bp_pinned(cpu, type); else - nr += task_bp_pinned(tsk); + nr += task_bp_pinned(tsk, type); if (nr > slots->pinned) slots->pinned = nr; - nr = per_cpu(nr_bp_flexible, cpu); + nr = per_cpu(nr_bp_flexible[type], cpu); if (nr > slots->flexible) slots->flexible = nr; @@ -156,14 +176,15 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) /* * Add a pinned breakpoint for the given task in our constraint table */ -static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) +static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, + enum bp_type_idx type) { unsigned int *tsk_pinned; int count = 0; - count = task_bp_pinned(tsk); + count = task_bp_pinned(tsk, type); - tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); + tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); if (enable) { tsk_pinned[count]++; if (count > 0) @@ -178,7 +199,8 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) /* * Add/remove the given breakpoint in our constraint table */ -static void toggle_bp_slot(struct perf_event *bp, bool enable) +static void +toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type) { int cpu = bp->cpu; struct task_struct *tsk = bp->ctx->task; @@ -186,20 +208,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) /* Pinned counter task profiling */ if (tsk) { if (cpu >= 0) { - toggle_bp_task_slot(tsk, cpu, enable); + toggle_bp_task_slot(tsk, cpu, enable, type); return; } for_each_online_cpu(cpu) - toggle_bp_task_slot(tsk, cpu, enable); + toggle_bp_task_slot(tsk, cpu, enable, type); return; } /* Pinned counter cpu profiling */ if (enable) - per_cpu(nr_cpu_bp_pinned, bp->cpu)++; + per_cpu(nr_cpu_bp_pinned[type], bp->cpu)++; else - per_cpu(nr_cpu_bp_pinned, bp->cpu)--; + per_cpu(nr_cpu_bp_pinned[type], bp->cpu)--; } /* @@ -246,14 +268,21 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) static int __reserve_bp_slot(struct perf_event *bp) { struct bp_busy_slots slots = {0}; + enum bp_type_idx type; - fetch_bp_busy_slots(&slots, bp); + /* Basic checks */ + if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || + bp->attr.bp_type == HW_BREAKPOINT_INVALID) + return -EINVAL; + + type = find_slot_idx(bp); + fetch_bp_busy_slots(&slots, bp, type); /* Flexible counters need to keep at least one slot */ if (slots.pinned + (!!slots.flexible) == HBP_NUM) return -ENOSPC; - toggle_bp_slot(bp, true); + toggle_bp_slot(bp, true, type); return 0; } @@ -273,7 +302,10 @@ int reserve_bp_slot(struct perf_event *bp) static void __release_bp_slot(struct perf_event *bp) { - toggle_bp_slot(bp, false); + enum bp_type_idx type; + + type = find_slot_idx(bp); + toggle_bp_slot(bp, false, type); } void release_bp_slot(struct perf_event *bp) -- cgit v1.2.3 From f93a20541134fa767e8dc4eb32e956d30b9f6b92 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 13 Apr 2010 00:32:30 +0200 Subject: hw-breakpoints: Handle breakpoint weight in allocation constraints Depending on their nature and on what an arch supports, breakpoints may consume more than one address register. For example a simple absolute address match usually only requires one address register. But an address range match may consume two registers. Currently our slot allocation constraints, that tend to reflect the limited arch's resources, always consider that a breakpoint consumes one slot. Then provide a way for archs to tell us the weight of a breakpoint through a new hw_breakpoint_weight() helper. This weight will be computed against the generic allocation constraints instead of a constant value. Signed-off-by: Frederic Weisbecker Acked-by: Paul Mundt Cc: Will Deacon Cc: Mahesh Salgaonkar Cc: K. Prasad Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Ingo Molnar --- kernel/hw_breakpoint.c | 63 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 18 deletions(-) (limited to 'kernel/hw_breakpoint.c') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 8ead1345e33b..974498b858fc 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -77,6 +77,11 @@ struct bp_busy_slots { /* Serialize accesses to the above constraints */ static DEFINE_MUTEX(nr_bp_mutex); +__weak int hw_breakpoint_weight(struct perf_event *bp) +{ + return 1; +} + static inline enum bp_type_idx find_slot_idx(struct perf_event *bp) { if (bp->attr.bp_type & HW_BREAKPOINT_RW) @@ -124,7 +129,7 @@ static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) list_for_each_entry(bp, list, event_entry) { if (bp->attr.type == PERF_TYPE_BREAKPOINT) if (find_slot_idx(bp) == type) - count++; + count += hw_breakpoint_weight(bp); } raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -173,26 +178,41 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, } } +/* + * For now, continue to consider flexible as pinned, until we can + * ensure no flexible event can ever be scheduled before a pinned event + * in a same cpu. + */ +static void +fetch_this_slot(struct bp_busy_slots *slots, int weight) +{ + slots->pinned += weight; +} + /* * Add a pinned breakpoint for the given task in our constraint table */ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, - enum bp_type_idx type) + enum bp_type_idx type, int weight) { unsigned int *tsk_pinned; - int count = 0; + int old_count = 0; + int old_idx = 0; + int idx = 0; - count = task_bp_pinned(tsk, type); + old_count = task_bp_pinned(tsk, type); + old_idx = old_count - 1; + idx = old_idx + weight; tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); if (enable) { - tsk_pinned[count]++; - if (count > 0) - tsk_pinned[count-1]--; + tsk_pinned[idx]++; + if (old_count > 0) + tsk_pinned[old_idx]--; } else { - tsk_pinned[count]--; - if (count > 0) - tsk_pinned[count-1]++; + tsk_pinned[idx]--; + if (old_count > 0) + tsk_pinned[old_idx]++; } } @@ -200,7 +220,8 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, * Add/remove the given breakpoint in our constraint table */ static void -toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type) +toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, + int weight) { int cpu = bp->cpu; struct task_struct *tsk = bp->ctx->task; @@ -208,20 +229,20 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type) /* Pinned counter task profiling */ if (tsk) { if (cpu >= 0) { - toggle_bp_task_slot(tsk, cpu, enable, type); + toggle_bp_task_slot(tsk, cpu, enable, type, weight); return; } for_each_online_cpu(cpu) - toggle_bp_task_slot(tsk, cpu, enable, type); + toggle_bp_task_slot(tsk, cpu, enable, type, weight); return; } /* Pinned counter cpu profiling */ if (enable) - per_cpu(nr_cpu_bp_pinned[type], bp->cpu)++; + per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; else - per_cpu(nr_cpu_bp_pinned[type], bp->cpu)--; + per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight; } /* @@ -269,6 +290,7 @@ static int __reserve_bp_slot(struct perf_event *bp) { struct bp_busy_slots slots = {0}; enum bp_type_idx type; + int weight; /* Basic checks */ if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || @@ -276,13 +298,16 @@ static int __reserve_bp_slot(struct perf_event *bp) return -EINVAL; type = find_slot_idx(bp); + weight = hw_breakpoint_weight(bp); + fetch_bp_busy_slots(&slots, bp, type); + fetch_this_slot(&slots, weight); /* Flexible counters need to keep at least one slot */ - if (slots.pinned + (!!slots.flexible) == HBP_NUM) + if (slots.pinned + (!!slots.flexible) > HBP_NUM) return -ENOSPC; - toggle_bp_slot(bp, true, type); + toggle_bp_slot(bp, true, type, weight); return 0; } @@ -303,9 +328,11 @@ int reserve_bp_slot(struct perf_event *bp) static void __release_bp_slot(struct perf_event *bp) { enum bp_type_idx type; + int weight; type = find_slot_idx(bp); - toggle_bp_slot(bp, false, type); + weight = hw_breakpoint_weight(bp); + toggle_bp_slot(bp, false, type, weight); } void release_bp_slot(struct perf_event *bp) -- cgit v1.2.3 From feef47d0cb530e8419dfa0b48141b538b89b1b1a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 23 Apr 2010 05:59:55 +0200 Subject: hw-breakpoints: Get the number of available registers on boot dynamically The breakpoint generic layer assumes that archs always know in advance the static number of address registers available to host breakpoints through the HBP_NUM macro. However this is not true for every archs. For example Arm needs to get this information dynamically to handle the compatiblity between different versions. To solve this, this patch proposes to drop the static HBP_NUM macro and let the arch provide the number of available slots through a new hw_breakpoint_slots() function. For archs that have CONFIG_HAVE_MIXED_BREAKPOINTS_REGS selected, it will be called once as the number of registers fits for instruction and data breakpoints together. For the others it will be called first to get the number of instruction breakpoint registers and another time to get the data breakpoint registers, the targeted type is given as a parameter of hw_breakpoint_slots(). Reported-by: Will Deacon Signed-off-by: Frederic Weisbecker Acked-by: Paul Mundt Cc: Mahesh Salgaonkar Cc: K. Prasad Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Jason Wessel Cc: Ingo Molnar --- arch/sh/include/asm/hw_breakpoint.h | 5 ++++ arch/x86/include/asm/hw_breakpoint.h | 5 ++++ include/linux/hw_breakpoint.h | 10 +++++++ kernel/hw_breakpoint.c | 53 ++++++++++++++++++++++++++++-------- kernel/trace/trace_ksym.c | 26 +++++------------- 5 files changed, 68 insertions(+), 31 deletions(-) (limited to 'kernel/hw_breakpoint.c') diff --git a/arch/sh/include/asm/hw_breakpoint.h b/arch/sh/include/asm/hw_breakpoint.h index 382bad937dcc..e14cad96798f 100644 --- a/arch/sh/include/asm/hw_breakpoint.h +++ b/arch/sh/include/asm/hw_breakpoint.h @@ -46,6 +46,11 @@ struct pmu; /* Maximum number of UBC channels */ #define HBP_NUM 2 +static inline int hw_breakpoint_slots(int type) +{ + return HBP_NUM; +} + /* arch/sh/kernel/hw_breakpoint.c */ extern int arch_check_bp_in_kernelspace(struct perf_event *bp); extern int arch_validate_hwbkpt_settings(struct perf_event *bp); diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index c77a5a6fab9d..942255310e6a 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -41,6 +41,11 @@ struct arch_hw_breakpoint { /* Total number of available HW breakpoint registers */ #define HBP_NUM 4 +static inline int hw_breakpoint_slots(int type) +{ + return HBP_NUM; +} + struct perf_event; struct pmu; diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 7e8899093098..a2d6ea49ec56 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -17,6 +17,16 @@ enum { HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, }; +enum bp_type_idx { + TYPE_INST = 0, +#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS + TYPE_DATA = 0, +#else + TYPE_DATA = 1, +#endif + TYPE_MAX +}; + #ifdef __KERNEL__ #include diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 974498b858fc..684b710cbb91 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -40,20 +40,12 @@ #include #include #include +#include #include #include #include -enum bp_type_idx { - TYPE_INST = 0, -#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS - TYPE_DATA = 0, -#else - TYPE_DATA = 1, -#endif - TYPE_MAX -}; /* * Constraints data @@ -63,11 +55,15 @@ enum bp_type_idx { static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]); /* Number of pinned task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[TYPE_MAX][HBP_NUM]); +static DEFINE_PER_CPU(unsigned int, *nr_task_bp_pinned[TYPE_MAX]); /* Number of non-pinned cpu/task breakpoints in a cpu */ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); +static int nr_slots[TYPE_MAX]; + +static int constraints_initialized; + /* Gather the number of total pinned and un-pinned bp in a cpuset */ struct bp_busy_slots { unsigned int pinned; @@ -99,7 +95,7 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) int i; unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); - for (i = HBP_NUM -1; i >= 0; i--) { + for (i = nr_slots[type] - 1; i >= 0; i--) { if (tsk_pinned[i] > 0) return i + 1; } @@ -292,6 +288,10 @@ static int __reserve_bp_slot(struct perf_event *bp) enum bp_type_idx type; int weight; + /* We couldn't initialize breakpoint constraints on boot */ + if (!constraints_initialized) + return -ENOMEM; + /* Basic checks */ if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || bp->attr.bp_type == HW_BREAKPOINT_INVALID) @@ -304,7 +304,7 @@ static int __reserve_bp_slot(struct perf_event *bp) fetch_this_slot(&slots, weight); /* Flexible counters need to keep at least one slot */ - if (slots.pinned + (!!slots.flexible) > HBP_NUM) + if (slots.pinned + (!!slots.flexible) > nr_slots[type]) return -ENOSPC; toggle_bp_slot(bp, true, type, weight); @@ -551,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { static int __init init_hw_breakpoint(void) { + unsigned int **task_bp_pinned; + int cpu, err_cpu; + int i; + + for (i = 0; i < TYPE_MAX; i++) + nr_slots[i] = hw_breakpoint_slots(i); + + for_each_possible_cpu(cpu) { + for (i = 0; i < TYPE_MAX; i++) { + task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu); + *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i], + GFP_KERNEL); + if (!*task_bp_pinned) + goto err_alloc; + } + } + + constraints_initialized = 1; + return register_die_notifier(&hw_breakpoint_exceptions_nb); + + err_alloc: + for_each_possible_cpu(err_cpu) { + if (err_cpu == cpu) + break; + for (i = 0; i < TYPE_MAX; i++) + kfree(per_cpu(nr_task_bp_pinned[i], cpu)); + } + + return -ENOMEM; } core_initcall(init_hw_breakpoint); diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index d59cd6879477..8eaf00749b65 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -34,12 +34,6 @@ #include -/* - * For now, let us restrict the no. of symbols traced simultaneously to number - * of available hardware breakpoint registers. - */ -#define KSYM_TRACER_MAX HBP_NUM - #define KSYM_TRACER_OP_LEN 3 /* rw- */ struct trace_ksym { @@ -53,7 +47,6 @@ struct trace_ksym { static struct trace_array *ksym_trace_array; -static unsigned int ksym_filter_entry_count; static unsigned int ksym_tracing_enabled; static HLIST_HEAD(ksym_filter_head); @@ -181,13 +174,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) struct trace_ksym *entry; int ret = -ENOMEM; - if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { - printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" - " new requests for tracing can be accepted now.\n", - KSYM_TRACER_MAX); - return -ENOSPC; - } - entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); if (!entry) return -ENOMEM; @@ -203,13 +189,17 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) if (IS_ERR(entry->ksym_hbp)) { ret = PTR_ERR(entry->ksym_hbp); - printk(KERN_INFO "ksym_tracer request failed. Try again" - " later!!\n"); + if (ret == -ENOSPC) { + printk(KERN_ERR "ksym_tracer: Maximum limit reached." + " No new requests for tracing can be accepted now.\n"); + } else { + printk(KERN_INFO "ksym_tracer request failed. Try again" + " later!!\n"); + } goto err; } hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); - ksym_filter_entry_count++; return 0; @@ -265,7 +255,6 @@ static void __ksym_trace_reset(void) hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, ksym_hlist) { unregister_wide_hw_breakpoint(entry->ksym_hbp); - ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); kfree(entry); @@ -338,7 +327,6 @@ static ssize_t ksym_trace_filter_write(struct file *file, goto out_unlock; } /* Error or "symbol:---" case: drop it */ - ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); kfree(entry); -- cgit v1.2.3 From 777d0411cd1e384115985dac5ccd42031e3eee2b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 3 May 2010 15:39:45 +0200 Subject: hw_breakpoints: Fix percpu build failure Fix this build error: kernel/hw_breakpoint.c:58:1: error: pasting "__pcpu_scope_" and "*" does not give a valid preprocessing token It happens if CONFIG_DEBUG_FORCE_WEAK_PER_CPU, because we concatenate someting with the name and we have the "*" in the name. Cc: Frederic Weisbecker Cc: Will Deacon Cc: Paul Mundt Cc: Mahesh Salgaonkar Cc: K. Prasad Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Jason Wessel LKML-Reference: <20100503133942.GA5497@nowhere> Signed-off-by: Ingo Molnar --- kernel/hw_breakpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/hw_breakpoint.c') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 684b710cbb91..7a56b22e0602 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -55,7 +55,7 @@ static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]); /* Number of pinned task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, *nr_task_bp_pinned[TYPE_MAX]); +static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]); /* Number of non-pinned cpu/task breakpoints in a cpu */ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); -- cgit v1.2.3