From 195a84d91e92ee3fe571a2086a6db7e17bf5bc7c Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Fri, 14 Jun 2013 10:10:38 +0800 Subject: tracing/kprobes: Remove unnecessary checking of trace_probe_is_enabled Since tp->flags assignment was moved into function enable_trace_probe(), there is no need to use trace_probe_is_enabled to check flags in the same function. Remove the unnecessary checking. Link: http://lkml.kernel.org/r/51BA7B9E.3040807@huawei.com Acked-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Oleg Nesterov Cc: Srikar Dronamraju Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/trace/trace_kprobe.c') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9f46e98ba8f2..f2374172ba7b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -240,8 +240,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) } else tp->flags |= TP_FLAG_PROFILE; - if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && - !trace_probe_has_gone(tp)) { + if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { if (trace_probe_is_return(tp)) ret = enable_kretprobe(&tp->rp); else -- cgit v1.2.3 From 288e984e622336bab8bc3dfdf2f190816362d9a1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 20 Jun 2013 19:38:06 +0200 Subject: tracing/kprobes: Avoid perf_trace_buf_*() if ->perf_events is empty perf_trace_buf_prepare() + perf_trace_buf_submit() make no sense if this task/CPU has no active counters. Change kprobe_perf_func() and kretprobe_perf_func() to check call->perf_events beforehand and return if this list is empty. For example, "perf record -e some_probe -p1". Only /sbin/init will report, all other threads which hit the same probe will do perf_trace_buf_prepare/perf_trace_buf_submit just to realize that nobody wants perf_swevent_event(). Link: http://lkml.kernel.org/r/20130620173806.GA13151@redhat.com Acked-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace_kprobe.c') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f2374172ba7b..c35bebe53ffe 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1156,6 +1156,10 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) int size, __size, dsize; int rctx; + head = this_cpu_ptr(call->perf_events); + if (hlist_empty(head)) + return; + dsize = __get_data_size(tp, regs); __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); @@ -1171,8 +1175,6 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) entry->ip = (unsigned long)tp->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - - head = this_cpu_ptr(call->perf_events); perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL); } @@ -1188,6 +1190,10 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, int size, __size, dsize; int rctx; + head = this_cpu_ptr(call->perf_events); + if (hlist_empty(head)) + return; + dsize = __get_data_size(tp, regs); __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); @@ -1203,8 +1209,6 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - - head = this_cpu_ptr(call->perf_events); perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head, NULL); } -- cgit v1.2.3 From 3fe3d6193e7cd7b4dd2bde10772f048bdefea4ee Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 20 Jun 2013 19:38:09 +0200 Subject: tracing/kprobes: Kill probe_enable_lock enable_trace_probe() and disable_trace_probe() should not worry about serialization, the caller (perf_trace_init or __ftrace_set_clr_event) holds event_mutex. They are also called by kprobe_trace_self_tests_init(), but this __init function can't race with itself or trace_events.c And note that this code depended on event_mutex even before 41a7dd420c which introduced probe_enable_lock. In fact it assumes that the caller kprobe_register() can never race with itself. Otherwise, say, tp->flags manipulations are racy. Link: http://lkml.kernel.org/r/20130620173809.GA13158@redhat.com Acked-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'kernel/trace/trace_kprobe.c') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c35bebe53ffe..282f86cfd304 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -183,16 +183,15 @@ static struct trace_probe *find_trace_probe(const char *event, return NULL; } +/* + * This and enable_trace_probe/disable_trace_probe rely on event_mutex + * held by the caller, __ftrace_set_clr_event(). + */ static int trace_probe_nr_files(struct trace_probe *tp) { - struct ftrace_event_file **file; + struct ftrace_event_file **file = rcu_dereference_raw(tp->files); int ret = 0; - /* - * Since all tp->files updater is protected by probe_enable_lock, - * we don't need to lock an rcu_read_lock. - */ - file = rcu_dereference_raw(tp->files); if (file) while (*(file++)) ret++; @@ -200,8 +199,6 @@ static int trace_probe_nr_files(struct trace_probe *tp) return ret; } -static DEFINE_MUTEX(probe_enable_lock); - /* * Enable trace_probe * if the file is NULL, enable "perf" handler, or enable "trace" handler. @@ -211,8 +208,6 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { int ret = 0; - mutex_lock(&probe_enable_lock); - if (file) { struct ftrace_event_file **new, **old; int n = trace_probe_nr_files(tp); @@ -223,7 +218,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) GFP_KERNEL); if (!new) { ret = -ENOMEM; - goto out_unlock; + goto out; } memcpy(new, old, n * sizeof(struct ftrace_event_file *)); new[n] = file; @@ -246,10 +241,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) else ret = enable_kprobe(&tp->rp.kp); } - - out_unlock: - mutex_unlock(&probe_enable_lock); - + out: return ret; } @@ -282,8 +274,6 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { int ret = 0; - mutex_lock(&probe_enable_lock); - if (file) { struct ftrace_event_file **new, **old; int n = trace_probe_nr_files(tp); @@ -292,7 +282,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) old = rcu_dereference_raw(tp->files); if (n == 0 || trace_probe_file_index(tp, file) < 0) { ret = -EINVAL; - goto out_unlock; + goto out; } if (n == 1) { /* Remove the last file */ @@ -303,7 +293,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) GFP_KERNEL); if (!new) { ret = -ENOMEM; - goto out_unlock; + goto out; } /* This copy & check loop copies the NULL stopper too */ @@ -326,10 +316,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) else disable_kprobe(&tp->rp.kp); } - - out_unlock: - mutex_unlock(&probe_enable_lock); - + out: return ret; } @@ -1214,6 +1201,12 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, } #endif /* CONFIG_PERF_EVENTS */ +/* + * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex. + * + * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe + * lockless, but we can't race with this __init function. + */ static __kprobes int kprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) @@ -1379,6 +1372,10 @@ find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) return NULL; } +/* + * Nobody but us can call enable_trace_probe/disable_trace_probe at this + * stage, we can do this lockless. + */ static __init int kprobe_trace_self_tests_init(void) { int ret, warn = 0; -- cgit v1.2.3 From b04d52e368e2cf526abb2bab61f304eaea126af2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 20 Jun 2013 19:38:14 +0200 Subject: tracing/kprobes: Turn trace_probe->files into list_head I think that "ftrace_event_file *trace_probe[]" complicates the code for no reason, turn it into list_head to simplify the code. enable_trace_probe() no longer needs synchronize_sched(). This needs the extra sizeof(list_head) memory for every attached ftrace_event_file, hopefully not a problem in this case. Link: http://lkml.kernel.org/r/20130620173814.GA13165@redhat.com Acked-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 138 ++++++++++++-------------------------------- 1 file changed, 37 insertions(+), 101 deletions(-) (limited to 'kernel/trace/trace_kprobe.c') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 282f86cfd304..405b5b0f903e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -35,12 +35,17 @@ struct trace_probe { const char *symbol; /* symbol name */ struct ftrace_event_class class; struct ftrace_event_call call; - struct ftrace_event_file * __rcu *files; + struct list_head files; ssize_t size; /* trace entry size */ unsigned int nr_args; struct probe_arg args[]; }; +struct event_file_link { + struct ftrace_event_file *file; + struct list_head list; +}; + #define SIZEOF_TRACE_PROBE(n) \ (offsetof(struct trace_probe, args) + \ (sizeof(struct probe_arg) * (n))) @@ -150,6 +155,7 @@ static struct trace_probe *alloc_trace_probe(const char *group, goto error; INIT_LIST_HEAD(&tp->list); + INIT_LIST_HEAD(&tp->files); return tp; error: kfree(tp->call.name); @@ -183,22 +189,6 @@ static struct trace_probe *find_trace_probe(const char *event, return NULL; } -/* - * This and enable_trace_probe/disable_trace_probe rely on event_mutex - * held by the caller, __ftrace_set_clr_event(). - */ -static int trace_probe_nr_files(struct trace_probe *tp) -{ - struct ftrace_event_file **file = rcu_dereference_raw(tp->files); - int ret = 0; - - if (file) - while (*(file++)) - ret++; - - return ret; -} - /* * Enable trace_probe * if the file is NULL, enable "perf" handler, or enable "trace" handler. @@ -209,29 +199,18 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) int ret = 0; if (file) { - struct ftrace_event_file **new, **old; - int n = trace_probe_nr_files(tp); - - old = rcu_dereference_raw(tp->files); - /* 1 is for new one and 1 is for stopper */ - new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *), - GFP_KERNEL); - if (!new) { + struct event_file_link *link; + + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) { ret = -ENOMEM; goto out; } - memcpy(new, old, n * sizeof(struct ftrace_event_file *)); - new[n] = file; - /* The last one keeps a NULL */ - rcu_assign_pointer(tp->files, new); - tp->flags |= TP_FLAG_TRACE; + link->file = file; + list_add_tail_rcu(&link->list, &tp->files); - if (old) { - /* Make sure the probe is done with old files */ - synchronize_sched(); - kfree(old); - } + tp->flags |= TP_FLAG_TRACE; } else tp->flags |= TP_FLAG_PROFILE; @@ -245,24 +224,16 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) return ret; } -static int -trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) +static struct event_file_link * +find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) { - struct ftrace_event_file **files; - int i; + struct event_file_link *link; - /* - * Since all tp->files updater is protected by probe_enable_lock, - * we don't need to lock an rcu_read_lock. - */ - files = rcu_dereference_raw(tp->files); - if (files) { - for (i = 0; files[i]; i++) - if (files[i] == file) - return i; - } + list_for_each_entry(link, &tp->files, list) + if (link->file == file) + return link; - return -1; + return NULL; } /* @@ -275,38 +246,23 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) int ret = 0; if (file) { - struct ftrace_event_file **new, **old; - int n = trace_probe_nr_files(tp); - int i, j; + struct event_file_link *link; - old = rcu_dereference_raw(tp->files); - if (n == 0 || trace_probe_file_index(tp, file) < 0) { + link = find_event_file_link(tp, file); + if (!link) { ret = -EINVAL; goto out; } - if (n == 1) { /* Remove the last file */ - tp->flags &= ~TP_FLAG_TRACE; - new = NULL; - } else { - new = kzalloc(n * sizeof(struct ftrace_event_file *), - GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto out; - } - - /* This copy & check loop copies the NULL stopper too */ - for (i = 0, j = 0; j < n && i < n + 1; i++) - if (old[i] != file) - new[j++] = old[i]; - } + list_del_rcu(&link->list); + /* synchronize with kprobe_trace_func/kretprobe_trace_func */ + synchronize_sched(); + kfree(link); - rcu_assign_pointer(tp->files, new); + if (!list_empty(&tp->files)) + goto out; - /* Make sure the probe is done with old files */ - synchronize_sched(); - kfree(old); + tp->flags &= ~TP_FLAG_TRACE; } else tp->flags &= ~TP_FLAG_PROFILE; @@ -871,20 +827,10 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, static __kprobes void kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) { - /* - * Note: preempt is already disabled around the kprobe handler. - * However, we still need an smp_read_barrier_depends() corresponding - * to smp_wmb() in rcu_assign_pointer() to access the pointer. - */ - struct ftrace_event_file **file = rcu_dereference_raw(tp->files); - - if (unlikely(!file)) - return; + struct event_file_link *link; - while (*file) { - __kprobe_trace_func(tp, regs, *file); - file++; - } + list_for_each_entry_rcu(link, &tp->files, list) + __kprobe_trace_func(tp, regs, link->file); } /* Kretprobe handler */ @@ -931,20 +877,10 @@ static __kprobes void kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, struct pt_regs *regs) { - /* - * Note: preempt is already disabled around the kprobe handler. - * However, we still need an smp_read_barrier_depends() corresponding - * to smp_wmb() in rcu_assign_pointer() to access the pointer. - */ - struct ftrace_event_file **file = rcu_dereference_raw(tp->files); - - if (unlikely(!file)) - return; + struct event_file_link *link; - while (*file) { - __kretprobe_trace_func(tp, ri, regs, *file); - file++; - } + list_for_each_entry_rcu(link, &tp->files, list) + __kretprobe_trace_func(tp, ri, regs, link->file); } /* Event entry printers */ -- cgit v1.2.3 From cf6735a4b103b801753748531e3658cdc8cafa5e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 20 Jun 2013 19:38:11 +0200 Subject: tracing/kprobes: Don't pass addr=ip to perf_trace_buf_submit() kprobe_perf_func() and kretprobe_perf_func() pass addr=ip to perf_trace_buf_submit() for no reason. This sets perf_sample_data->addr for PERF_SAMPLE_ADDR, we already have perf_sample_data->ip initialized if PERF_SAMPLE_IP. Link: http://lkml.kernel.org/r/20130620173811.GA13161@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace_kprobe.c') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 405b5b0f903e..7ed6976493c8 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1098,8 +1098,7 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) entry->ip = (unsigned long)tp->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - perf_trace_buf_submit(entry, size, rctx, - entry->ip, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } /* Kretprobe profile handler */ @@ -1132,8 +1131,7 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - perf_trace_buf_submit(entry, size, rctx, - entry->ret_ip, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } #endif /* CONFIG_PERF_EVENTS */ -- cgit v1.2.3 From cd92bf61d6d70bd3eb33b46d600e3f3eb9c5778a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Jun 2013 19:02:11 +0200 Subject: tracing/perf: Move the PERF_MAX_TRACE_SIZE check into perf_trace_buf_prepare() Every perf_trace_buf_prepare() caller does WARN_ONCE(size > PERF_MAX_TRACE_SIZE, message) and "message" is almost the same. Shift this WARN_ONCE() into perf_trace_buf_prepare(). This changes the meaning of _ONCE, but I think this is fine. - 4947014 2932448 10104832 17984294 1126b26 vmlinux + 4948422 2932448 10104832 17985702 11270a6 vmlinux on my build. Link: http://lkml.kernel.org/r/20130617170211.GA19813@redhat.com Acked-by: Peter Zijlstra Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 4 ---- kernel/trace/trace_event_perf.c | 4 ++++ kernel/trace/trace_kprobe.c | 6 ------ kernel/trace/trace_syscalls.c | 12 ------------ kernel/trace/trace_uprobe.c | 2 -- 5 files changed, 4 insertions(+), 24 deletions(-) (limited to 'kernel/trace/trace_kprobe.c') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index d615f78cc6b6..41a6643e2136 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -670,10 +670,6 @@ perf_trace_##call(void *__data, proto) \ sizeof(u64)); \ __entry_size -= sizeof(u32); \ \ - if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE, \ - "profile buffer not large enough")) \ - return; \ - \ entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare( \ __entry_size, event_call->event.type, &__regs, &rctx); \ if (!entry) \ diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 12df5573086e..80c36bcf66e8 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -236,6 +236,10 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, + "perf buffer not large enough")) + return NULL; + pc = preempt_count(); *rctxp = perf_swevent_get_recursion_context(); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7ed6976493c8..ae6ce835b023 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1087,9 +1087,6 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "profile buffer not large enough")) - return; entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); if (!entry) @@ -1120,9 +1117,6 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "profile buffer not large enough")) - return; entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); if (!entry) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index ac0085777fbd..8fd03657bc7d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -575,10 +575,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "perf buffer not large enough")) - return; - rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, sys_data->enter_event->event.type, regs, &rctx); if (!rec) @@ -652,14 +648,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - /* - * Impossible, but be paranoid with the future - * How to put this check outside runtime? - */ - if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "exit event has grown above perf buffer size")) - return; - rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, sys_data->exit_event->event.type, regs, &rctx); if (!rec) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d5d0cd368a56..a23d2d71188e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -818,8 +818,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu, size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); - if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) - return; preempt_disable(); head = this_cpu_ptr(call->perf_events); -- cgit v1.2.3 From a232e270dcb55a70ad3241bc6fc160fd9b5c9e6c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 9 Jul 2013 18:35:26 +0900 Subject: tracing/kprobe: Wait for disabling all running kprobe handlers Wait for disabling all running kprobe handlers when a kprobe event is disabled, since the caller, trace_remove_event_call() supposes that a removing event is disabled completely by disabling the event. With this change, ftrace can ensure that there is no running event handlers after disabling it. Link: http://lkml.kernel.org/r/20130709093526.20138.93100.stgit@mhiramat-M0-7522 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace_kprobe.c') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ae6ce835b023..3811487e7a7a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -243,11 +243,11 @@ find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) static int disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { + struct event_file_link *link = NULL; + int wait = 0; int ret = 0; if (file) { - struct event_file_link *link; - link = find_event_file_link(tp, file); if (!link) { ret = -EINVAL; @@ -255,10 +255,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) } list_del_rcu(&link->list); - /* synchronize with kprobe_trace_func/kretprobe_trace_func */ - synchronize_sched(); - kfree(link); - + wait = 1; if (!list_empty(&tp->files)) goto out; @@ -271,8 +268,22 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) disable_kretprobe(&tp->rp); else disable_kprobe(&tp->rp.kp); + wait = 1; } out: + if (wait) { + /* + * Synchronize with kprobe_trace_func/kretprobe_trace_func + * to ensure disabled (all running handlers are finished). + * This is not only for kfree(), but also the caller, + * trace_remove_event_call() supposes it for releasing + * event_call related objects, which will be accessed in + * the kprobe_trace_func/kretprobe_trace_func. + */ + synchronize_sched(); + kfree(link); /* Ignored if link == NULL */ + } + return ret; } -- cgit v1.2.3 From 40c32592668b727cbfcf7b1c0567f581bd62a5e4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 3 Jul 2013 23:33:50 -0400 Subject: tracing/kprobes: Fail to unregister if probe event files are in use When a probe is being removed, it cleans up the event files that correspond to the probe. But there is a race between writing to one of these files and deleting the probe. This is especially true for the "enable" file. CPU 0 CPU 1 ----- ----- fd = open("enable",O_WRONLY); probes_open() release_all_trace_probes() unregister_trace_probe() if (trace_probe_is_enabled(tp)) return -EBUSY write(fd, "1", 1) __ftrace_set_clr_event() call->class->reg() (kprobe_register) enable_trace_probe(tp) __unregister_trace_probe(tp); list_del(&tp->list) unregister_probe_event(tp) <-- fails! free_trace_probe(tp) write(fd, "0", 1) __ftrace_set_clr_event() call->class->unreg (kprobe_register) disable_trace_probe(tp) <-- BOOM! A test program was written that used two threads to simulate the above scenario adding a nanosleep() interval to change the timings and after several thousand runs, it was able to trigger this bug and crash: BUG: unable to handle kernel paging request at 00000005000000f9 IP: [] probes_open+0x3b/0xa7 PGD 7808a067 PUD 0 Oops: 0000 [#1] PREEMPT SMP Dumping ftrace buffer: --------------------------------- Modules linked in: ipt_MASQUERADE sunrpc ip6t_REJECT nf_conntrack_ipv6 CPU: 1 PID: 2070 Comm: test-kprobe-rem Not tainted 3.11.0-rc3-test+ #47 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 task: ffff880077756440 ti: ffff880076e52000 task.ti: ffff880076e52000 RIP: 0010:[] [] probes_open+0x3b/0xa7 RSP: 0018:ffff880076e53c38 EFLAGS: 00010203 RAX: 0000000500000001 RBX: ffff88007844f440 RCX: 0000000000000003 RDX: 0000000000000003 RSI: 0000000000000003 RDI: ffff880076e52000 RBP: ffff880076e53c58 R08: ffff880076e53bd8 R09: 0000000000000000 R10: ffff880077756440 R11: 0000000000000006 R12: ffffffff810dee35 R13: ffff880079250418 R14: 0000000000000000 R15: ffff88007844f450 FS: 00007f87a276f700(0000) GS:ffff88007d480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000005000000f9 CR3: 0000000077262000 CR4: 00000000000007e0 Stack: ffff880076e53c58 ffffffff81219ea0 ffff88007844f440 ffffffff810dee35 ffff880076e53ca8 ffffffff81130f78 ffff8800772986c0 ffff8800796f93a0 ffffffff81d1b5d8 ffff880076e53e04 0000000000000000 ffff88007844f440 Call Trace: [] ? security_file_open+0x2c/0x30 [] ? unregister_trace_probe+0x4b/0x4b [] do_dentry_open+0x162/0x226 [] finish_open+0x46/0x54 [] do_last+0x7f6/0x996 [] ? inode_permission+0x42/0x44 [] path_openat+0x232/0x496 [] do_filp_open+0x3a/0x8a [] ? __alloc_fd+0x168/0x17a [] do_sys_open+0x70/0x102 [] ? trace_hardirqs_on_caller+0x160/0x197 [] SyS_open+0x1e/0x20 [] system_call_fastpath+0x16/0x1b Code: e5 41 54 53 48 89 f3 48 83 ec 10 48 23 56 78 48 39 c2 75 6c 31 f6 48 c7 RIP [] probes_open+0x3b/0xa7 RSP CR2: 00000005000000f9 ---[ end trace 35f17d68fc569897 ]--- The unregister_trace_probe() must be done first, and if it fails it must fail the removal of the kprobe. Several changes have already been made by Oleg Nesterov and Masami Hiramatsu to allow moving the unregister_probe_event() before the removal of the probe and exit the function if it fails. This prevents the tp structure from being used after it is freed. Link: http://lkml.kernel.org/r/20130704034038.819592356@goodmis.org Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace_kprobe.c') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3811487e7a7a..243f6834d026 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -95,7 +95,7 @@ static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) } static int register_probe_event(struct trace_probe *tp); -static void unregister_probe_event(struct trace_probe *tp); +static int unregister_probe_event(struct trace_probe *tp); static DEFINE_MUTEX(probe_lock); static LIST_HEAD(probe_list); @@ -351,9 +351,12 @@ static int unregister_trace_probe(struct trace_probe *tp) if (trace_probe_is_enabled(tp)) return -EBUSY; + /* Will fail if probe is being used by ftrace or perf */ + if (unregister_probe_event(tp)) + return -EBUSY; + __unregister_trace_probe(tp); list_del(&tp->list); - unregister_probe_event(tp); return 0; } @@ -632,7 +635,9 @@ static int release_all_trace_probes(void) /* TODO: Use batch unregistration */ while (!list_empty(&probe_list)) { tp = list_entry(probe_list.next, struct trace_probe, list); - unregister_trace_probe(tp); + ret = unregister_trace_probe(tp); + if (ret) + goto end; free_trace_probe(tp); } @@ -1247,11 +1252,15 @@ static int register_probe_event(struct trace_probe *tp) return ret; } -static void unregister_probe_event(struct trace_probe *tp) +static int unregister_probe_event(struct trace_probe *tp) { + int ret; + /* tp->event is unregistered in trace_remove_event_call() */ - trace_remove_event_call(&tp->call); - kfree(tp->call.print_fmt); + ret = trace_remove_event_call(&tp->call); + if (!ret) + kfree(tp->call.print_fmt); + return ret; } /* Make a debugfs interface for controlling probe points */ -- cgit v1.2.3