From c757bea93bea4b77ebd181cc6dca60c15e3b1a2c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 21 Dec 2009 22:35:16 -0500 Subject: tracing: Fix setting tracer specific options The function __set_tracer_option() takes as its last parameter a "neg" value. If set it should negate the value of the option. The trace_options_write() passed the value written to the file which is what the new value needs to be set as. But since this is not the negative, it never sets the value. Reported-by: Peter Zijlstra Cc: Li Zefan Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ee61915935d5..d0a4c12d1f1c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3949,7 +3949,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, if (!!(topt->flags->val & topt->opt->bit) != val) { mutex_lock(&trace_types_lock); ret = __set_tracer_option(current_trace, topt->flags, - topt->opt, val); + topt->opt, !val); mutex_unlock(&trace_types_lock); if (ret) return ret; -- cgit v1.2.3 From 40892367bc893f3abf6f5ca8ac2ed1c98ba26a77 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 21 Dec 2009 12:01:17 -0800 Subject: tracing: Kconfig spelling fixes and cleanups Fix filename reference (ftrace-implementation.txt -> ftrace-design.txt). Fix spelling, punctuation, grammar. Fix help text indentation and line lengths to reduce need for horizontal scrolling or larger window sizes. Signed-off-by: Randy Dunlap Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20091221120117.3fb49cdc.randy.dunlap@oracle.com> Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 112 +++++++++++++++++++++++++-------------------------- 1 file changed, 56 insertions(+), 56 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d006554888dc..6c22d8a2f289 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -12,17 +12,17 @@ config NOP_TRACER config HAVE_FTRACE_NMI_ENTER bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_FUNCTION_TRACER bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_FUNCTION_GRAPH_TRACER bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_FUNCTION_GRAPH_FP_TEST bool @@ -34,17 +34,17 @@ config HAVE_FUNCTION_GRAPH_FP_TEST config HAVE_FUNCTION_TRACE_MCOUNT_TEST bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_DYNAMIC_FTRACE bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_FTRACE_MCOUNT_RECORD bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_HW_BRANCH_TRACER bool @@ -52,7 +52,7 @@ config HAVE_HW_BRANCH_TRACER config HAVE_SYSCALL_TRACEPOINTS bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config TRACER_MAX_TRACE bool @@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP # This allows those options to appear when no other tracer is selected. But the # options do not appear when something else selects it. We need the two options # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the -# hidding of the automatic options. +# hiding of the automatic options. config TRACING bool @@ -119,7 +119,7 @@ menuconfig FTRACE bool "Tracers" default y if DEBUG_KERNEL help - Enable the kernel tracing infrastructure. + Enable the kernel tracing infrastructure. if FTRACE @@ -133,7 +133,7 @@ config FUNCTION_TRACER help Enable the kernel to trace every kernel function. This is done by using a compiler feature to insert a small, 5-byte No-Operation - instruction to the beginning of every kernel function, which NOP + instruction at the beginning of every kernel function, which NOP sequence is then dynamically patched into a tracer call when tracing is enabled by the administrator. If it's runtime disabled (the bootup default), then the overhead of the instructions is very @@ -150,7 +150,7 @@ config FUNCTION_GRAPH_TRACER and its entry. Its first purpose is to trace the duration of functions and draw a call graph for each thread with some information like - the return value. This is done by setting the current return + the return value. This is done by setting the current return address on the current task structure into a stack of calls. @@ -173,7 +173,7 @@ config IRQSOFF_TRACER echo 0 > /sys/kernel/debug/tracing/tracing_max_latency - (Note that kernel size and overhead increases with this option + (Note that kernel size and overhead increase with this option enabled. This option and the preempt-off timing option can be used together or separately.) @@ -186,7 +186,7 @@ config PREEMPT_TRACER select TRACER_MAX_TRACE select RING_BUFFER_ALLOW_SWAP help - This option measures the time spent in preemption off critical + This option measures the time spent in preemption-off critical sections, with microsecond accuracy. The default measurement method is a maximum search, which is @@ -195,7 +195,7 @@ config PREEMPT_TRACER echo 0 > /sys/kernel/debug/tracing/tracing_max_latency - (Note that kernel size and overhead increases with this option + (Note that kernel size and overhead increase with this option enabled. This option and the irqs-off timing option can be used together or separately.) @@ -222,7 +222,7 @@ config ENABLE_DEFAULT_TRACERS depends on !GENERIC_TRACER select TRACING help - This tracer hooks to various trace points in the kernel + This tracer hooks to various trace points in the kernel, allowing the user to pick and choose which trace point they want to trace. It also includes the sched_switch tracer plugin. @@ -265,19 +265,19 @@ choice The likely/unlikely profiler only looks at the conditions that are annotated with a likely or unlikely macro. - The "all branch" profiler will profile every if statement in the + The "all branch" profiler will profile every if-statement in the kernel. This profiler will also enable the likely/unlikely - profiler as well. + profiler. - Either of the above profilers add a bit of overhead to the system. - If unsure choose "No branch profiling". + Either of the above profilers adds a bit of overhead to the system. + If unsure, choose "No branch profiling". config BRANCH_PROFILE_NONE bool "No branch profiling" help - No branch profiling. Branch profiling adds a bit of overhead. - Only enable it if you want to analyse the branching behavior. - Otherwise keep it disabled. + No branch profiling. Branch profiling adds a bit of overhead. + Only enable it if you want to analyse the branching behavior. + Otherwise keep it disabled. config PROFILE_ANNOTATED_BRANCHES bool "Trace likely/unlikely profiler" @@ -288,7 +288,7 @@ config PROFILE_ANNOTATED_BRANCHES /sys/kernel/debug/tracing/profile_annotated_branch - Note: this will add a significant overhead, only turn this + Note: this will add a significant overhead; only turn this on if you need to profile the system's use of these macros. config PROFILE_ALL_BRANCHES @@ -305,7 +305,7 @@ config PROFILE_ALL_BRANCHES This configuration, when enabled, will impose a great overhead on the system. This should only be enabled when the system - is to be analyzed + is to be analyzed in much detail. endchoice config TRACING_BRANCHES @@ -335,7 +335,7 @@ config POWER_TRACER depends on X86 select GENERIC_TRACER help - This tracer helps developers to analyze and optimize the kernels + This tracer helps developers to analyze and optimize the kernel's power management decisions, specifically the C-state and P-state behavior. @@ -391,14 +391,14 @@ config HW_BRANCH_TRACER select GENERIC_TRACER help This tracer records all branches on the system in a circular - buffer giving access to the last N branches for each cpu. + buffer, giving access to the last N branches for each cpu. config KMEMTRACE bool "Trace SLAB allocations" select GENERIC_TRACER help kmemtrace provides tracing for slab allocator functions, such as - kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected + kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected data is then fed to the userspace application in order to analyse allocation hotspots, internal fragmentation and so on, making it possible to see how well an allocator performs, as well as debug @@ -417,15 +417,15 @@ config WORKQUEUE_TRACER bool "Trace workqueues" select GENERIC_TRACER help - The workqueue tracer provides some statistical informations + The workqueue tracer provides some statistical information about each cpu workqueue thread such as the number of the works inserted and executed since their creation. It can help - to evaluate the amount of work each of them have to perform. + to evaluate the amount of work each of them has to perform. For example it can help a developer to decide whether he should - choose a per cpu workqueue instead of a singlethreaded one. + choose a per-cpu workqueue instead of a singlethreaded one. config BLK_DEV_IO_TRACE - bool "Support for tracing block io actions" + bool "Support for tracing block IO actions" depends on SYSFS depends on BLOCK select RELAY @@ -456,15 +456,15 @@ config KPROBE_EVENT select TRACING default y help - This allows the user to add tracing events (similar to tracepoints) on the fly - via the ftrace interface. See Documentation/trace/kprobetrace.txt - for more details. + This allows the user to add tracing events (similar to tracepoints) + on the fly via the ftrace interface. See + Documentation/trace/kprobetrace.txt for more details. Those events can be inserted wherever kprobes can probe, and record various register and memory values. - This option is also required by perf-probe subcommand of perf tools. If - you want to use perf tools, this option is strongly recommended. + This option is also required by perf-probe subcommand of perf tools. + If you want to use perf tools, this option is strongly recommended. config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" @@ -472,32 +472,32 @@ config DYNAMIC_FTRACE depends on HAVE_DYNAMIC_FTRACE default y help - This option will modify all the calls to ftrace dynamically - (will patch them out of the binary image and replaces them - with a No-Op instruction) as they are called. A table is - created to dynamically enable them again. + This option will modify all the calls to ftrace dynamically + (will patch them out of the binary image and replace them + with a No-Op instruction) as they are called. A table is + created to dynamically enable them again. - This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise - has native performance as long as no tracing is active. + This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but + otherwise has native performance as long as no tracing is active. - The changes to the code are done by a kernel thread that - wakes up once a second and checks to see if any ftrace calls - were made. If so, it runs stop_machine (stops all CPUS) - and modifies the code to jump over the call to ftrace. + The changes to the code are done by a kernel thread that + wakes up once a second and checks to see if any ftrace calls + were made. If so, it runs stop_machine (stops all CPUS) + and modifies the code to jump over the call to ftrace. config FUNCTION_PROFILER bool "Kernel function profiler" depends on FUNCTION_TRACER default n help - This option enables the kernel function profiler. A file is created - in debugfs called function_profile_enabled which defaults to zero. - When a 1 is echoed into this file profiling begins, and when a - zero is entered, profiling stops. A file in the trace_stats - directory called functions, that show the list of functions that - have been hit and their counters. + This option enables the kernel function profiler. A file is created + in debugfs called function_profile_enabled which defaults to zero. + When a 1 is echoed into this file profiling begins, and when a + zero is entered, profiling stops. A "functions" file is created in + the trace_stats directory; this file shows the list of functions that + have been hit and their counters. - If in doubt, say N + If in doubt, say N. config FTRACE_MCOUNT_RECORD def_bool y @@ -556,8 +556,8 @@ config RING_BUFFER_BENCHMARK tristate "Ring buffer benchmark stress tester" depends on RING_BUFFER help - This option creates a test to stress the ring buffer and bench mark it. - It creates its own ring buffer such that it will not interfer with + This option creates a test to stress the ring buffer and benchmark it. + It creates its own ring buffer such that it will not interfere with any other users of the ring buffer (such as ftrace). It then creates a producer and consumer that will run for 10 seconds and sleep for 10 seconds. Each interval it will print out the number of events @@ -566,7 +566,7 @@ config RING_BUFFER_BENCHMARK It does not disable interrupts or raise its priority, so it may be affected by processes that are running. - If unsure, say N + If unsure, say N. endif # FTRACE -- cgit v1.2.3 From 88f7a890d74137ab0d126a5d65679cd620f1a289 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 30 Dec 2009 14:22:22 +0800 Subject: ksym_tracer: Fix to make the tracer work ksym tracer doesn't work: # echo tasklist_lock:rw- > ksym_trace_filter -bash: echo: write error: No such device It's because we pass to perf_event_create_kernel_counter() a cpu number which is not present. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: K.Prasad Cc: Frederic Weisbecker LKML-Reference: <4B3AF19E.1010201@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/hw_breakpoint.c | 10 +++++++--- kernel/trace/trace_ksym.c | 1 - 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 366eedf949c0..48fb0bb6992a 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -388,7 +389,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, if (!cpu_events) return ERR_PTR(-ENOMEM); - for_each_possible_cpu(cpu) { + get_online_cpus(); + for_each_online_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); @@ -399,18 +401,20 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, goto fail; } } + put_online_cpus(); return cpu_events; fail: - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); if (IS_ERR(*pevent)) break; unregister_hw_breakpoint(*pevent); } + put_online_cpus(); + free_percpu(cpu_events); - /* return the error if any */ return ERR_PTR(err); } EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index faf37fa4408c..340b6ff193e0 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) entry->attr.bp_addr = addr; entry->attr.bp_len = HW_BREAKPOINT_LEN_4; - ret = -EAGAIN; entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, ksym_hbp_handler); -- cgit v1.2.3 From 3d13ec2efdb5843ad91e57b60d50b44d922cf063 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 30 Dec 2009 14:23:19 +0800 Subject: ksym_tracer: Fix to allow writing newline to ksym_trace_filter It used to work, but now doesn't: # echo > ksym_filter bash: echo: write error: Invalid argument It's caused by d954fbf0ff6b5fdfb32350e85a2f15d3db976506 ("tracing: Fix wrong usage of strstrip in trace_ksyms"). Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: K.Prasad Cc: Frederic Weisbecker LKML-Reference: <4B3AF1D7.5040400@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 340b6ff193e0..160a8d8b37a2 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -299,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file, * 2: echo 0 > ksym_trace_filter * 3: echo "*:---" > ksym_trace_filter */ - if (!buf[0] || !strcmp(buf, "0") || - !strcmp(buf, "*:---")) { + if (!input_string[0] || !strcmp(input_string, "0") || + !strcmp(input_string, "*:---")) { __ksym_trace_reset(); ret = 0; goto out; -- cgit v1.2.3 From e6d9491bf8ba6728cc86aeabbc688d20ec0563b5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 30 Dec 2009 14:23:40 +0800 Subject: ksym_tracer: Fix race when incrementing count We are under rcu read section but not holding the write lock, so count++ is not atomic. Use atomic64_t instead. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: K.Prasad Cc: Frederic Weisbecker LKML-Reference: <4B3AF1EC.9010608@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 160a8d8b37a2..67d79f709fc5 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -32,6 +32,8 @@ #include #include +#include + /* * For now, let us restrict the no. of symbols traced simultaneously to number * of available hardware breakpoint registers. @@ -44,7 +46,7 @@ struct trace_ksym { struct perf_event **ksym_hbp; struct perf_event_attr attr; #ifdef CONFIG_PROFILE_KSYM_TRACER - unsigned long counter; + atomic64_t counter; #endif struct hlist_node ksym_hlist; }; @@ -69,9 +71,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) rcu_read_lock(); hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { - if ((entry->attr.bp_addr == hbp_hit_addr) && - (entry->counter <= MAX_UL_INT)) { - entry->counter++; + if (entry->attr.bp_addr == hbp_hit_addr) { + atomic64_inc(&entry->counter); break; } } @@ -501,7 +502,8 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) seq_printf(m, " %-36s", fn_name); else seq_printf(m, " %-36s", ""); - seq_printf(m, " %15lu\n", entry->counter); + seq_printf(m, " %15llu\n", + (unsigned long long)atomic64_read(&entry->counter)); return 0; } -- cgit v1.2.3 From 53ab668064edaeef99c0ee22799483d45f4c81f6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 30 Dec 2009 14:24:03 +0800 Subject: ksym_tracer: Remove trace_stat trace_stat is problematic. Don't use it, use seqfile instead. This fixes a race that reading the stat file is not protected by any lock, which can lead to use after free. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: K.Prasad Cc: Frederic Weisbecker LKML-Reference: <4B3AF203.40200@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 127 ++++++++++++++++++---------------------------- 1 file changed, 50 insertions(+), 77 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 67d79f709fc5..94103cdcf9d8 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -26,7 +26,6 @@ #include #include "trace_output.h" -#include "trace_stat.h" #include "trace.h" #include @@ -444,103 +443,77 @@ struct tracer ksym_tracer __read_mostly = .print_line = ksym_trace_output }; -__init static int init_ksym_trace(void) -{ - struct dentry *d_tracer; - struct dentry *entry; - - d_tracer = tracing_init_dentry(); - ksym_filter_entry_count = 0; - - entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, - NULL, &ksym_tracing_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'ksym_trace_filter' file\n"); - - return register_tracer(&ksym_tracer); -} -device_initcall(init_ksym_trace); - - #ifdef CONFIG_PROFILE_KSYM_TRACER -static int ksym_tracer_stat_headers(struct seq_file *m) +static int ksym_profile_show(struct seq_file *m, void *v) { + struct hlist_node *node; + struct trace_ksym *entry; + int access_type = 0; + char fn_name[KSYM_NAME_LEN]; + seq_puts(m, " Access Type "); seq_puts(m, " Symbol Counter\n"); seq_puts(m, " ----------- "); seq_puts(m, " ------ -------\n"); - return 0; -} -static int ksym_tracer_stat_show(struct seq_file *m, void *v) -{ - struct hlist_node *stat = v; - struct trace_ksym *entry; - int access_type = 0; - char fn_name[KSYM_NAME_LEN]; + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { - entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); + access_type = entry->attr.bp_type; - access_type = entry->attr.bp_type; + switch (access_type) { + case HW_BREAKPOINT_R: + seq_puts(m, " R "); + break; + case HW_BREAKPOINT_W: + seq_puts(m, " W "); + break; + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: + seq_puts(m, " RW "); + break; + default: + seq_puts(m, " NA "); + } - switch (access_type) { - case HW_BREAKPOINT_R: - seq_puts(m, " R "); - break; - case HW_BREAKPOINT_W: - seq_puts(m, " W "); - break; - case HW_BREAKPOINT_R | HW_BREAKPOINT_W: - seq_puts(m, " RW "); - break; - default: - seq_puts(m, " NA "); + if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) + seq_printf(m, " %-36s", fn_name); + else + seq_printf(m, " %-36s", ""); + seq_printf(m, " %15llu\n", + (unsigned long long)atomic64_read(&entry->counter)); } - - if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) - seq_printf(m, " %-36s", fn_name); - else - seq_printf(m, " %-36s", ""); - seq_printf(m, " %15llu\n", - (unsigned long long)atomic64_read(&entry->counter)); + rcu_read_unlock(); return 0; } -static void *ksym_tracer_stat_start(struct tracer_stat *trace) +static int ksym_profile_open(struct inode *node, struct file *file) { - return ksym_filter_head.first; -} - -static void * -ksym_tracer_stat_next(void *v, int idx) -{ - struct hlist_node *stat = v; - - return stat->next; + return single_open(file, ksym_profile_show, NULL); } -static struct tracer_stat ksym_tracer_stats = { - .name = "ksym_tracer", - .stat_start = ksym_tracer_stat_start, - .stat_next = ksym_tracer_stat_next, - .stat_headers = ksym_tracer_stat_headers, - .stat_show = ksym_tracer_stat_show +static const struct file_operations ksym_profile_fops = { + .open = ksym_profile_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; +#endif /* CONFIG_PROFILE_KSYM_TRACER */ -__init static int ksym_tracer_stat_init(void) +__init static int init_ksym_trace(void) { - int ret; + struct dentry *d_tracer; - ret = register_stat_tracer(&ksym_tracer_stats); - if (ret) { - printk(KERN_WARNING "Warning: could not register " - "ksym tracer stats\n"); - return 1; - } + d_tracer = tracing_init_dentry(); - return 0; + trace_create_file("ksym_trace_filter", 0644, d_tracer, + NULL, &ksym_tracing_fops); + +#ifdef CONFIG_PROFILE_KSYM_TRACER + trace_create_file("ksym_profile", 0444, d_tracer, + NULL, &ksym_profile_fops); +#endif + + return register_tracer(&ksym_tracer); } -fs_initcall(ksym_tracer_stat_init); -#endif /* CONFIG_PROFILE_KSYM_TRACER */ +device_initcall(init_ksym_trace); -- cgit v1.2.3 From 79b408210885b9f7f0b067b07a09d68f4da3a700 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:19 +0800 Subject: tracing/kprobe: Show sign of fields in trace_kprobe format files The format files of trace_kprobe do not show the sign of the fields. The other format files show the field signed type of the fields and this patch makes the trace_kprobe formats consistent with the others. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D27.5040009@cn.fujitsu.com> Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7ecab06547a5..83f1e6ef7063 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1182,10 +1182,11 @@ static int __probe_event_show_format(struct trace_seq *s, #undef SHOW_FIELD #define SHOW_FIELD(type, item, name) \ do { \ - ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ - "offset:%u;\tsize:%u;\n", name, \ + ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \ + "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\ (unsigned int)offsetof(typeof(field), item),\ - (unsigned int)sizeof(type)); \ + (unsigned int)sizeof(type), \ + is_signed_type(type)); \ if (!ret) \ return 0; \ } while (0) -- cgit v1.2.3 From fb7ae981cb9fe8665b9da97e8734745e030c151d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:38 +0800 Subject: tracing: Fix sign fields in ftrace_define_fields_##call() Add is_signed_type() call to trace_define_field() in ftrace macros. The code previously just passed in 0 (false), disregarding whether or not the field was actually a signed type. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D3A.6020007@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 7 ++++--- kernel/trace/trace_export.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 73523151a731..c6fe03e902ca 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -414,7 +414,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), 0, FILTER_OTHER); \ + sizeof(field.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; @@ -422,8 +423,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #define __dynamic_array(type, item, len) \ ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ offsetof(typeof(field), __data_loc_##item), \ - sizeof(field.__data_loc_##item), 0, \ - FILTER_OTHER); + sizeof(field.__data_loc_##item), \ + is_signed_type(type), FILTER_OTHER); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 458e5bfe26d0..d4fa5dc1ee4e 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), 0, FILTER_OTHER); \ + sizeof(field.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; @@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), \ container.item), \ - sizeof(field.container.item), 0, \ - FILTER_OTHER); \ + sizeof(field.container.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; -- cgit v1.2.3 From 5ded3dc6a3c7549b36a8ac27bbd81b33756a2c29 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Wed, 6 Jan 2010 17:12:07 -0800 Subject: ring-buffer: Wrap a list.next reference with rb_list_head() This reference at the end of rb_get_reader_page() was causing off-by-one writes to the prev pointer of the page after the reader page when that page is the head page, and therefore the reader page has the RB_PAGE_HEAD flag in its list.next pointer. This eventually results in a GPF in a subsequent call to rb_set_head_page() (usually from rb_get_reader_page()) when that prev pointer is dereferenced. The dereferenced register would characteristically have an address that appears shifted left by one byte (eg, ffxxxxxxxxxxxxyy instead of ffffxxxxxxxxxxxx) due to being written at an address one byte too high. Signed-off-by: David Sharp LKML-Reference: <1262826727-9090-1-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2326b04c95c4..d5b7308b7e1b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2906,7 +2906,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * * Now make the new head point back to the reader page. */ - reader->list.next->prev = &cpu_buffer->reader_page->list; + rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; rb_inc_page(cpu_buffer, &cpu_buffer->head_page); /* Finally update the reader page to the new head */ -- cgit v1.2.3 From 0e1ff5d72a6393f2ef5dbf74f58bb55a12d63834 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 6 Jan 2010 20:40:44 -0500 Subject: ring-buffer: Add rb_list_head() wrapper around new reader page next field If the very unlikely case happens where the writer moves the head by one between where the head page is read and where the new reader page is assigned _and_ the writer then writes and wraps the entire ring buffer so that the head page is back to what was originally read as the head page, the page to be swapped will have a corrupted next pointer. Simple solution is to wrap the assignment of the next pointer with a rb_list_head(). Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d5b7308b7e1b..edefe3b2801b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2869,7 +2869,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * Splice the empty reader page into the list around the head. */ reader = rb_set_head_page(cpu_buffer); - cpu_buffer->reader_page->list.next = reader->list.next; + cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); cpu_buffer->reader_page->list.prev = reader->list.prev; /* -- cgit v1.2.3 From 751e9983ee276cb150e8812b1d995f6035a63878 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:53:02 +0800 Subject: ftrace: Fix MATCH_END_ONLY function filter For '*foo' pattern, we should allow any string ending with 'foo', but ftrace filter incorrectly disallows strings like bar_foo_foo: # echo '*io' > set_ftrace_filter # cat set_ftrace_filter | grep 'req_bio_endio' # cat available_filter_functions | grep 'req_bio_endio' req_bio_endio Signed-off-by: Li Zefan LKML-Reference: <4B4E870E.6060607@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7968762c8167..1e6640f80454 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1690,7 +1690,7 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin) static int ftrace_match(char *str, char *regex, int len, int type) { int matched = 0; - char *ptr; + int slen; switch (type) { case MATCH_FULL: @@ -1706,8 +1706,8 @@ static int ftrace_match(char *str, char *regex, int len, int type) matched = 1; break; case MATCH_END_ONLY: - ptr = strstr(str, regex); - if (ptr && (ptr[len] == 0)) + slen = strlen(str); + if (slen >= len && memcmp(str + slen - len, regex, len) == 0) matched = 1; break; } -- cgit v1.2.3 From 285caad415f459f336247932b4db95a571357a02 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:53:21 +0800 Subject: tracing/filters: Fix MATCH_FRONT_ONLY filter matching MATCH_FRONT_ONLY actually is a full matching: # ./perf record -R -f -a -e lock:lock_acquire \ --filter 'name ~rcu_*' sleep 1 # ./perf trace (no output) We should pass the length of the pattern string to strncmp(). Signed-off-by: Li Zefan LKML-Reference: <4B4E8721.5090301@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 50504cb228de..11c3973e6552 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -261,7 +261,7 @@ static int regex_match_full(char *str, struct regex *r, int len) static int regex_match_front(char *str, struct regex *r, int len) { - if (strncmp(str, r->pattern, len) == 0) + if (strncmp(str, r->pattern, r->len) == 0) return 1; return 0; } -- cgit v1.2.3 From a3291c14ecf0a995e30d993b7f2cae031de98727 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:53:41 +0800 Subject: tracing/filters: Fix MATCH_END_ONLY filter matching For '*foo' pattern, we should allow any string ending with 'foo', but event filtering incorrectly disallows strings like bar_foo_foo: Signed-off-by: Li Zefan LKML-Reference: <4B4E8735.6070604@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 11c3973e6552..49e44dd17851 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -275,9 +275,10 @@ static int regex_match_middle(char *str, struct regex *r, int len) static int regex_match_end(char *str, struct regex *r, int len) { - char *ptr = strstr(str, r->pattern); + int strlen = len - 1; - if (ptr && (ptr[r->len] == 0)) + if (strlen >= r->len && + memcmp(str + strlen - r->len, r->pattern, r->len) == 0) return 1; return 0; } -- cgit v1.2.3 From b2af211f284eb1bef19fbb85fc8ef551bb1e7460 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:54:11 +0800 Subject: tracing/filters: Fix MATCH_MIDDLE_ONLY filter matching The @str might not be NULL-terminated if it's of type DYN_STRING or STATIC_STRING, so we should use strnstr() instead of strstr(). Signed-off-by: Li Zefan LKML-Reference: <4B4E8753.2000102@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 49e44dd17851..f364b085397e 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -268,7 +268,7 @@ static int regex_match_front(char *str, struct regex *r, int len) static int regex_match_middle(char *str, struct regex *r, int len) { - if (strstr(str, r->pattern)) + if (strnstr(str, r->pattern, len)) return 1; return 0; } -- cgit v1.2.3 From 16da27a8bc7a0d050686d1b2e9efb53fab9ed226 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:54:27 +0800 Subject: tracing/filters: Fix MATCH_FULL filter matching for PTR_STRING MATCH_FULL matching for PTR_STRING is not working correctly: # echo 'func == vt' > events/bkl/lock_kernel/filter # echo 1 > events/bkl/lock_kernel/enable ... # cat trace Xorg-1484 [000] 1973.392586: lock_kernel: ... func=vt_ioctl() gpm-1402 [001] 1974.027740: lock_kernel: ... func=vt_ioctl() We should pass to regex.match(..., len) the length (including '\0') of the source string instead of the length of the pattern string. Signed-off-by: Li Zefan LKML-Reference: <4B4E8763.5070707@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f364b085397e..60c2a4efad4a 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -211,8 +211,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event, { char **addr = (char **)(event + pred->offset); int cmp, match; + int len = strlen(*addr) + 1; /* including tailing '\0' */ - cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len); + cmp = pred->regex.match(*addr, &pred->regex, len); match = cmp ^ pred->not; @@ -782,10 +783,8 @@ static int filter_add_pred(struct filter_parse_state *ps, pred->regex.field_len = field->size; } else if (field->filter_type == FILTER_DYN_STRING) fn = filter_pred_strloc; - else { + else fn = filter_pred_pchar; - pred->regex.field_len = strlen(pred->regex.pattern); - } } else { if (field->is_signed) ret = strict_strtoll(pred->regex.pattern, 0, &val); -- cgit v1.2.3 From d1303dd1d6b220cab375f24fa91a5640e54e169e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:54:40 +0800 Subject: tracing/filters: Add comment for match callbacks We should be clear on 2 things: - the length parameter of a match callback includes tailing '\0'. - the string to be searched might not be NULL-terminated. Signed-off-by: Li Zefan LKML-Reference: <4B4E8770.7000608@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 60c2a4efad4a..e42af9aad69f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -252,7 +252,18 @@ static int filter_pred_none(struct filter_pred *pred, void *event, return 0; } -/* Basic regex callbacks */ +/* + * regex_match_foo - Basic regex callbacks + * + * @str: the string to be searched + * @r: the regex structure containing the pattern string + * @len: the length of the string to be searched (including '\0') + * + * Note: + * - @str might not be NULL-terminated if it's of type DYN_STRING + * or STATIC_STRING + */ + static int regex_match_full(char *str, struct regex *r, int len) { if (strncmp(str, r->pattern, len) == 0) -- cgit v1.2.3 From 74bf4076f2ed79b5510440b72a561823a8852ec0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 25 Jan 2010 15:11:53 -0500 Subject: tracing: Prevent kernel oops with corrupted buffer If the contents of the ftrace ring buffer gets corrupted and the trace file is read, it could create a kernel oops (usualy just killing the user task thread). This is caused by the checking of the pid in the buffer. If the pid is negative, it still references the cmdline cache array, which could point to an invalid address. The simple fix is to test for negative PIDs. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0df1b0f2cb9e..eac6875cb990 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -951,6 +951,11 @@ void trace_find_cmdline(int pid, char comm[]) return; } + if (WARN_ON_ONCE(pid < 0)) { + strcpy(comm, ""); + return; + } + if (pid > PID_MAX_DEFAULT) { strcpy(comm, "<...>"); return; -- cgit v1.2.3 From 492a74f4210e15f4701422e2e1c4cd3c1e45ddae Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 25 Jan 2010 15:17:47 -0500 Subject: ring-buffer: Check if ring buffer iterator has stale data Usually reads of the ring buffer is performed by a single task. There are two types of reads from the ring buffer. One is a consuming read which will consume the entry that was read and the next read will be the entry that follows. The other is an iterator that will let the user read the contents of the ring buffer without modifying it. When an iterator is allocated, writes to the ring buffer are disabled to protect the iterator. The problem exists when consuming reads happen while an iterator is allocated. Specifically, the kind of read that swaps out an entire page (used by splice) and replaces it with a new read. If the iterator is on the page that is swapped out, then the next read may read from this swapped out page and return garbage. This patch adds a check when reading the iterator to make sure that the iterator contents are still valid. If a consuming read has taken place, the iterator is reset. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index edefe3b2801b..503b630e0bda 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -464,6 +464,8 @@ struct ring_buffer_iter { struct ring_buffer_per_cpu *cpu_buffer; unsigned long head; struct buffer_page *head_page; + struct buffer_page *cache_reader_page; + unsigned long cache_read; u64 read_stamp; }; @@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) iter->read_stamp = cpu_buffer->read_stamp; else iter->read_stamp = iter->head_page->page->time_stamp; + iter->cache_reader_page = cpu_buffer->reader_page; + iter->cache_read = cpu_buffer->read; } /** @@ -3066,6 +3070,15 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) cpu_buffer = iter->cpu_buffer; buffer = cpu_buffer->buffer; + /* + * Check if someone performed a consuming read to + * the buffer. A consuming read invalidates the iterator + * and we need to reset the iterator in this case. + */ + if (unlikely(iter->cache_read != cpu_buffer->read || + iter->cache_reader_page != cpu_buffer->reader_page)) + rb_iter_reset(iter); + again: /* * We repeat when a timestamp is encountered. -- cgit v1.2.3 From 3c05d7482777f15e71bb4cb1ba78dee2800dfec6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 26 Jan 2010 16:14:08 -0500 Subject: ring-buffer: Check for end of page in iterator If the iterator comes to an empty page for some reason, or if the page is emptied by a consuming read. The iterator code currently does not check if the iterator is pass the contents, and may return a false entry. This patch adds a check to the ring buffer iterator to test if the current page has been completely read and sets the iterator to the next page if necessary. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 503b630e0bda..8c1b2d290718 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3064,9 +3064,6 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer_event *event; int nr_loops = 0; - if (ring_buffer_iter_empty(iter)) - return NULL; - cpu_buffer = iter->cpu_buffer; buffer = cpu_buffer->buffer; @@ -3080,6 +3077,9 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) rb_iter_reset(iter); again: + if (ring_buffer_iter_empty(iter)) + return NULL; + /* * We repeat when a timestamp is encountered. * We can get multiple timestamps by nested interrupts or also @@ -3094,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) if (rb_per_cpu_empty(cpu_buffer)) return NULL; + if (iter->head >= local_read(&iter->head_page->page->commit)) { + rb_inc_iter(iter); + goto again; + } + event = rb_iter_head_event(iter); switch (event->type_len) { -- cgit v1.2.3 From 03688970347bfea32823953a7ce5886d1713205f Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 22 Jan 2010 08:12:47 -0500 Subject: tracing/documentation: Cover new frame pointer semantics Update the graph tracer examples to cover the new frame pointer semantics (in terms of passing it along). Move the HAVE_FUNCTION_GRAPH_FP_TEST docs out of the Kconfig, into the right place, and expand on the details. Signed-off-by: Mike Frysinger LKML-Reference: <1264165967-18938-1-git-send-email-vapier@gentoo.org> Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace-design.txt | 26 +++++++++++++++++++++++--- kernel/trace/Kconfig | 4 +--- 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 239f14b2b55a..6a5a579126b0 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -1,5 +1,6 @@ function tracer guts ==================== + By Mike Frysinger Introduction ------------ @@ -173,14 +174,16 @@ void ftrace_graph_caller(void) unsigned long *frompc = &...; unsigned long selfpc = - MCOUNT_INSN_SIZE; - prepare_ftrace_return(frompc, selfpc); + /* passing frame pointer up is optional -- see below */ + prepare_ftrace_return(frompc, selfpc, frame_pointer); /* restore all state needed by the ABI */ } #endif -For information on how to implement prepare_ftrace_return(), simply look at -the x86 version. The only architecture-specific piece in it is the setup of +For information on how to implement prepare_ftrace_return(), simply look at the +x86 version (the frame pointer passing is optional; see the next section for +more information). The only architecture-specific piece in it is the setup of the fault recovery table (the asm(...) code). The rest should be the same across architectures. @@ -205,6 +208,23 @@ void return_to_handler(void) #endif +HAVE_FUNCTION_GRAPH_FP_TEST +--------------------------- + +An arch may pass in a unique value (frame pointer) to both the entering and +exiting of a function. On exit, the value is compared and if it does not +match, then it will panic the kernel. This is largely a sanity check for bad +code generation with gcc. If gcc for your port sanely updates the frame +pointer under different opitmization levels, then ignore this option. + +However, adding support for it isn't terribly difficult. In your assembly code +that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument. +Then in the C version of that function, do what the x86 port does and pass it +along to ftrace_push_return_trace() instead of a stub value of 0. + +Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer. + + HAVE_FTRACE_NMI_ENTER --------------------- diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6c22d8a2f289..60e2ce0181ee 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER config HAVE_FUNCTION_GRAPH_FP_TEST bool help - An arch may pass in a unique value (frame pointer) to both the - entering and exiting of a function. On exit, the value is compared - and if it does not match, then it will panic the kernel. + See Documentation/trace/ftrace-design.txt config HAVE_FUNCTION_TRACE_MCOUNT_TEST bool -- cgit v1.2.3 From 4f48f8b7fd18c44f8478174f9925cc3c059c6ce4 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 2 Feb 2010 15:32:09 +0800 Subject: tracing: Fix circular dead lock in stack trace When we cat /tracing/stack_trace, we may cause circular lock: sys_read() t_start() arch_spin_lock(&max_stack_lock); t_show() seq_printf(), vsnprintf() .... /* they are all trace-able, when they are traced, max_stack_lock may be required again. */ The following script can trigger this circular dead lock very easy: #!/bin/bash echo 1 > /proc/sys/kernel/stack_tracer_enabled mount -t debugfs xxx /mnt > /dev/null 2>&1 ( # make check_stack() zealous to require max_stack_lock for ((; ;)) { echo 1 > /mnt/tracing/stack_max_size } ) & for ((; ;)) { cat /mnt/tracing/stack_trace > /dev/null } To fix this bug, we increase the percpu trace_active before require the lock. Reported-by: Li Zefan Signed-off-by: Lai Jiangshan LKML-Reference: <4B67D4F9.9080905@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_stack.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 678a5120ee30..f4bc9b27de5f 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, unsigned long val, flags; char buf[64]; int ret; + int cpu; if (count >= sizeof(buf)) return -EINVAL; @@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, return ret; local_irq_save(flags); + + /* + * In case we trace inside arch_spin_lock() or after (NMI), + * we will cause circular lock, so we also need to increase + * the percpu trace_active here. + */ + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)++; + arch_spin_lock(&max_stack_lock); *ptr = val; arch_spin_unlock(&max_stack_lock); + + per_cpu(trace_active, cpu)--; local_irq_restore(flags); return count; @@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { + int cpu; + local_irq_disable(); + + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)++; + arch_spin_lock(&max_stack_lock); if (*pos == 0) @@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void t_stop(struct seq_file *m, void *p) { + int cpu; + arch_spin_unlock(&max_stack_lock); + + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)--; + local_irq_enable(); } -- cgit v1.2.3 From a9bb18f36c8056f0712fb28c52c0f85d98438dfb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Feb 2010 17:23:47 +0100 Subject: tracing/kprobes: Fix probe parsing Trying to add a probe like: echo p:myprobe 0x10000 > /sys/kernel/debug/tracing/kprobe_events will fail since the wrong pointer is passed to strict_strtoul when trying to convert the address to an unsigned long. Signed-off-by: Heiko Carstens Acked-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <20100210162346.GA6933@osiris.boeblingen.de.ibm.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_kprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6ea90c0e2c96..50b1b8239806 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -689,7 +689,7 @@ static int create_trace_probe(int argc, char **argv) return -EINVAL; } /* an address specified */ - ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); + ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr); if (ret) { pr_info("Failed to parse address.\n"); return ret; -- cgit v1.2.3