From cdad72207d164569cb4bf647eb824a7f93e8d388 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Sep 2008 16:00:57 +0200 Subject: lockstat: documentation update Christoph noted that the documentation doesn't tell in what unit the lockstat times are reported, ammend this. Reported-by: Christoph Hellwig Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- Documentation/lockstat.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index 4ba4664ce5c3..02f36f5c64fe 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -100,6 +100,7 @@ The first lock (05-10) is a read/write lock, and shows two lines above the short separator. The contention points don't match the column descriptors, they have two: contentions and [] symbol. +The integer part of the time values is in us. View the top contending locks: -- cgit v1.2.3 From c7e78cff6b7518212247fb20b1dc6411540dc9af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Oct 2008 23:17:09 +0200 Subject: lockstat: contend with points We currently only provide points that have to wait on contention, also lists the points we have to wait for. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- Documentation/lockstat.txt | 50 +++++++++++++++++++++++++++++----------------- include/linux/lockdep.h | 13 ++++++++---- kernel/lockdep.c | 33 +++++++++++++++++++----------- kernel/lockdep_proc.c | 21 +++++++++++++++++-- kernel/mutex.c | 2 +- 5 files changed, 82 insertions(+), 37 deletions(-) (limited to 'Documentation') diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index 02f36f5c64fe..9cb9138f7a79 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -71,34 +71,48 @@ Look at the current lock statistics: # less /proc/lock_stat -01 lock_stat version 0.2 +01 lock_stat version 0.3 02 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total 04 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 05 -06 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 -07 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 -08 -------------------------- -09 &inode->i_data.tree_lock 0 [] add_to_page_cache+0x5f/0x190 -10 -11 ............................................................................................................................................................................................... -12 -13 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 -14 ----------- -15 dcache_lock 180 [] sys_getcwd+0x11e/0x230 -16 dcache_lock 165 [] d_alloc+0x15a/0x210 -17 dcache_lock 33 [] _atomic_dec_and_lock+0x4d/0x70 -18 dcache_lock 1 [] shrink_dcache_parent+0x18/0x130 +06 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34 +07 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88 +08 --------------- +09 &mm->mmap_sem 487 [] do_page_fault+0x466/0x928 +10 &mm->mmap_sem 179 [] sys_mprotect+0xcd/0x21d +11 &mm->mmap_sem 279 [] sys_mmap+0x75/0xce +12 &mm->mmap_sem 76 [] sys_munmap+0x32/0x59 +13 --------------- +14 &mm->mmap_sem 270 [] sys_mmap+0x75/0xce +15 &mm->mmap_sem 431 [] do_page_fault+0x466/0x928 +16 &mm->mmap_sem 138 [] sys_munmap+0x32/0x59 +17 &mm->mmap_sem 145 [] sys_mprotect+0xcd/0x21d +18 +19 ............................................................................................................................................................................................... +20 +21 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41 +22 ----------- +23 dcache_lock 179 [] _atomic_dec_and_lock+0x34/0x54 +24 dcache_lock 113 [] d_alloc+0x19a/0x1eb +25 dcache_lock 99 [] d_rehash+0x1b/0x44 +26 dcache_lock 104 [] d_instantiate+0x36/0x8a +27 ----------- +28 dcache_lock 192 [] _atomic_dec_and_lock+0x34/0x54 +29 dcache_lock 98 [] d_rehash+0x1b/0x44 +30 dcache_lock 72 [] d_alloc+0x19a/0x1eb +31 dcache_lock 112 [] d_instantiate+0x36/0x8a This excerpt shows the first two lock class statistics. Line 01 shows the output version - each time the format changes this will be updated. Line 02-04 -show the header with column descriptions. Lines 05-10 and 13-18 show the actual +show the header with column descriptions. Lines 05-18 and 20-31 show the actual statistics. These statistics come in two parts; the actual stats separated by a -short separator (line 08, 14) from the contention points. +short separator (line 08, 13) from the contention points. -The first lock (05-10) is a read/write lock, and shows two lines above the +The first lock (05-18) is a read/write lock, and shows two lines above the short separator. The contention points don't match the column descriptors, -they have two: contentions and [] symbol. +they have two: contentions and [] symbol. The second set of contention +points are the points we're contending with. The integer part of the time values is in us. diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 0aa657aa8a1e..fc9f8e88123b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -73,6 +73,8 @@ struct lock_class_key { struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; }; +#define LOCKSTAT_POINTS 4 + /* * The lock-class itself: */ @@ -119,7 +121,8 @@ struct lock_class { int name_version; #ifdef CONFIG_LOCK_STAT - unsigned long contention_point[4]; + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; #endif }; @@ -144,6 +147,7 @@ enum bounce_type { struct lock_class_stats { unsigned long contention_point[4]; + unsigned long contending_point[4]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; @@ -165,6 +169,7 @@ struct lockdep_map { const char *name; #ifdef CONFIG_LOCK_STAT int cpu; + unsigned long ip; #endif }; @@ -355,7 +360,7 @@ struct lock_class_key { }; #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); -extern void lock_acquired(struct lockdep_map *lock); +extern void lock_acquired(struct lockdep_map *lock, unsigned long ip); #define LOCK_CONTENDED(_lock, try, lock) \ do { \ @@ -363,13 +368,13 @@ do { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock(_lock); \ } \ - lock_acquired(&(_lock)->dep_map); \ + lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) -#define lock_acquired(lockdep_map) do {} while (0) +#define lock_acquired(lockdep_map, ip) do {} while (0) #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index dbda475b13bd..234a9dccb4be 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) #ifdef CONFIG_LOCK_STAT static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); -static int lock_contention_point(struct lock_class *class, unsigned long ip) +static int lock_point(unsigned long points[], unsigned long ip) { int i; - for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { - if (class->contention_point[i] == 0) { - class->contention_point[i] = ip; + for (i = 0; i < LOCKSTAT_POINTS; i++) { + if (points[i] == 0) { + points[i] = ip; break; } - if (class->contention_point[i] == ip) + if (points[i] == ip) break; } @@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class) for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) stats.contention_point[i] += pcs->contention_point[i]; + for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++) + stats.contending_point[i] += pcs->contending_point[i]; + lock_time_add(&pcs->read_waittime, &stats.read_waittime); lock_time_add(&pcs->write_waittime, &stats.write_waittime); @@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class) memset(cpu_stats, 0, sizeof(struct lock_class_stats)); } memset(class->contention_point, 0, sizeof(class->contention_point)); + memset(class->contending_point, 0, sizeof(class->contending_point)); } static struct lock_class_stats *get_lock_stats(struct lock_class *class) @@ -3001,7 +3005,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) struct held_lock *hlock, *prev_hlock; struct lock_class_stats *stats; unsigned int depth; - int i, point; + int i, contention_point, contending_point; depth = curr->lockdep_depth; if (DEBUG_LOCKS_WARN_ON(!depth)) @@ -3025,18 +3029,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) found_it: hlock->waittime_stamp = sched_clock(); - point = lock_contention_point(hlock_class(hlock), ip); + contention_point = lock_point(hlock_class(hlock)->contention_point, ip); + contending_point = lock_point(hlock_class(hlock)->contending_point, + lock->ip); stats = get_lock_stats(hlock_class(hlock)); - if (point < ARRAY_SIZE(stats->contention_point)) - stats->contention_point[point]++; + if (contention_point < LOCKSTAT_POINTS) + stats->contention_point[contention_point]++; + if (contending_point < LOCKSTAT_POINTS) + stats->contending_point[contending_point]++; if (lock->cpu != smp_processor_id()) stats->bounces[bounce_contended + !!hlock->read]++; put_lock_stats(stats); } static void -__lock_acquired(struct lockdep_map *lock) +__lock_acquired(struct lockdep_map *lock, unsigned long ip) { struct task_struct *curr = current; struct held_lock *hlock, *prev_hlock; @@ -3085,6 +3093,7 @@ found_it: put_lock_stats(stats); lock->cpu = cpu; + lock->ip = ip; } void lock_contended(struct lockdep_map *lock, unsigned long ip) @@ -3106,7 +3115,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_contended); -void lock_acquired(struct lockdep_map *lock) +void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; @@ -3119,7 +3128,7 @@ void lock_acquired(struct lockdep_map *lock) raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion = 1; - __lock_acquired(lock); + __lock_acquired(lock, ip); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 8d3a6eba8d5a..13716b813896 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -557,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (stats->read_holdtime.nr) namelen += 2; - for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { + for (i = 0; i < LOCKSTAT_POINTS; i++) { char sym[KSYM_SYMBOL_LEN]; char ip[32]; @@ -574,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) stats->contention_point[i], ip, sym); } + for (i = 0; i < LOCKSTAT_POINTS; i++) { + char sym[KSYM_SYMBOL_LEN]; + char ip[32]; + + if (class->contending_point[i] == 0) + break; + + if (!i) + seq_line(m, '-', 40-namelen, namelen); + + sprint_symbol(sym, class->contending_point[i]); + snprintf(ip, sizeof(ip), "[<%p>]", + (void *)class->contending_point[i]); + seq_printf(m, "%40s %14lu %29s %s\n", name, + stats->contending_point[i], + ip, sym); + } if (i) { seq_puts(m, "\n"); seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); @@ -583,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) static void seq_header(struct seq_file *m) { - seq_printf(m, "lock_stat version 0.2\n"); + seq_printf(m, "lock_stat version 0.3\n"); seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " "%14s %14s\n", diff --git a/kernel/mutex.c b/kernel/mutex.c index 12c779dc65d4..39a3816b68d9 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } done: - lock_acquired(&lock->dep_map); + lock_acquired(&lock->dep_map, ip); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task)); -- cgit v1.2.3 From d9e540762f5cdd89f24e518ad1fd31142d0b9726 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 1 Nov 2008 19:57:37 +0100 Subject: ftrace: ftrace_dump_on_oops=[tracer] Impact: add new (optional) debug boot option In order to facilitate early boot trouble, allow one to specify a tracer on the kernel boot line. Signed-off-by: Peter Zijlstra Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 8 +++++ kernel/trace/trace.c | 58 ++++++++++++++++++++++++------------- 2 files changed, 46 insertions(+), 20 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b6..4862284d3119 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -765,6 +765,14 @@ and is between 256 and 4096 characters. It is defined in the file parameter will force ia64_sal_cache_flush to call ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. + ftrace=[tracer] + [ftrace] will set and start the specified tracer + as early as possible in order to facilitate early + boot debugging. + + ftrace_dump_on_oops + [ftrace] will dump the trace buffers on oops. + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bdb1df00fb10..482583eb8001 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -79,6 +79,15 @@ static int tracing_disabled = 1; */ int ftrace_dump_on_oops; +static int tracing_set_tracer(char *buf); + +static int __init set_ftrace(char *str) +{ + tracing_set_tracer(str); + return 1; +} +__setup("ftrace", set_ftrace); + static int __init set_ftrace_dump_on_oops(char *str) { ftrace_dump_on_oops = 1; @@ -2394,29 +2403,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static ssize_t -tracing_set_trace_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int tracing_set_tracer(char *buf) { struct trace_array *tr = &global_trace; struct tracer *t; - char buf[max_tracer_type_len+1]; - int i; - size_t ret; - - ret = cnt; - - if (cnt > max_tracer_type_len) - cnt = max_tracer_type_len; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - /* strip ending whitespace. */ - for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) - buf[i] = 0; + int ret = 0; mutex_lock(&trace_types_lock); for (t = trace_types; t; t = t->next) { @@ -2440,6 +2431,33 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, out: mutex_unlock(&trace_types_lock); + return ret; +} + +static ssize_t +tracing_set_trace_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[max_tracer_type_len+1]; + int i; + size_t ret; + + if (cnt > max_tracer_type_len) + cnt = max_tracer_type_len; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* strip ending whitespace. */ + for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) + buf[i] = 0; + + ret = tracing_set_tracer(buf); + if (!ret) + ret = cnt; + if (ret > 0) filp->f_pos += ret; -- cgit v1.2.3 From a94c80e78bc9f4493ffc25a02d5d7bcd93c399d0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 17:52:36 -0500 Subject: ftrace: rename trace_entries to buffer_size_kb Impact: rename of debugfs file trace_entries to buffer_size_kb The original ftrace had fixed size entries, and the number of entries was shown and modified via the file called trace_entries. By converting to the unified trace buffer, we now allow for variable size entries which makes the meaning of trace_entries pointless. Since trace_size might be confused to the size of the trace, this patch names it "buffer_size_kb" (thanks to Arjan van de Ven for this idea). [ mingo@elte.hu: changed from buffer_size to buffer_size_kb ] ( Note, the units are still bytes - the next patch changes that, to keep the wide rename patch separate from the unit-change patch. ) Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 18 +++++++++--------- kernel/trace/trace.c | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 9cc4d685dde5..a1b58777839b 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -94,7 +94,7 @@ of ftrace. Here is a list of some of the key files: only be recorded if the latency is greater than the value in this file. (in microseconds) - trace_entries: This sets or displays the number of bytes each CPU + buffer_size_kb: This sets or displays the number of bytes each CPU buffer can hold. The tracer buffers are the same size for each CPU. The displayed number is the size of the CPU buffer and not total size of all buffers. The @@ -1299,13 +1299,13 @@ trace entries ------------- Having too much or not enough data can be troublesome in diagnosing -an issue in the kernel. The file trace_entries is used to modify +an issue in the kernel. The file buffer_size_kb is used to modify the size of the internal trace buffers. The number listed is the number of entries that can be recorded per CPU. To know the full size, multiply the number of possible CPUS with the number of entries. - # cat /debug/tracing/trace_entries + # cat /debug/tracing/buffer_size_kb 65620 Note, to modify this, you must have tracing completely disabled. To do that, @@ -1313,8 +1313,8 @@ echo "nop" into the current_tracer. If the current_tracer is not set to "nop", an EINVAL error will be returned. # echo nop > /debug/tracing/current_tracer - # echo 100000 > /debug/tracing/trace_entries - # cat /debug/tracing/trace_entries + # echo 100000 > /debug/tracing/buffer_size_kb + # cat /debug/tracing/buffer_size_kb 100045 @@ -1323,8 +1323,8 @@ are held in individual pages. It allocates the number of pages it takes to fulfill the request. If more entries may fit on the last page then they will be added. - # echo 1 > /debug/tracing/trace_entries - # cat /debug/tracing/trace_entries + # echo 1 > /debug/tracing/buffer_size_kb + # cat /debug/tracing/buffer_size_kb 85 This shows us that 85 entries can fit in a single page. @@ -1332,8 +1332,8 @@ This shows us that 85 entries can fit in a single page. The number of pages which will be allocated is limited to a percentage of available memory. Allocating too much will produce an error. - # echo 1000000000000 > /debug/tracing/trace_entries + # echo 1000000000000 > /debug/tracing/buffer_size_kb -bash: echo: write error: Cannot allocate memory - # cat /debug/tracing/trace_entries + # cat /debug/tracing/buffer_size_kb 85 diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4bf070bb5272..b42d42056fa4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3198,11 +3198,11 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_pipe' entry\n"); - entry = debugfs_create_file("trace_entries", 0644, d_tracer, + entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, &global_trace, &tracing_entries_fops); if (!entry) pr_warning("Could not create debugfs " - "'trace_entries' entry\n"); + "'buffer_size_kb' entry\n"); entry = debugfs_create_file("trace_marker", 0220, d_tracer, NULL, &tracing_mark_fops); -- cgit v1.2.3 From 1696b2b0f44a8d42f3e6b1ea90c21790871c04d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 13 Nov 2008 00:09:35 -0500 Subject: ftrace: show buffer size in kilobytes Impact: change the units of buffer_size_kb to kilobytes This patch changes the units of the buffer_size_kb file to kilobytes. Reading and writing to the file uses kilobytes as units. To help users to know what units are used, the output of the file now looks like: # cat /debug/tracing/buffer_size_kb 1408 Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 22 +++++----------------- kernel/trace/trace.c | 5 ++++- 2 files changed, 9 insertions(+), 18 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index a1b58777839b..6d3fe4cdf921 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -94,10 +94,10 @@ of ftrace. Here is a list of some of the key files: only be recorded if the latency is greater than the value in this file. (in microseconds) - buffer_size_kb: This sets or displays the number of bytes each CPU + buffer_size_kb: This sets or displays the number of kilobytes each CPU buffer can hold. The tracer buffers are the same size for each CPU. The displayed number is the size of the - CPU buffer and not total size of all buffers. The + CPU buffer and not total size of all buffers. The trace buffers are allocated in pages (blocks of memory that the kernel uses for allocation, usually 4 KB in size). If the last page allocated has room for more bytes @@ -1306,28 +1306,16 @@ the full size, multiply the number of possible CPUS with the number of entries. # cat /debug/tracing/buffer_size_kb -65620 +1408 (units kilobytes) Note, to modify this, you must have tracing completely disabled. To do that, echo "nop" into the current_tracer. If the current_tracer is not set to "nop", an EINVAL error will be returned. # echo nop > /debug/tracing/current_tracer - # echo 100000 > /debug/tracing/buffer_size_kb + # echo 10000 > /debug/tracing/buffer_size_kb # cat /debug/tracing/buffer_size_kb -100045 - - -Notice that we echoed in 100,000 but the size is 100,045. The entries -are held in individual pages. It allocates the number of pages it takes -to fulfill the request. If more entries may fit on the last page -then they will be added. - - # echo 1 > /debug/tracing/buffer_size_kb - # cat /debug/tracing/buffer_size_kb -85 - -This shows us that 85 entries can fit in a single page. +10000 (units kilobytes) The number of pages which will be allocated is limited to a percentage of available memory. Allocating too much will produce an error. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b42d42056fa4..d664aae2e10a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2905,7 +2905,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, char buf[64]; int r; - r = sprintf(buf, "%lu\n", tr->entries); + r = sprintf(buf, "%lu\n", tr->entries >> 10); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2945,6 +2945,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, atomic_inc(&max_tr.data[cpu]->disabled); } + /* value is in KB */ + val <<= 10; + if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); if (ret < 0) { -- cgit v1.2.3 From ee6bce52276c0717ed3e63296e5d9465d339e923 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 17:52:37 -0500 Subject: ftrace: rename iter_ctrl to trace_options Impact: rename file /debug/tracing/iter_ctrl to /debug/tracing/trace_options The original ftrace had a file called "iter_ctrl" that would control the way the output was iterated. But this file grew into a catch all for different trace options. This patch renames the file from iter_ctrl to trace_options to reflect this change. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 14 +++++++------- kernel/trace/trace.c | 18 +++++++++--------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 6d3fe4cdf921..753f4de4b175 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -82,7 +82,7 @@ of ftrace. Here is a list of some of the key files: tracer is not adding more data, they will display the same information every time they are read. - iter_ctrl: This file lets the user control the amount of data + trace_options: This file lets the user control the amount of data that is displayed in one of the above output files. @@ -316,23 +316,23 @@ The above is mostly meaningful for kernel developers. The rest is the same as the 'trace' file. -iter_ctrl ---------- +trace_options +------------- -The iter_ctrl file is used to control what gets printed in the trace +The trace_options file is used to control what gets printed in the trace output. To see what is available, simply cat the file: - cat /debug/tracing/iter_ctrl + cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ noblock nostacktrace nosched-tree To disable one of the options, echo in the option prepended with "no". - echo noprint-parent > /debug/tracing/iter_ctrl + echo noprint-parent > /debug/tracing/trace_options To enable an option, leave off the "no". - echo sym-offset > /debug/tracing/iter_ctrl + echo sym-offset > /debug/tracing/trace_options Here are the available options: diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d664aae2e10a..240423a9d1af 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -204,7 +204,7 @@ static DEFINE_MUTEX(trace_types_lock); /* trace_wait is a waitqueue for tasks blocked on trace_poll */ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); -/* trace_flags holds iter_ctrl options */ +/* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK; /** @@ -2411,7 +2411,7 @@ static struct file_operations tracing_cpumask_fops = { }; static ssize_t -tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, +tracing_trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { char *buf; @@ -2448,7 +2448,7 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, } static ssize_t -tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, +tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[64]; @@ -2493,8 +2493,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, static struct file_operations tracing_iter_fops = { .open = tracing_open_generic, - .read = tracing_iter_ctrl_read, - .write = tracing_iter_ctrl_write, + .read = tracing_trace_options_read, + .write = tracing_trace_options_write, }; static const char readme_msg[] = @@ -2508,9 +2508,9 @@ static const char readme_msg[] = "# echo sched_switch > /debug/tracing/current_tracer\n" "# cat /debug/tracing/current_tracer\n" "sched_switch\n" - "# cat /debug/tracing/iter_ctrl\n" + "# cat /debug/tracing/trace_options\n" "noprint-parent nosym-offset nosym-addr noverbose\n" - "# echo print-parent > /debug/tracing/iter_ctrl\n" + "# echo print-parent > /debug/tracing/trace_options\n" "# echo 1 > /debug/tracing/tracing_enabled\n" "# cat /debug/tracing/trace > /tmp/trace.txt\n" "echo 0 > /debug/tracing/tracing_enabled\n" @@ -3148,10 +3148,10 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); - entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, + entry = debugfs_create_file("trace_options", 0644, d_tracer, NULL, &tracing_iter_fops); if (!entry) - pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); + pr_warning("Could not create debugfs 'trace_options' entry\n"); entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, NULL, &tracing_cpumask_fops); -- cgit v1.2.3 From a0bca6a59ebc052751eed6e3b182c153495672d8 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:40 -0500 Subject: markers: create DEFINE_MARKER and GET_MARKER (new API) Impact: new API. Allow markers to be used only for declaration, without function call associated. Useful to create specialized probes. The problem we had is that two function calls were required when one wanted to put a marker in a tracepoint probe. Now the marker can be used simply for trace data type declaration, leaving the trace write work within the tracepoint probe without any additional function call. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- Documentation/markers.txt | 14 ++++++++++++++ include/linux/marker.h | 39 +++++++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/markers.txt b/Documentation/markers.txt index 089f6138fcd9..6d275e4ef385 100644 --- a/Documentation/markers.txt +++ b/Documentation/markers.txt @@ -70,6 +70,20 @@ a printk warning which identifies the inconsistency: "Format mismatch for probe probe_name (format), marker (format)" +Another way to use markers is to simply define the marker without generating any +function call to actually call into the marker. This is useful in combination +with tracepoint probes in a scheme like this : + +void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk); + +DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name, + "arg1 %u pid %d"); + +notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk) +{ + struct marker *marker = &GET_MARKER(kernel_irq_entry); + /* write data to trace buffers ... */ +} * Probe / marker example diff --git a/include/linux/marker.h b/include/linux/marker.h index 57a307018ceb..34c14bc957f5 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -55,6 +55,22 @@ struct marker { #ifdef CONFIG_MARKERS +#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ + static const char __mstrtab_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name "\0" format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ + 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ + NULL, tp_name_str, tp_cb } + +#define DEFINE_MARKER(name, format) \ + _DEFINE_MARKER(name, NULL, NULL, format) + +#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ + _DEFINE_MARKER(name, #tp_name, tp_cb, format) + /* * Note : the empty asm volatile with read constraint is used here instead of a * "used" attribute to fix a gcc 4.1.x bug. @@ -68,14 +84,7 @@ struct marker { */ #define __trace_mark(generic, name, call_private, format, args...) \ do { \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL, NULL, NULL }; \ + DEFINE_MARKER(name, format); \ __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ @@ -89,14 +98,7 @@ struct marker { { \ register_trace_##tp_name(tp_cb); \ } \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL, #tp_name, tp_cb };\ + DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ __mark_check_format(format, ## args); \ (*__mark_##name.call)(&__mark_##name, call_private, \ ## args); \ @@ -104,7 +106,11 @@ struct marker { extern void marker_update_probe_range(struct marker *begin, struct marker *end); + +#define GET_MARKER(name) (__mark_##name) + #else /* !CONFIG_MARKERS */ +#define DEFINE_MARKER(name, tp_name, tp_cb, format) #define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) #define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ @@ -118,6 +124,7 @@ extern void marker_update_probe_range(struct marker *begin, static inline void marker_update_probe_range(struct marker *begin, struct marker *end) { } +#define GET_MARKER(name) #endif /* CONFIG_MARKERS */ /** -- cgit v1.2.3 From 7e066fb870fcd1025ec3ba7bbde5d541094f4ce1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:47 -0500 Subject: tracepoints: add DECLARE_TRACE() and DEFINE_TRACE() Impact: API *CHANGE*. Must update all tracepoint users. Add DEFINE_TRACE() to tracepoints to let them declare the tracepoint structure in a single spot for all the kernel. It helps reducing memory consumption, especially when declaring a lot of tracepoints, e.g. for kmalloc tracing. *API CHANGE WARNING*: now, DECLARE_TRACE() must be used in headers for tracepoint declarations rather than DEFINE_TRACE(). This is the sane way to do it. The name previously used was misleading. Updates scheduler instrumentation to follow this API change. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- Documentation/tracepoints.txt | 7 ++++++- include/asm-generic/vmlinux.lds.h | 1 + include/linux/tracepoint.h | 35 +++++++++++++++++++++++---------- include/trace/sched.h | 24 +++++++++++----------- kernel/exit.c | 4 ++++ kernel/fork.c | 2 ++ kernel/kthread.c | 3 +++ kernel/sched.c | 6 ++++++ kernel/signal.c | 2 ++ samples/tracepoints/tp-samples-trace.h | 4 ++-- samples/tracepoints/tracepoint-sample.c | 3 +++ 11 files changed, 66 insertions(+), 25 deletions(-) (limited to 'Documentation') diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt index 5d354e167494..e8ad47b437f3 100644 --- a/Documentation/tracepoints.txt +++ b/Documentation/tracepoints.txt @@ -42,7 +42,7 @@ In include/trace/subsys.h : #include -DEFINE_TRACE(subsys_eventname, +DECLARE_TRACE(subsys_eventname, TPPTOTO(int firstarg, struct task_struct *p), TPARGS(firstarg, p)); @@ -50,6 +50,8 @@ In subsys/file.c (where the tracing statement must be added) : #include +DEFINE_TRACE(subsys_eventname); + void somefct(void) { ... @@ -86,6 +88,9 @@ to limit collisions. Tracepoint names are global to the kernel: they are considered as being the same whether they are in the core kernel image or in modules. +If the tracepoint has to be used in kernel modules, an +EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be used to +export the defined tracepoints. * Probe / tracepoint example diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a5e4ed9baec8..3b46ae464933 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -71,6 +71,7 @@ VMLINUX_SYMBOL(__start___markers) = .; \ *(__markers) \ VMLINUX_SYMBOL(__stop___markers) = .; \ + . = ALIGN(32); \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ VMLINUX_SYMBOL(__stop___tracepoints) = .; \ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7e9b42aeae0e..757005458366 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -24,8 +24,12 @@ struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ void **funcs; -} __attribute__((aligned(8))); - +} __attribute__((aligned(32))); /* + * Aligned on 32 bytes because it is + * globally visible and gcc happily + * align these on the structure size. + * Keep in sync with vmlinux.lds.h. + */ #define TPPROTO(args...) args #define TPARGS(args...) args @@ -55,15 +59,10 @@ struct tracepoint { * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ + extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - static const char __tpstrtab_##name[] \ - __attribute__((section("__tracepoints_strings"))) \ - = #name; \ - static struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"), aligned(8))) = \ - { __tpstrtab_##name, 0, NULL }; \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ TPPROTO(proto), TPARGS(args)); \ @@ -77,11 +76,23 @@ struct tracepoint { return tracepoint_probe_unregister(#name, (void *)probe);\ } +#define DEFINE_TRACE(name) \ + static const char __tpstrtab_##name[] \ + __attribute__((section("__tracepoints_strings"))) = #name; \ + struct tracepoint __tracepoint_##name \ + __attribute__((section("__tracepoints"), aligned(32))) = \ + { __tpstrtab_##name, 0, NULL } + +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ + EXPORT_SYMBOL_GPL(__tracepoint_##name) +#define EXPORT_TRACEPOINT_SYMBOL(name) \ + EXPORT_SYMBOL(__tracepoint_##name) + extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -95,6 +106,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, return -ENOSYS; \ } +#define DEFINE_TRACE(name) +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) +#define EXPORT_TRACEPOINT_SYMBOL(name) + static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) { } diff --git a/include/trace/sched.h b/include/trace/sched.h index ad47369d01b5..9b2854abf7e2 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -4,52 +4,52 @@ #include #include -DEFINE_TRACE(sched_kthread_stop, +DECLARE_TRACE(sched_kthread_stop, TPPROTO(struct task_struct *t), TPARGS(t)); -DEFINE_TRACE(sched_kthread_stop_ret, +DECLARE_TRACE(sched_kthread_stop_ret, TPPROTO(int ret), TPARGS(ret)); -DEFINE_TRACE(sched_wait_task, +DECLARE_TRACE(sched_wait_task, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_wakeup, +DECLARE_TRACE(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_wakeup_new, +DECLARE_TRACE(sched_wakeup_new, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_switch, +DECLARE_TRACE(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TPARGS(rq, prev, next)); -DEFINE_TRACE(sched_migrate_task, +DECLARE_TRACE(sched_migrate_task, TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu), TPARGS(rq, p, dest_cpu)); -DEFINE_TRACE(sched_process_free, +DECLARE_TRACE(sched_process_free, TPPROTO(struct task_struct *p), TPARGS(p)); -DEFINE_TRACE(sched_process_exit, +DECLARE_TRACE(sched_process_exit, TPPROTO(struct task_struct *p), TPARGS(p)); -DEFINE_TRACE(sched_process_wait, +DECLARE_TRACE(sched_process_wait, TPPROTO(struct pid *pid), TPARGS(pid)); -DEFINE_TRACE(sched_process_fork, +DECLARE_TRACE(sched_process_fork, TPPROTO(struct task_struct *parent, struct task_struct *child), TPARGS(parent, child)); -DEFINE_TRACE(sched_signal_send, +DECLARE_TRACE(sched_signal_send, TPPROTO(int sig, struct task_struct *p), TPARGS(sig, p)); diff --git a/kernel/exit.c b/kernel/exit.c index ae2b92be5fae..f995d2418668 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -54,6 +54,10 @@ #include #include +DEFINE_TRACE(sched_process_free); +DEFINE_TRACE(sched_process_exit); +DEFINE_TRACE(sched_process_wait); + static void exit_mm(struct task_struct * tsk); static inline int task_detached(struct task_struct *p) diff --git a/kernel/fork.c b/kernel/fork.c index f6083561dfe0..0837d0deee5f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -79,6 +79,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ +DEFINE_TRACE(sched_process_fork); + int nr_processes(void) { int cpu; diff --git a/kernel/kthread.c b/kernel/kthread.c index 8e7a7ce3ed0a..4fbc456f393d 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -21,6 +21,9 @@ static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; +DEFINE_TRACE(sched_kthread_stop); +DEFINE_TRACE(sched_kthread_stop_ret); + struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ diff --git a/kernel/sched.c b/kernel/sched.c index 50a21f964679..327f91c63c99 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -118,6 +118,12 @@ */ #define RUNTIME_INF ((u64)~0ULL) +DEFINE_TRACE(sched_wait_task); +DEFINE_TRACE(sched_wakeup); +DEFINE_TRACE(sched_wakeup_new); +DEFINE_TRACE(sched_switch); +DEFINE_TRACE(sched_migrate_task); + #ifdef CONFIG_SMP /* * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) diff --git a/kernel/signal.c b/kernel/signal.c index 4530fc654455..e9afe63da24b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -41,6 +41,8 @@ static struct kmem_cache *sigqueue_cachep; +DEFINE_TRACE(sched_signal_send); + static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h index 0216b55bd640..01724e04c556 100644 --- a/samples/tracepoints/tp-samples-trace.h +++ b/samples/tracepoints/tp-samples-trace.h @@ -4,10 +4,10 @@ #include /* for struct inode and struct file */ #include -DEFINE_TRACE(subsys_event, +DECLARE_TRACE(subsys_event, TPPROTO(struct inode *inode, struct file *file), TPARGS(inode, file)); -DEFINE_TRACE(subsys_eventb, +DECLARE_TRACE(subsys_eventb, TPPROTO(void), TPARGS()); #endif diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c index 4ae4b7fcc043..00d169792a3e 100644 --- a/samples/tracepoints/tracepoint-sample.c +++ b/samples/tracepoints/tracepoint-sample.c @@ -13,6 +13,9 @@ #include #include "tp-samples-trace.h" +DEFINE_TRACE(subsys_event); +DEFINE_TRACE(subsys_eventb); + struct proc_dir_entry *pentry_example; static int my_open(struct inode *inode, struct file *file) -- cgit v1.2.3 From 8fd88d159031bd238dad1d7186a2030b9f9349de Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:48 -0500 Subject: tracepoints: documentation fix for teardown Impact: documentation update Need a tracepoint_synchronize_unregister() before the end of exit() to make sure every probe callers have exited the non preemptible section and thus are not executing the probe code anymore. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- Documentation/tracepoints.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt index e8ad47b437f3..93cd90e89cc1 100644 --- a/Documentation/tracepoints.txt +++ b/Documentation/tracepoints.txt @@ -70,10 +70,12 @@ Where : Connecting a function (probe) to a tracepoint is done by providing a probe (function to call) for the specific tracepoint through register_trace_subsys_eventname(). Removing a probe is done through -unregister_trace_subsys_eventname(); it will remove the probe sure there is no -caller left using the probe when it returns. Probe removal is preempt-safe -because preemption is disabled around the probe call. See the "Probe example" -section below for a sample probe module. +unregister_trace_subsys_eventname(); it will remove the probe. +marker_synchronize_unregister() must be called before the end of the module exit +function to make sure there is no caller left using the probe. This, and the +fact that preemption is disabled around the probe call, make sure that probe +removal and module unload are safe. See the "Probe example" section below for a +sample probe module. The tracepoint mechanism supports inserting multiple instances of the same tracepoint, but a single definition must be made of a given tracepoint name over -- cgit v1.2.3 From 0dcf8fe5fe5d7279f1c479fa82f1f1ca6f22e814 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:49 -0500 Subject: tracepoints, docs: marker_synchronize_unregister->tracepoint_synchronize_unregister Impact: documentation update. Signed-off-by: Zhaolei Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- Documentation/tracepoints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt index 93cd90e89cc1..3a1c74384ffb 100644 --- a/Documentation/tracepoints.txt +++ b/Documentation/tracepoints.txt @@ -71,7 +71,7 @@ Connecting a function (probe) to a tracepoint is done by providing a probe (function to call) for the specific tracepoint through register_trace_subsys_eventname(). Removing a probe is done through unregister_trace_subsys_eventname(); it will remove the probe. -marker_synchronize_unregister() must be called before the end of the module exit +tracepoint_synchronize_unregister() must be called before the end of the module exit function to make sure there is no caller left using the probe. This, and the fact that preemption is disabled around the probe call, make sure that probe removal and module unload are safe. See the "Probe example" section below for a -- cgit v1.2.3 From 0a7ad64531713e33e39af95bdbfb172f4f328b1e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 16 Nov 2008 08:54:36 +0100 Subject: tracepoints: format documentation Impact: documentation update Properly format Documentation/tracepoints.txt - it was full of overlong lines and other typographical problems. Signed-off-by: Ingo Molnar --- Documentation/tracepoints.txt | 87 +++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 40 deletions(-) (limited to 'Documentation') diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt index 3a1c74384ffb..2d42241a25c3 100644 --- a/Documentation/tracepoints.txt +++ b/Documentation/tracepoints.txt @@ -3,28 +3,30 @@ Mathieu Desnoyers -This document introduces Linux Kernel Tracepoints and their use. It provides -examples of how to insert tracepoints in the kernel and connect probe functions -to them and provides some examples of probe functions. +This document introduces Linux Kernel Tracepoints and their use. It +provides examples of how to insert tracepoints in the kernel and +connect probe functions to them and provides some examples of probe +functions. * Purpose of tracepoints -A tracepoint placed in code provides a hook to call a function (probe) that you -can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or -"off" (no probe is attached). When a tracepoint is "off" it has no effect, -except for adding a tiny time penalty (checking a condition for a branch) and -space penalty (adding a few bytes for the function call at the end of the -instrumented function and adds a data structure in a separate section). When a -tracepoint is "on", the function you provide is called each time the tracepoint -is executed, in the execution context of the caller. When the function provided -ends its execution, it returns to the caller (continuing from the tracepoint -site). +A tracepoint placed in code provides a hook to call a function (probe) +that you can provide at runtime. A tracepoint can be "on" (a probe is +connected to it) or "off" (no probe is attached). When a tracepoint is +"off" it has no effect, except for adding a tiny time penalty +(checking a condition for a branch) and space penalty (adding a few +bytes for the function call at the end of the instrumented function +and adds a data structure in a separate section). When a tracepoint +is "on", the function you provide is called each time the tracepoint +is executed, in the execution context of the caller. When the function +provided ends its execution, it returns to the caller (continuing from +the tracepoint site). You can put tracepoints at important locations in the code. They are lightweight hooks that can pass an arbitrary number of parameters, -which prototypes are described in a tracepoint declaration placed in a header -file. +which prototypes are described in a tracepoint declaration placed in a +header file. They can be used for tracing and performance accounting. @@ -63,36 +65,41 @@ Where : - subsys_eventname is an identifier unique to your event - subsys is the name of your subsystem. - eventname is the name of the event to trace. -- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function - called by this tracepoint. -- TPARGS(firstarg, p) are the parameters names, same as found in the prototype. -Connecting a function (probe) to a tracepoint is done by providing a probe -(function to call) for the specific tracepoint through +- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the + function called by this tracepoint. + +- TPARGS(firstarg, p) are the parameters names, same as found in the + prototype. + +Connecting a function (probe) to a tracepoint is done by providing a +probe (function to call) for the specific tracepoint through register_trace_subsys_eventname(). Removing a probe is done through unregister_trace_subsys_eventname(); it will remove the probe. -tracepoint_synchronize_unregister() must be called before the end of the module exit -function to make sure there is no caller left using the probe. This, and the -fact that preemption is disabled around the probe call, make sure that probe -removal and module unload are safe. See the "Probe example" section below for a -sample probe module. - -The tracepoint mechanism supports inserting multiple instances of the same -tracepoint, but a single definition must be made of a given tracepoint name over -all the kernel to make sure no type conflict will occur. Name mangling of the -tracepoints is done using the prototypes to make sure typing is correct. -Verification of probe type correctness is done at the registration site by the -compiler. Tracepoints can be put in inline functions, inlined static functions, -and unrolled loops as well as regular functions. - -The naming scheme "subsys_event" is suggested here as a convention intended -to limit collisions. Tracepoint names are global to the kernel: they are -considered as being the same whether they are in the core kernel image or in -modules. + +tracepoint_synchronize_unregister() must be called before the end of +the module exit function to make sure there is no caller left using +the probe. This, and the fact that preemption is disabled around the +probe call, make sure that probe removal and module unload are safe. +See the "Probe example" section below for a sample probe module. + +The tracepoint mechanism supports inserting multiple instances of the +same tracepoint, but a single definition must be made of a given +tracepoint name over all the kernel to make sure no type conflict will +occur. Name mangling of the tracepoints is done using the prototypes +to make sure typing is correct. Verification of probe type correctness +is done at the registration site by the compiler. Tracepoints can be +put in inline functions, inlined static functions, and unrolled loops +as well as regular functions. + +The naming scheme "subsys_event" is suggested here as a convention +intended to limit collisions. Tracepoint names are global to the +kernel: they are considered as being the same whether they are in the +core kernel image or in modules. If the tracepoint has to be used in kernel modules, an -EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be used to -export the defined tracepoints. +EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be +used to export the defined tracepoints. * Probe / tracepoint example -- cgit v1.2.3 From 02b67518e2b1c490787dac7f35e1204e74fe21ba Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sat, 22 Nov 2008 13:28:47 +0200 Subject: tracing: add support for userspace stacktraces in tracing/iter_ctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: add new (default-off) tracing visualization feature Usage example: mount -t debugfs nodev /sys/kernel/debug cd /sys/kernel/debug/tracing echo userstacktrace >iter_ctrl echo sched_switch >current_tracer echo 1 >tracing_enabled .... run application ... echo 0 >tracing_enabled Then read one of 'trace','latency_trace','trace_pipe'. To get the best output you can compile your userspace programs with frame pointers (at least glibc + the app you are tracing). Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 5 ++- arch/x86/kernel/stacktrace.c | 57 +++++++++++++++++++++++++++ include/linux/stacktrace.h | 8 ++++ kernel/trace/trace.c | 93 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 9 +++++ 5 files changed, 171 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 753f4de4b175..79a80f79c062 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -324,7 +324,7 @@ output. To see what is available, simply cat the file: cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ - noblock nostacktrace nosched-tree + noblock nostacktrace nosched-tree nouserstacktrace To disable one of the options, echo in the option prepended with "no". @@ -378,6 +378,9 @@ Here are the available options: When a trace is recorded, so is the stack of functions. This allows for back traces of trace sites. + userstacktrace - This option changes the trace. + It records a stacktrace of the current userspace thread. + sched-tree - TBD (any users??) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index a03e7f6d90c3..b15153060417 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -6,6 +6,7 @@ #include #include #include +#include #include static void save_stack_warning(void *data, char *msg) @@ -83,3 +84,59 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ + +struct stack_frame { + const void __user *next_fp; + unsigned long return_address; +}; + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + int ret; + + if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) + return 0; + + ret = 1; + pagefault_disable(); + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) + ret = 0; + pagefault_enable(); + + return ret; +} + +void save_stack_trace_user(struct stack_trace *trace) +{ + /* + * Trace user stack if we are not a kernel thread + */ + if (current->mm) { + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->bp; + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->ip; + + while (trace->nr_entries < trace->max_entries) { + struct stack_frame frame; + frame.next_fp = NULL; + frame.return_address = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; + if (frame.return_address) + trace->entries[trace->nr_entries++] = + frame.return_address; + if (fp == frame.next_fp) + break; + fp = frame.next_fp; + } + } + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index b106fd8e0d5c..68de51468f5d 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); + +#ifdef CONFIG_X86 +extern void save_stack_trace_user(struct stack_trace *trace); +#else +# define save_stack_trace_user(trace) do { } while (0) +#endif + #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4ee6f0375222..ced8b4fa9f51 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -275,6 +275,7 @@ static const char *trace_options[] = { "ftrace_preempt", "branch", "annotate", + "userstacktrace", NULL }; @@ -918,6 +919,44 @@ void __trace_stack(struct trace_array *tr, ftrace_trace_stack(tr, data, flags, skip, preempt_count()); } +static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, int pc) +{ + struct userstack_entry *entry; + struct stack_trace trace; + struct ring_buffer_event *event; + unsigned long irq_flags; + + if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_USER_STACK; + + memset(&entry->caller, 0, sizeof(entry->caller)); + + trace.nr_entries = 0; + trace.max_entries = FTRACE_STACK_ENTRIES; + trace.skip = 0; + trace.entries = entry->caller; + + save_stack_trace_user(&trace); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +} + +void __trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags) +{ + ftrace_trace_userstack(tr, data, flags, preempt_count()); +} + static void ftrace_trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3, @@ -941,6 +980,7 @@ ftrace_trace_special(void *__tr, void *__data, entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, irq_flags, 4, pc); + ftrace_trace_userstack(tr, data, irq_flags, pc); trace_wake_up(); } @@ -979,6 +1019,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 5, pc); + ftrace_trace_userstack(tr, data, flags, pc); } void @@ -1008,6 +1049,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 6, pc); + ftrace_trace_userstack(tr, data, flags, pc); trace_wake_up(); } @@ -1387,6 +1429,31 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } +static int +seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, + unsigned long sym_flags) +{ + int ret = 1; + unsigned i; + + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + unsigned long ip = entry->caller[i]; + + if (ip == ULONG_MAX || !ret) + break; + if (i) + ret = trace_seq_puts(s, " <- "); + if (!ip) { + ret = trace_seq_puts(s, "??"); + continue; + } + if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + } + + return ret; +} + static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); @@ -1702,6 +1769,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) field->line); break; } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + seq_print_userip_objs(field, s, sym_flags); + if (entry->flags & TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1853,6 +1930,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) field->line); break; } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + ret = seq_print_userip_objs(field, s, sym_flags); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + ret = trace_seq_putc(s, '\n'); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + break; + } } return TRACE_TYPE_HANDLED; } @@ -1912,6 +2002,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2000,6 +2091,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2054,6 +2146,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2cb12fd98f6b..17bb4c830b01 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -26,6 +26,7 @@ enum trace_type { TRACE_BOOT_CALL, TRACE_BOOT_RET, TRACE_FN_RET, + TRACE_USER_STACK, __TRACE_LAST_TYPE }; @@ -42,6 +43,7 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + int tgid; }; /* @@ -99,6 +101,11 @@ struct stack_entry { unsigned long caller[FTRACE_STACK_ENTRIES]; }; +struct userstack_entry { + struct trace_entry ent; + unsigned long caller[FTRACE_STACK_ENTRIES]; +}; + /* * ftrace_printk entry: */ @@ -240,6 +247,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ + IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ @@ -500,6 +508,7 @@ enum trace_iterator_flags { TRACE_ITER_PREEMPTONLY = 0x800, TRACE_ITER_BRANCH = 0x1000, TRACE_ITER_ANNOTATE = 0x2000, + TRACE_ITER_USERSTACKTRACE = 0x4000 }; /* -- cgit v1.2.3 From b54d3de9f3b8956653b06f1a32e9f9321c6d9027 Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sat, 22 Nov 2008 13:28:48 +0200 Subject: tracing: identify which executable object the userspace address belongs to MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: modify+improve the userstacktrace tracing visualization feature Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely. Example usage: mount -t debugfs nodev /sys/kernel/debug cd /sys/kernel/debug/tracing echo userstacktrace >iter_ctrl echo sym-userobj >iter_ctrl echo sched_switch >current_tracer echo 1 >tracing_enabled cat trace_pipe >/tmp/trace& .... run application ... echo 0 >tracing_enabled cat /tmp/trace You'll see stack entries like: /lib/libpthread-2.7.so[+0xd370] You can convert them to function/line using: addr2line -fie /lib/libpthread-2.7.so 0xd370 Or: addr2line -fie /usr/lib/debug/libpthread-2.7.so 0xd370 For non-PIC/PIE executables this won't work: a.out[+0x73b] You need to run the following: addr2line -fie a.out 0x40073b (where 0x400000 is the default load address of a.out) Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 13 +++++++- kernel/trace/trace.c | 86 ++++++++++++++++++++++++++++++++++++++++++++---- kernel/trace/trace.h | 3 +- 3 files changed, 93 insertions(+), 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 79a80f79c062..35a78bc6651d 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -324,7 +324,7 @@ output. To see what is available, simply cat the file: cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ - noblock nostacktrace nosched-tree nouserstacktrace + noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj To disable one of the options, echo in the option prepended with "no". @@ -381,6 +381,17 @@ Here are the available options: userstacktrace - This option changes the trace. It records a stacktrace of the current userspace thread. + sym-userobj - when user stacktrace are enabled, look up which object the + address belongs to, and print a relative address + This is especially useful when ASLR is on, otherwise you don't + get a chance to resolve the address to object/file/line after the app is no + longer running + + The lookup is performed when you read trace,trace_pipe,latency_trace. Example: + + a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 +x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] + sched-tree - TBD (any users??) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ced8b4fa9f51..62776b71b1c5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -276,6 +277,7 @@ static const char *trace_options[] = { "branch", "annotate", "userstacktrace", + "sym-userobj", NULL }; @@ -422,6 +424,28 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) return trace_seq_putmem(s, hex, j); } +static int +trace_seq_path(struct trace_seq *s, struct path *path) +{ + unsigned char *p; + + if (s->len >= (PAGE_SIZE - 1)) + return 0; + p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); + if (!IS_ERR(p)) { + p = mangle_path(s->buffer + s->len, p, "\n"); + if (p) { + s->len = p - s->buffer; + return 1; + } + } else { + s->buffer[s->len++] = '?'; + return 1; + } + + return 0; +} + static void trace_seq_reset(struct trace_seq *s) { @@ -802,6 +826,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; + entry->tgid = (tsk) ? tsk->tgid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | @@ -1429,28 +1454,73 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } +static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, + unsigned long ip, unsigned long sym_flags) +{ + struct file *file = NULL; + unsigned long vmstart = 0; + int ret = 1; + + if (mm) { + const struct vm_area_struct *vma = find_vma(mm, ip); + if (vma) { + file = vma->vm_file; + vmstart = vma->vm_start; + } + } + if (file) { + ret = trace_seq_path(s, &file->f_path); + if (ret) + ret = trace_seq_printf(s, "[+0x%lx]", + ip - vmstart); + } + if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + return ret; +} + static int seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, - unsigned long sym_flags) + unsigned long sym_flags) { + struct mm_struct *mm = NULL; int ret = 1; unsigned i; + if (trace_flags & TRACE_ITER_SYM_USEROBJ) { + struct task_struct *task; + /* + * we do the lookup on the thread group leader, + * since individual threads might have already quit! + */ + rcu_read_lock(); + task = find_task_by_vpid(entry->ent.tgid); + rcu_read_unlock(); + + if (task) + mm = get_task_mm(task); + } + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { unsigned long ip = entry->caller[i]; if (ip == ULONG_MAX || !ret) break; - if (i) + if (i && ret) ret = trace_seq_puts(s, " <- "); if (!ip) { - ret = trace_seq_puts(s, "??"); + if (ret) + ret = trace_seq_puts(s, "??"); continue; } - if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/) - ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + if (!ret) + break; + if (ret) + ret = seq_print_user_ip(s, mm, ip, sym_flags); } + if (mm) + mmput(mm); return ret; } @@ -1775,8 +1845,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_assign_type(field, entry); seq_print_userip_objs(field, s, sym_flags); - if (entry->flags & TRACE_FLAG_CONT) - trace_seq_print_cont(s, iter); + trace_seq_putc(s, '\n'); break; } default: @@ -3581,6 +3650,9 @@ void ftrace_dump(void) atomic_inc(&global_trace.data[cpu]->disabled); } + /* don't look at user memory in panic mode */ + trace_flags &= ~TRACE_ITER_SYM_USEROBJ; + printk(KERN_TRACE "Dumping ftrace buffer:\n"); iter.tr = &global_trace; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 17bb4c830b01..28c15c2ebc22 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -508,7 +508,8 @@ enum trace_iterator_flags { TRACE_ITER_PREEMPTONLY = 0x800, TRACE_ITER_BRANCH = 0x1000, TRACE_ITER_ANNOTATE = 0x2000, - TRACE_ITER_USERSTACKTRACE = 0x4000 + TRACE_ITER_USERSTACKTRACE = 0x4000, + TRACE_ITER_SYM_USEROBJ = 0x8000 }; /* -- cgit v1.2.3 From fb91ee6cf5b8be5360acec577458e29ec7e97e5e Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 23 Nov 2008 21:24:59 +0200 Subject: tracing, doc: update mmiotrace documentation Impact: update documentation Update to reflect the current state of the tracing framework: - "none" tracer has been replaced by "nop" tracer - tracing_enabled must be toggled when changing buffer size Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- Documentation/tracers/mmiotrace.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt index 5bbbe2096223..cde23b4a12a1 100644 --- a/Documentation/tracers/mmiotrace.txt +++ b/Documentation/tracers/mmiotrace.txt @@ -37,7 +37,7 @@ $ echo mmiotrace > /debug/tracing/current_tracer $ cat /debug/tracing/trace_pipe > mydump.txt & Start X or whatever. $ echo "X is up" > /debug/tracing/trace_marker -$ echo none > /debug/tracing/current_tracer +$ echo nop > /debug/tracing/current_tracer Check for lost events. @@ -66,7 +66,7 @@ which action. It is recommended to place descriptive markers about what you do. Shut down mmiotrace (requires root privileges): -$ echo none > /debug/tracing/current_tracer +$ echo nop > /debug/tracing/current_tracer The 'cat' process exits. If it does not, kill it by issuing 'fg' command and pressing ctrl+c. @@ -81,7 +81,9 @@ are: $ cat /debug/tracing/trace_entries gives you a number. Approximately double this number and write it back, for instance: +$ echo 0 > /debug/tracing/tracing_enabled $ echo 128000 > /debug/tracing/trace_entries +$ echo 1 > /debug/tracing/tracing_enabled Then start again from the top. If you are doing a trace for a driver project, e.g. Nouveau, you should also -- cgit v1.2.3