diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-05-03 20:19:54 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-05-03 20:19:54 +0200 |
commit | 9b1f61d5d73d550a20dd79b9a17b6bb05a8f9307 (patch) | |
tree | b1b3c9677f586971212fee1c38500ff5d2b6773a | |
parent | Merge branch 'work.recursive_removal' of git://git.kernel.org/pub/scm/linux/k... (diff) | |
parent | tracing: Restructure trace_clock_global() to never block (diff) | |
download | linux-9b1f61d5d73d550a20dd79b9a17b6bb05a8f9307.tar.xz linux-9b1f61d5d73d550a20dd79b9a17b6bb05a8f9307.zip |
Merge tag 'trace-v5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing updates from Steven Rostedt:
"New feature:
- A new "func-no-repeats" option in tracefs/options directory.
When set the function tracer will detect if the current function
being traced is the same as the previous one, and instead of
recording it, it will keep track of the number of times that the
function is repeated in a row. And when another function is
recorded, it will write a new event that shows the function that
repeated, the number of times it repeated and the time stamp of
when the last repeated function occurred.
Enhancements:
- In order to implement the above "func-no-repeats" option, the ring
buffer timestamp can now give the accurate timestamp of the event
as it is being recorded, instead of having to record an absolute
timestamp for all events. This helps the histogram code which no
longer needs to waste ring buffer space.
- New validation logic to make sure all trace events that access
dereferenced pointers do so in a safe way, and will warn otherwise.
Fixes:
- No longer limit the PIDs of tasks that are recorded for
"saved_cmdlines" to PID_MAX_DEFAULT (32768), as systemd now allows
for a much larger range. This caused the mapping of PIDs to the
task names to be dropped for all tasks with a PID greater than
32768.
- Change trace_clock_global() to never block. This caused a deadlock.
Clean ups:
- Typos, prototype fixes, and removing of duplicate or unused code.
- Better management of ftrace_page allocations"
* tag 'trace-v5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (32 commits)
tracing: Restructure trace_clock_global() to never block
tracing: Map all PIDs to command lines
ftrace: Reuse the output of the function tracer for func_repeats
tracing: Add "func_no_repeats" option for function tracing
tracing: Unify the logic for function tracing options
tracing: Add method for recording "func_repeats" events
tracing: Add "last_func_repeats" to struct trace_array
tracing: Define new ftrace event "func_repeats"
tracing: Define static void trace_print_time()
ftrace: Simplify the calculation of page number for ftrace_page->records some more
ftrace: Store the order of pages allocated in ftrace_page
tracing: Remove unused argument from "ring_buffer_time_stamp()
tracing: Remove duplicate struct declaration in trace_events.h
tracing: Update create_system_filter() kernel-doc comment
tracing: A minor cleanup for create_system_filter()
kernel: trace: Mundane typo fixes in the file trace_events_filter.c
tracing: Fix various typos in comments
scripts/recordmcount.pl: Make vim and emacs indent the same
scripts/recordmcount.pl: Make indent spacing consistent
tracing: Add a verifier to check string pointers for trace events
...
43 files changed, 1187 insertions, 297 deletions
diff --git a/arch/microblaze/include/asm/ftrace.h b/arch/microblaze/include/asm/ftrace.h index 5db7f4489f05..6a92bed37794 100644 --- a/arch/microblaze/include/asm/ftrace.h +++ b/arch/microblaze/include/asm/ftrace.h @@ -13,7 +13,7 @@ extern void ftrace_call_graph(void); #endif #ifdef CONFIG_DYNAMIC_FTRACE -/* reloction of mcount call site is the same as the address */ +/* relocation of mcount call site is the same as the address */ static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 414f8a780cc3..0e23e3a8df6b 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -236,7 +236,7 @@ void __naked return_to_handler(void) "bal ftrace_return_to_handler\n\t" "move $lp, $r0 \n\t" - /* restore state nedded by the ABI */ + /* restore state needed by the ABI */ "lmw.bim $r0,[$sp],$r1,#0x0 \n\t"); } diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index bc76970b6ee5..debe8c4f7062 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -12,7 +12,7 @@ #ifdef __ASSEMBLY__ -/* Based off of objdump optput from glibc */ +/* Based off of objdump output from glibc */ #define MCOUNT_SAVE_FRAME \ stwu r1,-48(r1); \ @@ -52,7 +52,7 @@ extern void _mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { - /* reloction of mcount call site is the same as the address */ + /* relocation of mcount call site is the same as the address */ return addr; } diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 0646c5961846..295c43315bbe 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -67,7 +67,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) * Modifying code must take extra care. On an SMP machine, if * the code being modified is also being executed on another CPU * that CPU will have undefined results and possibly take a GPF. - * We use kstop_machine to stop other CPUS from exectuing code. + * We use kstop_machine to stop other CPUS from executing code. * But this does not stop NMIs from happening. We still need * to protect against that. We separate out the modification of * the code to take care of this. diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index d3aa1a524431..e284394cb3aa 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h @@ -17,7 +17,7 @@ void _mcount(void); #endif #ifdef CONFIG_DYNAMIC_FTRACE -/* reloction of mcount call site is the same as the address */ +/* relocation of mcount call site is the same as the address */ static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 4b83cbded559..1261e8b41edb 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -477,7 +477,7 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) * * The instances directory is special as it allows for mkdir and rmdir to * to be done by userspace. When a mkdir or rmdir is performed, the inode - * locks are released and the methhods passed in (@mkdir and @rmdir) are + * locks are released and the methods passed in (@mkdir and @rmdir) are * called without locks and with the name of the directory being created * within the instances directory. * diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 86e5028bfa20..a69f363b61bf 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -33,7 +33,7 @@ /* * If the arch's mcount caller does not support all of ftrace's * features, then it must call an indirect function that - * does. Or at least does enough to prevent any unwelcomed side effects. + * does. Or at least does enough to prevent any unwelcome side effects. */ #if !ARCH_SUPPORTS_FTRACE_OPS # define FTRACE_FORCE_LIST_FUNC 1 @@ -389,7 +389,7 @@ DECLARE_PER_CPU(int, disable_stack_tracer); */ static inline void stack_tracer_disable(void) { - /* Preemption or interupts must be disabled */ + /* Preemption or interrupts must be disabled */ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); this_cpu_inc(disable_stack_tracer); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 136ea0997e6d..dac53fd3afea 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -61,7 +61,8 @@ enum ring_buffer_type { unsigned ring_buffer_event_length(struct ring_buffer_event *event); void *ring_buffer_event_data(struct ring_buffer_event *event); -u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event); +u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, + struct ring_buffer_event *event); /* * ring_buffer_discard_commit will remove an event that has not @@ -180,7 +181,7 @@ unsigned long ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cp unsigned long ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu); unsigned long ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu); -u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu); +u64 ring_buffer_time_stamp(struct trace_buffer *buffer); void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, int cpu, u64 *ts); void ring_buffer_set_clock(struct trace_buffer *buffer, diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 9d6c28cc4d8f..5b31c5147969 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -72,6 +72,31 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** + * seq_buf_terminate - Make sure buffer is nul terminated + * @s: the seq_buf descriptor to terminate. + * + * This makes sure that the buffer in @s is nul terminated and + * safe to read as a string. + * + * Note, if this is called when the buffer has overflowed, then + * the last byte of the buffer is zeroed, and the len will still + * point passed it. + * + * After this function is called, s->buffer is safe to use + * in string operations. + */ +static inline void seq_buf_terminate(struct seq_buf *s) +{ + if (WARN_ON(s->size == 0)) + return; + + if (seq_buf_buffer_left(s)) + s->buffer[s->len] = 0; + else + s->buffer[s->size - 1] = 0; +} + +/** * seq_buf_get_buf - get buffer to write arbitrary data to * @s: the seq_buf handle * @bufp: the beginning of the buffer is stored here diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 28e7af1406f2..ad413b382a3c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -206,7 +206,7 @@ static inline unsigned int tracing_gen_ctx_dec(void) trace_ctx = tracing_gen_ctx(); /* - * Subtract one from the preeption counter if preemption is enabled, + * Subtract one from the preemption counter if preemption is enabled, * see trace_event_buffer_reserve()for details. */ if (IS_ENABLED(CONFIG_PREEMPTION)) @@ -404,7 +404,6 @@ trace_get_fields(struct trace_event_call *event_call) return event_call->class->get_fields(event_call); } -struct trace_array; struct trace_subsystem_dir; enum { @@ -640,7 +639,8 @@ enum event_trigger_type { extern int filter_match_preds(struct event_filter *filter, void *rec); extern enum event_trigger_type -event_triggers_call(struct trace_event_file *file, void *rec, +event_triggers_call(struct trace_event_file *file, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event); extern void event_triggers_post_call(struct trace_event_file *file, @@ -664,7 +664,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file) if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { if (eflags & EVENT_FILE_FL_TRIGGER_MODE) - event_triggers_call(file, NULL, NULL); + event_triggers_call(file, NULL, NULL, NULL); if (eflags & EVENT_FILE_FL_SOFT_DISABLED) return true; if (eflags & EVENT_FILE_FL_PID_FILTER) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 9cfb099da58f..13f65420f188 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -465,7 +465,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * * * * The declared 'local variable' is called '__entry' * * - * * __field(pid_t, prev_prid) is equivalent to a standard declariton: + * * __field(pid_t, prev_prid) is equivalent to a standard declaration: * * * * pid_t prev_pid; * * diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index bd528176a3d5..abb8b24744fd 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -49,7 +49,7 @@ TRACE_EVENT(io_uring_create, ); /** - * io_uring_register - called after a buffer/file/eventfd was succesfully + * io_uring_register - called after a buffer/file/eventfd was successfully * registered for a ring * * @ctx: pointer to a ring context structure diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index c7711e9b6900..6768b64bc738 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -48,7 +48,7 @@ TRACE_EVENT(rcu_utilization, * RCU flavor, the grace-period number, and a string identifying the * grace-period-related event as follows: * - * "AccReadyCB": CPU acclerates new callbacks to RCU_NEXT_READY_TAIL. + * "AccReadyCB": CPU accelerates new callbacks to RCU_NEXT_READY_TAIL. * "AccWaitCB": CPU accelerates new callbacks to RCU_WAIT_TAIL. * "newreq": Request a new grace period. * "start": Start a grace period. diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index cbe3e152d24c..1eca2305ca42 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -174,7 +174,7 @@ DEFINE_EVENT(sched_wakeup_template, sched_waking, TP_ARGS(p)); /* - * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. + * Tracepoint called when the task is actually woken; p->state == TASK_RUNNING. * It is not always called from the waking context. */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup, diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 19abb6c3eb73..6ad031c71be7 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -119,7 +119,7 @@ TRACE_EVENT(timer_expire_entry, * When used in combination with the timer_expire_entry tracepoint we can * determine the runtime of the timer callback function. * - * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might + * NOTE: Do NOT dereference timer in TP_fast_assign. The pointer might * be invalid. We solely track the pointer. */ DEFINE_EVENT(timer_class, timer_expire_exit, diff --git a/init/main.c b/init/main.c index dd11bfd10ead..543fbe3060be 100644 --- a/init/main.c +++ b/init/main.c @@ -405,7 +405,7 @@ static int __init bootconfig_params(char *param, char *val, return 0; } -static void __init setup_boot_config(const char *cmdline) +static void __init setup_boot_config(void) { static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata; const char *msg; @@ -472,7 +472,7 @@ static void __init setup_boot_config(const char *cmdline) #else -static void __init setup_boot_config(const char *cmdline) +static void __init setup_boot_config(void) { /* Remove bootconfig data from initrd */ get_boot_config_from_initrd(NULL, NULL); @@ -895,7 +895,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) pr_notice("%s", linux_banner); early_security_init(); setup_arch(&command_line); - setup_boot_config(command_line); + setup_boot_config(); setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 29a6ebeebc9e..b8a0d1d564fb 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -42,7 +42,7 @@ bool ftrace_graph_is_dead(void) } /** - * ftrace_graph_stop - set to permanently disable function graph tracincg + * ftrace_graph_stop - set to permanently disable function graph tracing * * In case of an error int function graph tracing, this is called * to try to keep function graph tracing from causing any more harm. @@ -117,7 +117,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, /* * Skip graph tracing if the return location is served by direct trampoline, - * since call sequence and return addresses is unpredicatable anymore. + * since call sequence and return addresses are unpredictable anyway. * Ex: BPF trampoline may call original function and may skip frame * depending on type of BPF programs attached. */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3ba52d4e1314..792b5589ba64 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1045,7 +1045,7 @@ struct ftrace_ops global_ops = { }; /* - * Used by the stack undwinder to know about dynamic ftrace trampolines. + * Used by the stack unwinder to know about dynamic ftrace trampolines. */ struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr) { @@ -1090,7 +1090,7 @@ struct ftrace_page { struct ftrace_page *next; struct dyn_ftrace *records; int index; - int size; + int order; }; #define ENTRY_SIZE sizeof(struct dyn_ftrace) @@ -3000,7 +3000,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) * When the kernel is preemptive, tasks can be preempted * while on a ftrace trampoline. Just scheduling a task on * a CPU is not good enough to flush them. Calling - * synchornize_rcu_tasks() will wait for those tasks to + * synchronize_rcu_tasks() will wait for those tasks to * execute and either schedule voluntarily or enter user space. */ if (IS_ENABLED(CONFIG_PREEMPTION)) @@ -3156,15 +3156,9 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) if (WARN_ON(!count)) return -EINVAL; + /* We want to fill as much as possible, with no empty pages */ pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE); - order = get_count_order(pages); - - /* - * We want to fill as much as possible. No more than a page - * may be empty. - */ - if (!is_power_of_2(pages)) - order--; + order = fls(pages) - 1; again: pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); @@ -3181,7 +3175,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) ftrace_number_of_groups++; cnt = (PAGE_SIZE << order) / ENTRY_SIZE; - pg->size = cnt; + pg->order = order; if (cnt > count) cnt = count; @@ -3194,7 +3188,6 @@ ftrace_allocate_pages(unsigned long num_to_init) { struct ftrace_page *start_pg; struct ftrace_page *pg; - int order; int cnt; if (!num_to_init) @@ -3230,13 +3223,13 @@ ftrace_allocate_pages(unsigned long num_to_init) free_pages: pg = start_pg; while (pg) { - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - if (order >= 0) - free_pages((unsigned long)pg->records, order); + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } start_pg = pg->next; kfree(pg); pg = start_pg; - ftrace_number_of_pages -= 1 << order; ftrace_number_of_groups--; } pr_info("ftrace: FAILED to allocate memory for functions\n"); @@ -5407,7 +5400,7 @@ EXPORT_SYMBOL_GPL(modify_ftrace_direct); * @reset - non zero to reset all filters before applying this filter. * * Filters denote which functions should be enabled when tracing is enabled - * If @ip is NULL, it failes to update filter. + * If @ip is NULL, it fails to update filter. */ int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset) @@ -6221,6 +6214,7 @@ static int ftrace_process_locs(struct module *mod, p = start; pg = start_pg; while (p < end) { + unsigned long end_offset; addr = ftrace_call_adjust(*p++); /* * Some architecture linkers will pad between @@ -6231,7 +6225,8 @@ static int ftrace_process_locs(struct module *mod, if (!addr) continue; - if (pg->index == pg->size) { + end_offset = (pg->index+1) * sizeof(pg->records[0]); + if (end_offset > PAGE_SIZE << pg->order) { /* We should have allocated enough */ if (WARN_ON(!pg->next)) break; @@ -6359,7 +6354,7 @@ clear_mod_from_hash(struct ftrace_page *pg, struct ftrace_hash *hash) } } -/* Clear any records from hashs */ +/* Clear any records from hashes */ static void clear_mod_from_hashes(struct ftrace_page *pg) { struct trace_array *tr; @@ -6400,7 +6395,6 @@ void ftrace_release_mod(struct module *mod) struct ftrace_page **last_pg; struct ftrace_page *tmp_page = NULL; struct ftrace_page *pg; - int order; mutex_lock(&ftrace_lock); @@ -6451,12 +6445,12 @@ void ftrace_release_mod(struct module *mod) /* Needs to be called outside of ftrace_lock */ clear_mod_from_hashes(pg); - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - if (order >= 0) - free_pages((unsigned long)pg->records, order); + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } tmp_page = pg->next; kfree(pg); - ftrace_number_of_pages -= 1 << order; ftrace_number_of_groups--; } } @@ -6774,7 +6768,6 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) struct ftrace_mod_map *mod_map = NULL; struct ftrace_init_func *func, *func_next; struct list_head clear_hash; - int order; INIT_LIST_HEAD(&clear_hash); @@ -6812,10 +6805,10 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) ftrace_update_tot_cnt--; if (!pg->index) { *last_pg = pg->next; - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - if (order >= 0) - free_pages((unsigned long)pg->records, order); - ftrace_number_of_pages -= 1 << order; + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } ftrace_number_of_groups--; kfree(pg); pg = container_of(last_pg, struct ftrace_page, next); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 68744c51517e..2c0ee6484990 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -287,17 +287,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) -/** - * ring_buffer_event_time_stamp - return the event's extended timestamp - * @event: the event to get the timestamp of - * - * Returns the extended timestamp associated with a data event. - * An extended time_stamp is a 64-bit timestamp represented - * internally in a special way that makes the best use of space - * contained within a ring buffer event. This function decodes - * it and maps it to a straight u64 value. - */ -u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event) +static u64 rb_event_time_stamp(struct ring_buffer_event *event) { u64 ts; @@ -487,6 +477,8 @@ struct rb_time_struct { #endif typedef struct rb_time_struct rb_time_t; +#define MAX_NEST 5 + /* * head_page == tail_page && head == tail then buffer is empty. */ @@ -524,6 +516,7 @@ struct ring_buffer_per_cpu { unsigned long read_bytes; rb_time_t write_stamp; rb_time_t before_stamp; + u64 event_stamp[MAX_NEST]; u64 read_stamp; /* ring buffer pages to update, > 0 to add, < 0 to remove */ long nr_pages_to_update; @@ -749,6 +742,99 @@ static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) } #endif +/* + * Enable this to make sure that the event passed to + * ring_buffer_event_time_stamp() is not committed and also + * is on the buffer that it passed in. + */ +//#define RB_VERIFY_EVENT +#ifdef RB_VERIFY_EVENT +static struct list_head *rb_list_head(struct list_head *list); +static void verify_event(struct ring_buffer_per_cpu *cpu_buffer, + void *event) +{ + struct buffer_page *page = cpu_buffer->commit_page; + struct buffer_page *tail_page = READ_ONCE(cpu_buffer->tail_page); + struct list_head *next; + long commit, write; + unsigned long addr = (unsigned long)event; + bool done = false; + int stop = 0; + + /* Make sure the event exists and is not committed yet */ + do { + if (page == tail_page || WARN_ON_ONCE(stop++ > 100)) + done = true; + commit = local_read(&page->page->commit); + write = local_read(&page->write); + if (addr >= (unsigned long)&page->page->data[commit] && + addr < (unsigned long)&page->page->data[write]) + return; + + next = rb_list_head(page->list.next); + page = list_entry(next, struct buffer_page, list); + } while (!done); + WARN_ON_ONCE(1); +} +#else +static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer, + void *event) +{ +} +#endif + + +static inline u64 rb_time_stamp(struct trace_buffer *buffer); + +/** + * ring_buffer_event_time_stamp - return the event's current time stamp + * @buffer: The buffer that the event is on + * @event: the event to get the time stamp of + * + * Note, this must be called after @event is reserved, and before it is + * committed to the ring buffer. And must be called from the same + * context where the event was reserved (normal, softirq, irq, etc). + * + * Returns the time stamp associated with the current event. + * If the event has an extended time stamp, then that is used as + * the time stamp to return. + * In the highly unlikely case that the event was nested more than + * the max nesting, then the write_stamp of the buffer is returned, + * otherwise current time is returned, but that really neither of + * the last two cases should ever happen. + */ +u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, + struct ring_buffer_event *event) +{ + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[smp_processor_id()]; + unsigned int nest; + u64 ts; + + /* If the event includes an absolute time, then just use that */ + if (event->type_len == RINGBUF_TYPE_TIME_STAMP) + return rb_event_time_stamp(event); + + nest = local_read(&cpu_buffer->committing); + verify_event(cpu_buffer, event); + if (WARN_ON_ONCE(!nest)) + goto fail; + + /* Read the current saved nesting level time stamp */ + if (likely(--nest < MAX_NEST)) + return cpu_buffer->event_stamp[nest]; + + /* Shouldn't happen, warn if it does */ + WARN_ONCE(1, "nest (%d) greater than max", nest); + + fail: + /* Can only fail on 32 bit */ + if (!rb_time_read(&cpu_buffer->write_stamp, &ts)) + /* Screw it, just read the current time */ + ts = rb_time_stamp(cpu_buffer->buffer); + + return ts; +} + /** * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer * @buffer: The ring_buffer to get the number of pages from @@ -994,7 +1080,7 @@ static inline u64 rb_time_stamp(struct trace_buffer *buffer) return ts << DEBUG_SHIFT; } -u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu) +u64 ring_buffer_time_stamp(struct trace_buffer *buffer) { u64 time; @@ -2710,6 +2796,10 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, { unsigned length = info->length; u64 delta = info->delta; + unsigned int nest = local_read(&cpu_buffer->committing) - 1; + + if (!WARN_ON_ONCE(nest >= MAX_NEST)) + cpu_buffer->event_stamp[nest] = info->ts; /* * If we need to add a timestamp, then we @@ -2766,7 +2856,7 @@ static u64 rb_time_delta(struct ring_buffer_event *event) return 0; case RINGBUF_TYPE_TIME_EXTEND: - return ring_buffer_event_time_stamp(event); + return rb_event_time_stamp(event); case RINGBUF_TYPE_TIME_STAMP: return 0; @@ -3064,7 +3154,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * is called before preempt_count() is updated, since the check will * be on the NORMAL bit, the TRANSITION bit will then be set. If an * NMI then comes in, it will set the NMI bit, but when the NMI code - * does the trace_recursive_unlock() it will clear the TRANSTION bit + * does the trace_recursive_unlock() it will clear the TRANSITION bit * and leave the NMI bit set. But this is fine, because the interrupt * code that set the TRANSITION bit will then clear the NMI bit when it * calls trace_recursive_unlock(). If another NMI comes in, it will @@ -3212,13 +3302,13 @@ static void dump_buffer_page(struct buffer_data_page *bpage, switch (event->type_len) { case RINGBUF_TYPE_TIME_EXTEND: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); ts += delta; pr_warn(" [%lld] delta:%lld TIME EXTEND\n", ts, delta); break; case RINGBUF_TYPE_TIME_STAMP: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); ts = delta; pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta); break; @@ -3289,12 +3379,12 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, switch (event->type_len) { case RINGBUF_TYPE_TIME_EXTEND: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); ts += delta; break; case RINGBUF_TYPE_TIME_STAMP: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); ts = delta; break; @@ -3451,7 +3541,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, info->after, ts)) { /* Nothing came after this event between C and E */ info->delta = ts - info->after; - info->ts = ts; } else { /* * Interrupted between C and E: @@ -3463,6 +3552,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, */ info->delta = 0; } + info->ts = ts; info->add_timestamp &= ~RB_ADD_STAMP_FORCE; } @@ -4256,12 +4346,12 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, return; case RINGBUF_TYPE_TIME_EXTEND: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); cpu_buffer->read_stamp += delta; return; case RINGBUF_TYPE_TIME_STAMP: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); cpu_buffer->read_stamp = delta; return; @@ -4286,12 +4376,12 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, return; case RINGBUF_TYPE_TIME_EXTEND: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); iter->read_stamp += delta; return; case RINGBUF_TYPE_TIME_STAMP: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); iter->read_stamp = delta; return; @@ -4544,7 +4634,7 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, case RINGBUF_TYPE_TIME_STAMP: if (ts) { - *ts = ring_buffer_event_time_stamp(event); + *ts = rb_event_time_stamp(event); ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } @@ -4635,7 +4725,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) case RINGBUF_TYPE_TIME_STAMP: if (ts) { - *ts = ring_buffer_event_time_stamp(event); + *ts = rb_event_time_stamp(event); ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } @@ -5021,6 +5111,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) rb_time_set(&cpu_buffer->write_stamp, 0); rb_time_set(&cpu_buffer->before_stamp, 0); + memset(cpu_buffer->event_stamp, 0, sizeof(cpu_buffer->event_stamp)); + cpu_buffer->lost_events = 0; cpu_buffer->last_overrun = 0; diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c index a4b4bbf8c3bf..0b15e975d2c2 100644 --- a/kernel/trace/synth_event_gen_test.c +++ b/kernel/trace/synth_event_gen_test.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Test module for in-kernel sythetic event creation and generation. + * Test module for in-kernel synthetic event creation and generation. * * Copyright (C) 2019 Tom Zanussi <zanussi@kernel.org> */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 915fe8790f04..560e4c8d3825 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -514,7 +514,7 @@ void trace_free_pid_list(struct trace_pid_list *pid_list) * @filtered_pids: The list of pids to check * @search_pid: The PID to find in @filtered_pids * - * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis. + * Returns true if @search_pid is found in @filtered_pids, and false otherwise. */ bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) @@ -545,7 +545,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task) { /* - * If filterd_no_pids is not empty, and the task's pid is listed + * If filtered_no_pids is not empty, and the task's pid is listed * in filtered_no_pids, then return true. * Otherwise, if filtered_pids is empty, that means we can * trace all tasks. If it has content, then only trace pids @@ -612,7 +612,7 @@ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) (*pos)++; - /* pid already is +1 of the actual prevous bit */ + /* pid already is +1 of the actual previous bit */ pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid); /* Return pid + 1 to allow zero to be represented */ @@ -771,7 +771,7 @@ static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) if (!buf->buffer) return trace_clock_local(); - ts = ring_buffer_time_stamp(buf->buffer, cpu); + ts = ring_buffer_time_stamp(buf->buffer); ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); return ts; @@ -834,7 +834,7 @@ DEFINE_MUTEX(trace_types_lock); * The content of events may become garbage if we allow other process consumes * these events concurrently: * A) the page of the consumed events may become a normal page - * (not reader page) in ring buffer, and this page will be rewrited + * (not reader page) in ring buffer, and this page will be rewritten * by events producer. * B) The page of the consumed events may become a page for splice_read, * and this page will be returned to system. @@ -1520,7 +1520,7 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) #undef C #define C(a, b) b -/* These must match the bit postions in trace_iterator_flags */ +/* These must match the bit positions in trace_iterator_flags */ static const char *trace_options[] = { TRACE_FLAGS NULL @@ -2390,14 +2390,13 @@ static void tracing_stop_tr(struct trace_array *tr) static int trace_save_cmdline(struct task_struct *tsk) { - unsigned pid, idx; + unsigned tpid, idx; /* treat recording of idle task as a success */ if (!tsk->pid) return 1; - if (unlikely(tsk->pid > PID_MAX_DEFAULT)) - return 0; + tpid = tsk->pid & (PID_MAX_DEFAULT - 1); /* * It's not the end of the world if we don't get @@ -2408,26 +2407,15 @@ static int trace_save_cmdline(struct task_struct *tsk) if (!arch_spin_trylock(&trace_cmdline_lock)) return 0; - idx = savedcmd->map_pid_to_cmdline[tsk->pid]; + idx = savedcmd->map_pid_to_cmdline[tpid]; if (idx == NO_CMDLINE_MAP) { idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num; - /* - * Check whether the cmdline buffer at idx has a pid - * mapped. We are going to overwrite that entry so we - * need to clear the map_pid_to_cmdline. Otherwise we - * would read the new comm for the old pid. - */ - pid = savedcmd->map_cmdline_to_pid[idx]; - if (pid != NO_CMDLINE_MAP) - savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; - - savedcmd->map_cmdline_to_pid[idx] = tsk->pid; - savedcmd->map_pid_to_cmdline[tsk->pid] = idx; - + savedcmd->map_pid_to_cmdline[tpid] = idx; savedcmd->cmdline_idx = idx; } + savedcmd->map_cmdline_to_pid[idx] = tsk->pid; set_cmdline(idx, tsk->comm); arch_spin_unlock(&trace_cmdline_lock); @@ -2438,6 +2426,7 @@ static int trace_save_cmdline(struct task_struct *tsk) static void __trace_find_cmdline(int pid, char comm[]) { unsigned map; + int tpid; if (!pid) { strcpy(comm, "<idle>"); @@ -2449,16 +2438,16 @@ static void __trace_find_cmdline(int pid, char comm[]) return; } - if (pid > PID_MAX_DEFAULT) { - strcpy(comm, "<...>"); - return; + tpid = pid & (PID_MAX_DEFAULT - 1); + map = savedcmd->map_pid_to_cmdline[tpid]; + if (map != NO_CMDLINE_MAP) { + tpid = savedcmd->map_cmdline_to_pid[map]; + if (tpid == pid) { + strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); + return; + } } - - map = savedcmd->map_pid_to_cmdline[pid]; - if (map != NO_CMDLINE_MAP) - strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); - else - strcpy(comm, "<...>"); + strcpy(comm, "<...>"); } void trace_find_cmdline(int pid, char comm[]) @@ -2737,12 +2726,13 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, unsigned int trace_ctx) { struct ring_buffer_event *entry; + struct trace_array *tr = trace_file->tr; int val; - *current_rb = trace_file->tr->array_buffer.buffer; + *current_rb = tr->array_buffer.buffer; - if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags & - (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && + if (!tr->no_filter_buffering_ref && + (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && (entry = this_cpu_read(trace_buffered_event))) { /* Try to use the per cpu buffer first */ val = this_cpu_inc_return(trace_buffered_event_cnt); @@ -3116,6 +3106,40 @@ static void ftrace_trace_userstack(struct trace_array *tr, #endif /* CONFIG_STACKTRACE */ +static inline void +func_repeats_set_delta_ts(struct func_repeats_entry *entry, + unsigned long long delta) +{ + entry->bottom_delta_ts = delta & U32_MAX; + entry->top_delta_ts = (delta >> 32); +} + +void trace_last_func_repeats(struct trace_array *tr, + struct trace_func_repeats *last_info, + unsigned int trace_ctx) +{ + struct trace_buffer *buffer = tr->array_buffer.buffer; + struct func_repeats_entry *entry; + struct ring_buffer_event *event; + u64 delta; + + event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, + sizeof(*entry), trace_ctx); + if (!event) + return; + + delta = ring_buffer_event_time_stamp(buffer, event) - + last_info->ts_last_call; + + entry = ring_buffer_event_data(event); + entry->ip = last_info->ip; + entry->parent_ip = last_info->parent_ip; + entry->count = last_info->count; + func_repeats_set_delta_ts(entry, delta); + + __buffer_unlock_commit(buffer, event); +} + /* created for use with alloc_percpu */ struct trace_buffer_struct { int nesting; @@ -3368,7 +3392,7 @@ int trace_array_vprintk(struct trace_array *tr, * buffer (use trace_printk() for that), as writing into the top level * buffer should only have events that can be individually disabled. * trace_printk() is only used for debugging a kernel, and should not - * be ever encorporated in normal use. + * be ever incorporated in normal use. * * trace_array_printk() can be used, as it will not add noise to the * top level tracing buffer. @@ -3562,6 +3586,204 @@ static char *trace_iter_expand_format(struct trace_iterator *iter) return tmp; } +/* Returns true if the string is safe to dereference from an event */ +static bool trace_safe_str(struct trace_iterator *iter, const char *str) +{ + unsigned long addr = (unsigned long)str; + struct trace_event *trace_event; + struct trace_event_call *event; + + /* OK if part of the event data */ + if ((addr >= (unsigned long)iter->ent) && + (addr < (unsigned long)iter->ent + iter->ent_size)) + return true; + + /* OK if part of the temp seq buffer */ + if ((addr >= (unsigned long)iter->tmp_seq.buffer) && + (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE)) + return true; + + /* Core rodata can not be freed */ + if (is_kernel_rodata(addr)) + return true; + + if (trace_is_tracepoint_string(str)) + return true; + + /* + * Now this could be a module event, referencing core module + * data, which is OK. + */ + if (!iter->ent) + return false; + + trace_event = ftrace_find_event(iter->ent->type); + if (!trace_event) + return false; + + event = container_of(trace_event, struct trace_event_call, event); + if (!event->mod) + return false; + + /* Would rather have rodata, but this will suffice */ + if (within_module_core(addr, event->mod)) + return true; + + return false; +} + +static const char *show_buffer(struct trace_seq *s) +{ + struct seq_buf *seq = &s->seq; + + seq_buf_terminate(seq); + + return seq->buffer; +} + +static DEFINE_STATIC_KEY_FALSE(trace_no_verify); + +static int test_can_verify_check(const char *fmt, ...) +{ + char buf[16]; + va_list ap; + int ret; + + /* + * The verifier is dependent on vsnprintf() modifies the va_list + * passed to it, where it is sent as a reference. Some architectures + * (like x86_32) passes it by value, which means that vsnprintf() + * does not modify the va_list passed to it, and the verifier + * would then need to be able to understand all the values that + * vsnprintf can use. If it is passed by value, then the verifier + * is disabled. + */ + va_start(ap, fmt); + vsnprintf(buf, 16, "%d", ap); + ret = va_arg(ap, int); + va_end(ap); + + return ret; +} + +static void test_can_verify(void) +{ + if (!test_can_verify_check("%d %d", 0, 1)) { + pr_info("trace event string verifier disabled\n"); + static_branch_inc(&trace_no_verify); + } +} + +/** + * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer + * @iter: The iterator that holds the seq buffer and the event being printed + * @fmt: The format used to print the event + * @ap: The va_list holding the data to print from @fmt. + * + * This writes the data into the @iter->seq buffer using the data from + * @fmt and @ap. If the format has a %s, then the source of the string + * is examined to make sure it is safe to print, otherwise it will + * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string + * pointer. + */ +void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, + va_list ap) +{ + const char *p = fmt; + const char *str; + int i, j; + + if (WARN_ON_ONCE(!fmt)) + return; + + if (static_branch_unlikely(&trace_no_verify)) + goto print; + + /* Don't bother checking when doing a ftrace_dump() */ + if (iter->fmt == static_fmt_buf) + goto print; + + while (*p) { + j = 0; + + /* We only care about %s and variants */ + for (i = 0; p[i]; i++) { + if (i + 1 >= iter->fmt_size) { + /* + * If we can't expand the copy buffer, + * just print it. + */ + if (!trace_iter_expand_format(iter)) + goto print; + } + + if (p[i] == '\\' && p[i+1]) { + i++; + continue; + } + if (p[i] == '%') { + /* Need to test cases like %08.*s */ + for (j = 1; p[i+j]; j++) { + if (isdigit(p[i+j]) || + p[i+j] == '*' || + p[i+j] == '.') + continue; + break; + } + if (p[i+j] == 's') + break; + } + j = 0; + } + /* If no %s found then just print normally */ + if (!p[i]) + break; + + /* Copy up to the %s, and print that */ + strncpy(iter->fmt, p, i); + iter->fmt[i] = '\0'; + trace_seq_vprintf(&iter->seq, iter->fmt, ap); + + /* The ap now points to the string data of the %s */ + str = va_arg(ap, const char *); + + /* + * If you hit this warning, it is likely that the + * trace event in question used %s on a string that + * was saved at the time of the event, but may not be + * around when the trace is read. Use __string(), + * __assign_str() and __get_str() helpers in the TRACE_EVENT() + * instead. See samples/trace_events/trace-events-sample.h + * for reference. + */ + if (WARN_ONCE(!trace_safe_str(iter, str), + "fmt: '%s' current_buffer: '%s'", + fmt, show_buffer(&iter->seq))) { + int ret; + + /* Try to safely read the string */ + ret = strncpy_from_kernel_nofault(iter->fmt, str, + iter->fmt_size); + if (ret < 0) + trace_seq_printf(&iter->seq, "(0x%px)", str); + else + trace_seq_printf(&iter->seq, "(0x%px:%s)", + str, iter->fmt); + str = "[UNSAFE-MEMORY]"; + strcpy(iter->fmt, "%s"); + } else { + strncpy(iter->fmt, p + i, j + 1); + iter->fmt[j+1] = '\0'; + } + trace_seq_printf(&iter->seq, iter->fmt, str); + + p += i + j + 1; + } + print: + if (*p) + trace_seq_vprintf(&iter->seq, p, ap); +} + const char *trace_event_format(struct trace_iterator *iter, const char *fmt) { const char *p, *new_fmt; @@ -6768,7 +6990,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { /* do not add \n before testing triggers, but add \0 */ entry->buf[cnt] = '\0'; - tt = event_triggers_call(tr->trace_marker_file, entry, event); + tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); } if (entry->buf[cnt - 1] != '\n') { @@ -6976,31 +7198,34 @@ static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) return ret; } -int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs) +u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) +{ + if (rbe == this_cpu_read(trace_buffered_event)) + return ring_buffer_time_stamp(buffer); + + return ring_buffer_event_time_stamp(buffer, rbe); +} + +/* + * Set or disable using the per CPU trace_buffer_event when possible. + */ +int tracing_set_filter_buffering(struct trace_array *tr, bool set) { int ret = 0; mutex_lock(&trace_types_lock); - if (abs && tr->time_stamp_abs_ref++) + if (set && tr->no_filter_buffering_ref++) goto out; - if (!abs) { - if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) { + if (!set) { + if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) { ret = -EINVAL; goto out; } - if (--tr->time_stamp_abs_ref) - goto out; + --tr->no_filter_buffering_ref; } - - ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs); - -#ifdef CONFIG_TRACER_MAX_TRACE - if (tr->max_buffer.buffer) - ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs); -#endif out: mutex_unlock(&trace_types_lock); @@ -7336,11 +7561,11 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) * @cmd: The tracing command that caused the error * @str: The string to position the caret at within @cmd * - * Finds the position of the first occurence of @str within @cmd. The + * Finds the position of the first occurrence of @str within @cmd. The * return value can be passed to tracing_log_err() for caret placement * within @cmd. * - * Returns the index within @cmd of the first occurence of @str or 0 + * Returns the index within @cmd of the first occurrence of @str or 0 * if @str was not found. */ unsigned int err_pos(char *cmd, const char *str) @@ -7890,7 +8115,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem); - t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu)); + t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); usec_rem = do_div(t, USEC_PER_SEC); trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); } else { @@ -7899,7 +8124,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); trace_seq_printf(s, "now ts: %llu\n", - ring_buffer_time_stamp(trace_buf->buffer, cpu)); + ring_buffer_time_stamp(trace_buf->buffer)); } cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); @@ -8906,6 +9131,7 @@ static int __remove_instance(struct trace_array *tr) ftrace_clear_pids(tr); ftrace_destroy_function_files(tr); tracefs_remove(tr->dir); + free_percpu(tr->last_func_repeats); free_trace_buffers(tr); for (i = 0; i < tr->nr_topts; i++) { @@ -9123,7 +9349,7 @@ int tracing_init_dentry(void) * As there may still be users that expect the tracing * files to exist in debugfs/tracing, we must automount * the tracefs file system there, so older tools still - * work with the newer kerenl. + * work with the newer kernel. */ tr->dir = debugfs_create_automount("tracing", NULL, trace_automount, NULL); @@ -9676,6 +9902,8 @@ __init static int tracer_alloc_buffers(void) register_snapshot_cmd(); + test_can_verify(); + return 0; out_free_savedcmd: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a6446c03cfbc..cd80d046c7a5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -45,6 +45,7 @@ enum trace_type { TRACE_BPUTS, TRACE_HWLAT, TRACE_RAW_DATA, + TRACE_FUNC_REPEATS, __TRACE_LAST_TYPE, }; @@ -262,6 +263,17 @@ struct cond_snapshot { }; /* + * struct trace_func_repeats - used to keep track of the consecutive + * (on the same CPU) calls of a single function. + */ +struct trace_func_repeats { + unsigned long ip; + unsigned long parent_ip; + unsigned long count; + u64 ts_last_call; +}; + +/* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. * They have on/off state as well: @@ -352,11 +364,12 @@ struct trace_array { /* function tracing enabled */ int function_enabled; #endif - int time_stamp_abs_ref; + int no_filter_buffering_ref; struct list_head hist_vars; #ifdef CONFIG_TRACER_SNAPSHOT struct cond_snapshot *cond_snapshot; #endif + struct trace_func_repeats __percpu *last_func_repeats; }; enum { @@ -372,7 +385,8 @@ extern int tracing_check_open_get_tr(struct trace_array *tr); extern struct trace_array *trace_array_find(const char *instance); extern struct trace_array *trace_array_find_get(const char *instance); -extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); +extern u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe); +extern int tracing_set_filter_buffering(struct trace_array *tr, bool set); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); extern bool trace_clock_in_ns(struct trace_array *tr); @@ -441,6 +455,8 @@ extern void __ftrace_bad_type(void); TRACE_GRAPH_ENT); \ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ + IF_ASSIGN(var, ent, struct func_repeats_entry, \ + TRACE_FUNC_REPEATS); \ __ftrace_bad_type(); \ } while (0) @@ -581,7 +597,10 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); +bool trace_is_tracepoint_string(const char *str); const char *trace_event_format(struct trace_iterator *iter, const char *fmt); +void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, + va_list ap); int trace_empty(struct trace_iterator *iter); @@ -676,6 +695,10 @@ static inline void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, } #endif /* CONFIG_STACKTRACE */ +void trace_last_func_repeats(struct trace_array *tr, + struct trace_func_repeats *last_info, + unsigned int trace_ctx); + extern u64 ftrace_now(int cpu); extern void trace_find_cmdline(int pid, char comm[]); @@ -1329,7 +1352,7 @@ __event_trigger_test_discard(struct trace_event_file *file, unsigned long eflags = file->flags; if (eflags & EVENT_FILE_FL_TRIGGER_COND) - *tt = event_triggers_call(file, entry, event); + *tt = event_triggers_call(file, buffer, entry, event); if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && @@ -1343,7 +1366,7 @@ __event_trigger_test_discard(struct trace_event_file *file, /** * event_trigger_unlock_commit - handle triggers and finish event commit - * @file: The file pointer assoctiated to the event + * @file: The file pointer associated with the event * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself @@ -1370,7 +1393,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, /** * event_trigger_unlock_commit_regs - handle triggers and finish event commit - * @file: The file pointer assoctiated to the event + * @file: The file pointer associated with the event * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself @@ -1626,7 +1649,7 @@ extern int register_trigger_hist_enable_disable_cmds(void); */ struct event_trigger_ops { void (*func)(struct event_trigger_data *data, - void *rec, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe); int (*init)(struct event_trigger_ops *ops, struct event_trigger_data *data); diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index aaf6793ededa..c1637f90c8a3 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -95,33 +95,49 @@ u64 notrace trace_clock_global(void) { unsigned long flags; int this_cpu; - u64 now; + u64 now, prev_time; raw_local_irq_save(flags); this_cpu = raw_smp_processor_id(); - now = sched_clock_cpu(this_cpu); + /* - * If in an NMI context then dont risk lockups and return the - * cpu_clock() time: + * The global clock "guarantees" that the events are ordered + * between CPUs. But if two events on two different CPUS call + * trace_clock_global at roughly the same time, it really does + * not matter which one gets the earlier time. Just make sure + * that the same CPU will always show a monotonic clock. + * + * Use a read memory barrier to get the latest written + * time that was recorded. */ - if (unlikely(in_nmi())) - goto out; + smp_rmb(); + prev_time = READ_ONCE(trace_clock_struct.prev_time); + now = sched_clock_cpu(this_cpu); - arch_spin_lock(&trace_clock_struct.lock); + /* Make sure that now is always greater than prev_time */ + if ((s64)(now - prev_time) < 0) + now = prev_time + 1; /* - * TODO: if this happens often then maybe we should reset - * my_scd->clock to prev_time+1, to make sure - * we start ticking with the local clock from now on? + * If in an NMI context then dont risk lockups and simply return + * the current time. */ - if ((s64)(now - trace_clock_struct.prev_time) < 0) - now = trace_clock_struct.prev_time + 1; + if (unlikely(in_nmi())) + goto out; - trace_clock_struct.prev_time = now; + /* Tracing can cause strange recursion, always use a try lock */ + if (arch_spin_trylock(&trace_clock_struct.lock)) { + /* Reread prev_time in case it was already updated */ + prev_time = READ_ONCE(trace_clock_struct.prev_time); + if ((s64)(now - prev_time) < 0) + now = prev_time + 1; - arch_spin_unlock(&trace_clock_struct.lock); + trace_clock_struct.prev_time = now; + /* The unlock acts as the wmb for the above rmb */ + arch_spin_unlock(&trace_clock_struct.lock); + } out: raw_local_irq_restore(flags); diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 4547ac59da61..251c819cf0c5 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -338,3 +338,25 @@ FTRACE_ENTRY(hwlat, hwlat_entry, __entry->nmi_total_ts, __entry->nmi_count) ); + +#define FUNC_REPEATS_GET_DELTA_TS(entry) \ + (((u64)(entry)->top_delta_ts << 32) | (entry)->bottom_delta_ts) \ + +FTRACE_ENTRY(func_repeats, func_repeats_entry, + + TRACE_FUNC_REPEATS, + + F_STRUCT( + __field( unsigned long, ip ) + __field( unsigned long, parent_ip ) + __field( u16 , count ) + __field( u16 , top_delta_ts ) + __field( u32 , bottom_delta_ts ) + ), + + F_printk(" %ps <-%ps\t(repeats:%u delta: -%llu)", + (void *)__entry->ip, + (void *)__entry->parent_ip, + __entry->count, + FUNC_REPEATS_GET_DELTA_TS(__entry)) +); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 288ad2c274fb..03be4435d103 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -16,7 +16,7 @@ static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses - * suprises + * surprises */ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) perf_trace_t; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a3563afd412d..80e96989770e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -217,6 +217,214 @@ int trace_event_get_offsets(struct trace_event_call *call) return tail->offset + tail->size; } +/* + * Check if the referenced field is an array and return true, + * as arrays are OK to dereference. + */ +static bool test_field(const char *fmt, struct trace_event_call *call) +{ + struct trace_event_fields *field = call->class->fields_array; + const char *array_descriptor; + const char *p = fmt; + int len; + + if (!(len = str_has_prefix(fmt, "REC->"))) + return false; + fmt += len; + for (p = fmt; *p; p++) { + if (!isalnum(*p) && *p != '_') + break; + } + len = p - fmt; + + for (; field->type; field++) { + if (strncmp(field->name, fmt, len) || + field->name[len]) + continue; + array_descriptor = strchr(field->type, '['); + /* This is an array and is OK to dereference. */ + return array_descriptor != NULL; + } + return false; +} + +/* + * Examine the print fmt of the event looking for unsafe dereference + * pointers using %p* that could be recorded in the trace event and + * much later referenced after the pointer was freed. Dereferencing + * pointers are OK, if it is dereferenced into the event itself. + */ +static void test_event_printk(struct trace_event_call *call) +{ + u64 dereference_flags = 0; + bool first = true; + const char *fmt, *c, *r, *a; + int parens = 0; + char in_quote = 0; + int start_arg = 0; + int arg = 0; + int i; + + fmt = call->print_fmt; + + if (!fmt) + return; + + for (i = 0; fmt[i]; i++) { + switch (fmt[i]) { + case '\\': + i++; + if (!fmt[i]) + return; + continue; + case '"': + case '\'': + /* + * The print fmt starts with a string that + * is processed first to find %p* usage, + * then after the first string, the print fmt + * contains arguments that are used to check + * if the dereferenced %p* usage is safe. + */ + if (first) { + if (fmt[i] == '\'') + continue; + if (in_quote) { + arg = 0; + first = false; + /* + * If there was no %p* uses + * the fmt is OK. + */ + if (!dereference_flags) + return; + } + } + if (in_quote) { + if (in_quote == fmt[i]) + in_quote = 0; + } else { + in_quote = fmt[i]; + } + continue; + case '%': + if (!first || !in_quote) + continue; + i++; + if (!fmt[i]) + return; + switch (fmt[i]) { + case '%': + continue; + case 'p': + /* Find dereferencing fields */ + switch (fmt[i + 1]) { + case 'B': case 'R': case 'r': + case 'b': case 'M': case 'm': + case 'I': case 'i': case 'E': + case 'U': case 'V': case 'N': + case 'a': case 'd': case 'D': + case 'g': case 't': case 'C': + case 'O': case 'f': + if (WARN_ONCE(arg == 63, + "Too many args for event: %s", + trace_event_name(call))) + return; + dereference_flags |= 1ULL << arg; + } + break; + default: + { + bool star = false; + int j; + + /* Increment arg if %*s exists. */ + for (j = 0; fmt[i + j]; j++) { + if (isdigit(fmt[i + j]) || + fmt[i + j] == '.') + continue; + if (fmt[i + j] == '*') { + star = true; + continue; + } + if ((fmt[i + j] == 's') && star) + arg++; + break; + } + break; + } /* default */ + + } /* switch */ + arg++; + continue; + case '(': + if (in_quote) + continue; + parens++; + continue; + case ')': + if (in_quote) + continue; + parens--; + if (WARN_ONCE(parens < 0, + "Paren mismatch for event: %s\narg='%s'\n%*s", + trace_event_name(call), + fmt + start_arg, + (i - start_arg) + 5, "^")) + return; + continue; + case ',': + if (in_quote || parens) + continue; + i++; + while (isspace(fmt[i])) + i++; + start_arg = i; + if (!(dereference_flags & (1ULL << arg))) + goto next_arg; + + /* Find the REC-> in the argument */ + c = strchr(fmt + i, ','); + r = strstr(fmt + i, "REC->"); + if (r && (!c || r < c)) { + /* + * Addresses of events on the buffer, + * or an array on the buffer is + * OK to dereference. + * There's ways to fool this, but + * this is to catch common mistakes, + * not malicious code. + */ + a = strchr(fmt + i, '&'); + if ((a && (a < r)) || test_field(r, call)) + dereference_flags &= ~(1ULL << arg); + } + next_arg: + i--; + arg++; + } + } + + /* + * If you triggered the below warning, the trace event reported + * uses an unsafe dereference pointer %p*. As the data stored + * at the trace event time may no longer exist when the trace + * event is printed, dereferencing to the original source is + * unsafe. The source of the dereference must be copied into the + * event itself, and the dereference must access the copy instead. + */ + if (WARN_ON_ONCE(dereference_flags)) { + arg = 1; + while (!(dereference_flags & 1)) { + dereference_flags >>= 1; + arg++; + } + pr_warn("event %s has unsafe dereference of argument %d\n", + trace_event_name(call), arg); + pr_warn("print_fmt: %s\n", fmt); + } +} + int trace_event_raw_init(struct trace_event_call *call) { int id; @@ -225,6 +433,8 @@ int trace_event_raw_init(struct trace_event_call *call) if (!id) return -ENODEV; + test_event_printk(call); + return 0; } EXPORT_SYMBOL_GPL(trace_event_raw_init); @@ -2436,7 +2646,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) } /* - * Since calls are grouped by systems, the likelyhood that the + * Since calls are grouped by systems, the likelihood that the * next call in the iteration belongs to the same system as the * previous call is high. As an optimization, we skip searching * for a map[] that matches the call's system if the last call @@ -2496,7 +2706,7 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) } /* - * Just create a decriptor for early init. A descriptor is required + * Just create a descriptor for early init. A descriptor is required * for enabling events at boot. We want to enable events before * the filesystem is initialized. */ diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e91259f6a722..c9124038b140 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -256,7 +256,7 @@ enum { * is "&&" we don't call update_preds(). Instead continue to "c". As the * next token after "c" is not "&&" but the end of input, we first process the * "&&" by calling update_preds() for the "&&" then we process the "||" by - * callin updates_preds() with the values for processing "||". + * calling updates_preds() with the values for processing "||". * * What does that mean? What update_preds() does is to first save the "target" * of the program entry indexed by the current program entry's "target" @@ -296,7 +296,7 @@ enum { * and "FALSE" the program entry after that, we are now done with the first * pass. * - * Making the above "a || b && c" have a progam of: + * Making the above "a || b && c" have a program of: * prog[0] = { "a", 1, 2 } * prog[1] = { "b", 0, 2 } * prog[2] = { "c", 0, 3 } @@ -390,7 +390,7 @@ enum { * F: return FALSE * * As "r = a; if (!r) goto n5;" is obviously the same as - * "if (!a) goto n5;" without doing anything we can interperate the + * "if (!a) goto n5;" without doing anything we can interpret the * program as: * n1: if (!a) goto n5; * n2: if (!b) goto n5; @@ -1693,6 +1693,7 @@ static void create_filter_finish(struct filter_parse_error *pe) /** * create_filter - create a filter for a trace_event_call + * @tr: the trace array associated with these events * @call: trace_event_call to create a filter for * @filter_str: filter string * @set_str: remember @filter_str and enable detailed error in filter @@ -1741,8 +1742,8 @@ int create_event_filter(struct trace_array *tr, } /** - * create_system_filter - create a filter for an event_subsystem - * @system: event_subsystem to create a filter for + * create_system_filter - create a filter for an event subsystem + * @dir: the descriptor for the subsystem directory * @filter_str: filter string * @filterp: out param for created filter (always updated on return) * @@ -1750,7 +1751,6 @@ int create_event_filter(struct trace_array *tr, * and always remembers @filter_str. */ static int create_system_filter(struct trace_subsystem_dir *dir, - struct trace_array *tr, char *filter_str, struct event_filter **filterp) { struct filter_parse_error *pe = NULL; @@ -1758,13 +1758,13 @@ static int create_system_filter(struct trace_subsystem_dir *dir, err = create_filter_start(filter_str, true, &pe, filterp); if (!err) { - err = process_system_preds(dir, tr, pe, filter_str); + err = process_system_preds(dir, dir->tr, pe, filter_str); if (!err) { /* System filters just show a default message */ kfree((*filterp)->filter_string); (*filterp)->filter_string = NULL; } else { - append_filter_err(tr, pe, *filterp); + append_filter_err(dir->tr, pe, *filterp); } } create_filter_finish(pe); @@ -1852,7 +1852,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, goto out_unlock; } - err = create_system_filter(dir, tr, filter_string, &filter); + err = create_system_filter(dir, filter_string, &filter); if (filter) { /* * No event actually uses the system filter diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 39ebe1826fc3..c1abd63f1d6c 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -81,6 +81,7 @@ struct hist_field; typedef u64 (*hist_field_fn_t) (struct hist_field *field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event); @@ -153,6 +154,7 @@ struct hist_field { static u64 hist_field_none(struct hist_field *field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { @@ -161,6 +163,7 @@ static u64 hist_field_none(struct hist_field *field, static u64 hist_field_counter(struct hist_field *field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { @@ -169,6 +172,7 @@ static u64 hist_field_counter(struct hist_field *field, static u64 hist_field_string(struct hist_field *hist_field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { @@ -179,6 +183,7 @@ static u64 hist_field_string(struct hist_field *hist_field, static u64 hist_field_dynstring(struct hist_field *hist_field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { @@ -191,6 +196,7 @@ static u64 hist_field_dynstring(struct hist_field *hist_field, static u64 hist_field_pstring(struct hist_field *hist_field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { @@ -201,52 +207,56 @@ static u64 hist_field_pstring(struct hist_field *hist_field, static u64 hist_field_log2(struct hist_field *hist_field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { struct hist_field *operand = hist_field->operands[0]; - u64 val = operand->fn(operand, elt, rbe, event); + u64 val = operand->fn(operand, elt, buffer, rbe, event); return (u64) ilog2(roundup_pow_of_two(val)); } static u64 hist_field_plus(struct hist_field *hist_field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { struct hist_field *operand1 = hist_field->operands[0]; struct hist_field *operand2 = hist_field->operands[1]; - u64 val1 = operand1->fn(operand1, elt, rbe, event); - u64 val2 = operand2->fn(operand2, elt, rbe, event); + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event); return val1 + val2; } static u64 hist_field_minus(struct hist_field *hist_field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { struct hist_field *operand1 = hist_field->operands[0]; struct hist_field *operand2 = hist_field->operands[1]; - u64 val1 = operand1->fn(operand1, elt, rbe, event); - u64 val2 = operand2->fn(operand2, elt, rbe, event); + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event); return val1 - val2; } static u64 hist_field_unary_minus(struct hist_field *hist_field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { struct hist_field *operand = hist_field->operands[0]; - s64 sval = (s64)operand->fn(operand, elt, rbe, event); + s64 sval = (s64)operand->fn(operand, elt, buffer, rbe, event); u64 val = (u64)-sval; return val; @@ -255,6 +265,7 @@ static u64 hist_field_unary_minus(struct hist_field *hist_field, #define DEFINE_HIST_FIELD_FN(type) \ static u64 hist_field_##type(struct hist_field *hist_field, \ struct tracing_map_elt *elt, \ + struct trace_buffer *buffer, \ struct ring_buffer_event *rbe, \ void *event) \ { \ @@ -380,7 +391,8 @@ struct hist_trigger_data { struct action_data; typedef void (*action_fn_t) (struct hist_trigger_data *hist_data, - struct tracing_map_elt *elt, void *rec, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe, void *key, struct action_data *data, u64 *var_ref_vals); @@ -608,7 +620,8 @@ static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, } static void action_trace(struct hist_trigger_data *hist_data, - struct tracing_map_elt *elt, void *rec, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe, void *key, struct action_data *data, u64 *var_ref_vals) { @@ -624,13 +637,14 @@ struct hist_var_data { static u64 hist_field_timestamp(struct hist_field *hist_field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { struct hist_trigger_data *hist_data = hist_field->hist_data; struct trace_array *tr = hist_data->event_file->tr; - u64 ts = ring_buffer_event_time_stamp(rbe); + u64 ts = ring_buffer_event_time_stamp(buffer, rbe); if (hist_data->attrs->ts_in_usecs && trace_clock_in_ns(tr)) ts = ns2usecs(ts); @@ -640,6 +654,7 @@ static u64 hist_field_timestamp(struct hist_field *hist_field, static u64 hist_field_cpu(struct hist_field *hist_field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { @@ -1020,6 +1035,7 @@ static struct hist_field *find_event_var(struct hist_trigger_data *hist_data, static u64 hist_field_var_ref(struct hist_field *hist_field, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *event) { @@ -2561,6 +2577,7 @@ find_target_event_var(struct hist_trigger_data *hist_data, } static inline void __update_field_vars(struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *rec, struct field_var **field_vars, @@ -2576,7 +2593,7 @@ static inline void __update_field_vars(struct tracing_map_elt *elt, struct hist_field *var = field_var->var; struct hist_field *val = field_var->val; - var_val = val->fn(val, elt, rbe, rec); + var_val = val->fn(val, elt, buffer, rbe, rec); var_idx = var->var.idx; if (val->flags & HIST_FIELD_FL_STRING) { @@ -2592,19 +2609,21 @@ static inline void __update_field_vars(struct tracing_map_elt *elt, static void update_field_vars(struct hist_trigger_data *hist_data, struct tracing_map_elt *elt, + struct trace_buffer *buffer, struct ring_buffer_event *rbe, void *rec) { - __update_field_vars(elt, rbe, rec, hist_data->field_vars, + __update_field_vars(elt, buffer, rbe, rec, hist_data->field_vars, hist_data->n_field_vars, 0); } static void save_track_data_vars(struct hist_trigger_data *hist_data, - struct tracing_map_elt *elt, void *rec, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe, void *key, struct action_data *data, u64 *var_ref_vals) { - __update_field_vars(elt, rbe, rec, hist_data->save_vars, + __update_field_vars(elt, buffer, rbe, rec, hist_data->save_vars, hist_data->n_save_vars, hist_data->n_field_var_str); } @@ -2780,12 +2799,14 @@ static void save_track_val(struct hist_trigger_data *hist_data, } static void save_track_data(struct hist_trigger_data *hist_data, - struct tracing_map_elt *elt, void *rec, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe, void *key, struct action_data *data, u64 *var_ref_vals) { if (data->track_data.save_data) - data->track_data.save_data(hist_data, elt, rec, rbe, key, data, var_ref_vals); + data->track_data.save_data(hist_data, elt, buffer, rec, rbe, + key, data, var_ref_vals); } static bool check_track_val(struct tracing_map_elt *elt, @@ -2836,7 +2857,8 @@ static bool cond_snapshot_update(struct trace_array *tr, void *cond_data) } static void save_track_data_snapshot(struct hist_trigger_data *hist_data, - struct tracing_map_elt *elt, void *rec, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe, void *key, struct action_data *data, u64 *var_ref_vals) @@ -2905,7 +2927,8 @@ static bool cond_snapshot_update(struct trace_array *tr, void *cond_data) return false; } static void save_track_data_snapshot(struct hist_trigger_data *hist_data, - struct tracing_map_elt *elt, void *rec, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe, void *key, struct action_data *data, u64 *var_ref_vals) {} @@ -2947,7 +2970,8 @@ static void track_data_print(struct seq_file *m, } static void ontrack_action(struct hist_trigger_data *hist_data, - struct tracing_map_elt *elt, void *rec, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe, void *key, struct action_data *data, u64 *var_ref_vals) { @@ -2955,7 +2979,8 @@ static void ontrack_action(struct hist_trigger_data *hist_data, if (check_track_val(elt, data, var_val)) { save_track_val(hist_data, elt, data, var_val); - save_track_data(hist_data, elt, rec, rbe, key, data, var_ref_vals); + save_track_data(hist_data, elt, buffer, rec, rbe, + key, data, var_ref_vals); } } @@ -4400,7 +4425,8 @@ create_hist_data(unsigned int map_bits, } static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, - struct tracing_map_elt *elt, void *rec, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe, u64 *var_ref_vals) { @@ -4414,7 +4440,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, for_each_hist_val_field(i, hist_data) { hist_field = hist_data->fields[i]; - hist_val = hist_field->fn(hist_field, elt, rbe, rec); + hist_val = hist_field->fn(hist_field, elt, buffer, rbe, rec); if (hist_field->flags & HIST_FIELD_FL_VAR) { var_idx = hist_field->var.idx; @@ -4442,13 +4468,13 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, for_each_hist_key_field(i, hist_data) { hist_field = hist_data->fields[i]; if (hist_field->flags & HIST_FIELD_FL_VAR) { - hist_val = hist_field->fn(hist_field, elt, rbe, rec); + hist_val = hist_field->fn(hist_field, elt, buffer, rbe, rec); var_idx = hist_field->var.idx; tracing_map_set_var(elt, var_idx, hist_val); } } - update_field_vars(hist_data, elt, rbe, rec); + update_field_vars(hist_data, elt, buffer, rbe, rec); } static inline void add_to_key(char *compound_key, void *key, @@ -4478,7 +4504,8 @@ static inline void add_to_key(char *compound_key, void *key, static void hist_trigger_actions(struct hist_trigger_data *hist_data, - struct tracing_map_elt *elt, void *rec, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe, void *key, u64 *var_ref_vals) { @@ -4487,11 +4514,12 @@ hist_trigger_actions(struct hist_trigger_data *hist_data, for (i = 0; i < hist_data->n_actions; i++) { data = hist_data->actions[i]; - data->fn(hist_data, elt, rec, rbe, key, data, var_ref_vals); + data->fn(hist_data, elt, buffer, rec, rbe, key, data, var_ref_vals); } } -static void event_hist_trigger(struct event_trigger_data *data, void *rec, +static void event_hist_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe) { struct hist_trigger_data *hist_data = data->private_data; @@ -4516,7 +4544,7 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec, HIST_STACKTRACE_SKIP); key = entries; } else { - field_contents = key_field->fn(key_field, elt, rbe, rec); + field_contents = key_field->fn(key_field, elt, buffer, rbe, rec); if (key_field->flags & HIST_FIELD_FL_STRING) { key = (void *)(unsigned long)field_contents; use_compound_key = true; @@ -4539,10 +4567,10 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec, if (!elt) return; - hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals); + hist_trigger_elt_update(hist_data, elt, buffer, rec, rbe, var_ref_vals); if (resolve_var_refs(hist_data, key, var_ref_vals, true)) - hist_trigger_actions(hist_data, elt, rec, rbe, key, var_ref_vals); + hist_trigger_actions(hist_data, elt, buffer, rec, rbe, key, var_ref_vals); } static void hist_trigger_stacktrace_print(struct seq_file *m, @@ -5456,7 +5484,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, goto out; } - tracing_set_time_stamp_abs(file->tr, true); + tracing_set_filter_buffering(file->tr, true); } if (named_data) @@ -5564,7 +5592,7 @@ static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, if (hist_data->enable_timestamps) { if (!hist_data->remove || unregistered) - tracing_set_time_stamp_abs(file->tr, false); + tracing_set_filter_buffering(file->tr, false); } } @@ -5611,7 +5639,7 @@ static void hist_unreg_all(struct trace_event_file *file) update_cond_flag(file); if (hist_data->enable_timestamps) - tracing_set_time_stamp_abs(file->tr, false); + tracing_set_filter_buffering(file->tr, false); if (test->ops->free) test->ops->free(test->ops, test); } @@ -5812,7 +5840,8 @@ __init int register_trigger_hist_cmd(void) } static void -hist_enable_trigger(struct event_trigger_data *data, void *rec, +hist_enable_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { struct enable_trigger_data *enable_data = data->private_data; @@ -5830,7 +5859,8 @@ hist_enable_trigger(struct event_trigger_data *data, void *rec, } static void -hist_enable_count_trigger(struct event_trigger_data *data, void *rec, +hist_enable_count_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { if (!data->count) @@ -5839,7 +5869,7 @@ hist_enable_count_trigger(struct event_trigger_data *data, void *rec, if (data->count != -1) (data->count)--; - hist_enable_trigger(data, rec, event); + hist_enable_trigger(data, buffer, rec, event); } static struct event_trigger_ops hist_enable_trigger_ops = { diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 8d71e6c83f10..2ac75eb6aa86 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1385,7 +1385,7 @@ static int destroy_synth_event(struct synth_event *se) /** * synth_event_delete - Delete a synthetic event - * @event_name: The name of the new sythetic event + * @event_name: The name of the new synthetic event * * Delete a synthetic event that was created with synth_event_create(). * diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index f725802160c0..b8bfa8505b7b 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -53,7 +53,8 @@ void trigger_data_free(struct event_trigger_data *data) * any trigger that should be deferred, ETT_NONE if nothing to defer. */ enum event_trigger_type -event_triggers_call(struct trace_event_file *file, void *rec, +event_triggers_call(struct trace_event_file *file, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { struct event_trigger_data *data; @@ -67,7 +68,7 @@ event_triggers_call(struct trace_event_file *file, void *rec, if (data->paused) continue; if (!rec) { - data->ops->func(data, rec, event); + data->ops->func(data, buffer, rec, event); continue; } filter = rcu_dereference_sched(data->filter); @@ -77,7 +78,7 @@ event_triggers_call(struct trace_event_file *file, void *rec, tt |= data->cmd_ops->trigger_type; continue; } - data->ops->func(data, rec, event); + data->ops->func(data, buffer, rec, event); } return tt; } @@ -105,7 +106,7 @@ event_triggers_post_call(struct trace_event_file *file, if (data->paused) continue; if (data->cmd_ops->trigger_type & tt) - data->ops->func(data, NULL, NULL); + data->ops->func(data, NULL, NULL, NULL); } } EXPORT_SYMBOL_GPL(event_triggers_post_call); @@ -937,7 +938,8 @@ get_named_trigger_data(struct event_trigger_data *data) } static void -traceon_trigger(struct event_trigger_data *data, void *rec, +traceon_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { if (tracing_is_on()) @@ -947,7 +949,8 @@ traceon_trigger(struct event_trigger_data *data, void *rec, } static void -traceon_count_trigger(struct event_trigger_data *data, void *rec, +traceon_count_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { if (tracing_is_on()) @@ -963,7 +966,8 @@ traceon_count_trigger(struct event_trigger_data *data, void *rec, } static void -traceoff_trigger(struct event_trigger_data *data, void *rec, +traceoff_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { if (!tracing_is_on()) @@ -973,7 +977,8 @@ traceoff_trigger(struct event_trigger_data *data, void *rec, } static void -traceoff_count_trigger(struct event_trigger_data *data, void *rec, +traceoff_count_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { if (!tracing_is_on()) @@ -1071,7 +1076,8 @@ static struct event_command trigger_traceoff_cmd = { #ifdef CONFIG_TRACER_SNAPSHOT static void -snapshot_trigger(struct event_trigger_data *data, void *rec, +snapshot_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { struct trace_event_file *file = data->private_data; @@ -1083,7 +1089,8 @@ snapshot_trigger(struct event_trigger_data *data, void *rec, } static void -snapshot_count_trigger(struct event_trigger_data *data, void *rec, +snapshot_count_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { if (!data->count) @@ -1092,7 +1099,7 @@ snapshot_count_trigger(struct event_trigger_data *data, void *rec, if (data->count != -1) (data->count)--; - snapshot_trigger(data, rec, event); + snapshot_trigger(data, buffer, rec, event); } static int @@ -1176,14 +1183,16 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; } #endif static void -stacktrace_trigger(struct event_trigger_data *data, void *rec, +stacktrace_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { trace_dump_stack(STACK_SKIP); } static void -stacktrace_count_trigger(struct event_trigger_data *data, void *rec, +stacktrace_count_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { if (!data->count) @@ -1192,7 +1201,7 @@ stacktrace_count_trigger(struct event_trigger_data *data, void *rec, if (data->count != -1) (data->count)--; - stacktrace_trigger(data, rec, event); + stacktrace_trigger(data, buffer, rec, event); } static int @@ -1254,7 +1263,8 @@ static __init void unregister_trigger_traceon_traceoff_cmds(void) } static void -event_enable_trigger(struct event_trigger_data *data, void *rec, +event_enable_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { struct enable_trigger_data *enable_data = data->private_data; @@ -1266,7 +1276,8 @@ event_enable_trigger(struct event_trigger_data *data, void *rec, } static void -event_enable_count_trigger(struct event_trigger_data *data, void *rec, +event_enable_count_trigger(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) { struct enable_trigger_data *enable_data = data->private_data; @@ -1281,7 +1292,7 @@ event_enable_count_trigger(struct event_trigger_data *data, void *rec, if (data->count != -1) (data->count)--; - event_enable_trigger(data, rec, event); + event_enable_trigger(data, buffer, rec, event); } int event_enable_trigger_print(struct seq_file *m, diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index f93723ca66bc..1f0e63f5d1f9 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -27,13 +27,28 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); +static void +function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +static void +function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, + struct ftrace_regs *fregs); static struct tracer_flags func_flags; /* Our option */ enum { - TRACE_FUNC_OPT_STACK = 0x1, + + TRACE_FUNC_NO_OPTS = 0x0, /* No flags set. */ + TRACE_FUNC_OPT_STACK = 0x1, + TRACE_FUNC_OPT_NO_REPEATS = 0x2, + + /* Update this to next highest bit. */ + TRACE_FUNC_OPT_HIGHEST_BIT = 0x4 }; +#define TRACE_FUNC_OPT_MASK (TRACE_FUNC_OPT_HIGHEST_BIT - 1) + int ftrace_allocate_ftrace_ops(struct trace_array *tr) { struct ftrace_ops *ops; @@ -86,6 +101,34 @@ void ftrace_destroy_function_files(struct trace_array *tr) ftrace_free_ftrace_ops(tr); } +static ftrace_func_t select_trace_function(u32 flags_val) +{ + switch (flags_val & TRACE_FUNC_OPT_MASK) { + case TRACE_FUNC_NO_OPTS: + return function_trace_call; + case TRACE_FUNC_OPT_STACK: + return function_stack_trace_call; + case TRACE_FUNC_OPT_NO_REPEATS: + return function_no_repeats_trace_call; + case TRACE_FUNC_OPT_STACK | TRACE_FUNC_OPT_NO_REPEATS: + return function_stack_no_repeats_trace_call; + default: + return NULL; + } +} + +static bool handle_func_repeats(struct trace_array *tr, u32 flags_val) +{ + if (!tr->last_func_repeats && + (flags_val & TRACE_FUNC_OPT_NO_REPEATS)) { + tr->last_func_repeats = alloc_percpu(struct trace_func_repeats); + if (!tr->last_func_repeats) + return false; + } + + return true; +} + static int function_trace_init(struct trace_array *tr) { ftrace_func_t func; @@ -97,12 +140,12 @@ static int function_trace_init(struct trace_array *tr) if (!tr->ops) return -ENOMEM; - /* Currently only the global instance can do stack tracing */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL && - func_flags.val & TRACE_FUNC_OPT_STACK) - func = function_stack_trace_call; - else - func = function_trace_call; + func = select_trace_function(func_flags.val); + if (!func) + return -EINVAL; + + if (!handle_func_repeats(tr, func_flags.val)) + return -ENOMEM; ftrace_init_array_ops(tr, func); @@ -205,15 +248,137 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, local_irq_restore(flags); } +static inline bool is_repeat_check(struct trace_array *tr, + struct trace_func_repeats *last_info, + unsigned long ip, unsigned long parent_ip) +{ + if (last_info->ip == ip && + last_info->parent_ip == parent_ip && + last_info->count < U16_MAX) { + last_info->ts_last_call = + ring_buffer_time_stamp(tr->array_buffer.buffer); + last_info->count++; + return true; + } + + return false; +} + +static inline void process_repeats(struct trace_array *tr, + unsigned long ip, unsigned long parent_ip, + struct trace_func_repeats *last_info, + unsigned int trace_ctx) +{ + if (last_info->count) { + trace_last_func_repeats(tr, last_info, trace_ctx); + last_info->count = 0; + } + + last_info->ip = ip; + last_info->parent_ip = parent_ip; +} + +static void +function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, + struct ftrace_regs *fregs) +{ + struct trace_func_repeats *last_info; + struct trace_array *tr = op->private; + struct trace_array_cpu *data; + unsigned int trace_ctx; + unsigned long flags; + int bit; + int cpu; + + if (unlikely(!tr->function_enabled)) + return; + + bit = ftrace_test_recursion_trylock(ip, parent_ip); + if (bit < 0) + return; + + preempt_disable_notrace(); + + cpu = smp_processor_id(); + data = per_cpu_ptr(tr->array_buffer.data, cpu); + if (atomic_read(&data->disabled)) + goto out; + + /* + * An interrupt may happen at any place here. But as far as I can see, + * the only damage that this can cause is to mess up the repetition + * counter without valuable data being lost. + * TODO: think about a solution that is better than just hoping to be + * lucky. + */ + last_info = per_cpu_ptr(tr->last_func_repeats, cpu); + if (is_repeat_check(tr, last_info, ip, parent_ip)) + goto out; + + local_save_flags(flags); + trace_ctx = tracing_gen_ctx_flags(flags); + process_repeats(tr, ip, parent_ip, last_info, trace_ctx); + + trace_function(tr, ip, parent_ip, trace_ctx); + +out: + ftrace_test_recursion_unlock(bit); + preempt_enable_notrace(); +} + +static void +function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, + struct ftrace_regs *fregs) +{ + struct trace_func_repeats *last_info; + struct trace_array *tr = op->private; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + unsigned int trace_ctx; + + if (unlikely(!tr->function_enabled)) + return; + + /* + * Need to use raw, since this must be called before the + * recursive protection is performed. + */ + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = per_cpu_ptr(tr->array_buffer.data, cpu); + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + last_info = per_cpu_ptr(tr->last_func_repeats, cpu); + if (is_repeat_check(tr, last_info, ip, parent_ip)) + goto out; + + trace_ctx = tracing_gen_ctx_flags(flags); + process_repeats(tr, ip, parent_ip, last_info, trace_ctx); + + trace_function(tr, ip, parent_ip, trace_ctx); + __trace_stack(tr, trace_ctx, STACK_SKIP); + } + + out: + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + static struct tracer_opt func_opts[] = { #ifdef CONFIG_STACKTRACE { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, #endif + { TRACER_OPT(func-no-repeats, TRACE_FUNC_OPT_NO_REPEATS) }, { } /* Always set a last empty entry */ }; static struct tracer_flags func_flags = { - .val = 0, /* By default: all flags disabled */ + .val = TRACE_FUNC_NO_OPTS, /* By default: all flags disabled */ .opts = func_opts }; @@ -235,30 +400,32 @@ static struct tracer function_trace; static int func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { - switch (bit) { - case TRACE_FUNC_OPT_STACK: - /* do nothing if already set */ - if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK)) - break; - - /* We can change this flag when not running. */ - if (tr->current_trace != &function_trace) - break; + ftrace_func_t func; + u32 new_flags; - unregister_ftrace_function(tr->ops); + /* Do nothing if already set. */ + if (!!set == !!(func_flags.val & bit)) + return 0; - if (set) { - tr->ops->func = function_stack_trace_call; - register_ftrace_function(tr->ops); - } else { - tr->ops->func = function_trace_call; - register_ftrace_function(tr->ops); - } + /* We can change this flag only when not running. */ + if (tr->current_trace != &function_trace) + return 0; - break; - default: + new_flags = (func_flags.val & ~bit) | (set ? bit : 0); + func = select_trace_function(new_flags); + if (!func) return -EINVAL; - } + + /* Check if there's anything to change. */ + if (tr->ops->func == func) + return 0; + + if (!handle_func_repeats(tr, new_flags)) + return -ENOMEM; + + unregister_ftrace_function(tr->ops); + tr->ops->func = func; + register_ftrace_function(tr->ops); return 0; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 0aa6e6faa943..0de6837722da 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -764,7 +764,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, * - we are inside irq code * - we just entered irq code * - * retunns 0 if + * returns 0 if * - funcgraph-interrupts option is set * - we are not inside irq code */ diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 34dc1a712dcb..632ef88131a9 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -83,7 +83,7 @@ struct hwlat_sample { u64 nmi_total_ts; /* Total time spent in NMIs */ struct timespec64 timestamp; /* wall time */ int nmi_count; /* # NMIs during this sample */ - int count; /* # of iteratons over threash */ + int count; /* # of iterations over thresh */ }; /* keep the global state somewhere. */ @@ -389,7 +389,7 @@ static int start_kthread(struct trace_array *tr) } /** - * stop_kthread - Inform the hardware latency samping/detector kthread to stop + * stop_kthread - Inform the hardware latency sampling/detector kthread to stop * * This kicks the running hardware latency sampling/detector kernel thread and * tells it to stop sampling now. Use this on unload and at system shutdown. diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6fe770d86dc3..ea6178cb5e33 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1748,7 +1748,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE)) kretprobe_perf_func(tk, ri, regs); #endif - return 0; /* We don't tweek kernel, so just return 0 */ + return 0; /* We don't tweak kernel, so just return 0 */ } NOKPROBE_SYMBOL(kretprobe_dispatcher); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 61255bad7e01..d0368a569bfa 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -317,7 +317,7 @@ void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...) va_list ap; va_start(ap, fmt); - trace_seq_vprintf(&iter->seq, trace_event_format(iter, fmt), ap); + trace_check_vprintf(iter, trace_event_format(iter, fmt), ap); va_end(ap); } EXPORT_SYMBOL(trace_event_printf); @@ -587,13 +587,26 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) return !trace_seq_has_overflowed(s); } +static void trace_print_time(struct trace_seq *s, struct trace_iterator *iter, + unsigned long long ts) +{ + unsigned long secs, usec_rem; + unsigned long long t; + + if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) { + t = ns2usecs(ts); + usec_rem = do_div(t, USEC_PER_SEC); + secs = (unsigned long)t; + trace_seq_printf(s, " %5lu.%06lu", secs, usec_rem); + } else + trace_seq_printf(s, " %12llu", ts); +} + int trace_print_context(struct trace_iterator *iter) { struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; struct trace_entry *entry = iter->ent; - unsigned long long t; - unsigned long secs, usec_rem; char comm[TASK_COMM_LEN]; trace_find_cmdline(entry->pid, comm); @@ -614,13 +627,8 @@ int trace_print_context(struct trace_iterator *iter) if (tr->trace_flags & TRACE_ITER_IRQ_INFO) trace_print_lat_fmt(s, entry); - if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) { - t = ns2usecs(iter->ts); - usec_rem = do_div(t, USEC_PER_SEC); - secs = (unsigned long)t; - trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem); - } else - trace_seq_printf(s, " %12llu: ", iter->ts); + trace_print_time(s, iter, iter->ts); + trace_seq_puts(s, ": "); return !trace_seq_has_overflowed(s); } @@ -837,6 +845,17 @@ enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, return trace_handle_return(&iter->seq); } +static void print_fn_trace(struct trace_seq *s, unsigned long ip, + unsigned long parent_ip, int flags) +{ + seq_print_ip_sym(s, ip, flags); + + if ((flags & TRACE_ITER_PRINT_PARENT) && parent_ip) { + trace_seq_puts(s, " <-"); + seq_print_ip_sym(s, parent_ip, flags); + } +} + /* TRACE_FN */ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags, struct trace_event *event) @@ -846,13 +865,7 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent); - seq_print_ip_sym(s, field->ip, flags); - - if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { - trace_seq_puts(s, " <-"); - seq_print_ip_sym(s, field->parent_ip, flags); - } - + print_fn_trace(s, field->ip, field->parent_ip, flags); trace_seq_putc(s, '\n'); return trace_handle_return(s); @@ -1373,6 +1386,51 @@ static struct trace_event trace_raw_data_event = { .funcs = &trace_raw_data_funcs, }; +static enum print_line_t +trace_func_repeats_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct func_repeats_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + trace_seq_printf(s, "%lu %lu %u %llu\n", + field->ip, + field->parent_ip, + field->count, + FUNC_REPEATS_GET_DELTA_TS(field)); + + return trace_handle_return(s); +} + +static enum print_line_t +trace_func_repeats_print(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct func_repeats_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + print_fn_trace(s, field->ip, field->parent_ip, flags); + trace_seq_printf(s, " (repeats: %u, last_ts:", field->count); + trace_print_time(s, iter, + iter->ts - FUNC_REPEATS_GET_DELTA_TS(field)); + trace_seq_puts(s, ")\n"); + + return trace_handle_return(s); +} + +static struct trace_event_functions trace_func_repeats_funcs = { + .trace = trace_func_repeats_print, + .raw = trace_func_repeats_raw, +}; + +static struct trace_event trace_func_repeats_event = { + .type = TRACE_FUNC_REPEATS, + .funcs = &trace_func_repeats_funcs, +}; static struct trace_event *events[] __initdata = { &trace_fn_event, @@ -1385,6 +1443,7 @@ static struct trace_event *events[] __initdata = { &trace_print_event, &trace_hwlat_event, &trace_raw_data_event, + &trace_func_repeats_event, NULL }; diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index ff32476df072..4b320fe7df70 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -251,6 +251,17 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) } EXPORT_SYMBOL_GPL(__ftrace_vprintk); +bool trace_is_tracepoint_string(const char *str) +{ + const char **ptr = __start___tracepoint_str; + + for (ptr = __start___tracepoint_str; ptr < __stop___tracepoint_str; ptr++) { + if (str == *ptr) + return true; + } + return false; +} + static const char **find_next(void *v, loff_t *pos) { const char **fmt = v; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index ec589a4612df..15413ad7cef2 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -168,7 +168,7 @@ void __trace_probe_log_err(int offset, int err_type) if (!trace_probe_log.argv) return; - /* Recalcurate the length and allocate buffer */ + /* Recalculate the length and allocate buffer */ for (i = 0; i < trace_probe_log.argc; i++) { if (i == trace_probe_log.index) pos = len; @@ -182,7 +182,7 @@ void __trace_probe_log_err(int offset, int err_type) /** * Set the error position is next to the last arg + space. * Note that len includes the terminal null and the cursor - * appaers at pos + 1. + * appears at pos + 1. */ pos = len; offset = 0; @@ -592,7 +592,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } /* - * Since $comm and immediate string can not be dereferred, + * Since $comm and immediate string can not be dereferenced, * we can find those by strcmp. */ if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) { diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 7ce4027089ee..227d518e5ba5 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -134,7 +134,7 @@ struct fetch_type { size_t size; /* Byte size of type */ int is_signed; /* Signed flag */ print_type_func_t print; /* Print functions */ - const char *fmt; /* Fromat string */ + const char *fmt; /* Format string */ const char *fmttype; /* Name in format file */ }; diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index e5282828f4a6..f003c5d02a3a 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -167,7 +167,7 @@ array: return code->op == FETCH_OP_END ? ret : -EILSEQ; } -/* Sum up total data length for dynamic arraies (strings) */ +/* Sum up total data length for dynamic arrays (strings) */ static nokprobe_inline int __get_data_size(struct trace_probe *tp, struct pt_regs *regs) { diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 73ef12092250..adf7ef194005 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -878,7 +878,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) int ret; /* - * Now that the big kernel lock is no longer preemptable, + * Now that the big kernel lock is no longer preemptible, * and this is called with the BKL held, it will always * fail. If preemption is already disabled, simply * pass the test. When the BKL is removed, or becomes @@ -940,7 +940,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * int ret; /* - * Now that the big kernel lock is no longer preemptable, + * Now that the big kernel lock is no longer preemptible, * and this is called with the BKL held, it will always * fail. If preemption is already disabled, simply * pass the test. When the BKL is removed, or becomes diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 1d84fcc78e3e..9c90b3a7dce2 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -16,7 +16,7 @@ * The buffer size is currently PAGE_SIZE, although it may become dynamic * in the future. * - * A write to the buffer will either succed or fail. That is, unlike + * A write to the buffer will either succeed or fail. That is, unlike * sprintf() there will not be a partial write (well it may write into * the buffer but it wont update the pointers). This allows users to * try to write something into the trace_seq buffer and if it fails @@ -73,7 +73,7 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s) * @fmt: printf format string * * The tracer may use either sequence operations or its own - * copy to user routines. To simplify formating of a trace + * copy to user routines. To simplify formatting of a trace * trace_seq_printf() is used to store strings into a special * buffer (@s). Then the output may be either used by * the sequencer or pulled into another buffer. @@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(trace_seq_bitmask); * @fmt: printf format string * * The tracer may use either sequence operations or its own - * copy to user routines. To simplify formating of a trace + * copy to user routines. To simplify formatting of a trace * trace_seq_printf is used to store strings into a special * buffer (@s). Then the output may be either used by * the sequencer or pulled into another buffer. @@ -226,7 +226,7 @@ EXPORT_SYMBOL_GPL(trace_seq_puts); * @c: simple character to record * * The tracer may use either the sequence operations or its own - * copy to user routines. This function records a simple charater + * copy to user routines. This function records a simple character * into a special buffer (@s) for later retrieval by a sequencer * or other mechanism. */ @@ -348,7 +348,7 @@ int trace_seq_path(struct trace_seq *s, const struct path *path) EXPORT_SYMBOL_GPL(trace_seq_path); /** - * trace_seq_to_user - copy the squence buffer to user space + * trace_seq_to_user - copy the sequence buffer to user space * @s: trace sequence descriptor * @ubuf: The userspace memory location to copy to * @cnt: The amount to copy @@ -363,7 +363,7 @@ EXPORT_SYMBOL_GPL(trace_seq_path); * * On failure it returns -EBUSY if all of the content in the * sequence has been already read, which includes nothing in the - * sequenc (@s->len == @s->readpos). + * sequence (@s->len == @s->readpos). * * Returns -EFAULT if the copy to userspace fails. */ diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 867860ea57da..a5429b3dac24 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -266,9 +266,9 @@ if ($arch eq "x86_64") { # force flags for this arch $ld .= " -m shlelf_linux"; if ($endian eq "big") { - $objcopy .= " -O elf32-shbig-linux"; + $objcopy .= " -O elf32-shbig-linux"; } else { - $objcopy .= " -O elf32-sh-linux"; + $objcopy .= " -O elf32-sh-linux"; } } elsif ($arch eq "powerpc") { @@ -289,12 +289,12 @@ if ($arch eq "x86_64") { $ldemulation = "lppc" } if ($bits == 64) { - $type = ".quad"; - $cc .= " -m64 "; - $ld .= " -m elf64".$ldemulation." "; + $type = ".quad"; + $cc .= " -m64 "; + $ld .= " -m elf64".$ldemulation." "; } else { - $cc .= " -m32 "; - $ld .= " -m elf32".$ldemulation." "; + $cc .= " -m32 "; + $ld .= " -m elf32".$ldemulation." "; } } elsif ($arch eq "arm") { @@ -313,7 +313,7 @@ if ($arch eq "x86_64") { $type = "data8"; if ($is_module eq "0") { - $cc .= " -mconstant-gp"; + $cc .= " -mconstant-gp"; } } elsif ($arch eq "sparc64") { # In the objdump output there are giblets like: @@ -530,10 +530,10 @@ while (<IN>) { $read_function = defined($text_sections{$1}); if (!$read_function) { foreach my $prefix (keys %text_section_prefixes) { - if (substr($1, 0, length $prefix) eq $prefix) { - $read_function = 1; - last; - } + if (substr($1, 0, length $prefix) eq $prefix) { + $read_function = 1; + last; + } } } # print out any recorded offsets @@ -642,3 +642,5 @@ if ($#converts >= 0) { `$rm $mcount_o $mcount_s`; exit(0); + +# vim: softtabstop=4 diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c index b69de9263ee6..3a2e6bb781a8 100644 --- a/tools/tracing/latency/latency-collector.c +++ b/tools/tracing/latency/latency-collector.c @@ -1018,7 +1018,7 @@ static long go_to_sleep(const struct entry *req) cond_timedwait(&printstate.cond, &printstate.mutex, &future); if (time_has_passed(&future)) break; - }; + } if (printstate_has_new_req_arrived(req)) delay = -1; @@ -1941,7 +1941,7 @@ static void scan_arguments(int argc, char *argv[]) if (value < 0) { warnx("TIME must be >= 0\n"); show_usage(); - ; + exit(0); } trace_enable = true; use_random_sleep = true; |