From b3ca59f6fe79eb83a360e885f49730d07d31bf79 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Thu, 31 Dec 2020 10:35:58 -0500 Subject: tracing: Update trace_ignore_this_task() kernel-doc comment Update kernel-doc parameter after commit b3b1e6ededa4 ("ftrace: Create set_ftrace_notrace_pid to not trace tasks") added @filtered_no_pids. Link: https://lkml.kernel.org/r/20201231153558.4804-1-hqjagain@gmail.com Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b8a2d786b503..9e4f4043a3df 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -530,6 +530,7 @@ trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) /** * trace_ignore_this_task - should a task be ignored for tracing * @filtered_pids: The list of pids to check + * @filtered_no_pids: The list of pids not to be traced * @task: The task that should be ignored if not filtered * * Checks if @task should be traced or not from @filtered_pids. @@ -780,7 +781,7 @@ u64 ftrace_now(int cpu) } /** - * tracing_is_enabled - Show if global_trace has been disabled + * tracing_is_enabled - Show if global_trace has been enabled * * Shows if the global trace has been enabled or not. It uses the * mirror flag "buffer_disabled" to be used in fast paths such as for -- cgit v1.2.3 From 18d14ebdbd07abdd6466f252d431ce6f0e70e38a Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Wed, 30 Dec 2020 22:05:21 +0800 Subject: tracing: Remove get/put_cpu() from function_trace_init Since commit b6f11df26fdc ("trace: Call tracing_reset_online_cpus before tracer->init()"), get/put_cpu() are not needed anymore. We can use raw_smp_processor_id() instead. Link: https://lkml.kernel.org/r/20201230140521.31920-1-hqjagain@gmail.com Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_functions.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c5095dd28e20..f67aec5bb771 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -106,8 +106,7 @@ static int function_trace_init(struct trace_array *tr) ftrace_init_array_ops(tr, func); - tr->array_buffer.cpu = get_cpu(); - put_cpu(); + tr->array_buffer.cpu = raw_smp_processor_id(); tracing_start_cmdline_record(); tracing_start_function_trace(tr); -- cgit v1.2.3 From 6689bed36c52e34d772603118b0a31a0a5c11013 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 25 Dec 2020 22:03:56 +0800 Subject: ring-buffer: Remove cpu_buffer argument from the rb_inc_page() The cpu_buffer argument is not used inside the rb_inc_page() after commit 3adc54fa82a6 ("ring-buffer: make the buffer a true circular link list"). And cpu_buffer argument is not used inside the two functions too, rb_is_head_page/rb_set_list_to_head. Link: https://lkml.kernel.org/r/20201225140356.23008-1-hqjagain@gmail.com Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ec08f948dd80..8fccee76a5f3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1112,8 +1112,7 @@ static struct list_head *rb_list_head(struct list_head *list) * its flags will be non zero. */ static inline int -rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page *page, struct list_head *list) +rb_is_head_page(struct buffer_page *page, struct list_head *list) { unsigned long val; @@ -1142,8 +1141,7 @@ static bool rb_is_reader_page(struct buffer_page *page) /* * rb_set_list_to_head - set a list_head to be pointing to head. */ -static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer, - struct list_head *list) +static void rb_set_list_to_head(struct list_head *list) { unsigned long *ptr; @@ -1166,7 +1164,7 @@ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) /* * Set the previous list pointer to have the HEAD flag. */ - rb_set_list_to_head(cpu_buffer, head->list.prev); + rb_set_list_to_head(head->list.prev); } static void rb_list_head_clear(struct list_head *list) @@ -1241,8 +1239,7 @@ static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, old_flag, RB_PAGE_NORMAL); } -static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page **bpage) +static inline void rb_inc_page(struct buffer_page **bpage) { struct list_head *p = rb_list_head((*bpage)->list.next); @@ -1274,11 +1271,11 @@ rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) */ for (i = 0; i < 3; i++) { do { - if (rb_is_head_page(cpu_buffer, page, page->list.prev)) { + if (rb_is_head_page(page, page->list.prev)) { cpu_buffer->head_page = page; return page; } - rb_inc_page(cpu_buffer, &page); + rb_inc_page(&page); } while (page != head); } @@ -1824,7 +1821,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) cond_resched(); to_remove_page = tmp_iter_page; - rb_inc_page(cpu_buffer, &tmp_iter_page); + rb_inc_page(&tmp_iter_page); /* update the counters */ page_entries = rb_page_entries(to_remove_page); @@ -2271,7 +2268,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) if (iter->head_page == cpu_buffer->reader_page) iter->head_page = rb_set_head_page(cpu_buffer); else - rb_inc_page(cpu_buffer, &iter->head_page); + rb_inc_page(&iter->head_page); iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp; iter->head = 0; @@ -2374,7 +2371,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * want the outer most commit to reset it. */ new_head = next_page; - rb_inc_page(cpu_buffer, &new_head); + rb_inc_page(&new_head); ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, RB_PAGE_NORMAL); @@ -2526,7 +2523,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, next_page = tail_page; - rb_inc_page(cpu_buffer, &next_page); + rb_inc_page(&next_page); /* * If for some reason, we had an interrupt storm that made @@ -2552,7 +2549,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * the buffer, unless the commit page is still on the * reader page. */ - if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) { + if (rb_is_head_page(next_page, &tail_page->list)) { /* * If the commit is not on the reader page, then @@ -2879,7 +2876,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) return; local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); - rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); + rb_inc_page(&cpu_buffer->commit_page); /* add barrier to keep gcc from optimizing too much */ barrier(); } @@ -3638,14 +3635,14 @@ rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, * Because the commit page may be on the reader page we * start with the next page and check the end loop there. */ - rb_inc_page(cpu_buffer, &bpage); + rb_inc_page(&bpage); start = bpage; do { if (bpage->page == (void *)addr) { local_dec(&bpage->entries); return; } - rb_inc_page(cpu_buffer, &bpage); + rb_inc_page(&bpage); } while (bpage != start); /* commit not part of this buffer?? */ @@ -4367,7 +4364,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->pages = reader->list.prev; /* The reader page will be pointing to the new head */ - rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); + rb_set_list_to_head(&cpu_buffer->reader_page->list); /* * We want to make sure we read the overruns after we set up our @@ -4406,7 +4403,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * Now make the new head point back to the reader page. */ rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; - rb_inc_page(cpu_buffer, &cpu_buffer->head_page); + rb_inc_page(&cpu_buffer->head_page); local_inc(&cpu_buffer->pages_read); -- cgit v1.2.3 From c6358bacdcdb64eb75192a135b66d3d3e57ad2e5 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Thu, 24 Dec 2020 22:46:34 +0800 Subject: ring-buffer: Drop unneeded check in ring_buffer_resize() Remove the cpumask check, as we has done it at the beginning of the function. Also fix a typo. s/also the on the/also on the/ Link: https://lkml.kernel.org/r/20201224144634.3210-1-hqjagain@gmail.com Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8fccee76a5f3..b9dad3500041 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2059,10 +2059,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, put_online_cpus(); } else { - /* Make sure this CPU has been initialized */ - if (!cpumask_test_cpu(cpu_id, buffer->cpumask)) - goto out; - cpu_buffer = buffer->buffers[cpu_id]; if (nr_pages == cpu_buffer->nr_pages) @@ -2580,7 +2576,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * have filled up the buffer with events * from interrupts and such, and wrapped. * - * Note, if the tail page is also the on the + * Note, if the tail page is also on the * reader_page, we let it move out. */ if (unlikely((cpu_buffer->commit_page != -- cgit v1.2.3 From 36590c50b2d0729952511129916beeea30d31d81 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 25 Jan 2021 20:45:08 +0100 Subject: tracing: Merge irqflags + preempt counter. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The state of the interrupts (irqflags) and the preemption counter are both passed down to tracing_generic_entry_update(). Only one bit of irqflags is actually required: The on/off state. The complete 32bit of the preemption counter isn't needed. Just whether of the upper bits (softirq, hardirq and NMI) are set and the preemption depth is needed. The irqflags and the preemption counter could be evaluated early and the information stored in an integer `trace_ctx'. tracing_generic_entry_update() would use the upper bits as the TRACE_FLAG_* and the lower 8bit as the disabled-preemption depth (considering that one must be substracted from the counter in one special cases). The actual preemption value is not used except for the tracing record. The `irqflags' variable is mostly used only for the tracing record. An exception here is for instance wakeup_tracer_call() or probe_wakeup_sched_switch() which explicilty disable interrupts and use that `irqflags' to save (and restore) the IRQ state and to record the state. Struct trace_event_buffer has also the `pc' and flags' members which can be replaced with `trace_ctx' since their actual value is not used outside of trace recording. This will reduce tracing_generic_entry_update() to simply assign values to struct trace_entry. The evaluation of the TRACE_FLAG_* bits is moved to _tracing_gen_ctx_flags() which replaces preempt_count() and local_save_flags() invocations. As an example, ftrace_syscall_enter() may invoke: - trace_buffer_lock_reserve() -> … -> tracing_generic_entry_update() - event_trigger_unlock_commit() -> ftrace_trace_stack() -> … -> tracing_generic_entry_update() -> ftrace_trace_userstack() -> … -> tracing_generic_entry_update() In this case the TRACE_FLAG_* bits were evaluated three times. By using the `trace_ctx' they are evaluated once and assigned three times. A build with all tracers enabled on x86-64 with and without the patch: text data bss dec hex filename 21970669 17084168 7639260 46694097 2c87ed1 vmlinux.old 21970293 17084168 7639260 46693721 2c87d59 vmlinux.new text shrank by 379 bytes, data remained constant. Link: https://lkml.kernel.org/r/20210125194511.3924915-2-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 25 +++-- kernel/trace/blktrace.c | 17 +-- kernel/trace/trace.c | 208 +++++++++++++++++++---------------- kernel/trace/trace.h | 38 +++---- kernel/trace/trace_branch.c | 6 +- kernel/trace/trace_event_perf.c | 5 +- kernel/trace/trace_events.c | 18 +-- kernel/trace/trace_events_inject.c | 6 +- kernel/trace/trace_functions.c | 28 ++--- kernel/trace/trace_functions_graph.c | 32 +++--- kernel/trace/trace_hwlat.c | 7 +- kernel/trace/trace_irqsoff.c | 86 ++++++--------- kernel/trace/trace_kprobe.c | 10 +- kernel/trace/trace_mmiotrace.c | 14 ++- kernel/trace/trace_sched_wakeup.c | 71 ++++++------ kernel/trace/trace_syscalls.c | 20 ++-- kernel/trace/trace_uprobe.c | 4 +- 17 files changed, 287 insertions(+), 308 deletions(-) (limited to 'kernel') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index d321fe5ad1a1..091250b0895a 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -148,17 +148,29 @@ enum print_line_t { enum print_line_t trace_handle_return(struct trace_seq *s); -void tracing_generic_entry_update(struct trace_entry *entry, - unsigned short type, - unsigned long flags, - int pc); +static inline void tracing_generic_entry_update(struct trace_entry *entry, + unsigned short type, + unsigned int trace_ctx) +{ + struct task_struct *tsk = current; + + entry->preempt_count = trace_ctx & 0xff; + entry->pid = (tsk) ? tsk->pid : 0; + entry->type = type; + entry->flags = trace_ctx >> 16; +} + +unsigned int tracing_gen_ctx_flags(unsigned long irqflags); +unsigned int tracing_gen_ctx(void); +unsigned int tracing_gen_ctx_dec(void); + struct trace_event_file; struct ring_buffer_event * trace_event_buffer_lock_reserve(struct trace_buffer **current_buffer, struct trace_event_file *trace_file, int type, unsigned long len, - unsigned long flags, int pc); + unsigned int trace_ctx); #define TRACE_RECORD_CMDLINE BIT(0) #define TRACE_RECORD_TGID BIT(1) @@ -232,8 +244,7 @@ struct trace_event_buffer { struct ring_buffer_event *event; struct trace_event_file *trace_file; void *entry; - unsigned long flags; - int pc; + unsigned int trace_ctx; struct pt_regs *regs; }; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index fb0fe4c66b84..c54eae2ab208 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -72,17 +72,17 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, struct blk_io_trace *t; struct ring_buffer_event *event = NULL; struct trace_buffer *buffer = NULL; - int pc = 0; + unsigned int trace_ctx = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; if (blk_tracer) { buffer = blk_tr->array_buffer.buffer; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len + cgid_len, - 0, pc); + trace_ctx); if (!event) return; t = ring_buffer_event_data(event); @@ -107,7 +107,7 @@ record_it: memcpy((void *) t + sizeof(*t) + cgid_len, data, len); if (blk_tracer) - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); } } @@ -222,8 +222,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, struct blk_io_trace *t; unsigned long flags = 0; unsigned long *sequence; + unsigned int trace_ctx = 0; pid_t pid; - int cpu, pc = 0; + int cpu; bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; @@ -252,10 +253,10 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, tracing_record_cmdline(current); buffer = blk_tr->array_buffer.buffer; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len + cgid_len, - 0, pc); + trace_ctx); if (!event) return; t = ring_buffer_event_data(event); @@ -301,7 +302,7 @@ record_it: memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); if (blk_tracer) { - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); return; } } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9e4f4043a3df..0b3cce6ecf52 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -176,7 +176,7 @@ static union trace_eval_map_item *trace_eval_maps; int tracing_set_tracer(struct trace_array *tr, const char *buf); static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, int pc); + unsigned int trace_ctx); #define MAX_TRACER_SIZE 100 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; @@ -906,23 +906,23 @@ static inline void trace_access_lock_init(void) #ifdef CONFIG_STACKTRACE static void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs); + unsigned int trace_ctx, + int skip, struct pt_regs *regs); static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs); + unsigned int trace_ctx, + int skip, struct pt_regs *regs); #else static inline void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { } static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned long trace_ctx, + int skip, struct pt_regs *regs) { } @@ -930,24 +930,24 @@ static inline void ftrace_trace_stack(struct trace_array *tr, static __always_inline void trace_event_setup(struct ring_buffer_event *event, - int type, unsigned long flags, int pc) + int type, unsigned int trace_ctx) { struct trace_entry *ent = ring_buffer_event_data(event); - tracing_generic_entry_update(ent, type, flags, pc); + tracing_generic_entry_update(ent, type, trace_ctx); } static __always_inline struct ring_buffer_event * __trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct ring_buffer_event *event; event = ring_buffer_lock_reserve(buffer, len); if (event != NULL) - trace_event_setup(event, type, flags, pc); + trace_event_setup(event, type, trace_ctx); return event; } @@ -1008,25 +1008,22 @@ int __trace_puts(unsigned long ip, const char *str, int size) struct ring_buffer_event *event; struct trace_buffer *buffer; struct print_entry *entry; - unsigned long irq_flags; + unsigned int trace_ctx; int alloc; - int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; - pc = preempt_count(); - if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; alloc = sizeof(*entry) + size + 2; /* possible \n added */ - local_save_flags(irq_flags); + trace_ctx = tracing_gen_ctx(); buffer = global_trace.array_buffer.buffer; ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - irq_flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + trace_ctx); if (!event) { size = 0; goto out; @@ -1045,7 +1042,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); out: ring_buffer_nest_end(buffer); return size; @@ -1062,25 +1059,22 @@ int __trace_bputs(unsigned long ip, const char *str) struct ring_buffer_event *event; struct trace_buffer *buffer; struct bputs_entry *entry; - unsigned long irq_flags; + unsigned int trace_ctx; int size = sizeof(struct bputs_entry); int ret = 0; - int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; - pc = preempt_count(); - if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; - local_save_flags(irq_flags); + trace_ctx = tracing_gen_ctx(); buffer = global_trace.array_buffer.buffer; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - irq_flags, pc); + trace_ctx); if (!event) goto out; @@ -1089,7 +1083,7 @@ int __trace_bputs(unsigned long ip, const char *str) entry->str = str; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); ret = 1; out: @@ -2585,36 +2579,69 @@ enum print_line_t trace_handle_return(struct trace_seq *s) } EXPORT_SYMBOL_GPL(trace_handle_return); -void -tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, - unsigned long flags, int pc) +unsigned int tracing_gen_ctx_flags(unsigned long irqflags) { - struct task_struct *tsk = current; + unsigned int trace_flags = 0; + unsigned int pc; + + pc = preempt_count(); - entry->preempt_count = pc & 0xff; - entry->pid = (tsk) ? tsk->pid : 0; - entry->type = type; - entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | + if (irqs_disabled_flags(irqflags)) + trace_flags |= TRACE_FLAG_IRQS_OFF; #else - TRACE_FLAG_IRQS_NOSUPPORT | + trace_flags |= TRACE_FLAG_IRQS_NOSUPPORT; #endif - ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | - ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | - ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | - (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); + + if (pc & NMI_MASK) + trace_flags |= TRACE_FLAG_NMI; + if (pc & HARDIRQ_MASK) + trace_flags |= TRACE_FLAG_HARDIRQ; + + if (pc & SOFTIRQ_OFFSET) + trace_flags |= TRACE_FLAG_SOFTIRQ; + + if (tif_need_resched()) + trace_flags |= TRACE_FLAG_NEED_RESCHED; + if (test_preempt_need_resched()) + trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; + return (trace_flags << 16) | (pc & 0xff); +} + +unsigned int tracing_gen_ctx(void) +{ + unsigned long irqflags; + +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + local_save_flags(irqflags); +#else + irqflags = 0; +#endif + return tracing_gen_ctx_flags(irqflags); +} + +unsigned int tracing_gen_ctx_dec(void) +{ + unsigned int trace_ctx; + + trace_ctx = tracing_gen_ctx(); + + /* + * Subtract one from the preeption counter if preemption is enabled, + * see trace_event_buffer_reserve()for details. + */ + if (IS_ENABLED(CONFIG_PREEMPTION)) + trace_ctx--; + return trace_ctx; } -EXPORT_SYMBOL_GPL(tracing_generic_entry_update); struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { - return __trace_buffer_lock_reserve(buffer, type, len, flags, pc); + return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); } DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); @@ -2734,7 +2761,7 @@ struct ring_buffer_event * trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, struct trace_event_file *trace_file, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct ring_buffer_event *entry; int val; @@ -2747,15 +2774,15 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, /* Try to use the per cpu buffer first */ val = this_cpu_inc_return(trace_buffered_event_cnt); if (val == 1) { - trace_event_setup(entry, type, flags, pc); + trace_event_setup(entry, type, trace_ctx); entry->array[0] = len; return entry; } this_cpu_dec(trace_buffered_event_cnt); } - entry = __trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, type, len, + trace_ctx); /* * If tracing is off, but we have triggers enabled * we still need to look at the event data. Use the temp_buffer @@ -2764,8 +2791,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, */ if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { *current_rb = temp_buffer; - entry = __trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, type, len, + trace_ctx); } return entry; } @@ -2851,7 +2878,7 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, fbuffer->event, fbuffer->entry, - fbuffer->flags, fbuffer->pc, fbuffer->regs); + fbuffer->trace_ctx, fbuffer->regs); } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); @@ -2867,7 +2894,7 @@ EXPORT_SYMBOL_GPL(trace_event_buffer_commit); void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, + unsigned int trace_ctx, struct pt_regs *regs) { __buffer_unlock_commit(buffer, event); @@ -2878,8 +2905,8 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, * and mmiotrace, but that's ok if they lose a function or * two. They are not that meaningful. */ - ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); - ftrace_trace_userstack(tr, buffer, flags, pc); + ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); + ftrace_trace_userstack(tr, buffer, trace_ctx); } /* @@ -2893,9 +2920,8 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, } void -trace_function(struct trace_array *tr, - unsigned long ip, unsigned long parent_ip, unsigned long flags, - int pc) +trace_function(struct trace_array *tr, unsigned long ip, unsigned long + parent_ip, unsigned int trace_ctx) { struct trace_event_call *call = &event_function; struct trace_buffer *buffer = tr->array_buffer.buffer; @@ -2903,7 +2929,7 @@ trace_function(struct trace_array *tr, struct ftrace_entry *entry; event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), - flags, pc); + trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -2937,8 +2963,8 @@ static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); static void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { struct trace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; @@ -2985,7 +3011,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, size = nr_entries * sizeof(unsigned long); event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, - sizeof(*entry) + size, flags, pc); + sizeof(*entry) + size, trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3006,22 +3032,22 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { if (!(tr->trace_flags & TRACE_ITER_STACKTRACE)) return; - __ftrace_trace_stack(buffer, flags, skip, pc, regs); + __ftrace_trace_stack(buffer, trace_ctx, skip, regs); } -void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, - int pc) +void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, + int skip) { struct trace_buffer *buffer = tr->array_buffer.buffer; if (rcu_is_watching()) { - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); return; } @@ -3035,7 +3061,7 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, return; rcu_irq_enter_irqson(); - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); rcu_irq_exit_irqson(); } @@ -3045,19 +3071,15 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, */ void trace_dump_stack(int skip) { - unsigned long flags; - if (tracing_disabled || tracing_selftest_running) return; - local_save_flags(flags); - #ifndef CONFIG_UNWINDER_ORC /* Skip 1 to skip this function. */ skip++; #endif __ftrace_trace_stack(global_trace.array_buffer.buffer, - flags, skip, preempt_count(), NULL); + tracing_gen_ctx(), skip, NULL); } EXPORT_SYMBOL_GPL(trace_dump_stack); @@ -3066,7 +3088,7 @@ static DEFINE_PER_CPU(int, user_stack_count); static void ftrace_trace_userstack(struct trace_array *tr, - struct trace_buffer *buffer, unsigned long flags, int pc) + struct trace_buffer *buffer, unsigned int trace_ctx) { struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; @@ -3093,7 +3115,7 @@ ftrace_trace_userstack(struct trace_array *tr, __this_cpu_inc(user_stack_count); event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) goto out_drop_count; entry = ring_buffer_event_data(event); @@ -3113,7 +3135,7 @@ ftrace_trace_userstack(struct trace_array *tr, #else /* CONFIG_USER_STACKTRACE_SUPPORT */ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, int pc) + unsigned int trace_ctx) { } #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ @@ -3243,9 +3265,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) struct trace_buffer *buffer; struct trace_array *tr = &global_trace; struct bprint_entry *entry; - unsigned long flags; + unsigned int trace_ctx; char *tbuffer; - int len = 0, size, pc; + int len = 0, size; if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -3253,7 +3275,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); tbuffer = get_trace_buf(); @@ -3267,12 +3289,11 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) goto out_put; - local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->array_buffer.buffer; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3282,7 +3303,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) memcpy(entry->buf, tbuffer, sizeof(u32) * len); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); } out: @@ -3305,9 +3326,9 @@ __trace_array_vprintk(struct trace_buffer *buffer, { struct trace_event_call *call = &event_print; struct ring_buffer_event *event; - int len = 0, size, pc; + int len = 0, size; struct print_entry *entry; - unsigned long flags; + unsigned int trace_ctx; char *tbuffer; if (tracing_disabled || tracing_selftest_running) @@ -3316,7 +3337,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); @@ -3328,11 +3349,10 @@ __trace_array_vprintk(struct trace_buffer *buffer, len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); - local_save_flags(flags); size = sizeof(*entry) + len + 1; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3341,7 +3361,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, memcpy(&entry->buf, tbuffer, len + 1); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL); } out: @@ -6654,7 +6674,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, enum event_trigger_type tt = ETT_NONE; struct trace_buffer *buffer; struct print_entry *entry; - unsigned long irq_flags; ssize_t written; int size; int len; @@ -6674,7 +6693,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */ /* If less than "", then make sure we can still add that */ @@ -6683,7 +6701,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - irq_flags, preempt_count()); + tracing_gen_ctx()); if (unlikely(!event)) /* Ring buffer disabled, return as if not open for write */ return -EBADF; @@ -6735,7 +6753,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, struct ring_buffer_event *event; struct trace_buffer *buffer; struct raw_data_entry *entry; - unsigned long irq_flags; ssize_t written; int size; int len; @@ -6757,14 +6774,13 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - local_save_flags(irq_flags); size = sizeof(*entry) + cnt; if (cnt < FAULT_SIZE_ID) size += FAULT_SIZE_ID - cnt; buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, - irq_flags, preempt_count()); + tracing_gen_ctx()); if (!event) /* Ring buffer disabled, return as if not open for write */ return -EBADF; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e448d2da0b99..8daf3a0758b1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -589,8 +589,7 @@ struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, - int pc); + unsigned int trace_ctx); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -615,11 +614,11 @@ unsigned long trace_total_entries(struct trace_array *tr); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc); + unsigned int trace_ctx); void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc); + unsigned int trace_ctx); void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); @@ -687,11 +686,10 @@ static inline void latency_fsnotify(struct trace_array *tr) { } #endif #ifdef CONFIG_STACKTRACE -void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, - int pc); +void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip); #else -static inline void __trace_stack(struct trace_array *tr, unsigned long flags, - int skip, int pc) +static inline void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, + int skip) { } #endif /* CONFIG_STACKTRACE */ @@ -831,10 +829,10 @@ extern void graph_trace_open(struct trace_iterator *iter); extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, - unsigned long flags, int pc); + unsigned int trace_ctx); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, - unsigned long flags, int pc); + unsigned int trace_ctx); #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; @@ -1297,15 +1295,15 @@ extern int call_filter_check_discard(struct trace_event_call *call, void *rec, void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, + unsigned int trcace_ctx, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc) + unsigned int trace_ctx) { - trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL); + trace_buffer_unlock_commit_regs(tr, buffer, event, trace_ctx, NULL); } DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); @@ -1366,8 +1364,7 @@ __event_trigger_test_discard(struct trace_event_file *file, * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. + * @trace_ctx: The tracing context flags. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and @@ -1377,12 +1374,12 @@ static inline void event_trigger_unlock_commit(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc) + void *entry, unsigned int trace_ctx) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); + trace_buffer_unlock_commit(file->tr, buffer, event, trace_ctx); if (tt) event_triggers_post_call(file, tt); @@ -1394,8 +1391,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. + * @trace_ctx: The tracing context flags. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and @@ -1408,14 +1404,14 @@ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc, + void *entry, unsigned int trace_ctx, struct pt_regs *regs) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit_regs(file->tr, buffer, event, - irq_flags, pc, regs); + trace_ctx, regs); if (tt) event_triggers_post_call(file, tt); diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index eff099123aa2..e47fdb4c92fb 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -37,7 +37,7 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) struct ring_buffer_event *event; struct trace_branch *entry; unsigned long flags; - int pc; + unsigned int trace_ctx; const char *p; if (current->trace_recursion & TRACE_BRANCH_BIT) @@ -59,10 +59,10 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) if (atomic_read(&data->disabled)) goto out; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(flags); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) goto out; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a71181655958..288ad2c274fb 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -421,11 +421,8 @@ NOKPROBE_SYMBOL(perf_trace_buf_alloc); void perf_trace_buf_update(void *record, u16 type) { struct trace_entry *entry = record; - int pc = preempt_count(); - unsigned long flags; - local_save_flags(flags); - tracing_generic_entry_update(entry, type, flags, pc); + tracing_generic_entry_update(entry, type, tracing_gen_ctx()); } NOKPROBE_SYMBOL(perf_trace_buf_update); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e9d28eeccb7e..20ccce3e4ffb 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -258,22 +258,19 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, trace_event_ignore_this_pid(trace_file)) return NULL; - local_save_flags(fbuffer->flags); - fbuffer->pc = preempt_count(); /* * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables * preemption (adding one to the preempt_count). Since we are * interested in the preempt_count at the time the tracepoint was * hit, we need to subtract one to offset the increment. */ - if (IS_ENABLED(CONFIG_PREEMPTION)) - fbuffer->pc--; + fbuffer->trace_ctx = tracing_gen_ctx_dec(); fbuffer->trace_file = trace_file; fbuffer->event = trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, event_call->event.type, len, - fbuffer->flags, fbuffer->pc); + fbuffer->trace_ctx); if (!fbuffer->event) return NULL; @@ -3678,12 +3675,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, struct trace_buffer *buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; - unsigned long flags; + unsigned int trace_ctx; long disabled; int cpu; - int pc; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); @@ -3691,11 +3687,9 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, if (disabled != 1) goto out; - local_save_flags(flags); - event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, TRACE_FN, sizeof(*entry), - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3703,7 +3697,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, entry->parent_ip = parent_ip; event_trigger_unlock_commit(&event_trace_file, buffer, event, - entry, flags, pc); + entry, trace_ctx); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); preempt_enable_notrace(); diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index 22bcf7c51d1e..c188045c5f97 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -192,7 +192,6 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size) static int parse_entry(char *str, struct trace_event_call *call, void **pentry) { struct ftrace_event_field *field; - unsigned long irq_flags; void *entry = NULL; int entry_size; u64 val = 0; @@ -203,9 +202,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry) if (!entry) return -ENOMEM; - local_save_flags(irq_flags); - tracing_generic_entry_update(entry, call->event.type, irq_flags, - preempt_count()); + tracing_generic_entry_update(entry, call->event.type, + tracing_gen_ctx()); while ((len = parse_field(str, call, &field, &val)) > 0) { if (is_function_field(field)) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index f67aec5bb771..f93723ca66bc 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -131,10 +131,9 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, { struct trace_array *tr = op->private; struct trace_array_cpu *data; - unsigned long flags; + unsigned int trace_ctx; int bit; int cpu; - int pc; if (unlikely(!tr->function_enabled)) return; @@ -143,15 +142,14 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); - if (!atomic_read(&data->disabled)) { - local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, pc); - } + if (!atomic_read(&data->disabled)) + trace_function(tr, ip, parent_ip, trace_ctx); + ftrace_test_recursion_unlock(bit); preempt_enable_notrace(); } @@ -183,7 +181,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, unsigned long flags; long disabled; int cpu; - int pc; + unsigned int trace_ctx; if (unlikely(!tr->function_enabled)) return; @@ -198,9 +196,9 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - trace_function(tr, ip, parent_ip, flags, pc); - __trace_stack(tr, flags, STACK_SKIP, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + trace_function(tr, ip, parent_ip, trace_ctx); + __trace_stack(tr, trace_ctx, STACK_SKIP); } atomic_dec(&data->disabled); @@ -403,13 +401,11 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip, static __always_inline void trace_stack(struct trace_array *tr) { - unsigned long flags; - int pc; + unsigned int trace_ctx; - local_save_flags(flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); - __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc); + __trace_stack(tr, trace_ctx, FTRACE_STACK_SKIP); } static void diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d874dec87131..0aa6e6faa943 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -96,8 +96,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, - unsigned long flags, - int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; @@ -105,7 +104,7 @@ int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return 0; entry = ring_buffer_event_data(event); @@ -129,10 +128,10 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) struct trace_array *tr = graph_array; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; long disabled; int ret; int cpu; - int pc; if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) return 0; @@ -174,8 +173,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - ret = __trace_graph_entry(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + ret = __trace_graph_entry(tr, trace, trace_ctx); } else { ret = 0; } @@ -188,7 +187,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) static void __trace_graph_function(struct trace_array *tr, - unsigned long ip, unsigned long flags, int pc) + unsigned long ip, unsigned int trace_ctx) { u64 time = trace_clock_local(); struct ftrace_graph_ent ent = { @@ -202,22 +201,21 @@ __trace_graph_function(struct trace_array *tr, .rettime = time, }; - __trace_graph_entry(tr, &ent, flags, pc); - __trace_graph_return(tr, &ret, flags, pc); + __trace_graph_entry(tr, &ent, trace_ctx); + __trace_graph_return(tr, &ret, trace_ctx); } void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { - __trace_graph_function(tr, ip, flags, pc); + __trace_graph_function(tr, ip, trace_ctx); } void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, - unsigned long flags, - int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; @@ -225,7 +223,7 @@ void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -239,9 +237,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace) struct trace_array *tr = graph_array; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; long disabled; int cpu; - int pc; ftrace_graph_addr_finish(trace); @@ -255,8 +253,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace) data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - __trace_graph_return(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + __trace_graph_return(tr, trace, trace_ctx); } atomic_dec(&data->disabled); local_irq_restore(flags); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index c0df9b97f147..34dc1a712dcb 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -108,14 +108,9 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct hwlat_entry *entry; - unsigned long flags; - int pc; - - pc = preempt_count(); - local_save_flags(flags); event = trace_buffer_lock_reserve(buffer, TRACE_HWLAT, sizeof(*entry), - flags, pc); + tracing_gen_ctx()); if (!event) return; entry = ring_buffer_event_data(event); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 6756379b661f..590b3d51afae 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -143,11 +143,14 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; if (!func_prolog_dec(tr, &data, &flags)) return; - trace_function(tr, ip, parent_ip, flags, preempt_count()); + trace_ctx = tracing_gen_ctx_flags(flags); + + trace_function(tr, ip, parent_ip, trace_ctx); atomic_dec(&data->disabled); } @@ -177,8 +180,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; int ret; - int pc; if (ftrace_graph_ignore_func(trace)) return 0; @@ -195,8 +198,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) if (!func_prolog_dec(tr, &data, &flags)) return 0; - pc = preempt_count(); - ret = __trace_graph_entry(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + ret = __trace_graph_entry(tr, trace, trace_ctx); atomic_dec(&data->disabled); return ret; @@ -207,15 +210,15 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace) struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; - int pc; + unsigned int trace_ctx; ftrace_graph_addr_finish(trace); if (!func_prolog_dec(tr, &data, &flags)) return; - pc = preempt_count(); - __trace_graph_return(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + __trace_graph_return(tr, trace, trace_ctx); atomic_dec(&data->disabled); } @@ -267,12 +270,12 @@ static void irqsoff_print_header(struct seq_file *s) static void __trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { if (is_graph(tr)) - trace_graph_function(tr, ip, parent_ip, flags, pc); + trace_graph_function(tr, ip, parent_ip, trace_ctx); else - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); } #else @@ -322,15 +325,13 @@ check_critical_timing(struct trace_array *tr, { u64 T0, T1, delta; unsigned long flags; - int pc; + unsigned int trace_ctx; T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); delta = T1-T0; - local_save_flags(flags); - - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); if (!report_latency(tr, delta)) goto out; @@ -341,9 +342,9 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(tr, delta)) goto out_unlock; - __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx); /* Skip 5 functions to get to the irq/preempt enable function */ - __trace_stack(tr, flags, 5, pc); + __trace_stack(tr, trace_ctx, 5); if (data->critical_sequence != max_sequence) goto out_unlock; @@ -363,16 +364,15 @@ out_unlock: out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); - __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx); } static nokprobe_inline void -start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) +start_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; - unsigned long flags; if (!tracer_enabled || !tracing_is_enabled()) return; @@ -393,9 +393,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) data->preempt_timestamp = ftrace_now(cpu); data->critical_start = parent_ip ? : ip; - local_save_flags(flags); - - __trace_function(tr, ip, parent_ip, flags, pc); + __trace_function(tr, ip, parent_ip, tracing_gen_ctx()); per_cpu(tracing_cpu, cpu) = 1; @@ -403,12 +401,12 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) } static nokprobe_inline void -stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) +stop_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; - unsigned long flags; + unsigned int trace_ctx; cpu = raw_smp_processor_id(); /* Always clear the tracing cpu on stopping the trace */ @@ -428,8 +426,8 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) atomic_inc(&data->disabled); - local_save_flags(flags); - __trace_function(tr, ip, parent_ip, flags, pc); + trace_ctx = tracing_gen_ctx(); + __trace_function(tr, ip, parent_ip, trace_ctx); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); @@ -438,20 +436,16 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) /* start and stop critical timings used to for stoppage (in idle) */ void start_critical_timings(void) { - int pc = preempt_count(); - - if (preempt_trace(pc) || irq_trace()) - start_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); + if (preempt_trace(preempt_count()) || irq_trace()) + start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL_GPL(start_critical_timings); NOKPROBE_SYMBOL(start_critical_timings); void stop_critical_timings(void) { - int pc = preempt_count(); - - if (preempt_trace(pc) || irq_trace()) - stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); + if (preempt_trace(preempt_count()) || irq_trace()) + stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL_GPL(stop_critical_timings); NOKPROBE_SYMBOL(stop_critical_timings); @@ -613,19 +607,15 @@ static void irqsoff_tracer_stop(struct trace_array *tr) */ void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { - unsigned int pc = preempt_count(); - - if (!preempt_trace(pc) && irq_trace()) - stop_critical_timing(a0, a1, pc); + if (!preempt_trace(preempt_count()) && irq_trace()) + stop_critical_timing(a0, a1); } NOKPROBE_SYMBOL(tracer_hardirqs_on); void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { - unsigned int pc = preempt_count(); - - if (!preempt_trace(pc) && irq_trace()) - start_critical_timing(a0, a1, pc); + if (!preempt_trace(preempt_count()) && irq_trace()) + start_critical_timing(a0, a1); } NOKPROBE_SYMBOL(tracer_hardirqs_off); @@ -665,18 +655,14 @@ static struct tracer irqsoff_tracer __read_mostly = #ifdef CONFIG_PREEMPT_TRACER void tracer_preempt_on(unsigned long a0, unsigned long a1) { - int pc = preempt_count(); - - if (preempt_trace(pc) && !irq_trace()) - stop_critical_timing(a0, a1, pc); + if (preempt_trace(preempt_count()) && !irq_trace()) + stop_critical_timing(a0, a1); } void tracer_preempt_off(unsigned long a0, unsigned long a1) { - int pc = preempt_count(); - - if (preempt_trace(pc) && !irq_trace()) - start_critical_timing(a0, a1, pc); + if (preempt_trace(preempt_count()) && !irq_trace()) + start_critical_timing(a0, a1); } static int preemptoff_tracer_init(struct trace_array *tr) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 56c7fbff7bd7..f6c459aba8a6 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1386,8 +1386,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(fbuffer.flags); - fbuffer.pc = preempt_count(); + fbuffer.trace_ctx = tracing_gen_ctx(); fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); @@ -1396,7 +1395,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, call->event.type, sizeof(*entry) + tk->tp.size + dsize, - fbuffer.flags, fbuffer.pc); + fbuffer.trace_ctx); if (!fbuffer.event) return; @@ -1434,8 +1433,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(fbuffer.flags); - fbuffer.pc = preempt_count(); + fbuffer.trace_ctx = tracing_gen_ctx(); fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); @@ -1443,7 +1441,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, call->event.type, sizeof(*entry) + tk->tp.size + dsize, - fbuffer.flags, fbuffer.pc); + fbuffer.trace_ctx); if (!fbuffer.event) return; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 84582bf1ed5f..7221ae0b4c47 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -300,10 +300,11 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; - int pc = preempt_count(); + unsigned int trace_ctx; + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW, - sizeof(*entry), 0, pc); + sizeof(*entry), trace_ctx); if (!event) { atomic_inc(&dropped_count); return; @@ -312,7 +313,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, entry->rw = *rw; if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -330,10 +331,11 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; - int pc = preempt_count(); + unsigned int trace_ctx; + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP, - sizeof(*entry), 0, pc); + sizeof(*entry), trace_ctx); if (!event) { atomic_inc(&dropped_count); return; @@ -342,7 +344,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, entry->map = *map; if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } void mmio_trace_mapping(struct mmiotrace_map *map) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index c0181066dbe9..e5778d1d7a5b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -67,7 +67,7 @@ static bool function_enabled; static int func_prolog_preempt_disable(struct trace_array *tr, struct trace_array_cpu **data, - int *pc) + unsigned int *trace_ctx) { long disabled; int cpu; @@ -75,7 +75,7 @@ func_prolog_preempt_disable(struct trace_array *tr, if (likely(!wakeup_task)) return 0; - *pc = preempt_count(); + *trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); @@ -116,8 +116,8 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; - unsigned long flags; - int pc, ret = 0; + unsigned int trace_ctx; + int ret = 0; if (ftrace_graph_ignore_func(trace)) return 0; @@ -131,11 +131,10 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) if (ftrace_graph_notrace_addr(trace->func)) return 1; - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return 0; - local_save_flags(flags); - ret = __trace_graph_entry(tr, trace, flags, pc); + ret = __trace_graph_entry(tr, trace, trace_ctx); atomic_dec(&data->disabled); preempt_enable_notrace(); @@ -146,16 +145,14 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; - unsigned long flags; - int pc; + unsigned int trace_ctx; ftrace_graph_addr_finish(trace); - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return; - local_save_flags(flags); - __trace_graph_return(tr, trace, flags, pc); + __trace_graph_return(tr, trace, trace_ctx); atomic_dec(&data->disabled); preempt_enable_notrace(); @@ -217,13 +214,13 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; unsigned long flags; - int pc; + unsigned int trace_ctx; - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return; local_irq_save(flags); - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); local_irq_restore(flags); atomic_dec(&data->disabled); @@ -303,12 +300,12 @@ static void wakeup_print_header(struct seq_file *s) static void __trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { if (is_graph(tr)) - trace_graph_function(tr, ip, parent_ip, flags, pc); + trace_graph_function(tr, ip, parent_ip, trace_ctx); else - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); } static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) @@ -375,7 +372,7 @@ static void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_context_switch; struct trace_buffer *buffer = tr->array_buffer.buffer; @@ -383,7 +380,7 @@ tracing_sched_switch_trace(struct trace_array *tr, struct ctx_switch_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_CTX, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -396,14 +393,14 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, flags, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } static void tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *wakee, struct task_struct *curr, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; @@ -411,7 +408,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -424,7 +421,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, flags, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } static void notrace @@ -436,7 +433,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, unsigned long flags; long disabled; int cpu; - int pc; + unsigned int trace_ctx; tracing_record_cmdline(prev); @@ -455,8 +452,6 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, if (next != wakeup_task) return; - pc = preempt_count(); - /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); @@ -464,6 +459,8 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, goto out; local_irq_save(flags); + trace_ctx = tracing_gen_ctx_flags(flags); + arch_spin_lock(&wakeup_lock); /* We could race with grabbing wakeup_lock */ @@ -473,9 +470,9 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, /* The task we are waiting for is waking up */ data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); - __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); - tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); - __trace_stack(wakeup_trace, flags, 0, pc); + __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, trace_ctx); + tracing_sched_switch_trace(wakeup_trace, prev, next, trace_ctx); + __trace_stack(wakeup_trace, trace_ctx, 0); T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); @@ -527,9 +524,8 @@ probe_wakeup(void *ignore, struct task_struct *p) { struct trace_array_cpu *data; int cpu = smp_processor_id(); - unsigned long flags; long disabled; - int pc; + unsigned int trace_ctx; if (likely(!tracer_enabled)) return; @@ -550,11 +546,12 @@ probe_wakeup(void *ignore, struct task_struct *p) (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio))) return; - pc = preempt_count(); disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); if (unlikely(disabled != 1)) goto out; + trace_ctx = tracing_gen_ctx(); + /* interrupts should be off from try_to_wake_up */ arch_spin_lock(&wakeup_lock); @@ -581,19 +578,17 @@ probe_wakeup(void *ignore, struct task_struct *p) wakeup_task = get_task_struct(p); - local_save_flags(flags); - data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); data->preempt_timestamp = ftrace_now(cpu); - tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); - __trace_stack(wakeup_trace, flags, 0, pc); + tracing_sched_wakeup_trace(wakeup_trace, p, current, trace_ctx); + __trace_stack(wakeup_trace, trace_ctx, 0); /* * We must be careful in using CALLER_ADDR2. But since wake_up * is not called by an assembly function (where as schedule is) * it should be safe to use it here. */ - __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); + __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, trace_ctx); out_locked: arch_spin_unlock(&wakeup_lock); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index d85a2f0f316b..8bfcd3b09422 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -298,9 +298,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct trace_buffer *buffer; - unsigned long irq_flags; + unsigned int trace_ctx; unsigned long args[6]; - int pc; int syscall_nr; int size; @@ -322,12 +321,11 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - local_save_flags(irq_flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->enter_event->event.type, size, irq_flags, pc); + sys_data->enter_event->event.type, size, trace_ctx); if (!event) return; @@ -337,7 +335,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); event_trigger_unlock_commit(trace_file, buffer, event, entry, - irq_flags, pc); + trace_ctx); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -348,8 +346,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct trace_buffer *buffer; - unsigned long irq_flags; - int pc; + unsigned int trace_ctx; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -368,13 +365,12 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; - local_save_flags(irq_flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->exit_event->event.type, sizeof(*entry), - irq_flags, pc); + trace_ctx); if (!event) return; @@ -383,7 +379,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) entry->ret = syscall_get_return_value(current, regs); event_trigger_unlock_commit(trace_file, buffer, event, entry, - irq_flags, pc); + trace_ctx); } static int reg_event_syscall_enter(struct trace_event_file *file, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3cf7128e1ad3..a1ed96a7a462 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -961,7 +961,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, size, 0, 0); + call->event.type, size, 0); if (!event) return; @@ -977,7 +977,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, memcpy(data, ucb->buf, tu->tp.size + dsize); - event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0); + event_trigger_unlock_commit(trace_file, buffer, event, entry, 0); } /* uprobe handler */ -- cgit v1.2.3 From 0c02006e6f5b0a3e73499bbf5943d9174c5ed640 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 25 Jan 2021 20:45:09 +0100 Subject: tracing: Inline tracing_gen_ctx_flags() Inline tracing_gen_ctx_flags(). This allows to have one ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT. This requires to move `trace_flag_type' so tracing_gen_ctx_flags() can use it. Link: https://lkml.kernel.org/r/20210125194511.3924915-3-bigeasy@linutronix.de Suggested-by: Steven Rostedt Link: https://lkml.kernel.org/r/20210125140323.6b1ff20c@gandalf.local.home Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 54 +++++++++++++++++++++++++++++++++++++++++--- kernel/trace/trace.c | 38 ++----------------------------- kernel/trace/trace.h | 19 ---------------- 3 files changed, 53 insertions(+), 58 deletions(-) (limited to 'kernel') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 091250b0895a..67ae708de40d 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -160,9 +160,57 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry, entry->flags = trace_ctx >> 16; } -unsigned int tracing_gen_ctx_flags(unsigned long irqflags); -unsigned int tracing_gen_ctx(void); -unsigned int tracing_gen_ctx_dec(void); +unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); + +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_IRQS_NOSUPPORT = 0x02, + TRACE_FLAG_NEED_RESCHED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, + TRACE_FLAG_PREEMPT_RESCHED = 0x20, + TRACE_FLAG_NMI = 0x40, +}; + +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) +{ + unsigned int irq_status = irqs_disabled_flags(irqflags) ? + TRACE_FLAG_IRQS_OFF : 0; + return tracing_gen_ctx_irq_test(irq_status); +} +static inline unsigned int tracing_gen_ctx(void) +{ + unsigned long irqflags; + + local_save_flags(irqflags); + return tracing_gen_ctx_flags(irqflags); +} +#else + +static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) +{ + return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); +} +static inline unsigned int tracing_gen_ctx(void) +{ + return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); +} +#endif + +static inline unsigned int tracing_gen_ctx_dec(void) +{ + unsigned int trace_ctx; + + trace_ctx = tracing_gen_ctx(); + /* + * Subtract one from the preeption counter if preemption is enabled, + * see trace_event_buffer_reserve()for details. + */ + if (IS_ENABLED(CONFIG_PREEMPTION)) + trace_ctx--; + return trace_ctx; +} struct trace_event_file; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0b3cce6ecf52..584fa2a1304a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2579,20 +2579,13 @@ enum print_line_t trace_handle_return(struct trace_seq *s) } EXPORT_SYMBOL_GPL(trace_handle_return); -unsigned int tracing_gen_ctx_flags(unsigned long irqflags) +unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) { - unsigned int trace_flags = 0; + unsigned int trace_flags = irqs_status; unsigned int pc; pc = preempt_count(); -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - if (irqs_disabled_flags(irqflags)) - trace_flags |= TRACE_FLAG_IRQS_OFF; -#else - trace_flags |= TRACE_FLAG_IRQS_NOSUPPORT; -#endif - if (pc & NMI_MASK) trace_flags |= TRACE_FLAG_NMI; if (pc & HARDIRQ_MASK) @@ -2608,33 +2601,6 @@ unsigned int tracing_gen_ctx_flags(unsigned long irqflags) return (trace_flags << 16) | (pc & 0xff); } -unsigned int tracing_gen_ctx(void) -{ - unsigned long irqflags; - -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - local_save_flags(irqflags); -#else - irqflags = 0; -#endif - return tracing_gen_ctx_flags(irqflags); -} - -unsigned int tracing_gen_ctx_dec(void) -{ - unsigned int trace_ctx; - - trace_ctx = tracing_gen_ctx(); - - /* - * Subtract one from the preeption counter if preemption is enabled, - * see trace_event_buffer_reserve()for details. - */ - if (IS_ENABLED(CONFIG_PREEMPTION)) - trace_ctx--; - return trace_ctx; -} - struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8daf3a0758b1..93fb08ab8bb6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -136,25 +136,6 @@ struct kretprobe_trace_entry_head { unsigned long ret_ip; }; -/* - * trace_flag_type is an enumeration that holds different - * states when a trace occurs. These are: - * IRQS_OFF - interrupts were disabled - * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags - * NEED_RESCHED - reschedule is requested - * HARDIRQ - inside an interrupt handler - * SOFTIRQ - inside a softirq handler - */ -enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_IRQS_NOSUPPORT = 0x02, - TRACE_FLAG_NEED_RESCHED = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, - TRACE_FLAG_PREEMPT_RESCHED = 0x20, - TRACE_FLAG_NMI = 0x40, -}; - #define TRACE_BUF_SIZE 1024 struct trace_array; -- cgit v1.2.3 From fe427886bf41279085e0707cced41150dbcd8512 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 25 Jan 2021 20:45:10 +0100 Subject: tracing: Use in_serving_softirq() to deduct softirq status. PREEMPT_RT does not report "serving softirq" because the tracing core looks at the preemption counter while PREEMPT_RT does not update it while processing softirqs in order to remain preemptible. The information is stored somewhere else. The in_serving_softirq() macro and the SOFTIRQ_OFFSET define are still working but not on the preempt-counter. Use in_serving_softirq() macro which works on PREEMPT_RT. On !PREEMPT_RT the compiler (gcc-10 / clang-11) is smart enough to optimize the in_serving_softirq() related read of the preemption counter away. The only difference I noticed by using in_serving_softirq() on !PREEMPT_RT is that gcc-10 implemented tracing_gen_ctx_flags() as reading FLAG, jmp _tracing_gen_ctx_flags(). Without in_serving_softirq() it inlined _tracing_gen_ctx_flags() into tracing_gen_ctx_flags(). Link: https://lkml.kernel.org/r/20210125194511.3924915-4-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 584fa2a1304a..75620c29e904 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2590,8 +2590,7 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) trace_flags |= TRACE_FLAG_NMI; if (pc & HARDIRQ_MASK) trace_flags |= TRACE_FLAG_HARDIRQ; - - if (pc & SOFTIRQ_OFFSET) + if (in_serving_softirq()) trace_flags |= TRACE_FLAG_SOFTIRQ; if (tif_need_resched()) -- cgit v1.2.3 From 28cc65a173819ccb049d6335ebe82c1be054e9bb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 11:40:51 +0000 Subject: tracing: Fix spelling mistake in Kconfig "infinit" -> "infinite" There is a spelling mistake in the Kconfig help text. Fix it. Link: https://lkml.kernel.org/r/20201216114051.12056-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index c1a62ae7e812..4f976f8d9a38 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -545,7 +545,7 @@ config KPROBE_EVENTS_ON_NOTRACE using kprobe events. If kprobes can use ftrace instead of breakpoint, ftrace related - functions are protected from kprobe-events to prevent an infinit + functions are protected from kprobe-events to prevent an infinite recursion or any unexpected execution path which leads to a kernel crash. -- cgit v1.2.3 From 39bcdd6a964b2d80fcec2f70f11896b1db6fb572 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Tue, 12 Jan 2021 10:20:08 +0530 Subject: tracing: Fix spelling of controlling in uprobes s/controling/controlling/p Link: https://lkml.kernel.org/r/20210112045008.29834-1-unixbhaskar@gmail.com Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_uprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index a1ed96a7a462..9d9440303075 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1635,7 +1635,7 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call) } #endif /* CONFIG_PERF_EVENTS */ -/* Make a trace interface for controling probe points */ +/* Make a trace interface for controlling probe points */ static __init int init_uprobe_trace(void) { int ret; -- cgit v1.2.3 From 557d50e79df7fe527bd5f93b26cf2bcdaaabce7a Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Tue, 12 Jan 2021 12:12:02 +0100 Subject: tracing: Fix a kernel doc warning Add description for trace_array_put() parameter. kernel/trace/trace.c:464: warning: Function parameter or member 'this_tr' not described in 'trace_array_put' Link: https://lkml.kernel.org/r/20210112111202.23508-1-huobean@gmail.com Signed-off-by: Bean Huo [ Merged as one of the original fixes was already fixed by someone else ] Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 75620c29e904..7fd432334ff5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -454,6 +454,7 @@ static void __trace_array_put(struct trace_array *this_tr) /** * trace_array_put - Decrement the reference counter for this trace array. + * @this_tr : pointer to the trace array * * NOTE: Use this when we no longer need the trace array returned by * trace_array_get_by_name(). This ensures the trace array can be later -- cgit v1.2.3 From f2a99ddfd0aaff5f5c53ea1f652b5160ba5ee9b7 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 15 Jan 2021 07:33:48 -0800 Subject: tracing: Remove definition of DEBUG in trace_mmiotrace.c Defining DEBUG should only be done in development. So remove DEBUG. Link: https://lkml.kernel.org/r/20210115153348.131791-1-trix@redhat.com Signed-off-by: Tom Rix Reviewed-by: Karol Herbst Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_mmiotrace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 7221ae0b4c47..64e77b513697 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -5,8 +5,6 @@ * Copyright (C) 2008 Pekka Paalanen */ -#define DEBUG 1 - #include #include #include -- cgit v1.2.3 From befe6d946551d65cddbd32b9cb0170b0249fd5ed Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 18 Nov 2020 09:34:05 -0500 Subject: tracepoint: Do not fail unregistering a probe due to memory failure The list of tracepoint callbacks is managed by an array that is protected by RCU. To update this array, a new array is allocated, the updates are copied over to the new array, and then the list of functions for the tracepoint is switched over to the new array. After a completion of an RCU grace period, the old array is freed. This process happens for both adding a callback as well as removing one. But on removing a callback, if the new array fails to be allocated, the callback is not removed, and may be used after it is freed by the clients of the tracepoint. There's really no reason to fail if the allocation for a new array fails when removing a function. Instead, the function can simply be replaced by a stub function that could be cleaned up on the next modification of the array. That is, instead of calling the function registered to the tracepoint, it would call a stub function in its place. Link: https://lore.kernel.org/r/20201115055256.65625-1-mmullins@mmlx.us Link: https://lore.kernel.org/r/20201116175107.02db396d@gandalf.local.home Link: https://lore.kernel.org/r/20201117211836.54acaef2@oasis.local.home Link: https://lkml.kernel.org/r/20201118093405.7a6d2290@gandalf.local.home [ Note, this version does use undefined compiler behavior (assuming that a stub function with no parameters or return, can be called by a location that thinks it has parameters but still no return value. Static calls do the same thing, so this trick is not without precedent. There's another solution that uses RCU tricks and is more complex, but can be an alternative if this solution becomes an issue. Link: https://lore.kernel.org/lkml/20210127170721.58bce7cc@gandalf.local.home/ ] Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Mathieu Desnoyers Cc: Ingo Molnar Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Dmitry Vyukov Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: Andrii Nakryiko Cc: John Fastabend Cc: KP Singh Cc: netdev Cc: bpf Cc: Kees Cook Cc: Florian Weimer Fixes: 97e1c18e8d17b ("tracing: Kernel Tracepoints") Reported-by: syzbot+83aa762ef23b6f0d1991@syzkaller.appspotmail.com Reported-by: syzbot+d29e58bb557324e55e5e@syzkaller.appspotmail.com Reported-by: Matt Mullins Signed-off-by: Steven Rostedt (VMware) Tested-by: Matt Mullins --- kernel/tracepoint.c | 80 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 7261fa0f5e3c..e8f20ae29c18 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -53,6 +53,12 @@ struct tp_probes { struct tracepoint_func probes[]; }; +/* Called in removal of a func but failed to allocate a new tp_funcs */ +static void tp_stub_func(void) +{ + return; +} + static inline void *allocate_probes(int count) { struct tp_probes *p = kmalloc(struct_size(p, probes, count), @@ -131,6 +137,7 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, { struct tracepoint_func *old, *new; int nr_probes = 0; + int stub_funcs = 0; int pos = -1; if (WARN_ON(!tp_func->func)) @@ -147,14 +154,34 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, if (old[nr_probes].func == tp_func->func && old[nr_probes].data == tp_func->data) return ERR_PTR(-EEXIST); + if (old[nr_probes].func == tp_stub_func) + stub_funcs++; } } - /* + 2 : one for new probe, one for NULL func */ - new = allocate_probes(nr_probes + 2); + /* + 2 : one for new probe, one for NULL func - stub functions */ + new = allocate_probes(nr_probes + 2 - stub_funcs); if (new == NULL) return ERR_PTR(-ENOMEM); if (old) { - if (pos < 0) { + if (stub_funcs) { + /* Need to copy one at a time to remove stubs */ + int probes = 0; + + pos = -1; + for (nr_probes = 0; old[nr_probes].func; nr_probes++) { + if (old[nr_probes].func == tp_stub_func) + continue; + if (pos < 0 && old[nr_probes].prio < prio) + pos = probes++; + new[probes++] = old[nr_probes]; + } + nr_probes = probes; + if (pos < 0) + pos = probes; + else + nr_probes--; /* Account for insertion */ + + } else if (pos < 0) { pos = nr_probes; memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); } else { @@ -188,8 +215,9 @@ static void *func_remove(struct tracepoint_func **funcs, /* (N -> M), (N > 1, M >= 0) probes */ if (tp_func->func) { for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + if ((old[nr_probes].func == tp_func->func && + old[nr_probes].data == tp_func->data) || + old[nr_probes].func == tp_stub_func) nr_del++; } } @@ -208,14 +236,32 @@ static void *func_remove(struct tracepoint_func **funcs, /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ new = allocate_probes(nr_probes - nr_del + 1); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if (old[i].func != tp_func->func - || old[i].data != tp_func->data) - new[j++] = old[i]; - new[nr_probes - nr_del].func = NULL; - *funcs = new; + if (new) { + for (i = 0; old[i].func; i++) + if ((old[i].func != tp_func->func + || old[i].data != tp_func->data) + && old[i].func != tp_stub_func) + new[j++] = old[i]; + new[nr_probes - nr_del].func = NULL; + *funcs = new; + } else { + /* + * Failed to allocate, replace the old function + * with calls to tp_stub_func. + */ + for (i = 0; old[i].func; i++) + if (old[i].func == tp_func->func && + old[i].data == tp_func->data) { + old[i].func = tp_stub_func; + /* Set the prio to the next event. */ + if (old[i + 1].func) + old[i].prio = + old[i + 1].prio; + else + old[i].prio = -1; + } + *funcs = old; + } } debug_print_probes(*funcs); return old; @@ -295,10 +341,12 @@ static int tracepoint_remove_func(struct tracepoint *tp, tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); old = func_remove(&tp_funcs, func); - if (IS_ERR(old)) { - WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); + if (WARN_ON_ONCE(IS_ERR(old))) return PTR_ERR(old); - } + + if (tp_funcs == old) + /* Failed allocating new tp_funcs, replaced func with stub */ + return 0; if (!tp_funcs) { /* Removed last function */ -- cgit v1.2.3 From 4b9091e1c1948dea3b0b097496f308ede897d665 Mon Sep 17 00:00:00 2001 From: Song Chen Date: Thu, 28 Jan 2021 09:35:13 +0800 Subject: kernel: trace: preemptirq_delay_test: add cpu affinity The kernel thread executing test can run on any cpu, which might be different cpu latency tracer is running on, as a result, the big latency caused by preemptirq delay test can't be detected. Therefore, the argument cpu_affinity is added to be passed to test, ensure it's running on the same cpu with latency tracer. e.g. cyclictest -p 90 -m -c 0 -i 1000 -a 3 modprobe preemptirq_delay_test test_mode=preempt delay=500 \ burst_size=3 cpu_affinity=3 Link: https://lkml.kernel.org/r/1611797713-20965-1-git-send-email-chensong_2000@189.cn Signed-off-by: Song Chen Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 4 ++++ kernel/trace/preemptirq_delay_test.c | 14 ++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 4f976f8d9a38..799dbcfe65ad 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -886,6 +886,10 @@ config PREEMPTIRQ_DELAY_TEST irq-disabled critical sections for 500us: modprobe preemptirq_delay_test test_mode=irq delay=500 burst_size=3 + What's more, if you want to attach the test on the cpu which the latency + tracer is running on, specify cpu_affinity=cpu_num at the end of the + command. + If unsure, say N config SYNTH_EVENT_GEN_TEST diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index 312d1a0ca3b6..8c4ffd076162 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -21,13 +21,16 @@ static ulong delay = 100; static char test_mode[12] = "irq"; static uint burst_size = 1; +static int cpu_affinity = -1; module_param_named(delay, delay, ulong, 0444); module_param_string(test_mode, test_mode, 12, 0444); module_param_named(burst_size, burst_size, uint, 0444); +module_param_named(cpu_affinity, cpu_affinity, int, 0444); MODULE_PARM_DESC(delay, "Period in microseconds (100 us default)"); MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt, irq, or alternate (default irq)"); MODULE_PARM_DESC(burst_size, "The size of a burst (default 1)"); +MODULE_PARM_DESC(cpu_affinity, "Cpu num test is running on"); static struct completion done; @@ -36,7 +39,9 @@ static struct completion done; static void busy_wait(ulong time) { u64 start, end; + start = trace_clock_local(); + do { end = trace_clock_local(); if (kthread_should_stop()) @@ -47,6 +52,7 @@ static void busy_wait(ulong time) static __always_inline void irqoff_test(void) { unsigned long flags; + local_irq_save(flags); busy_wait(delay); local_irq_restore(flags); @@ -113,6 +119,14 @@ static int preemptirq_delay_run(void *data) { int i; int s = MIN(burst_size, NR_TEST_FUNCS); + struct cpumask cpu_mask; + + if (cpu_affinity > -1) { + cpumask_clear(&cpu_mask); + cpumask_set_cpu(cpu_affinity, &cpu_mask); + if (set_cpus_allowed_ptr(current, &cpu_mask)) + pr_err("cpu_affinity:%d, failed\n", cpu_affinity); + } for (i = 0; i < s; i++) (testfuncs[i])(i); -- cgit v1.2.3 From 2d396cb3b12641adcde91104a7c89994f2e876a6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 5 Feb 2021 18:04:39 -0500 Subject: tracing: Do not create "enable" or "filter" files for ftrace event subsystem The ftrace event subsystem is only created for showing the format files of events created by the ftrace tracers, and are not trace events. The ftrace subsystem currently has both the "enable" and "filter" files that in other subsystems are used to enable/disable all events within the subsystem or set a filter for all the subsystem events. As ftrace subsystem events do not use enable or filter operations, these files are useless in the ftrace subsystem. Remove them. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 20ccce3e4ffb..c1e90611fe22 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2097,16 +2097,20 @@ event_subsystem_dir(struct trace_array *tr, const char *name, dir->subsystem = system; file->system = dir; - entry = tracefs_create_file("filter", 0644, dir->entry, dir, - &ftrace_subsystem_filter_fops); - if (!entry) { - kfree(system->filter); - system->filter = NULL; - pr_warn("Could not create tracefs '%s/filter' entry\n", name); - } + /* the ftrace system is special, do not create enable or filter files */ + if (strcmp(name, "ftrace") != 0) { + + entry = tracefs_create_file("filter", 0644, dir->entry, dir, + &ftrace_subsystem_filter_fops); + if (!entry) { + kfree(system->filter); + system->filter = NULL; + pr_warn("Could not create tracefs '%s/filter' entry\n", name); + } - trace_create_file("enable", 0644, dir->entry, dir, - &ftrace_system_enable_fops); + trace_create_file("enable", 0644, dir->entry, dir, + &ftrace_system_enable_fops); + } list_add(&dir->list, &tr->systems); -- cgit v1.2.3 From 7211f0a25781ace5f79b272318b4c60b5dcfd413 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 4 Feb 2021 14:30:04 -0500 Subject: tracepoints: Code clean up Restructure the code a bit to make it simpler, fix some formatting problems and add READ_ONCE/WRITE_ONCE to make sure there's no compiler load/store tearing to the variables that can be accessed across CPUs. Started with Mathieu Desnoyers's patch: Link: https://lore.kernel.org/lkml/20210203175741.20665-1-mathieu.desnoyers@efficios.com/ And will keep his signature, but I will take the responsibility of this being correct, and keep the authorship. Link: https://lkml.kernel.org/r/20210204143004.61126582@gandalf.local.home Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 2 +- kernel/tracepoint.c | 91 ++++++++++++++++++---------------------------- 2 files changed, 36 insertions(+), 57 deletions(-) (limited to 'kernel') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2aad1c10821a..9cfb099da58f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -305,7 +305,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ if (it_func_ptr) { \ do { \ - it_func = (it_func_ptr)->func; \ + it_func = READ_ONCE((it_func_ptr)->func); \ __data = (it_func_ptr)->data; \ ((void(*)(void *, proto))(it_func))(__data, args); \ } while ((++it_func_ptr)->func); \ diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e8f20ae29c18..9f478d29b926 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -136,9 +136,9 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, int prio) { struct tracepoint_func *old, *new; - int nr_probes = 0; - int stub_funcs = 0; - int pos = -1; + int iter_probes; /* Iterate over old probe array. */ + int nr_probes = 0; /* Counter for probes */ + int pos = -1; /* Insertion position into new array */ if (WARN_ON(!tp_func->func)) return ERR_PTR(-EINVAL); @@ -147,54 +147,38 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, old = *funcs; if (old) { /* (N -> N+1), (N != 0, 1) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - /* Insert before probes of lower priority */ - if (pos < 0 && old[nr_probes].prio < prio) - pos = nr_probes; - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + for (iter_probes = 0; old[iter_probes].func; iter_probes++) { + if (old[iter_probes].func == tp_stub_func) + continue; /* Skip stub functions. */ + if (old[iter_probes].func == tp_func->func && + old[iter_probes].data == tp_func->data) return ERR_PTR(-EEXIST); - if (old[nr_probes].func == tp_stub_func) - stub_funcs++; + nr_probes++; } } - /* + 2 : one for new probe, one for NULL func - stub functions */ - new = allocate_probes(nr_probes + 2 - stub_funcs); + /* + 2 : one for new probe, one for NULL func */ + new = allocate_probes(nr_probes + 2); if (new == NULL) return ERR_PTR(-ENOMEM); if (old) { - if (stub_funcs) { - /* Need to copy one at a time to remove stubs */ - int probes = 0; - - pos = -1; - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if (old[nr_probes].func == tp_stub_func) - continue; - if (pos < 0 && old[nr_probes].prio < prio) - pos = probes++; - new[probes++] = old[nr_probes]; - } - nr_probes = probes; - if (pos < 0) - pos = probes; - else - nr_probes--; /* Account for insertion */ - - } else if (pos < 0) { - pos = nr_probes; - memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); - } else { - /* Copy higher priority probes ahead of the new probe */ - memcpy(new, old, pos * sizeof(struct tracepoint_func)); - /* Copy the rest after it. */ - memcpy(new + pos + 1, old + pos, - (nr_probes - pos) * sizeof(struct tracepoint_func)); + nr_probes = 0; + for (iter_probes = 0; old[iter_probes].func; iter_probes++) { + if (old[iter_probes].func == tp_stub_func) + continue; + /* Insert before probes of lower priority */ + if (pos < 0 && old[iter_probes].prio < prio) + pos = nr_probes++; + new[nr_probes++] = old[iter_probes]; } - } else + if (pos < 0) + pos = nr_probes++; + /* nr_probes now points to the end of the new array */ + } else { pos = 0; + nr_probes = 1; /* must point at end of array */ + } new[pos] = *tp_func; - new[nr_probes + 1].func = NULL; + new[nr_probes].func = NULL; *funcs = new; debug_print_probes(*funcs); return old; @@ -237,11 +221,12 @@ static void *func_remove(struct tracepoint_func **funcs, /* + 1 for NULL */ new = allocate_probes(nr_probes - nr_del + 1); if (new) { - for (i = 0; old[i].func; i++) - if ((old[i].func != tp_func->func - || old[i].data != tp_func->data) - && old[i].func != tp_stub_func) + for (i = 0; old[i].func; i++) { + if ((old[i].func != tp_func->func || + old[i].data != tp_func->data) && + old[i].func != tp_stub_func) new[j++] = old[i]; + } new[nr_probes - nr_del].func = NULL; *funcs = new; } else { @@ -249,17 +234,11 @@ static void *func_remove(struct tracepoint_func **funcs, * Failed to allocate, replace the old function * with calls to tp_stub_func. */ - for (i = 0; old[i].func; i++) + for (i = 0; old[i].func; i++) { if (old[i].func == tp_func->func && - old[i].data == tp_func->data) { - old[i].func = tp_stub_func; - /* Set the prio to the next event. */ - if (old[i + 1].func) - old[i].prio = - old[i + 1].prio; - else - old[i].prio = -1; - } + old[i].data == tp_func->data) + WRITE_ONCE(old[i].func, tp_stub_func); + } *funcs = old; } } -- cgit v1.2.3 From 33b1d14668859626bf96958e38042b0ed8a22a68 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 3 Feb 2021 23:59:27 +0900 Subject: kprobes: Warn if the kprobe is reregistered Warn if the kprobe is reregistered, since there must be a software bug (actively used resource must not be re-registered) and caller must be fixed. Link: https://lkml.kernel.org/r/161236436734.194052.4058506306336814476.stgit@devnote2 Acked-by: Naveen N. Rao Acked-by: Ananth N Mavinakayanahalli Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d5a3eb74a657..dd1d027455c4 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1520,13 +1520,16 @@ valid: return ap; } -/* Return error if the kprobe is being re-registered */ -static inline int check_kprobe_rereg(struct kprobe *p) +/* + * Warn and return error if the kprobe is being re-registered since + * there must be a software bug. + */ +static inline int warn_kprobe_rereg(struct kprobe *p) { int ret = 0; mutex_lock(&kprobe_mutex); - if (__get_valid_kprobe(p)) + if (WARN_ON_ONCE(__get_valid_kprobe(p))) ret = -EINVAL; mutex_unlock(&kprobe_mutex); @@ -1614,7 +1617,7 @@ int register_kprobe(struct kprobe *p) return PTR_ERR(addr); p->addr = addr; - ret = check_kprobe_rereg(p); + ret = warn_kprobe_rereg(p); if (ret) return ret; @@ -1995,7 +1998,7 @@ int register_kretprobe(struct kretprobe *rp) return ret; /* If only rp->kp.addr is specified, check reregistering kprobes */ - if (rp->kp.addr && check_kprobe_rereg(&rp->kp)) + if (rp->kp.addr && warn_kprobe_rereg(&rp->kp)) return -EINVAL; if (kretprobe_blacklist_size) { -- cgit v1.2.3 From d262271d04830e4b5009f4a5cc64934d86b49832 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 1 Feb 2021 13:48:11 -0600 Subject: tracing/dynevent: Delegate parsing to create function Delegate command parsing to each create function so that the command syntax can be customized. This requires changes to the kprobe/uprobe/synthetic event handling, which are also included here. Link: https://lkml.kernel.org/r/e488726f49cbdbc01568618f8680584306c4c79f.1612208610.git.zanussi@kernel.org Signed-off-by: Masami Hiramatsu [ zanussi@kernel.org: added synthetic event modifications ] Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 23 ++------------- kernel/trace/trace.h | 3 +- kernel/trace/trace_dynevent.c | 35 ++++++++++++++--------- kernel/trace/trace_dynevent.h | 4 +-- kernel/trace/trace_events_synth.c | 60 ++++++++++++++++++++++++++++++--------- kernel/trace/trace_kprobe.c | 33 +++++++++++---------- kernel/trace/trace_probe.c | 17 +++++++++++ kernel/trace/trace_probe.h | 1 + kernel/trace/trace_uprobe.c | 17 +++++++---- 9 files changed, 120 insertions(+), 73 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7fd432334ff5..b79bcacdd6f9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9412,30 +9412,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) } EXPORT_SYMBOL_GPL(ftrace_dump); -int trace_run_command(const char *buf, int (*createfn)(int, char **)) -{ - char **argv; - int argc, ret; - - argc = 0; - ret = 0; - argv = argv_split(GFP_KERNEL, buf, &argc); - if (!argv) - return -ENOMEM; - - if (argc) - ret = createfn(argc, argv); - - argv_free(argv); - - return ret; -} - #define WRITE_BUFSIZE 4096 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, - int (*createfn)(int, char **)) + int (*createfn)(const char *)) { char *kbuf, *buf, *tmp; int ret = 0; @@ -9483,7 +9464,7 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, if (tmp) *tmp = '\0'; - ret = trace_run_command(buf, createfn); + ret = createfn(buf); if (ret) goto out; buf += size; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 93fb08ab8bb6..a9e13bd5a41b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1807,10 +1807,9 @@ extern int tracing_set_cpumask(struct trace_array *tr, #define MAX_EVENT_NAME_LEN 64 -extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, - int (*createfn)(int, char**)); + int (*createfn)(const char *)); extern unsigned int err_pos(char *cmd, const char *str); extern void tracing_log_err(struct trace_array *tr, diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 4f967d5cd917..dc971a68dda4 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -31,23 +31,31 @@ int dyn_event_register(struct dyn_event_operations *ops) return 0; } -int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) +int dyn_event_release(const char *raw_command, struct dyn_event_operations *type) { struct dyn_event *pos, *n; char *system = NULL, *event, *p; - int ret = -ENOENT; + int argc, ret = -ENOENT; + char **argv; + + argv = argv_split(GFP_KERNEL, raw_command, &argc); + if (!argv) + return -ENOMEM; if (argv[0][0] == '-') { - if (argv[0][1] != ':') - return -EINVAL; + if (argv[0][1] != ':') { + ret = -EINVAL; + goto out; + } event = &argv[0][2]; } else { event = strchr(argv[0], ':'); - if (!event) - return -EINVAL; + if (!event) { + ret = -EINVAL; + goto out; + } event++; } - argc--; argv++; p = strchr(event, '/'); if (p) { @@ -63,7 +71,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) if (type && type != pos->ops) continue; if (!pos->ops->match(system, event, - argc, (const char **)argv, pos)) + argc - 1, (const char **)argv + 1, pos)) continue; ret = pos->ops->free(pos); @@ -71,21 +79,22 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) break; } mutex_unlock(&event_mutex); - +out: + argv_free(argv); return ret; } -static int create_dyn_event(int argc, char **argv) +static int create_dyn_event(const char *raw_command) { struct dyn_event_operations *ops; int ret = -ENODEV; - if (argv[0][0] == '-' || argv[0][0] == '!') - return dyn_event_release(argc, argv, NULL); + if (raw_command[0] == '-' || raw_command[0] == '!') + return dyn_event_release(raw_command, NULL); mutex_lock(&dyn_event_ops_mutex); list_for_each_entry(ops, &dyn_event_ops_list, list) { - ret = ops->create(argc, (const char **)argv); + ret = ops->create(raw_command); if (!ret || ret != -ECANCELED) break; } diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index d6f72dcb7269..7754936b57ee 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -39,7 +39,7 @@ struct dyn_event; */ struct dyn_event_operations { struct list_head list; - int (*create)(int argc, const char *argv[]); + int (*create)(const char *raw_command); int (*show)(struct seq_file *m, struct dyn_event *ev); bool (*is_busy)(struct dyn_event *ev); int (*free)(struct dyn_event *ev); @@ -97,7 +97,7 @@ void *dyn_event_seq_start(struct seq_file *m, loff_t *pos); void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos); void dyn_event_seq_stop(struct seq_file *m, void *v); int dyn_events_release_all(struct dyn_event_operations *type); -int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type); +int dyn_event_release(const char *raw_command, struct dyn_event_operations *type); /* * for_each_dyn_event - iterate over the dyn_event list diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 5a8bc0b421f1..b2588a5650c9 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -62,7 +62,7 @@ static void synth_err(u8 err_type, u8 err_pos) err_type, err_pos); } -static int create_synth_event(int argc, const char **argv); +static int create_synth_event(const char *raw_command); static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); static bool synth_event_is_busy(struct dyn_event *ev); @@ -1383,18 +1383,30 @@ int synth_event_delete(const char *event_name) } EXPORT_SYMBOL_GPL(synth_event_delete); -static int create_or_delete_synth_event(int argc, char **argv) +static int create_or_delete_synth_event(const char *raw_command) { - const char *name = argv[0]; - int ret; + char **argv, *name = NULL; + int argc = 0, ret = 0; + + argv = argv_split(GFP_KERNEL, raw_command, &argc); + if (!argv) + return -ENOMEM; + + if (!argc) + goto free; + + name = argv[0]; /* trace_run_command() ensures argc != 0 */ if (name[0] == '!') { ret = synth_event_delete(name + 1); - return ret; + goto free; } ret = __create_synth_event(argc - 1, name, (const char **)argv + 1); +free: + argv_free(argv); + return ret == -ECANCELED ? -EINVAL : ret; } @@ -1403,7 +1415,7 @@ static int synth_event_run_command(struct dynevent_cmd *cmd) struct synth_event *se; int ret; - ret = trace_run_command(cmd->seq.buffer, create_or_delete_synth_event); + ret = create_or_delete_synth_event(cmd->seq.buffer); if (ret) return ret; @@ -1939,23 +1951,43 @@ int synth_event_trace_end(struct synth_event_trace_state *trace_state) } EXPORT_SYMBOL_GPL(synth_event_trace_end); -static int create_synth_event(int argc, const char **argv) +static int create_synth_event(const char *raw_command) { - const char *name = argv[0]; - int len; + char **argv, *name; + int len, argc = 0, ret = 0; + + argv = argv_split(GFP_KERNEL, raw_command, &argc); + if (!argv) { + ret = -ENOMEM; + return ret; + } - if (name[0] != 's' || name[1] != ':') - return -ECANCELED; + if (!argc) + goto free; + + name = argv[0]; + + if (name[0] != 's' || name[1] != ':') { + ret = -ECANCELED; + goto free; + } name += 2; /* This interface accepts group name prefix */ if (strchr(name, '/')) { len = str_has_prefix(name, SYNTH_SYSTEM "/"); - if (len == 0) - return -EINVAL; + if (len == 0) { + ret = -EINVAL; + goto free; + } name += len; } - return __create_synth_event(argc - 1, name, argv + 1); + + ret = __create_synth_event(argc - 1, name, (const char **)argv + 1); +free: + argv_free(argv); + + return ret; } static int synth_event_release(struct dyn_event *ev) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f6c459aba8a6..8a1cb0878cbc 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -35,7 +35,7 @@ static int __init set_kprobe_boot_events(char *str) } __setup("kprobe_event=", set_kprobe_boot_events); -static int trace_kprobe_create(int argc, const char **argv); +static int trace_kprobe_create(const char *raw_command); static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_kprobe_release(struct dyn_event *ev); static bool trace_kprobe_is_busy(struct dyn_event *ev); @@ -711,7 +711,7 @@ static inline void sanitize_event_name(char *name) *name = '_'; } -static int trace_kprobe_create(int argc, const char *argv[]) +static int __trace_kprobe_create(int argc, const char *argv[]) { /* * Argument syntax: @@ -910,20 +910,25 @@ error: goto out; } -static int create_or_delete_trace_kprobe(int argc, char **argv) +static int trace_kprobe_create(const char *raw_command) +{ + return trace_probe_create(raw_command, __trace_kprobe_create); +} + +static int create_or_delete_trace_kprobe(const char *raw_command) { int ret; - if (argv[0][0] == '-') - return dyn_event_release(argc, argv, &trace_kprobe_ops); + if (raw_command[0] == '-') + return dyn_event_release(raw_command, &trace_kprobe_ops); - ret = trace_kprobe_create(argc, (const char **)argv); + ret = trace_kprobe_create(raw_command); return ret == -ECANCELED ? -EINVAL : ret; } static int trace_kprobe_run_command(struct dynevent_cmd *cmd) { - return trace_run_command(cmd->seq.buffer, create_or_delete_trace_kprobe); + return create_or_delete_trace_kprobe(cmd->seq.buffer); } /** @@ -1084,7 +1089,7 @@ int kprobe_event_delete(const char *name) snprintf(buf, MAX_EVENT_NAME_LEN, "-:%s", name); - return trace_run_command(buf, create_or_delete_trace_kprobe); + return create_or_delete_trace_kprobe(buf); } EXPORT_SYMBOL_GPL(kprobe_event_delete); @@ -1886,7 +1891,7 @@ static __init void setup_boot_kprobe_events(void) if (p) *p++ = '\0'; - ret = trace_run_command(cmd, create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe(cmd); if (ret) pr_warn("Failed to add event(%d): %s\n", ret, cmd); @@ -1980,8 +1985,7 @@ static __init int kprobe_trace_self_tests_init(void) pr_info("Testing kprobe tracing: "); - ret = trace_run_command("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)", - create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)"); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function entry.\n"); warn++; @@ -2002,8 +2006,7 @@ static __init int kprobe_trace_self_tests_init(void) } } - ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target $retval", - create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("r:testprobe2 kprobe_trace_selftest_target $retval"); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function return.\n"); warn++; @@ -2076,13 +2079,13 @@ static __init int kprobe_trace_self_tests_init(void) trace_probe_event_call(&tk->tp), file); } - ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("-:testprobe"); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; } - ret = trace_run_command("-:testprobe2", create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("-:testprobe2"); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index d2867ccc6aca..ec589a4612df 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1134,3 +1134,20 @@ bool trace_probe_match_command_args(struct trace_probe *tp, } return true; } + +int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **)) +{ + int argc = 0, ret = 0; + char **argv; + + argv = argv_split(GFP_KERNEL, raw_command, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = createfn(argc, (const char **)argv); + + argv_free(argv); + + return ret; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2f703a20c724..7ce4027089ee 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -341,6 +341,7 @@ struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b); bool trace_probe_match_command_args(struct trace_probe *tp, int argc, const char **argv); +int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **)); #define trace_probe_for_each_link(pos, tp) \ list_for_each_entry(pos, &(tp)->event->files, list) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 9d9440303075..9b50869a5ddb 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -34,7 +34,7 @@ struct uprobe_trace_entry_head { #define DATAOF_TRACE_ENTRY(entry, is_return) \ ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) -static int trace_uprobe_create(int argc, const char **argv); +static int trace_uprobe_create(const char *raw_command); static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_uprobe_release(struct dyn_event *ev); static bool trace_uprobe_is_busy(struct dyn_event *ev); @@ -530,7 +530,7 @@ end: * Argument syntax: * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS] */ -static int trace_uprobe_create(int argc, const char **argv) +static int __trace_uprobe_create(int argc, const char **argv) { struct trace_uprobe *tu; const char *event = NULL, *group = UPROBE_EVENT_SYSTEM; @@ -716,14 +716,19 @@ fail_address_parse: return ret; } -static int create_or_delete_trace_uprobe(int argc, char **argv) +int trace_uprobe_create(const char *raw_command) +{ + return trace_probe_create(raw_command, __trace_uprobe_create); +} + +static int create_or_delete_trace_uprobe(const char *raw_command) { int ret; - if (argv[0][0] == '-') - return dyn_event_release(argc, argv, &trace_uprobe_ops); + if (raw_command[0] == '-') + return dyn_event_release(raw_command, &trace_uprobe_ops); - ret = trace_uprobe_create(argc, (const char **)argv); + ret = trace_uprobe_create(raw_command); return ret == -ECANCELED ? -EINVAL : ret; } -- cgit v1.2.3 From c9e759b1e8456a460f258fcfe9682003fcf03938 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 1 Feb 2021 13:48:12 -0600 Subject: tracing: Rework synthetic event command parsing Now that command parsing has been delegated to the create functions and we're no longer constrained by argv_split(), we can modify the synthetic event command parser to better match the higher-level structure of the synthetic event commands, which is basically an event name followed by a set of semicolon-separated fields. Since we're also now passed the raw command, we can also save it directly and can get rid of save_cmdstr(). Link: https://lkml.kernel.org/r/cb9e2be92d992ce59f2b4f132264a5d467f3933f.1612208610.git.zanussi@kernel.org Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 245 ++++++++++++++++++++++---------------- 1 file changed, 143 insertions(+), 102 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index b2588a5650c9..4f6c5a104ee2 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -48,7 +48,7 @@ static int errpos(const char *str) return err_pos(last_cmd, str); } -static void last_cmd_set(char *str) +static void last_cmd_set(const char *str) { if (!str) return; @@ -579,18 +579,14 @@ static void free_synth_field(struct synth_field *field) kfree(field); } -static struct synth_field *parse_synth_field(int argc, const char **argv, - int *consumed) +static struct synth_field *parse_synth_field(int argc, char **argv) { - struct synth_field *field; const char *prefix = NULL, *field_type = argv[0], *field_name, *array; - int len, ret = -ENOMEM; + int len, consumed, ret = -ENOMEM; + struct synth_field *field; struct seq_buf s; ssize_t size; - if (field_type[0] == ';') - field_type++; - if (!strcmp(field_type, "unsigned")) { if (argc < 3) { synth_err(SYNTH_ERR_INCOMPLETE_TYPE, errpos(field_type)); @@ -599,10 +595,20 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, prefix = "unsigned "; field_type = argv[1]; field_name = argv[2]; - *consumed = 3; + consumed = 3; } else { field_name = argv[1]; - *consumed = 2; + consumed = 2; + } + + if (consumed < argc) { + synth_err(SYNTH_ERR_INVALID_FIELD, errpos(field_type)); + return ERR_PTR(-EINVAL); + } + + if (!field_name) { + synth_err(SYNTH_ERR_INVALID_FIELD, errpos(field_type)); + return ERR_PTR(-EINVAL); } field = kzalloc(sizeof(*field), GFP_KERNEL); @@ -613,8 +619,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, array = strchr(field_name, '['); if (array) len -= strlen(array); - else if (field_name[len - 1] == ';') - len--; field->name = kmemdup_nul(field_name, len, GFP_KERNEL); if (!field->name) @@ -626,8 +630,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, goto free; } - if (field_type[0] == ';') - field_type++; len = strlen(field_type) + 1; if (array) @@ -644,11 +646,8 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, if (prefix) seq_buf_puts(&s, prefix); seq_buf_puts(&s, field_type); - if (array) { + if (array) seq_buf_puts(&s, array); - if (s.buffer[s.len - 1] == ';') - s.len--; - } if (WARN_ON_ONCE(!seq_buf_buffer_left(&s))) goto free; @@ -1160,46 +1159,12 @@ int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, } EXPORT_SYMBOL_GPL(synth_event_gen_cmd_array_start); -static int save_cmdstr(int argc, const char *name, const char **argv) -{ - struct seq_buf s; - char *buf; - int i; - - buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - seq_buf_init(&s, buf, MAX_DYNEVENT_CMD_LEN); - - seq_buf_puts(&s, name); - - for (i = 0; i < argc; i++) { - seq_buf_putc(&s, ' '); - seq_buf_puts(&s, argv[i]); - } - - if (!seq_buf_buffer_left(&s)) { - synth_err(SYNTH_ERR_CMD_TOO_LONG, 0); - kfree(buf); - return -EINVAL; - } - buf[s.len] = 0; - last_cmd_set(buf); - - kfree(buf); - return 0; -} - -static int __create_synth_event(int argc, const char *name, const char **argv) +static int __create_synth_event(const char *name, const char *raw_fields) { + char **argv, *field_str, *tmp_fields, *saved_fields = NULL; struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; + int i, argc, n_fields = 0, ret = 0; struct synth_event *event = NULL; - int i, consumed = 0, n_fields = 0, ret = 0; - - ret = save_cmdstr(argc, name, argv); - if (ret) - return ret; /* * Argument syntax: @@ -1208,46 +1173,60 @@ static int __create_synth_event(int argc, const char *name, const char **argv) * where 'field' = type field_name */ - if (name[0] == '\0' || argc < 1) { + if (name[0] == '\0') { synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); return -EINVAL; } - mutex_lock(&event_mutex); - if (!is_good_name(name)) { synth_err(SYNTH_ERR_BAD_NAME, errpos(name)); - ret = -EINVAL; - goto out; + return -EINVAL; } + mutex_lock(&event_mutex); + event = find_synth_event(name); if (event) { synth_err(SYNTH_ERR_EVENT_EXISTS, errpos(name)); ret = -EEXIST; - goto out; + goto err; } - for (i = 0; i < argc - 1; i++) { - if (strcmp(argv[i], ";") == 0) - continue; - if (n_fields == SYNTH_FIELDS_MAX) { - synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); - ret = -EINVAL; + tmp_fields = saved_fields = kstrdup(raw_fields, GFP_KERNEL); + if (!tmp_fields) { + ret = -ENOMEM; + goto err; + } + + while ((field_str = strsep(&tmp_fields, ";")) != NULL) { + argv = argv_split(GFP_KERNEL, field_str, &argc); + if (!argv) { + ret = -ENOMEM; goto err; } - field = parse_synth_field(argc - i, &argv[i], &consumed); + if (!argc) + continue; + + field = parse_synth_field(argc, argv); if (IS_ERR(field)) { + argv_free(argv); ret = PTR_ERR(field); goto err; } + + argv_free(argv); + fields[n_fields++] = field; - i += consumed - 1; + if (n_fields == SYNTH_FIELDS_MAX) { + synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); + ret = -EINVAL; + goto err; + } } - if (i < argc && strcmp(argv[i], ";") != 0) { - synth_err(SYNTH_ERR_INVALID_FIELD, errpos(argv[i])); + if (n_fields == 0) { + synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); ret = -EINVAL; goto err; } @@ -1266,6 +1245,8 @@ static int __create_synth_event(int argc, const char *name, const char **argv) out: mutex_unlock(&event_mutex); + kfree(saved_fields); + return ret; err: for (i = 0; i < n_fields; i++) @@ -1383,31 +1364,79 @@ int synth_event_delete(const char *event_name) } EXPORT_SYMBOL_GPL(synth_event_delete); -static int create_or_delete_synth_event(const char *raw_command) +static int check_command(const char *raw_command) { - char **argv, *name = NULL; - int argc = 0, ret = 0; + char **argv = NULL, *cmd, *saved_cmd, *name_and_field; + int argc, ret = 0; - argv = argv_split(GFP_KERNEL, raw_command, &argc); - if (!argv) + cmd = saved_cmd = kstrdup(raw_command, GFP_KERNEL); + if (!cmd) return -ENOMEM; - if (!argc) + name_and_field = strsep(&cmd, ";"); + if (!name_and_field) { + ret = -EINVAL; + goto free; + } + + if (name_and_field[0] == '!') + goto free; + + argv = argv_split(GFP_KERNEL, name_and_field, &argc); + if (!argv) { + ret = -ENOMEM; goto free; + } + argv_free(argv); + + if (argc < 3) + ret = -EINVAL; +free: + kfree(saved_cmd); - name = argv[0]; + return ret; +} + +static int create_or_delete_synth_event(const char *raw_command) +{ + char *name = NULL, *fields, *p; + int ret = 0; + + raw_command = skip_spaces(raw_command); + if (raw_command[0] == '\0') + return ret; + + last_cmd_set(raw_command); + + ret = check_command(raw_command); + if (ret) { + synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + return ret; + } + + p = strpbrk(raw_command, " \t"); + if (!p && raw_command[0] != '!') { + synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + ret = -EINVAL; + goto free; + } + + name = kmemdup_nul(raw_command, p ? p - raw_command : strlen(raw_command), GFP_KERNEL); + if (!name) + return -ENOMEM; - /* trace_run_command() ensures argc != 0 */ if (name[0] == '!') { ret = synth_event_delete(name + 1); goto free; } - ret = __create_synth_event(argc - 1, name, (const char **)argv + 1); + fields = skip_spaces(p); + + ret = __create_synth_event(name, fields); free: - argv_free(argv); + kfree(name); - return ret == -ECANCELED ? -EINVAL : ret; + return ret; } static int synth_event_run_command(struct dynevent_cmd *cmd) @@ -1953,39 +1982,51 @@ EXPORT_SYMBOL_GPL(synth_event_trace_end); static int create_synth_event(const char *raw_command) { - char **argv, *name; - int len, argc = 0, ret = 0; + char *fields, *p; + const char *name; + int len, ret = 0; - argv = argv_split(GFP_KERNEL, raw_command, &argc); - if (!argv) { - ret = -ENOMEM; + raw_command = skip_spaces(raw_command); + if (raw_command[0] == '\0') return ret; - } - if (!argc) - goto free; + last_cmd_set(raw_command); - name = argv[0]; + p = strpbrk(raw_command, " \t"); + if (!p) + return -EINVAL; - if (name[0] != 's' || name[1] != ':') { - ret = -ECANCELED; - goto free; - } + fields = skip_spaces(p); + + name = raw_command; + + if (name[0] != 's' || name[1] != ':') + return -ECANCELED; name += 2; /* This interface accepts group name prefix */ if (strchr(name, '/')) { len = str_has_prefix(name, SYNTH_SYSTEM "/"); - if (len == 0) { - ret = -EINVAL; - goto free; - } + if (len == 0) + return -EINVAL; name += len; } - ret = __create_synth_event(argc - 1, name, (const char **)argv + 1); -free: - argv_free(argv); + len = name - raw_command; + + ret = check_command(raw_command + len); + if (ret) { + synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + return ret; + } + + name = kmemdup_nul(raw_command + len, p - raw_command - len, GFP_KERNEL); + if (!name) + return -ENOMEM; + + ret = __create_synth_event(name, fields); + + kfree(name); return ret; } -- cgit v1.2.3 From 8d3e8165232322eb32b1404f97690d05fdfd94ef Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 1 Feb 2021 13:48:13 -0600 Subject: tracing: Update synth command errors Since array types are handled differently, errors referencing them also need to be handled differently. Add and use a new INVALID_ARRAY_SPEC error. Also add INVALID_CMD and INVALID_DYN_CMD to catch and display the correct form for badly-formed commands, which can also be used in place of CMD_INCOMPLETE, which is removed, and remove CMD_TOO_LONG, since it's no longer used. Link: https://lkml.kernel.org/r/b9dd434dc6458dcff11adc6ed616fe93a8794770.1612208610.git.zanussi@kernel.org Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 4f6c5a104ee2..aace72426e99 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -23,13 +23,14 @@ #undef ERRORS #define ERRORS \ C(BAD_NAME, "Illegal name"), \ - C(CMD_INCOMPLETE, "Incomplete command"), \ + C(INVALID_CMD, "Command must be of the form: field[;field] ..."),\ + C(INVALID_DYN_CMD, "Command must be of the form: s or -:[synthetic/] field[;field] ..."),\ C(EVENT_EXISTS, "Event already exists"), \ C(TOO_MANY_FIELDS, "Too many fields"), \ C(INCOMPLETE_TYPE, "Incomplete type"), \ C(INVALID_TYPE, "Invalid type"), \ - C(INVALID_FIELD, "Invalid field"), \ - C(CMD_TOO_LONG, "Command too long"), + C(INVALID_FIELD, "Invalid field"), \ + C(INVALID_ARRAY_SPEC, "Invalid array specification"), #undef C #define C(a, b) SYNTH_ERR_##a @@ -655,7 +656,10 @@ static struct synth_field *parse_synth_field(int argc, char **argv) size = synth_field_size(field->type); if (size < 0) { - synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type)); + if (array) + synth_err(SYNTH_ERR_INVALID_ARRAY_SPEC, errpos(field_name)); + else + synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type)); ret = -EINVAL; goto free; } else if (size == 0) { @@ -1174,7 +1178,7 @@ static int __create_synth_event(const char *name, const char *raw_fields) */ if (name[0] == '\0') { - synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + synth_err(SYNTH_ERR_INVALID_CMD, 0); return -EINVAL; } @@ -1226,7 +1230,7 @@ static int __create_synth_event(const char *name, const char *raw_fields) } if (n_fields == 0) { - synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + synth_err(SYNTH_ERR_INVALID_CMD, 0); ret = -EINVAL; goto err; } @@ -1410,13 +1414,13 @@ static int create_or_delete_synth_event(const char *raw_command) ret = check_command(raw_command); if (ret) { - synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + synth_err(SYNTH_ERR_INVALID_CMD, 0); return ret; } p = strpbrk(raw_command, " \t"); if (!p && raw_command[0] != '!') { - synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + synth_err(SYNTH_ERR_INVALID_CMD, 0); ret = -EINVAL; goto free; } @@ -1993,8 +1997,10 @@ static int create_synth_event(const char *raw_command) last_cmd_set(raw_command); p = strpbrk(raw_command, " \t"); - if (!p) + if (!p) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); return -EINVAL; + } fields = skip_spaces(p); @@ -2007,8 +2013,10 @@ static int create_synth_event(const char *raw_command) /* This interface accepts group name prefix */ if (strchr(name, '/')) { len = str_has_prefix(name, SYNTH_SYSTEM "/"); - if (len == 0) + if (len == 0) { + synth_err(SYNTH_ERR_INVALID_DYN_CMD, 0); return -EINVAL; + } name += len; } @@ -2016,7 +2024,7 @@ static int create_synth_event(const char *raw_command) ret = check_command(raw_command + len); if (ret) { - synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + synth_err(SYNTH_ERR_INVALID_CMD, 0); return ret; } -- cgit v1.2.3 From 8b5ab6bd0b293408ed8c9450831f879ce9903ea2 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 1 Feb 2021 13:48:14 -0600 Subject: tracing: Add a backward-compatibility check for synthetic event creation The synthetic event parsing rework now requires semicolons between synthetic event fields. That requirement breaks existing users who might already have used the old synthetic event command format, so this adds an inner loop that can parse more than one field, if present, between semicolons. For each field, parse_synth_field() checks in which version that field was introduced, using check_field_version(). The caller, __create_synth_event() can then use that version information to determine whether or not to enforce the requirement on the command as a whole. In the future, if/when new features are added, the requirement will be that any field/string containing the new feature must use semicolons, and the check_field_version() check can then check for those and enforce it. Using a version number allows this scheme to be extended if necessary. Link: https://lkml.kernel.org/r/74fcc500d561b40ce91c5ee94818c70c6b0c9330.1612208610.git.zanussi@kernel.org [ zanussi: added check_field_version() comment from rostedt@goodmis.org ] Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 93 +++++++++++++++++++++++++++++++-------- 1 file changed, 74 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index aace72426e99..2979a96595b4 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -580,11 +580,29 @@ static void free_synth_field(struct synth_field *field) kfree(field); } -static struct synth_field *parse_synth_field(int argc, char **argv) +static int check_field_version(const char *prefix, const char *field_type, + const char *field_name) +{ + /* + * For backward compatibility, the old synthetic event command + * format did not require semicolons, and in order to not + * break user space, that old format must still work. If a new + * feature is added, then the format that uses the new feature + * will be required to have semicolons, as nothing that uses + * the old format would be using the new, yet to be created, + * feature. When a new feature is added, this will detect it, + * and return a number greater than 1, and require the format + * to use semicolons. + */ + return 1; +} + +static struct synth_field *parse_synth_field(int argc, char **argv, + int *consumed, int *field_version) { const char *prefix = NULL, *field_type = argv[0], *field_name, *array; - int len, consumed, ret = -ENOMEM; struct synth_field *field; + int len, ret = -ENOMEM; struct seq_buf s; ssize_t size; @@ -596,15 +614,10 @@ static struct synth_field *parse_synth_field(int argc, char **argv) prefix = "unsigned "; field_type = argv[1]; field_name = argv[2]; - consumed = 3; + *consumed += 3; } else { field_name = argv[1]; - consumed = 2; - } - - if (consumed < argc) { - synth_err(SYNTH_ERR_INVALID_FIELD, errpos(field_type)); - return ERR_PTR(-EINVAL); + *consumed += 2; } if (!field_name) { @@ -612,6 +625,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv) return ERR_PTR(-EINVAL); } + *field_version = check_field_version(prefix, field_type, field_name); + field = kzalloc(sizeof(*field), GFP_KERNEL); if (!field) return ERR_PTR(-ENOMEM); @@ -1167,6 +1182,7 @@ static int __create_synth_event(const char *name, const char *raw_fields) { char **argv, *field_str, *tmp_fields, *saved_fields = NULL; struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; + int consumed, cmd_version = 1, n_fields_this_loop; int i, argc, n_fields = 0, ret = 0; struct synth_event *event = NULL; @@ -1212,21 +1228,60 @@ static int __create_synth_event(const char *name, const char *raw_fields) if (!argc) continue; - field = parse_synth_field(argc, argv); - if (IS_ERR(field)) { - argv_free(argv); - ret = PTR_ERR(field); - goto err; - } + n_fields_this_loop = 0; + consumed = 0; + while (argc > consumed) { + int field_version; + + field = parse_synth_field(argc - consumed, + argv + consumed, &consumed, + &field_version); + if (IS_ERR(field)) { + argv_free(argv); + ret = PTR_ERR(field); + goto err; + } - argv_free(argv); + /* + * Track the highest version of any field we + * found in the command. + */ + if (field_version > cmd_version) + cmd_version = field_version; + + /* + * Now sort out what is and isn't valid for + * each supported version. + * + * If we see more than 1 field per loop, it + * means we have multiple fields between + * semicolons, and that's something we no + * longer support in a version 2 or greater + * command. + */ + if (cmd_version > 1 && n_fields_this_loop >= 1) { + synth_err(SYNTH_ERR_INVALID_CMD, errpos(field_str)); + ret = -EINVAL; + goto err; + } + + fields[n_fields++] = field; + if (n_fields == SYNTH_FIELDS_MAX) { + synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); + ret = -EINVAL; + goto err; + } + + n_fields_this_loop++; + } - fields[n_fields++] = field; - if (n_fields == SYNTH_FIELDS_MAX) { - synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); + if (consumed < argc) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); ret = -EINVAL; goto err; } + + argv_free(argv); } if (n_fields == 0) { -- cgit v1.2.3 From efbbdaa22bb78761bff8dfdde027ad04bedd47ce Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 15 Oct 2020 23:55:07 +0900 Subject: tracing: Show real address for trace event arguments To help debugging kernel, show real address for trace event arguments in tracefs/trace{,pipe} instead of hashed pointer value. Since ftrace human-readable format uses vsprintf(), all %p are translated to hash values instead of pointer address. However, when debugging the kernel, raw address value gives a hint when comparing with the memory mapping in the kernel. (Those are sometimes used with crash log, which is not hashed too) So converting %p with %px when calling trace_seq_printf(). Moreover, this is not improving the security because the tracefs can be used only by root user and the raw address values are readable from tracefs/percpu/cpu*/trace_pipe_raw file. Link: https://lkml.kernel.org/r/160277370703.29307.5134475491761971203.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 4 +++ include/trace/trace_events.h | 2 +- kernel/trace/trace.c | 71 +++++++++++++++++++++++++++++++++++++++++++- kernel/trace/trace.h | 2 ++ kernel/trace/trace_output.c | 12 +++++++- 5 files changed, 88 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5d1eeac4bfbe..7077fec653bb 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -55,6 +55,8 @@ struct trace_event; int trace_raw_output_prep(struct trace_iterator *iter, struct trace_event *event); +extern __printf(2, 3) +void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...); /* * The trace entry - the most basic unit of tracing. This is what @@ -87,6 +89,8 @@ struct trace_iterator { unsigned long iter_flags; void *temp; /* temp holder */ unsigned int temp_size; + char *fmt; /* modified format holder */ + unsigned int fmt_size; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 7785961d82ba..e6a8b3febc49 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -364,7 +364,7 @@ trace_raw_output_##call(struct trace_iterator *iter, int flags, \ if (ret != TRACE_TYPE_HANDLED) \ return ret; \ \ - trace_seq_printf(s, print); \ + trace_event_printf(iter, print); \ \ return trace_handle_return(s); \ } \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b79bcacdd6f9..39f8a537196e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3530,6 +3530,62 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, return next; } +#define STATIC_FMT_BUF_SIZE 128 +static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; + +static char *trace_iter_expand_format(struct trace_iterator *iter) +{ + char *tmp; + + if (iter->fmt == static_fmt_buf) + return NULL; + + tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, + GFP_KERNEL); + if (tmp) { + iter->fmt_size += STATIC_FMT_BUF_SIZE; + iter->fmt = tmp; + } + + return tmp; +} + +const char *trace_event_format(struct trace_iterator *iter, const char *fmt) +{ + const char *p, *new_fmt; + char *q; + + if (WARN_ON_ONCE(!fmt)) + return fmt; + + p = fmt; + new_fmt = q = iter->fmt; + while (*p) { + if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { + if (!trace_iter_expand_format(iter)) + return fmt; + + q += iter->fmt - new_fmt; + new_fmt = iter->fmt; + } + + *q++ = *p++; + + /* Replace %p with %px */ + if (p[-1] == '%') { + if (p[0] == '%') { + *q++ = *p++; + } else if (p[0] == 'p' && !isalnum(p[1])) { + *q++ = *p++; + *q++ = 'x'; + } + } + } + *q = '\0'; + + return new_fmt; +} + #define STATIC_TEMP_BUF_SIZE 128 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); @@ -4322,6 +4378,16 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (iter->temp) iter->temp_size = 128; + /* + * trace_event_printf() may need to modify given format + * string to replace %p with %px so that it shows real address + * instead of hash value. However, that is only for the event + * tracing, other tracer may not need. Defer the allocation + * until it is needed. + */ + iter->fmt = NULL; + iter->fmt_size = 0; + /* * We make a copy of the current tracer to avoid concurrent * changes on it while we are reading. @@ -4473,6 +4539,7 @@ static int tracing_release(struct inode *inode, struct file *file) mutex_destroy(&iter->mutex); free_cpumask_var(iter->started); + kfree(iter->fmt); kfree(iter->temp); kfree(iter->trace); kfree(iter->buffer_iter); @@ -9331,9 +9398,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp */ + /* Can not use kmalloc for iter.temp and iter.fmt */ iter.temp = static_temp_buf; iter.temp_size = STATIC_TEMP_BUF_SIZE; + iter.fmt = static_fmt_buf; + iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a9e13bd5a41b..6c3ea6f95e68 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -581,6 +581,8 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); +const char *trace_event_format(struct trace_iterator *iter, const char *fmt); + int trace_empty(struct trace_iterator *iter); void *trace_find_next_entry_inc(struct trace_iterator *iter); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 92b1575ae0ca..61255bad7e01 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -312,13 +312,23 @@ int trace_raw_output_prep(struct trace_iterator *iter, } EXPORT_SYMBOL(trace_raw_output_prep); +void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + trace_seq_vprintf(&iter->seq, trace_event_format(iter, fmt), ap); + va_end(ap); +} +EXPORT_SYMBOL(trace_event_printf); + static int trace_output_raw(struct trace_iterator *iter, char *name, char *fmt, va_list ap) { struct trace_seq *s = &iter->seq; trace_seq_printf(s, "%s: ", name); - trace_seq_vprintf(s, fmt, ap); + trace_seq_vprintf(s, trace_event_format(iter, fmt), ap); return trace_handle_return(s); } -- cgit v1.2.3 From a345a6718bd5689f7cff7715e902d8739e0d5beb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 15 Oct 2020 23:55:25 +0900 Subject: tracing: Add ptr-hash option to show the hashed pointer value Add tracefs/options/hash-ptr option to show hashed pointer value by %p in event printk format string. For the security reason, normal printk will show the hashed pointer value (encrypted by random number) with %p to printk buffer to hide the real address. But the tracefs/trace always shows real address for debug. To bridge those outputs, add an option to switch the output format. Ftrace users can use it to find the hashed value corresponding to the real address in trace log. Link: https://lkml.kernel.org/r/160277372504.29307.14909828808982012211.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/ftrace.rst | 6 ++++++ kernel/trace/trace.c | 3 +++ kernel/trace/trace.h | 1 + 3 files changed, 10 insertions(+) (limited to 'kernel') diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index 87cf5c010d5d..62c98e9bbdd9 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -1159,6 +1159,12 @@ Here are the available options: This simulates the original behavior of the trace file. When the file is closed, tracing will be enabled again. + hash-ptr + When set, "%p" in the event printk format displays the + hashed pointer value instead of real address. + This will be useful if you want to find out which hashed + value is corresponding to the real value in trace log. + record-cmd When any event or tracer is enabled, a hook is enabled in the sched_switch trace point to fill comm cache diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 39f8a537196e..16e252d39016 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3558,6 +3558,9 @@ const char *trace_event_format(struct trace_iterator *iter, const char *fmt) if (WARN_ON_ONCE(!fmt)) return fmt; + if (iter->tr->trace_flags & TRACE_ITER_HASH_PTR) + return fmt; + p = fmt; new_fmt = q = iter->fmt; while (*p) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6c3ea6f95e68..dec13ff66077 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1175,6 +1175,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(MARKERS, "markers"), \ C(EVENT_FORK, "event-fork"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ + C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ -- cgit v1.2.3 From 99e22ce73c59ac2d6d08893af376483ca7d62850 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 12 Feb 2021 11:51:06 -0500 Subject: tracing: Make hash-ptr option default Since the original behavior of the trace events is to hash the %p pointers, make that the default, and have developers have to enable the option in order to have them unhashed. Cc: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 16e252d39016..f5e8e39d6f57 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -408,7 +408,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export); TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \ TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \ TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \ - TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS) + TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \ + TRACE_ITER_HASH_PTR) /* trace_options that are only supported by global_trace */ #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ -- cgit v1.2.3 From c85c9a2c6e368dc94907e63babb18a9788e5c9b6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 18 Feb 2021 23:29:23 +0900 Subject: kprobes: Fix to delay the kprobes jump optimization Commit 36dadef23fcc ("kprobes: Init kprobes in early_initcall") moved the kprobe setup in early_initcall(), which includes kprobe jump optimization. The kprobes jump optimizer involves synchronize_rcu_tasks() which depends on the ksoftirqd and rcu_spawn_tasks_*(). However, since those are setup in core_initcall(), kprobes jump optimizer can not run at the early_initcall(). To avoid this issue, make the kprobe optimization disabled in the early_initcall() and enables it in subsys_initcall(). Note that non-optimized kprobes is still available after early_initcall(). Only jump optimization is delayed. Link: https://lkml.kernel.org/r/161365856280.719838.12423085451287256713.stgit@devnote2 Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall") Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: RCU Cc: Michael Ellerman Cc: Andrew Morton Cc: Daniel Axtens Cc: Frederic Weisbecker Cc: Neeraj Upadhyay Cc: Joel Fernandes Cc: Michal Hocko Cc: "Theodore Y . Ts'o" Cc: Oleksiy Avramchenko Cc: stable@vger.kernel.org Reported-by: Paul E. McKenney Reported-by: Sebastian Andrzej Siewior Reported-by: Uladzislau Rezki Acked-by: Paul E. McKenney Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index dd1d027455c4..745f08fdd7a6 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -861,7 +861,6 @@ out: cpus_read_unlock(); } -#ifdef CONFIG_SYSCTL static void optimize_all_kprobes(void) { struct hlist_head *head; @@ -887,6 +886,7 @@ out: mutex_unlock(&kprobe_mutex); } +#ifdef CONFIG_SYSCTL static void unoptimize_all_kprobes(void) { struct hlist_head *head; @@ -2500,18 +2500,14 @@ static int __init init_kprobes(void) } } -#if defined(CONFIG_OPTPROBES) -#if defined(__ARCH_WANT_KPROBES_INSN_SLOT) - /* Init kprobe_optinsn_slots */ - kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; -#endif - /* By default, kprobes can be optimized */ - kprobes_allow_optimization = true; -#endif - /* By default, kprobes are armed */ kprobes_all_disarmed = false; +#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT) + /* Init kprobe_optinsn_slots for allocation */ + kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; +#endif + err = arch_init_kprobes(); if (!err) err = register_die_notifier(&kprobe_exceptions_nb); @@ -2526,6 +2522,21 @@ static int __init init_kprobes(void) } early_initcall(init_kprobes); +#if defined(CONFIG_OPTPROBES) +static int __init init_optprobes(void) +{ + /* + * Enable kprobe optimization - this kicks the optimizer which + * depends on synchronize_rcu_tasks() and ksoftirqd, that is + * not spawned in early initcall. So delay the optimization. + */ + optimize_all_kprobes(); + + return 0; +} +subsys_initcall(init_optprobes); +#endif + #ifdef CONFIG_DEBUG_FS static void report_probe(struct seq_file *pi, struct kprobe *p, const char *sym, int offset, char *modname, struct kprobe *pp) -- cgit v1.2.3