diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/Kconfig | 36 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 222 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 44 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace.c | 491 | ||||
-rw-r--r-- | kernel/trace/trace.h | 4 | ||||
-rw-r--r-- | kernel/trace/trace_entries.h | 6 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 168 | ||||
-rw-r--r-- | kernel/trace/trace_export.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 15 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 25 | ||||
-rw-r--r-- | kernel/trace/trace_probe.c | 19 | ||||
-rw-r--r-- | kernel/trace/trace_probe.h | 12 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_stat.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 15 |
17 files changed, 886 insertions, 198 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a5da09c899dd..3b9a48ae153a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -432,6 +432,14 @@ config UPROBE_EVENT This option is required if you plan to use perf-probe subcommand of perf tools on user space applications. +config BPF_EVENTS + depends on BPF_SYSCALL + depends on KPROBE_EVENT + bool + default y + help + This allows the user to attach BPF programs to kprobe events. + config PROBE_EVENTS def_bool n @@ -599,6 +607,34 @@ config RING_BUFFER_STARTUP_TEST If unsure, say N +config TRACE_ENUM_MAP_FILE + bool "Show enum mappings for trace events" + depends on TRACING + help + The "print fmt" of the trace events will show the enum names instead + of their values. This can cause problems for user space tools that + use this string to parse the raw data as user space does not know + how to convert the string to its value. + + To fix this, there's a special macro in the kernel that can be used + to convert the enum into its value. If this macro is used, then the + print fmt strings will have the enums converted to their values. + + If something does not get converted properly, this option can be + used to show what enums the kernel tried to convert. + + This option is for debugging the enum conversions. A file is created + in the tracing directory called "enum_map" that will show the enum + names matched with their values and what trace event system they + belong too. + + Normally, the mapping of the strings to values will be freed after + boot up or module load. With this option, they will not be freed, as + they are needed for the "enum_map" file. Enabling this option will + increase the memory footprint of the running kernel. + + If unsure, say N + endif # FTRACE endif # TRACING_SUPPORT diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 98f26588255e..9b1044e936a6 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o +obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o ifeq ($(CONFIG_PM),y) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c new file mode 100644 index 000000000000..2d56ce501632 --- /dev/null +++ b/kernel/trace/bpf_trace.c @@ -0,0 +1,222 @@ +/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/uaccess.h> +#include <linux/ctype.h> +#include "trace.h" + +static DEFINE_PER_CPU(int, bpf_prog_active); + +/** + * trace_call_bpf - invoke BPF program + * @prog: BPF program + * @ctx: opaque context pointer + * + * kprobe handlers execute BPF programs via this helper. + * Can be used from static tracepoints in the future. + * + * Return: BPF programs always return an integer which is interpreted by + * kprobe handler as: + * 0 - return from kprobe (event is filtered out) + * 1 - store kprobe event into ring buffer + * Other values are reserved and currently alias to 1 + */ +unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +{ + unsigned int ret; + + if (in_nmi()) /* not supported yet */ + return 1; + + preempt_disable(); + + if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { + /* + * since some bpf program is already running on this cpu, + * don't call into another bpf program (same or different) + * and don't send kprobe event into ring-buffer, + * so return zero here + */ + ret = 0; + goto out; + } + + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, ctx); + rcu_read_unlock(); + + out: + __this_cpu_dec(bpf_prog_active); + preempt_enable(); + + return ret; +} +EXPORT_SYMBOL_GPL(trace_call_bpf); + +static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + void *dst = (void *) (long) r1; + int size = (int) r2; + void *unsafe_ptr = (void *) (long) r3; + + return probe_kernel_read(dst, unsafe_ptr, size); +} + +static const struct bpf_func_proto bpf_probe_read_proto = { + .func = bpf_probe_read, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, + .arg3_type = ARG_ANYTHING, +}; + +static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + /* NMI safe access to clock monotonic */ + return ktime_get_mono_fast_ns(); +} + +static const struct bpf_func_proto bpf_ktime_get_ns_proto = { + .func = bpf_ktime_get_ns, + .gpl_only = true, + .ret_type = RET_INTEGER, +}; + +/* + * limited trace_printk() + * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed + */ +static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) +{ + char *fmt = (char *) (long) r1; + int mod[3] = {}; + int fmt_cnt = 0; + int i; + + /* + * bpf_check()->check_func_arg()->check_stack_boundary() + * guarantees that fmt points to bpf program stack, + * fmt_size bytes of it were initialized and fmt_size > 0 + */ + if (fmt[--fmt_size] != 0) + return -EINVAL; + + /* check format string for allowed specifiers */ + for (i = 0; i < fmt_size; i++) { + if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) + return -EINVAL; + + if (fmt[i] != '%') + continue; + + if (fmt_cnt >= 3) + return -EINVAL; + + /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ + i++; + if (fmt[i] == 'l') { + mod[fmt_cnt]++; + i++; + } else if (fmt[i] == 'p') { + mod[fmt_cnt]++; + i++; + if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) + return -EINVAL; + fmt_cnt++; + continue; + } + + if (fmt[i] == 'l') { + mod[fmt_cnt]++; + i++; + } + + if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') + return -EINVAL; + fmt_cnt++; + } + + return __trace_printk(1/* fake ip will not be printed */, fmt, + mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, + mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, + mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); +} + +static const struct bpf_func_proto bpf_trace_printk_proto = { + .func = bpf_trace_printk, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, +}; + +static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_probe_read: + return &bpf_probe_read_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; + + case BPF_FUNC_trace_printk: + /* + * this program might be calling bpf_trace_printk, + * so allocate per-cpu printk buffers + */ + trace_printk_init_buffers(); + + return &bpf_trace_printk_proto; + default: + return NULL; + } +} + +/* bpf+kprobe programs can access fields of 'struct pt_regs' */ +static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type) +{ + /* check bounds */ + if (off < 0 || off >= sizeof(struct pt_regs)) + return false; + + /* only read is allowed */ + if (type != BPF_READ) + return false; + + /* disallow misaligned access */ + if (off % size != 0) + return false; + + return true; +} + +static struct bpf_verifier_ops kprobe_prog_ops = { + .get_func_proto = kprobe_prog_func_proto, + .is_valid_access = kprobe_prog_is_valid_access, +}; + +static struct bpf_prog_type_list kprobe_tl = { + .ops = &kprobe_prog_ops, + .type = BPF_PROG_TYPE_KPROBE, +}; + +static int __init register_kprobe_prog_ops(void) +{ + bpf_register_prog_type(&kprobe_tl); + return 0; +} +late_initcall(register_kprobe_prog_ops); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4f228024055b..02bece4a99ea 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -18,7 +18,7 @@ #include <linux/kallsyms.h> #include <linux/seq_file.h> #include <linux/suspend.h> -#include <linux/debugfs.h> +#include <linux/tracefs.h> #include <linux/hardirq.h> #include <linux/kthread.h> #include <linux/uaccess.h> @@ -249,6 +249,19 @@ static void update_function_graph_func(void); static inline void update_function_graph_func(void) { } #endif + +static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) +{ + /* + * If this is a dynamic ops or we force list func, + * then it needs to call the list anyway. + */ + if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) + return ftrace_ops_list_func; + + return ftrace_ops_get_func(ops); +} + static void update_ftrace_function(void) { ftrace_func_t func; @@ -270,7 +283,7 @@ static void update_ftrace_function(void) * then have the mcount trampoline call the function directly. */ } else if (ftrace_ops_list->next == &ftrace_list_end) { - func = ftrace_ops_get_func(ftrace_ops_list); + func = ftrace_ops_get_list_func(ftrace_ops_list); } else { /* Just use the default ftrace_ops */ @@ -1008,7 +1021,7 @@ static struct tracer_stat function_stats __initdata = { .stat_show = function_stat_show }; -static __init void ftrace_profile_debugfs(struct dentry *d_tracer) +static __init void ftrace_profile_tracefs(struct dentry *d_tracer) { struct ftrace_profile_stat *stat; struct dentry *entry; @@ -1044,15 +1057,15 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) } } - entry = debugfs_create_file("function_profile_enabled", 0644, + entry = tracefs_create_file("function_profile_enabled", 0644, d_tracer, NULL, &ftrace_profile_fops); if (!entry) - pr_warning("Could not create debugfs " + pr_warning("Could not create tracefs " "'function_profile_enabled' entry\n"); } #else /* CONFIG_FUNCTION_PROFILER */ -static __init void ftrace_profile_debugfs(struct dentry *d_tracer) +static __init void ftrace_profile_tracefs(struct dentry *d_tracer) { } #endif /* CONFIG_FUNCTION_PROFILER */ @@ -4712,7 +4725,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops) mutex_unlock(&ftrace_lock); } -static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) +static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { trace_create_file("available_filter_functions", 0444, @@ -5020,7 +5033,7 @@ static int __init ftrace_nodyn_init(void) } core_initcall(ftrace_nodyn_init); -static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } +static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } static inline void ftrace_startup_all(int command) { } /* Keep as macros so we do not need to define the commands */ @@ -5209,13 +5222,6 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) { /* - * If this is a dynamic ops or we force list func, - * then it needs to call the list anyway. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) - return ftrace_ops_list_func; - - /* * If the func handles its own recursion, call it directly. * Otherwise call the recursion protected function that * will call the ftrace ops function. @@ -5473,7 +5479,7 @@ static const struct file_operations ftrace_pid_fops = { .release = ftrace_pid_release, }; -static __init int ftrace_init_debugfs(void) +static __init int ftrace_init_tracefs(void) { struct dentry *d_tracer; @@ -5481,16 +5487,16 @@ static __init int ftrace_init_debugfs(void) if (IS_ERR(d_tracer)) return 0; - ftrace_init_dyn_debugfs(d_tracer); + ftrace_init_dyn_tracefs(d_tracer); trace_create_file("set_ftrace_pid", 0644, d_tracer, NULL, &ftrace_pid_fops); - ftrace_profile_debugfs(d_tracer); + ftrace_profile_tracefs(d_tracer); return 0; } -fs_initcall(ftrace_init_debugfs); +fs_initcall(ftrace_init_tracefs); /** * ftrace_kill - kill ftrace diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5040d44fe5a3..0315d43176d8 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2679,7 +2679,7 @@ static DEFINE_PER_CPU(unsigned int, current_context); static __always_inline int trace_recursive_lock(void) { - unsigned int val = this_cpu_read(current_context); + unsigned int val = __this_cpu_read(current_context); int bit; if (in_interrupt()) { @@ -2696,18 +2696,14 @@ static __always_inline int trace_recursive_lock(void) return 1; val |= (1 << bit); - this_cpu_write(current_context, val); + __this_cpu_write(current_context, val); return 0; } static __always_inline void trace_recursive_unlock(void) { - unsigned int val = this_cpu_read(current_context); - - val--; - val &= this_cpu_read(current_context); - this_cpu_write(current_context, val); + __this_cpu_and(current_context, __this_cpu_read(current_context) - 1); } #else diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d44901bcd867..05330494a0df 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -20,6 +20,7 @@ #include <linux/notifier.h> #include <linux/irqflags.h> #include <linux/debugfs.h> +#include <linux/tracefs.h> #include <linux/pagemap.h> #include <linux/hardirq.h> #include <linux/linkage.h> @@ -31,6 +32,7 @@ #include <linux/splice.h> #include <linux/kdebug.h> #include <linux/string.h> +#include <linux/mount.h> #include <linux/rwsem.h> #include <linux/slab.h> #include <linux/ctype.h> @@ -123,6 +125,42 @@ enum ftrace_dump_mode ftrace_dump_on_oops; /* When set, tracing will stop when a WARN*() is hit */ int __disable_trace_on_warning; +#ifdef CONFIG_TRACE_ENUM_MAP_FILE +/* Map of enums to their values, for "enum_map" file */ +struct trace_enum_map_head { + struct module *mod; + unsigned long length; +}; + +union trace_enum_map_item; + +struct trace_enum_map_tail { + /* + * "end" is first and points to NULL as it must be different + * than "mod" or "enum_string" + */ + union trace_enum_map_item *next; + const char *end; /* points to NULL */ +}; + +static DEFINE_MUTEX(trace_enum_mutex); + +/* + * The trace_enum_maps are saved in an array with two extra elements, + * one at the beginning, and one at the end. The beginning item contains + * the count of the saved maps (head.length), and the module they + * belong to if not built in (head.mod). The ending item contains a + * pointer to the next array of saved enum_map items. + */ +union trace_enum_map_item { + struct trace_enum_map map; + struct trace_enum_map_head head; + struct trace_enum_map_tail tail; +}; + +static union trace_enum_map_item *trace_enum_maps; +#endif /* CONFIG_TRACE_ENUM_MAP_FILE */ + static int tracing_set_tracer(struct trace_array *tr, const char *buf); #define MAX_TRACER_SIZE 100 @@ -3908,6 +3946,182 @@ static const struct file_operations tracing_saved_cmdlines_size_fops = { .write = tracing_saved_cmdlines_size_write, }; +#ifdef CONFIG_TRACE_ENUM_MAP_FILE +static union trace_enum_map_item * +update_enum_map(union trace_enum_map_item *ptr) +{ + if (!ptr->map.enum_string) { + if (ptr->tail.next) { + ptr = ptr->tail.next; + /* Set ptr to the next real item (skip head) */ + ptr++; + } else + return NULL; + } + return ptr; +} + +static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos) +{ + union trace_enum_map_item *ptr = v; + + /* + * Paranoid! If ptr points to end, we don't want to increment past it. + * This really should never happen. + */ + ptr = update_enum_map(ptr); + if (WARN_ON_ONCE(!ptr)) + return NULL; + + ptr++; + + (*pos)++; + + ptr = update_enum_map(ptr); + + return ptr; +} + +static void *enum_map_start(struct seq_file *m, loff_t *pos) +{ + union trace_enum_map_item *v; + loff_t l = 0; + + mutex_lock(&trace_enum_mutex); + + v = trace_enum_maps; + if (v) + v++; + + while (v && l < *pos) { + v = enum_map_next(m, v, &l); + } + + return v; +} + +static void enum_map_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&trace_enum_mutex); +} + +static int enum_map_show(struct seq_file *m, void *v) +{ + union trace_enum_map_item *ptr = v; + + seq_printf(m, "%s %ld (%s)\n", + ptr->map.enum_string, ptr->map.enum_value, + ptr->map.system); + + return 0; +} + +static const struct seq_operations tracing_enum_map_seq_ops = { + .start = enum_map_start, + .next = enum_map_next, + .stop = enum_map_stop, + .show = enum_map_show, +}; + +static int tracing_enum_map_open(struct inode *inode, struct file *filp) +{ + if (tracing_disabled) + return -ENODEV; + + return seq_open(filp, &tracing_enum_map_seq_ops); +} + +static const struct file_operations tracing_enum_map_fops = { + .open = tracing_enum_map_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static inline union trace_enum_map_item * +trace_enum_jmp_to_tail(union trace_enum_map_item *ptr) +{ + /* Return tail of array given the head */ + return ptr + ptr->head.length + 1; +} + +static void +trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start, + int len) +{ + struct trace_enum_map **stop; + struct trace_enum_map **map; + union trace_enum_map_item *map_array; + union trace_enum_map_item *ptr; + + stop = start + len; + + /* + * The trace_enum_maps contains the map plus a head and tail item, + * where the head holds the module and length of array, and the + * tail holds a pointer to the next list. + */ + map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL); + if (!map_array) { + pr_warning("Unable to allocate trace enum mapping\n"); + return; + } + + mutex_lock(&trace_enum_mutex); + + if (!trace_enum_maps) + trace_enum_maps = map_array; + else { + ptr = trace_enum_maps; + for (;;) { + ptr = trace_enum_jmp_to_tail(ptr); + if (!ptr->tail.next) + break; + ptr = ptr->tail.next; + + } + ptr->tail.next = map_array; + } + map_array->head.mod = mod; + map_array->head.length = len; + map_array++; + + for (map = start; (unsigned long)map < (unsigned long)stop; map++) { + map_array->map = **map; + map_array++; + } + memset(map_array, 0, sizeof(*map_array)); + + mutex_unlock(&trace_enum_mutex); +} + +static void trace_create_enum_file(struct dentry *d_tracer) +{ + trace_create_file("enum_map", 0444, d_tracer, + NULL, &tracing_enum_map_fops); +} + +#else /* CONFIG_TRACE_ENUM_MAP_FILE */ +static inline void trace_create_enum_file(struct dentry *d_tracer) { } +static inline void trace_insert_enum_map_file(struct module *mod, + struct trace_enum_map **start, int len) { } +#endif /* !CONFIG_TRACE_ENUM_MAP_FILE */ + +static void trace_insert_enum_map(struct module *mod, + struct trace_enum_map **start, int len) +{ + struct trace_enum_map **map; + + if (len <= 0) + return; + + map = start; + + trace_event_enum_update(map, len); + + trace_insert_enum_map_file(mod, start, len); +} + static ssize_t tracing_set_trace_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -4105,9 +4319,24 @@ static void tracing_set_nop(struct trace_array *tr) tr->current_trace = &nop_trace; } -static int tracing_set_tracer(struct trace_array *tr, const char *buf) +static void update_tracer_options(struct trace_array *tr, struct tracer *t) { static struct trace_option_dentry *topts; + + /* Only enable if the directory has been created already. */ + if (!tr->dir) + return; + + /* Currently, only the top instance has options */ + if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) + return; + + destroy_trace_option_files(topts); + topts = create_trace_option_files(tr, t); +} + +static int tracing_set_tracer(struct trace_array *tr, const char *buf) +{ struct tracer *t; #ifdef CONFIG_TRACER_MAX_TRACE bool had_max_tr; @@ -4172,11 +4401,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) free_snapshot(tr); } #endif - /* Currently, only the top instance has options */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { - destroy_trace_option_files(topts); - topts = create_trace_option_files(tr, t); - } + update_tracer_options(tr, t); #ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { @@ -5817,6 +6042,14 @@ static inline __init int register_snapshot_cmd(void) { return 0; } static struct dentry *tracing_get_dentry(struct trace_array *tr) { + if (WARN_ON(!tr->dir)) + return ERR_PTR(-ENODEV); + + /* Top directory uses NULL as the parent */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) + return NULL; + + /* All sub buffers have a descriptor */ return tr->dir; } @@ -5831,10 +6064,10 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) if (IS_ERR(d_tracer)) return NULL; - tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer); + tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); WARN_ONCE(!tr->percpu_dir, - "Could not create debugfs directory 'per_cpu/%d'\n", cpu); + "Could not create tracefs directory 'per_cpu/%d'\n", cpu); return tr->percpu_dir; } @@ -5851,7 +6084,7 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, } static void -tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) +tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) { struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); struct dentry *d_cpu; @@ -5861,9 +6094,9 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) return; snprintf(cpu_dir, 30, "cpu%ld", cpu); - d_cpu = debugfs_create_dir(cpu_dir, d_percpu); + d_cpu = tracefs_create_dir(cpu_dir, d_percpu); if (!d_cpu) { - pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); + pr_warning("Could not create tracefs '%s' entry\n", cpu_dir); return; } @@ -6015,9 +6248,9 @@ struct dentry *trace_create_file(const char *name, { struct dentry *ret; - ret = debugfs_create_file(name, mode, parent, data, fops); + ret = tracefs_create_file(name, mode, parent, data, fops); if (!ret) - pr_warning("Could not create debugfs '%s' entry\n", name); + pr_warning("Could not create tracefs '%s' entry\n", name); return ret; } @@ -6034,9 +6267,9 @@ static struct dentry *trace_options_init_dentry(struct trace_array *tr) if (IS_ERR(d_tracer)) return NULL; - tr->options = debugfs_create_dir("options", d_tracer); + tr->options = tracefs_create_dir("options", d_tracer); if (!tr->options) { - pr_warning("Could not create debugfs directory 'options'\n"); + pr_warning("Could not create tracefs directory 'options'\n"); return NULL; } @@ -6105,7 +6338,7 @@ destroy_trace_option_files(struct trace_option_dentry *topts) return; for (cnt = 0; topts[cnt].opt; cnt++) - debugfs_remove(topts[cnt].entry); + tracefs_remove(topts[cnt].entry); kfree(topts); } @@ -6194,7 +6427,7 @@ static const struct file_operations rb_simple_fops = { struct dentry *trace_instance_dir; static void -init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer); +init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); static int allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size) @@ -6271,7 +6504,7 @@ static void free_trace_buffers(struct trace_array *tr) #endif } -static int new_instance_create(const char *name) +static int instance_mkdir(const char *name) { struct trace_array *tr; int ret; @@ -6310,17 +6543,17 @@ static int new_instance_create(const char *name) if (allocate_trace_buffers(tr, trace_buf_size) < 0) goto out_free_tr; - tr->dir = debugfs_create_dir(name, trace_instance_dir); + tr->dir = tracefs_create_dir(name, trace_instance_dir); if (!tr->dir) goto out_free_tr; ret = event_trace_add_tracer(tr->dir, tr); if (ret) { - debugfs_remove_recursive(tr->dir); + tracefs_remove_recursive(tr->dir); goto out_free_tr; } - init_tracer_debugfs(tr, tr->dir); + init_tracer_tracefs(tr, tr->dir); list_add(&tr->list, &ftrace_trace_arrays); @@ -6341,7 +6574,7 @@ static int new_instance_create(const char *name) } -static int instance_delete(const char *name) +static int instance_rmdir(const char *name) { struct trace_array *tr; int found = 0; @@ -6382,82 +6615,17 @@ static int instance_delete(const char *name) return ret; } -static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode) -{ - struct dentry *parent; - int ret; - - /* Paranoid: Make sure the parent is the "instances" directory */ - parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); - if (WARN_ON_ONCE(parent != trace_instance_dir)) - return -ENOENT; - - /* - * The inode mutex is locked, but debugfs_create_dir() will also - * take the mutex. As the instances directory can not be destroyed - * or changed in any other way, it is safe to unlock it, and - * let the dentry try. If two users try to make the same dir at - * the same time, then the new_instance_create() will determine the - * winner. - */ - mutex_unlock(&inode->i_mutex); - - ret = new_instance_create(dentry->d_iname); - - mutex_lock(&inode->i_mutex); - - return ret; -} - -static int instance_rmdir(struct inode *inode, struct dentry *dentry) -{ - struct dentry *parent; - int ret; - - /* Paranoid: Make sure the parent is the "instances" directory */ - parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); - if (WARN_ON_ONCE(parent != trace_instance_dir)) - return -ENOENT; - - /* The caller did a dget() on dentry */ - mutex_unlock(&d_inode(dentry)->i_mutex); - - /* - * The inode mutex is locked, but debugfs_create_dir() will also - * take the mutex. As the instances directory can not be destroyed - * or changed in any other way, it is safe to unlock it, and - * let the dentry try. If two users try to make the same dir at - * the same time, then the instance_delete() will determine the - * winner. - */ - mutex_unlock(&inode->i_mutex); - - ret = instance_delete(dentry->d_iname); - - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - mutex_lock(&d_inode(dentry)->i_mutex); - - return ret; -} - -static const struct inode_operations instance_dir_inode_operations = { - .lookup = simple_lookup, - .mkdir = instance_mkdir, - .rmdir = instance_rmdir, -}; - static __init void create_trace_instances(struct dentry *d_tracer) { - trace_instance_dir = debugfs_create_dir("instances", d_tracer); + trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, + instance_mkdir, + instance_rmdir); if (WARN_ON(!trace_instance_dir)) return; - - /* Hijack the dir inode operations, to allow mkdir */ - d_inode(trace_instance_dir)->i_op = &instance_dir_inode_operations; } static void -init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) +init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) { int cpu; @@ -6511,10 +6679,32 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) #endif for_each_tracing_cpu(cpu) - tracing_init_debugfs_percpu(tr, cpu); + tracing_init_tracefs_percpu(tr, cpu); } +static struct vfsmount *trace_automount(void *ingore) +{ + struct vfsmount *mnt; + struct file_system_type *type; + + /* + * To maintain backward compatibility for tools that mount + * debugfs to get to the tracing facility, tracefs is automatically + * mounted to the debugfs/tracing directory. + */ + type = get_fs_type("tracefs"); + if (!type) + return NULL; + mnt = vfs_kern_mount(type, 0, "tracefs", NULL); + put_filesystem(type); + if (IS_ERR(mnt)) + return NULL; + mntget(mnt); + + return mnt; +} + /** * tracing_init_dentry - initialize top level trace array * @@ -6526,23 +6716,112 @@ struct dentry *tracing_init_dentry(void) { struct trace_array *tr = &global_trace; + /* The top level trace array uses NULL as parent */ if (tr->dir) - return tr->dir; + return NULL; if (WARN_ON(!debugfs_initialized())) return ERR_PTR(-ENODEV); - tr->dir = debugfs_create_dir("tracing", NULL); - + /* + * As there may still be users that expect the tracing + * files to exist in debugfs/tracing, we must automount + * the tracefs file system there, so older tools still + * work with the newer kerenl. + */ + tr->dir = debugfs_create_automount("tracing", NULL, + trace_automount, NULL); if (!tr->dir) { pr_warn_once("Could not create debugfs directory 'tracing'\n"); return ERR_PTR(-ENOMEM); } - return tr->dir; + return NULL; +} + +extern struct trace_enum_map *__start_ftrace_enum_maps[]; +extern struct trace_enum_map *__stop_ftrace_enum_maps[]; + +static void __init trace_enum_init(void) +{ + int len; + + len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps; + trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len); +} + +#ifdef CONFIG_MODULES +static void trace_module_add_enums(struct module *mod) +{ + if (!mod->num_trace_enums) + return; + + /* + * Modules with bad taint do not have events created, do + * not bother with enums either. + */ + if (trace_module_has_bad_taint(mod)) + return; + + trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums); } -static __init int tracer_init_debugfs(void) +#ifdef CONFIG_TRACE_ENUM_MAP_FILE +static void trace_module_remove_enums(struct module *mod) +{ + union trace_enum_map_item *map; + union trace_enum_map_item **last = &trace_enum_maps; + + if (!mod->num_trace_enums) + return; + + mutex_lock(&trace_enum_mutex); + + map = trace_enum_maps; + + while (map) { + if (map->head.mod == mod) + break; + map = trace_enum_jmp_to_tail(map); + last = &map->tail.next; + map = map->tail.next; + } + if (!map) + goto out; + + *last = trace_enum_jmp_to_tail(map)->tail.next; + kfree(map); + out: + mutex_unlock(&trace_enum_mutex); +} +#else +static inline void trace_module_remove_enums(struct module *mod) { } +#endif /* CONFIG_TRACE_ENUM_MAP_FILE */ + +static int trace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + switch (val) { + case MODULE_STATE_COMING: + trace_module_add_enums(mod); + break; + case MODULE_STATE_GOING: + trace_module_remove_enums(mod); + break; + } + + return 0; +} + +static struct notifier_block trace_module_nb = { + .notifier_call = trace_module_notify, + .priority = 0, +}; +#endif /* CONFIG_MODULES */ + +static __init int tracer_init_tracefs(void) { struct dentry *d_tracer; @@ -6552,7 +6831,7 @@ static __init int tracer_init_debugfs(void) if (IS_ERR(d_tracer)) return 0; - init_tracer_debugfs(&global_trace, d_tracer); + init_tracer_tracefs(&global_trace, d_tracer); trace_create_file("tracing_thresh", 0644, d_tracer, &global_trace, &tracing_thresh_fops); @@ -6566,6 +6845,14 @@ static __init int tracer_init_debugfs(void) trace_create_file("saved_cmdlines_size", 0644, d_tracer, NULL, &tracing_saved_cmdlines_size_fops); + trace_enum_init(); + + trace_create_enum_file(d_tracer); + +#ifdef CONFIG_MODULES + register_module_notifier(&trace_module_nb); +#endif + #ifdef CONFIG_DYNAMIC_FTRACE trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, &tracing_dyn_info_fops); @@ -6575,6 +6862,10 @@ static __init int tracer_init_debugfs(void) create_trace_options_dir(&global_trace); + /* If the tracer was started via cmdline, create options for it here */ + if (global_trace.current_trace != &nop_trace) + update_tracer_options(&global_trace, global_trace.current_trace); + return 0; } @@ -6888,7 +7179,7 @@ void __init trace_init(void) tracepoint_printk = 0; } tracer_alloc_buffers(); - trace_event_init(); + trace_event_init(); } __init static int clear_boot_tracer(void) @@ -6910,5 +7201,5 @@ __init static int clear_boot_tracer(void) return 0; } -fs_initcall(tracer_init_debugfs); +fs_initcall(tracer_init_tracefs); late_initcall(clear_boot_tracer); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dd8205a35760..d2612016de94 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -334,7 +334,7 @@ struct tracer_flags { /** - * struct tracer - a specific tracer and its callbacks to interact with debugfs + * struct tracer - a specific tracer and its callbacks to interact with tracefs * @name: the name chosen to select it on the available_tracers file * @init: called when one switches to this tracer (echo name > current_tracer) * @reset: called when one switches to another tracer @@ -1309,8 +1309,10 @@ static inline void init_ftrace_syscalls(void) { } #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); +void trace_event_enum_update(struct trace_enum_map **map, int len); #else static inline void __init trace_event_init(void) { } +static inlin void trace_event_enum_update(struct trace_enum_map **map, int len) { } #endif extern struct trace_iterator *tracepoint_print_iter; diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e2d027ac66a2..ee7b94a4810a 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -223,7 +223,7 @@ FTRACE_ENTRY(bprint, bprint_entry, __dynamic_array( u32, buf ) ), - F_printk("%pf: %s", + F_printk("%ps: %s", (void *)__entry->ip, __entry->fmt), FILTER_OTHER @@ -238,7 +238,7 @@ FTRACE_ENTRY(print, print_entry, __dynamic_array( char, buf ) ), - F_printk("%pf: %s", + F_printk("%ps: %s", (void *)__entry->ip, __entry->buf), FILTER_OTHER @@ -253,7 +253,7 @@ FTRACE_ENTRY(bputs, bputs_entry, __field( const char *, str ) ), - F_printk("%pf: %s", + F_printk("%ps: %s", (void *)__entry->ip, __entry->str), FILTER_OTHER diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e19e7578a0d3..c4de47fc5cca 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -13,7 +13,7 @@ #include <linux/workqueue.h> #include <linux/spinlock.h> #include <linux/kthread.h> -#include <linux/debugfs.h> +#include <linux/tracefs.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> @@ -480,7 +480,7 @@ static void remove_subsystem(struct ftrace_subsystem_dir *dir) return; if (!--dir->nr_events) { - debugfs_remove_recursive(dir->entry); + tracefs_remove_recursive(dir->entry); list_del(&dir->list); __put_system_dir(dir); } @@ -499,7 +499,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file) } spin_unlock(&dir->d_lock); - debugfs_remove_recursive(dir); + tracefs_remove_recursive(dir); } list_del(&file->list); @@ -565,6 +565,7 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) { char *event = NULL, *sub = NULL, *match; + int ret; /* * The buf format can be <subsystem>:<event-name> @@ -590,7 +591,13 @@ static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) event = NULL; } - return __ftrace_set_clr_event(tr, match, sub, event, set); + ret = __ftrace_set_clr_event(tr, match, sub, event, set); + + /* Put back the colon to allow this to be called again */ + if (buf) + *(buf - 1) = ':'; + + return ret; } /** @@ -1526,7 +1533,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } else __get_system(system); - dir->entry = debugfs_create_dir(name, parent); + dir->entry = tracefs_create_dir(name, parent); if (!dir->entry) { pr_warn("Failed to create system directory %s\n", name); __put_system(system); @@ -1539,12 +1546,12 @@ event_subsystem_dir(struct trace_array *tr, const char *name, dir->subsystem = system; file->system = dir; - entry = debugfs_create_file("filter", 0644, dir->entry, dir, + entry = tracefs_create_file("filter", 0644, dir->entry, dir, &ftrace_subsystem_filter_fops); if (!entry) { kfree(system->filter); system->filter = NULL; - pr_warn("Could not create debugfs '%s/filter' entry\n", name); + pr_warn("Could not create tracefs '%s/filter' entry\n", name); } trace_create_file("enable", 0644, dir->entry, dir, @@ -1585,9 +1592,9 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) d_events = parent; name = ftrace_event_name(call); - file->dir = debugfs_create_dir(name, d_events); + file->dir = tracefs_create_dir(name, d_events); if (!file->dir) { - pr_warn("Could not create debugfs '%s' directory\n", name); + pr_warn("Could not create tracefs '%s' directory\n", name); return -1; } @@ -1704,6 +1711,131 @@ __register_event(struct ftrace_event_call *call, struct module *mod) return 0; } +static char *enum_replace(char *ptr, struct trace_enum_map *map, int len) +{ + int rlen; + int elen; + + /* Find the length of the enum value as a string */ + elen = snprintf(ptr, 0, "%ld", map->enum_value); + /* Make sure there's enough room to replace the string with the value */ + if (len < elen) + return NULL; + + snprintf(ptr, elen + 1, "%ld", map->enum_value); + + /* Get the rest of the string of ptr */ + rlen = strlen(ptr + len); + memmove(ptr + elen, ptr + len, rlen); + /* Make sure we end the new string */ + ptr[elen + rlen] = 0; + + return ptr + elen; +} + +static void update_event_printk(struct ftrace_event_call *call, + struct trace_enum_map *map) +{ + char *ptr; + int quote = 0; + int len = strlen(map->enum_string); + + for (ptr = call->print_fmt; *ptr; ptr++) { + if (*ptr == '\\') { + ptr++; + /* paranoid */ + if (!*ptr) + break; + continue; + } + if (*ptr == '"') { + quote ^= 1; + continue; + } + if (quote) + continue; + if (isdigit(*ptr)) { + /* skip numbers */ + do { + ptr++; + /* Check for alpha chars like ULL */ + } while (isalnum(*ptr)); + if (!*ptr) + break; + /* + * A number must have some kind of delimiter after + * it, and we can ignore that too. + */ + continue; + } + if (isalpha(*ptr) || *ptr == '_') { + if (strncmp(map->enum_string, ptr, len) == 0 && + !isalnum(ptr[len]) && ptr[len] != '_') { + ptr = enum_replace(ptr, map, len); + /* Hmm, enum string smaller than value */ + if (WARN_ON_ONCE(!ptr)) + return; + /* + * No need to decrement here, as enum_replace() + * returns the pointer to the character passed + * the enum, and two enums can not be placed + * back to back without something in between. + * We can skip that something in between. + */ + continue; + } + skip_more: + do { + ptr++; + } while (isalnum(*ptr) || *ptr == '_'); + if (!*ptr) + break; + /* + * If what comes after this variable is a '.' or + * '->' then we can continue to ignore that string. + */ + if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) { + ptr += *ptr == '.' ? 1 : 2; + if (!*ptr) + break; + goto skip_more; + } + /* + * Once again, we can skip the delimiter that came + * after the string. + */ + continue; + } + } +} + +void trace_event_enum_update(struct trace_enum_map **map, int len) +{ + struct ftrace_event_call *call, *p; + const char *last_system = NULL; + int last_i; + int i; + + down_write(&trace_event_sem); + list_for_each_entry_safe(call, p, &ftrace_events, list) { + /* events are usually grouped together with systems */ + if (!last_system || call->class->system != last_system) { + last_i = 0; + last_system = call->class->system; + } + + for (i = last_i; i < len; i++) { + if (call->class->system == map[i]->system) { + /* Save the first system if need be */ + if (!last_i) + last_i = i; + update_event_printk(call, map[i]); + } + } + } + up_write(&trace_event_sem); +} + static struct ftrace_event_file * trace_create_new_event(struct ftrace_event_call *call, struct trace_array *tr) @@ -1915,7 +2047,7 @@ static int trace_module_notify(struct notifier_block *self, static struct notifier_block trace_module_nb = { .notifier_call = trace_module_notify, - .priority = 0, + .priority = 1, /* higher than trace.c module notify */ }; #endif /* CONFIG_MODULES */ @@ -2228,7 +2360,7 @@ static inline int register_event_cmds(void) { return 0; } /* * The top level array has already had its ftrace_event_file * descriptors created in order to allow for early events to - * be recorded. This function is called after the debugfs has been + * be recorded. This function is called after the tracefs has been * initialized, and we now have to create the files associated * to the events. */ @@ -2311,16 +2443,16 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) struct dentry *d_events; struct dentry *entry; - entry = debugfs_create_file("set_event", 0644, parent, + entry = tracefs_create_file("set_event", 0644, parent, tr, &ftrace_set_event_fops); if (!entry) { - pr_warn("Could not create debugfs 'set_event' entry\n"); + pr_warn("Could not create tracefs 'set_event' entry\n"); return -ENOMEM; } - d_events = debugfs_create_dir("events", parent); + d_events = tracefs_create_dir("events", parent); if (!d_events) { - pr_warn("Could not create debugfs 'events' directory\n"); + pr_warn("Could not create tracefs 'events' directory\n"); return -ENOMEM; } @@ -2412,7 +2544,7 @@ int event_trace_del_tracer(struct trace_array *tr) down_write(&trace_event_sem); __trace_remove_event_dirs(tr); - debugfs_remove_recursive(tr->event_dir); + tracefs_remove_recursive(tr->event_dir); up_write(&trace_event_sem); tr->event_dir = NULL; @@ -2534,10 +2666,10 @@ static __init int event_trace_init(void) if (IS_ERR(d_tracer)) return 0; - entry = debugfs_create_file("available_events", 0444, d_tracer, + entry = tracefs_create_file("available_events", 0444, d_tracer, tr, &ftrace_avail_fops); if (!entry) - pr_warn("Could not create debugfs 'available_events' entry\n"); + pr_warn("Could not create tracefs 'available_events' entry\n"); if (trace_define_common_fields()) pr_warn("tracing: Failed to allocate common fields"); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 12e2b99be862..174a6a71146c 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -177,7 +177,7 @@ struct ftrace_event_call __used event_##call = { \ }, \ .event.type = etype, \ .print_fmt = print, \ - .flags = TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \ + .flags = TRACE_EVENT_FL_IGNORE_ENABLE, \ }; \ struct ftrace_event_call __used \ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 2d25ad1526bb..a51e79688455 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -6,7 +6,6 @@ * is Copyright (c) Steven Rostedt <srostedt@redhat.com> * */ -#include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/slab.h> @@ -151,7 +150,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, * The curr_ret_stack is initialized to -1 and get increased * in this function. So it can be less than -1 only if it was * filtered out via ftrace_graph_notrace_addr() which can be - * set from set_graph_notrace file in debugfs by user. + * set from set_graph_notrace file in tracefs by user. */ if (current->curr_ret_stack < -1) return -EBUSY; @@ -1309,15 +1308,19 @@ void graph_trace_open(struct trace_iterator *iter) { /* pid and depth on the last trace processed */ struct fgraph_data *data; + gfp_t gfpflags; int cpu; iter->private = NULL; - data = kzalloc(sizeof(*data), GFP_KERNEL); + /* We can be called in atomic context via ftrace_dump() */ + gfpflags = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL; + + data = kzalloc(sizeof(*data), gfpflags); if (!data) goto out_err; - data->cpu_data = alloc_percpu(struct fgraph_cpu_data); + data->cpu_data = alloc_percpu_gfp(struct fgraph_cpu_data, gfpflags); if (!data->cpu_data) goto out_err_free; @@ -1432,7 +1435,7 @@ static const struct file_operations graph_depth_fops = { .llseek = generic_file_llseek, }; -static __init int init_graph_debugfs(void) +static __init int init_graph_tracefs(void) { struct dentry *d_tracer; @@ -1445,7 +1448,7 @@ static __init int init_graph_debugfs(void) return 0; } -fs_initcall(init_graph_debugfs); +fs_initcall(init_graph_tracefs); static __init int init_graph_trace(void) { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d73f565b4e06..d0ce590f06e1 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -250,7 +250,7 @@ DEFINE_FETCH_symbol(string_size) #define fetch_file_offset_string_size NULL /* Fetch type information table */ -const struct fetch_type kprobes_fetch_type_table[] = { +static const struct fetch_type kprobes_fetch_type_table[] = { /* Special types */ [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, "__data_loc char[]"), @@ -760,7 +760,8 @@ static int create_trace_kprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg, - is_return, true); + is_return, true, + kprobes_fetch_type_table); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; @@ -1134,11 +1135,15 @@ static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct ftrace_event_call *call = &tk->tp.call; + struct bpf_prog *prog = call->prog; struct kprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; + if (prog && !trace_call_bpf(prog, regs)) + return; + head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) return; @@ -1165,11 +1170,15 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { struct ftrace_event_call *call = &tk->tp.call; + struct bpf_prog *prog = call->prog; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; + if (prog && !trace_call_bpf(prog, regs)) + return; + head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) return; @@ -1286,7 +1295,7 @@ static int register_kprobe_event(struct trace_kprobe *tk) kfree(call->print_fmt); return -ENODEV; } - call->flags = 0; + call->flags = TRACE_EVENT_FL_KPROBE; call->class->reg = kprobe_register; call->data = tk; ret = trace_add_event_call(call); @@ -1310,7 +1319,7 @@ static int unregister_kprobe_event(struct trace_kprobe *tk) return ret; } -/* Make a debugfs interface for controlling probe points */ +/* Make a tracefs interface for controlling probe points */ static __init int init_kprobe_trace(void) { struct dentry *d_tracer; @@ -1323,20 +1332,20 @@ static __init int init_kprobe_trace(void) if (IS_ERR(d_tracer)) return 0; - entry = debugfs_create_file("kprobe_events", 0644, d_tracer, + entry = tracefs_create_file("kprobe_events", 0644, d_tracer, NULL, &kprobe_events_ops); /* Event list interface */ if (!entry) - pr_warning("Could not create debugfs " + pr_warning("Could not create tracefs " "'kprobe_events' entry\n"); /* Profile interface */ - entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, + entry = tracefs_create_file("kprobe_profile", 0444, d_tracer, NULL, &kprobe_profile_ops); if (!entry) - pr_warning("Could not create debugfs " + pr_warning("Could not create tracefs " "'kprobe_profile' entry\n"); return 0; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index b983b2fd2ca1..1769a81da8a7 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -356,17 +356,14 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *t, - struct fetch_param *f, bool is_return, bool is_kprobe) + struct fetch_param *f, bool is_return, bool is_kprobe, + const struct fetch_type *ftbl) { - const struct fetch_type *ftbl; unsigned long param; long offset; char *tmp; int ret = 0; - ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; - BUG_ON(ftbl == NULL); - switch (arg[0]) { case '$': ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe); @@ -447,7 +444,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, dprm->fetch_size = get_fetch_size_function(t, dprm->fetch, ftbl); ret = parse_probe_arg(arg, t2, &dprm->orig, is_return, - is_kprobe); + is_kprobe, ftbl); if (ret) kfree(dprm); else { @@ -505,15 +502,12 @@ static int __parse_bitfield_probe_arg(const char *bf, /* String length checking wrapper */ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, - struct probe_arg *parg, bool is_return, bool is_kprobe) + struct probe_arg *parg, bool is_return, bool is_kprobe, + const struct fetch_type *ftbl) { - const struct fetch_type *ftbl; const char *t; int ret; - ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; - BUG_ON(ftbl == NULL); - if (strlen(arg) > MAX_ARGSTR_LEN) { pr_info("Argument is too long.: %s\n", arg); return -ENOSPC; @@ -535,7 +529,8 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, } parg->offset = *size; *size += parg->type->size; - ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe); + ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, + is_kprobe, ftbl); if (ret >= 0 && t != NULL) ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 4f815fbce16d..ab283e146b70 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -25,7 +25,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/debugfs.h> +#include <linux/tracefs.h> #include <linux/types.h> #include <linux/string.h> #include <linux/ctype.h> @@ -229,13 +229,6 @@ ASSIGN_FETCH_FUNC(file_offset, ftype), \ #define FETCH_TYPE_STRING 0 #define FETCH_TYPE_STRSIZE 1 -/* - * Fetch type information table. - * It's declared as a weak symbol due to conditional compilation. - */ -extern __weak const struct fetch_type kprobes_fetch_type_table[]; -extern __weak const struct fetch_type uprobes_fetch_type_table[]; - #ifdef CONFIG_KPROBE_EVENT struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); @@ -333,7 +326,8 @@ find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) } extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, - struct probe_arg *parg, bool is_return, bool is_kprobe); + struct probe_arg *parg, bool is_return, bool is_kprobe, + const struct fetch_type *ftbl); extern int traceprobe_conflict_field_name(const char *name, struct probe_arg *args, int narg); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index c3e4fcfddd45..3f34496244e9 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -327,11 +327,11 @@ static void t_stop(struct seq_file *m, void *p) local_irq_enable(); } -static int trace_lookup_stack(struct seq_file *m, long i) +static void trace_lookup_stack(struct seq_file *m, long i) { unsigned long addr = stack_dump_trace[i]; - return seq_printf(m, "%pS\n", (void *)addr); + seq_printf(m, "%pS\n", (void *)addr); } static void print_disabled(struct seq_file *m) diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 75e19e86c954..6cf935316769 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -12,7 +12,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/rbtree.h> -#include <linux/debugfs.h> +#include <linux/tracefs.h> #include "trace_stat.h" #include "trace.h" @@ -65,7 +65,7 @@ static void reset_stat_session(struct stat_session *session) static void destroy_session(struct stat_session *session) { - debugfs_remove(session->file); + tracefs_remove(session->file); __reset_stat_session(session); mutex_destroy(&session->stat_mutex); kfree(session); @@ -279,9 +279,9 @@ static int tracing_stat_init(void) if (IS_ERR(d_tracing)) return 0; - stat_dir = debugfs_create_dir("trace_stat", d_tracing); + stat_dir = tracefs_create_dir("trace_stat", d_tracing); if (!stat_dir) - pr_warning("Could not create debugfs " + pr_warning("Could not create tracefs " "'trace_stat' entry\n"); return 0; } @@ -291,7 +291,7 @@ static int init_stat_file(struct stat_session *session) if (!stat_dir && tracing_stat_init()) return -ENODEV; - session->file = debugfs_create_file(session->ts->name, 0644, + session->file = tracefs_create_file(session->ts->name, 0644, stat_dir, session, &tracing_stat_fops); if (!session->file) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index cb3b37e533cd..6dd022c7b5bc 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -196,7 +196,7 @@ DEFINE_FETCH_file_offset(string) DEFINE_FETCH_file_offset(string_size) /* Fetch type information table */ -const struct fetch_type uprobes_fetch_type_table[] = { +static const struct fetch_type uprobes_fetch_type_table[] = { /* Special types */ [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, "__data_loc char[]"), @@ -535,7 +535,8 @@ static int create_trace_uprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg, - is_return, false); + is_return, false, + uprobes_fetch_type_table); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; @@ -1005,7 +1006,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) return true; list_for_each_entry(event, &filter->perf_events, hw.tp_list) { - if (event->hw.tp_target->mm == mm) + if (event->hw.target->mm == mm) return true; } @@ -1015,7 +1016,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) static inline bool uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) { - return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); + return __uprobe_perf_filter(&tu->filter, event->hw.target->mm); } static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) @@ -1023,10 +1024,10 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) bool done; write_lock(&tu->filter.rwlock); - if (event->hw.tp_target) { + if (event->hw.target) { list_del(&event->hw.tp_list); done = tu->filter.nr_systemwide || - (event->hw.tp_target->flags & PF_EXITING) || + (event->hw.target->flags & PF_EXITING) || uprobe_filter_event(tu, event); } else { tu->filter.nr_systemwide--; @@ -1046,7 +1047,7 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) int err; write_lock(&tu->filter.rwlock); - if (event->hw.tp_target) { + if (event->hw.target) { /* * event->parent != NULL means copy_process(), we can avoid * uprobe_apply(). current->mm must be probed and we can rely |