summaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig11
-rw-r--r--kernel/trace/bpf_trace.c59
-rw-r--r--kernel/trace/ftrace.c31
-rw-r--r--kernel/trace/ring_buffer.c63
-rw-r--r--kernel/trace/trace.c65
-rw-r--r--kernel/trace/trace_benchmark.c2
-rw-r--r--kernel/trace/trace_events.c18
-rw-r--r--kernel/trace/trace_events_trigger.c13
-rw-r--r--kernel/trace/trace_functions.c49
-rw-r--r--kernel/trace/trace_kprobe.c61
-rw-r--r--kernel/trace/trace_probe.h12
-rw-r--r--kernel/trace/trace_selftest_dynamic.c5
-rw-r--r--kernel/trace/trace_uprobe.c2
13 files changed, 292 insertions, 99 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 904c952ac383..0b249e2f0c3c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -355,7 +355,7 @@ config PROFILE_ANNOTATED_BRANCHES
on if you need to profile the system's use of these macros.
config PROFILE_ALL_BRANCHES
- bool "Profile all if conditionals"
+ bool "Profile all if conditionals" if !FORTIFY_SOURCE
select TRACE_BRANCH_PROFILING
help
This tracer profiles all branch conditions. Every if ()
@@ -530,6 +530,15 @@ config FUNCTION_PROFILER
If in doubt, say N.
+config BPF_KPROBE_OVERRIDE
+ bool "Enable BPF programs to override a kprobed function"
+ depends on BPF_EVENTS
+ depends on FUNCTION_ERROR_INJECTION
+ default n
+ help
+ Allows BPF to override the execution of a probed function and
+ set a different return value. This is used for error injection.
+
config FTRACE_MCOUNT_RECORD
def_bool y
depends on DYNAMIC_FTRACE
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 40207c2a4113..fc2838ac8b78 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -13,6 +13,10 @@
#include <linux/filter.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
+#include <linux/kprobes.h>
+#include <linux/error-injection.h>
+
+#include "trace_probe.h"
#include "trace.h"
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
@@ -76,6 +80,23 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
}
EXPORT_SYMBOL_GPL(trace_call_bpf);
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
+{
+ regs_set_return_value(regs, rc);
+ override_function_with_return(regs);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_override_return_proto = {
+ .func = bpf_override_return,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+#endif
+
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
{
int ret;
@@ -224,7 +245,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
*/
#define __BPF_TP_EMIT() __BPF_ARG3_TP()
#define __BPF_TP(...) \
- __trace_printk(1 /* Fake ip will not be printed. */, \
+ __trace_printk(0 /* Fake ip */, \
fmt, ##__VA_ARGS__)
#define __BPF_ARG1_TP(...) \
@@ -556,6 +577,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_get_stackid_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ case BPF_FUNC_override_return:
+ return &bpf_override_return_proto;
+#endif
default:
return tracing_func_proto(func_id);
}
@@ -773,6 +798,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
struct bpf_prog_array *new_array;
int ret = -EEXIST;
+ /*
+ * Kprobe override only works if they are on the function entry,
+ * and only if they are on the opt-in list.
+ */
+ if (prog->kprobe_override &&
+ (!trace_kprobe_on_func_entry(event->tp_event) ||
+ !trace_kprobe_error_injectable(event->tp_event)))
+ return -EINVAL;
+
mutex_lock(&bpf_event_mutex);
if (event->prog)
@@ -825,3 +859,26 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
unlock:
mutex_unlock(&bpf_event_mutex);
}
+
+int perf_event_query_prog_array(struct perf_event *event, void __user *info)
+{
+ struct perf_event_query_bpf __user *uquery = info;
+ struct perf_event_query_bpf query = {};
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (event->attr.type != PERF_TYPE_TRACEPOINT)
+ return -EINVAL;
+ if (copy_from_user(&query, uquery, sizeof(query)))
+ return -EFAULT;
+
+ mutex_lock(&bpf_event_mutex);
+ ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
+ uquery->ids,
+ query.ids_len,
+ &uquery->prog_cnt);
+ mutex_unlock(&bpf_event_mutex);
+
+ return ret;
+}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ccdf3664e4a9..dabd9d167d42 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1119,15 +1119,11 @@ static struct ftrace_ops global_ops = {
};
/*
- * This is used by __kernel_text_address() to return true if the
- * address is on a dynamically allocated trampoline that would
- * not return true for either core_kernel_text() or
- * is_module_text_address().
+ * Used by the stack undwinder to know about dynamic ftrace trampolines.
*/
-bool is_ftrace_trampoline(unsigned long addr)
+struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr)
{
- struct ftrace_ops *op;
- bool ret = false;
+ struct ftrace_ops *op = NULL;
/*
* Some of the ops may be dynamically allocated,
@@ -1144,15 +1140,24 @@ bool is_ftrace_trampoline(unsigned long addr)
if (op->trampoline && op->trampoline_size)
if (addr >= op->trampoline &&
addr < op->trampoline + op->trampoline_size) {
- ret = true;
- goto out;
+ preempt_enable_notrace();
+ return op;
}
} while_for_each_ftrace_op(op);
-
- out:
preempt_enable_notrace();
- return ret;
+ return NULL;
+}
+
+/*
+ * This is used by __kernel_text_address() to return true if the
+ * address is on a dynamically allocated trampoline that would
+ * not return true for either core_kernel_text() or
+ * is_module_text_address().
+ */
+bool is_ftrace_trampoline(unsigned long addr)
+{
+ return ftrace_ops_trampoline(addr) != NULL;
}
struct ftrace_page {
@@ -5010,7 +5015,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
parser = &iter->parser;
if (trace_parser_loaded(parser)) {
- parser->buffer[parser->idx] = 0;
ftrace_match_records(iter->hash, parser->buffer, parser->idx);
}
@@ -5324,7 +5328,6 @@ ftrace_graph_release(struct inode *inode, struct file *file)
parser = &fgd->parser;
if (trace_parser_loaded((parser))) {
- parser->buffer[parser->idx] = 0;
ret = ftrace_graph_set_hash(fgd->new_hash,
parser->buffer);
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9ab18995ff1e..ca6930e0d25e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -630,7 +630,7 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
* Returns POLLIN | POLLRDNORM if data exists in the buffers,
* zero otherwise.
*/
-int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
+__poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table)
{
struct ring_buffer_per_cpu *cpu_buffer;
@@ -2534,29 +2534,58 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
* by the current task between lock and unlock. But it can
- * be modified more than once via an interrupt. There are four
- * different contexts that we need to consider.
+ * be modified more than once via an interrupt. To pass this
+ * information from the lock to the unlock without having to
+ * access the 'in_interrupt()' functions again (which do show
+ * a bit of overhead in something as critical as function tracing,
+ * we use a bitmask trick.
*
- * Normal context.
- * SoftIRQ context
- * IRQ context
- * NMI context
+ * bit 0 = NMI context
+ * bit 1 = IRQ context
+ * bit 2 = SoftIRQ context
+ * bit 3 = normal context.
*
- * If for some reason the ring buffer starts to recurse, we
- * only allow that to happen at most 4 times (one for each
- * context). If it happens 5 times, then we consider this a
- * recusive loop and do not let it go further.
+ * This works because this is the order of contexts that can
+ * preempt other contexts. A SoftIRQ never preempts an IRQ
+ * context.
+ *
+ * When the context is determined, the corresponding bit is
+ * checked and set (if it was set, then a recursion of that context
+ * happened).
+ *
+ * On unlock, we need to clear this bit. To do so, just subtract
+ * 1 from the current_context and AND it to itself.
+ *
+ * (binary)
+ * 101 - 1 = 100
+ * 101 & 100 = 100 (clearing bit zero)
+ *
+ * 1010 - 1 = 1001
+ * 1010 & 1001 = 1000 (clearing bit 1)
+ *
+ * The least significant bit can be cleared this way, and it
+ * just so happens that it is the same bit corresponding to
+ * the current context.
*/
static __always_inline int
trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
- if (cpu_buffer->current_context >= 4)
+ unsigned int val = cpu_buffer->current_context;
+ unsigned long pc = preempt_count();
+ int bit;
+
+ if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+ bit = RB_CTX_NORMAL;
+ else
+ bit = pc & NMI_MASK ? RB_CTX_NMI :
+ pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
+
+ if (unlikely(val & (1 << bit)))
return 1;
- cpu_buffer->current_context++;
- /* Interrupts must see this update */
- barrier();
+ val |= (1 << bit);
+ cpu_buffer->current_context = val;
return 0;
}
@@ -2564,9 +2593,7 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
static __always_inline void
trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
{
- /* Don't let the dec leak out */
- barrier();
- cpu_buffer->current_context--;
+ cpu_buffer->current_context &= cpu_buffer->current_context - 1;
}
/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a8d8a294345..56608538a4ad 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -530,8 +530,6 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
ubuf += ret;
cnt -= ret;
- parser.buffer[parser.idx] = 0;
-
ret = -EINVAL;
if (kstrtoul(parser.buffer, 0, &val))
break;
@@ -1236,18 +1234,18 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
cnt--;
}
+ parser->idx = 0;
+
/* only spaces were written */
- if (isspace(ch)) {
+ if (isspace(ch) || !ch) {
*ppos += read;
ret = read;
goto out;
}
-
- parser->idx = 0;
}
/* read the non-space input */
- while (cnt && !isspace(ch)) {
+ while (cnt && !isspace(ch) && ch) {
if (parser->idx < parser->size - 1)
parser->buffer[parser->idx++] = ch;
else {
@@ -1262,12 +1260,14 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
}
/* We either got finished input or we have to wait for another call. */
- if (isspace(ch)) {
+ if (isspace(ch) || !ch) {
parser->buffer[parser->idx] = 0;
parser->cont = false;
} else if (parser->idx < parser->size - 1) {
parser->cont = true;
parser->buffer[parser->idx++] = ch;
+ /* Make sure the parsed string always terminates with '\0'. */
+ parser->buffer[parser->idx] = 0;
} else {
ret = -EINVAL;
goto out;
@@ -2374,6 +2374,15 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
}
EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
+/*
+ * Skip 3:
+ *
+ * trace_buffer_unlock_commit_regs()
+ * trace_event_buffer_commit()
+ * trace_event_raw_event_xxx()
+*/
+# define STACK_SKIP 3
+
void trace_buffer_unlock_commit_regs(struct trace_array *tr,
struct ring_buffer *buffer,
struct ring_buffer_event *event,
@@ -2383,16 +2392,12 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
__buffer_unlock_commit(buffer, event);
/*
- * If regs is not set, then skip the following callers:
- * trace_buffer_unlock_commit_regs
- * event_trigger_unlock_commit
- * trace_event_buffer_commit
- * trace_event_raw_event_sched_switch
+ * If regs is not set, then skip the necessary functions.
* Note, we can still get here via blktrace, wakeup tracer
* and mmiotrace, but that's ok if they lose a function or
- * two. They are that meaningful.
+ * two. They are not that meaningful.
*/
- ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
+ ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
}
@@ -2579,11 +2584,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
trace.skip = skip;
/*
- * Add two, for this function and the call to save_stack_trace()
+ * Add one, for this function and the call to save_stack_trace()
* If regs is set, then these functions will not be in the way.
*/
+#ifndef CONFIG_UNWINDER_ORC
if (!regs)
- trace.skip += 2;
+ trace.skip++;
+#endif
/*
* Since events can happen in NMIs there's no safe way to
@@ -2682,17 +2689,6 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
if (unlikely(in_nmi()))
return;
- /*
- * It is possible that a function is being traced in a
- * location that RCU is not watching. A call to
- * rcu_irq_enter() will make sure that it is, but there's
- * a few internal rcu functions that could be traced
- * where that wont work either. In those cases, we just
- * do nothing.
- */
- if (unlikely(rcu_irq_enter_disabled()))
- return;
-
rcu_irq_enter_irqson();
__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
rcu_irq_exit_irqson();
@@ -2711,11 +2707,10 @@ void trace_dump_stack(int skip)
local_save_flags(flags);
- /*
- * Skip 3 more, seems to get us at the caller of
- * this function.
- */
- skip += 3;
+#ifndef CONFIG_UNWINDER_ORC
+ /* Skip 1 to skip this function. */
+ skip++;
+#endif
__ftrace_trace_stack(global_trace.trace_buffer.buffer,
flags, skip, preempt_count(), NULL);
}
@@ -5621,7 +5616,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
return 0;
}
-static unsigned int
+static __poll_t
trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
{
struct trace_array *tr = iter->tr;
@@ -5640,7 +5635,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl
filp, poll_table);
}
-static unsigned int
+static __poll_t
tracing_poll_pipe(struct file *filp, poll_table *poll_table)
{
struct trace_iterator *iter = filp->private_data;
@@ -6594,7 +6589,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
return ret;
}
-static unsigned int
+static __poll_t
tracing_buffers_poll(struct file *filp, poll_table *poll_table)
{
struct ftrace_buffer_info *info = filp->private_data;
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 79f838a75077..22fee766081b 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -165,7 +165,7 @@ static int benchmark_event_kthread(void *arg)
* this thread will never voluntarily schedule which would
* block synchronize_rcu_tasks() indefinitely.
*/
- cond_resched_rcu_qs();
+ cond_resched();
}
return 0;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index ec0f9aa4e151..05c7172c6667 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -885,8 +885,6 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
if (*parser.buffer == '!')
set = 0;
- parser.buffer[parser.idx] = 0;
-
ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
if (ret)
goto out_put;
@@ -2213,6 +2211,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
{
struct trace_event_call *call, *p;
const char *last_system = NULL;
+ bool first = false;
int last_i;
int i;
@@ -2220,15 +2219,28 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
list_for_each_entry_safe(call, p, &ftrace_events, list) {
/* events are usually grouped together with systems */
if (!last_system || call->class->system != last_system) {
+ first = true;
last_i = 0;
last_system = call->class->system;
}
+ /*
+ * Since calls are grouped by systems, the likelyhood that the
+ * next call in the iteration belongs to the same system as the
+ * previous call is high. As an optimization, we skip seaching
+ * for a map[] that matches the call's system if the last call
+ * was from the same system. That's what last_i is for. If the
+ * call has the same system as the previous call, then last_i
+ * will be the index of the first map[] that has a matching
+ * system.
+ */
for (i = last_i; i < len; i++) {
if (call->class->system == map[i]->system) {
/* Save the first system if need be */
- if (!last_i)
+ if (first) {
last_i = i;
+ first = false;
+ }
update_event_printk(call, map[i]);
}
}
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index f2ac9d44f6c4..87411482a46f 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1123,13 +1123,22 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; }
#endif /* CONFIG_TRACER_SNAPSHOT */
#ifdef CONFIG_STACKTRACE
+#ifdef CONFIG_UNWINDER_ORC
+/* Skip 2:
+ * event_triggers_post_call()
+ * trace_event_raw_event_xxx()
+ */
+# define STACK_SKIP 2
+#else
/*
- * Skip 3:
+ * Skip 4:
* stacktrace_trigger()
* event_triggers_post_call()
+ * trace_event_buffer_commit()
* trace_event_raw_event_xxx()
*/
-#define STACK_SKIP 3
+#define STACK_SKIP 4
+#endif
static void
stacktrace_trigger(struct event_trigger_data *data, void *rec)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 27f7ad12c4b1..b611cd36e22d 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -154,6 +154,24 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
preempt_enable_notrace();
}
+#ifdef CONFIG_UNWINDER_ORC
+/*
+ * Skip 2:
+ *
+ * function_stack_trace_call()
+ * ftrace_call()
+ */
+#define STACK_SKIP 2
+#else
+/*
+ * Skip 3:
+ * __trace_stack()
+ * function_stack_trace_call()
+ * ftrace_call()
+ */
+#define STACK_SKIP 3
+#endif
+
static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
@@ -180,15 +198,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
if (likely(disabled == 1)) {
pc = preempt_count();
trace_function(tr, ip, parent_ip, flags, pc);
- /*
- * skip over 5 funcs:
- * __ftrace_trace_stack,
- * __trace_stack,
- * function_stack_trace_call
- * ftrace_list_func
- * ftrace_call
- */
- __trace_stack(tr, flags, 5, pc);
+ __trace_stack(tr, flags, STACK_SKIP, pc);
}
atomic_dec(&data->disabled);
@@ -367,14 +377,27 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip,
tracer_tracing_off(tr);
}
+#ifdef CONFIG_UNWINDER_ORC
/*
- * Skip 4:
+ * Skip 3:
+ *
+ * function_trace_probe_call()
+ * ftrace_ops_assist_func()
+ * ftrace_call()
+ */
+#define FTRACE_STACK_SKIP 3
+#else
+/*
+ * Skip 5:
+ *
+ * __trace_stack()
* ftrace_stacktrace()
* function_trace_probe_call()
- * ftrace_ops_list_func()
+ * ftrace_ops_assist_func()
* ftrace_call()
*/
-#define STACK_SKIP 4
+#define FTRACE_STACK_SKIP 5
+#endif
static __always_inline void trace_stack(struct trace_array *tr)
{
@@ -384,7 +407,7 @@ static __always_inline void trace_stack(struct trace_array *tr)
local_save_flags(flags);
pc = preempt_count();
- __trace_stack(tr, flags, STACK_SKIP, pc);
+ __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc);
}
static void
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 492700c5fb4d..1fad24acd444 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/rculist.h>
+#include <linux/error-injection.h>
#include "trace_probe.h"
@@ -42,7 +43,6 @@ struct trace_kprobe {
(offsetof(struct trace_kprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
-
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
{
return tk->rp.handler != NULL;
@@ -87,6 +87,30 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
return nhit;
}
+bool trace_kprobe_on_func_entry(struct trace_event_call *call)
+{
+ struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+
+ return kprobe_on_func_entry(tk->rp.kp.addr,
+ tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
+ tk->rp.kp.addr ? 0 : tk->rp.kp.offset);
+}
+
+bool trace_kprobe_error_injectable(struct trace_event_call *call)
+{
+ struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+ unsigned long addr;
+
+ if (tk->symbol) {
+ addr = (unsigned long)
+ kallsyms_lookup_name(trace_kprobe_symbol(tk));
+ addr += tk->rp.kp.offset;
+ } else {
+ addr = (unsigned long)tk->rp.kp.addr;
+ }
+ return within_error_injection_list(addr);
+}
+
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
@@ -1170,7 +1194,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call)
#ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */
-static void
+static int
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
{
struct trace_event_call *call = &tk->tp.call;
@@ -1179,12 +1203,31 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
int size, __size, dsize;
int rctx;
- if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
- return;
+ if (bpf_prog_array_valid(call)) {
+ unsigned long orig_ip = instruction_pointer(regs);
+ int ret;
+
+ ret = trace_call_bpf(call, regs);
+
+ /*
+ * We need to check and see if we modified the pc of the
+ * pt_regs, and if so clear the kprobe and return 1 so that we
+ * don't do the single stepping.
+ * The ftrace kprobe handler leaves it up to us to re-enable
+ * preemption here before returning if we've modified the ip.
+ */
+ if (orig_ip != instruction_pointer(regs)) {
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ }
+ if (!ret)
+ return 0;
+ }
head = this_cpu_ptr(call->perf_events);
if (hlist_empty(head))
- return;
+ return 0;
dsize = __get_data_size(&tk->tp, regs);
__size = sizeof(*entry) + tk->tp.size + dsize;
@@ -1193,13 +1236,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
entry = perf_trace_buf_alloc(size, NULL, &rctx);
if (!entry)
- return;
+ return 0;
entry->ip = (unsigned long)tk->rp.kp.addr;
memset(&entry[1], 0, dsize);
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
+ return 0;
}
NOKPROBE_SYMBOL(kprobe_perf_func);
@@ -1275,6 +1319,7 @@ static int kprobe_register(struct trace_event_call *event,
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
{
struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
+ int ret = 0;
raw_cpu_inc(*tk->nhit);
@@ -1282,9 +1327,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
kprobe_trace_func(tk, regs);
#ifdef CONFIG_PERF_EVENTS
if (tk->tp.flags & TP_FLAG_PROFILE)
- kprobe_perf_func(tk, regs);
+ ret = kprobe_perf_func(tk, regs);
#endif
- return 0; /* We don't tweek kernel, so just return 0 */
+ return ret;
}
NOKPROBE_SYMBOL(kprobe_dispatcher);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index fb66e3eaa192..e101c5bb9eda 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -252,6 +252,8 @@ struct symbol_cache;
unsigned long update_symbol_cache(struct symbol_cache *sc);
void free_symbol_cache(struct symbol_cache *sc);
struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
+bool trace_kprobe_on_func_entry(struct trace_event_call *call);
+bool trace_kprobe_error_injectable(struct trace_event_call *call);
#else
/* uprobes do not support symbol fetch methods */
#define fetch_symbol_u8 NULL
@@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset)
{
return NULL;
}
+
+static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call)
+{
+ return false;
+}
+
+static inline bool trace_kprobe_error_injectable(struct trace_event_call *call)
+{
+ return false;
+}
#endif /* CONFIG_KPROBE_EVENTS */
struct probe_arg {
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
index 8cda06a10d66..c364cf777e1a 100644
--- a/kernel/trace/trace_selftest_dynamic.c
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -1,13 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
#include "trace.h"
-int DYN_FTRACE_TEST_NAME(void)
+noinline __noclone int DYN_FTRACE_TEST_NAME(void)
{
/* used to call mcount */
return 0;
}
-int DYN_FTRACE_TEST_NAME2(void)
+noinline __noclone int DYN_FTRACE_TEST_NAME2(void)
{
/* used to call mcount */
return 0;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 40592e7b3568..268029ae1be6 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -608,7 +608,7 @@ static int probes_seq_show(struct seq_file *m, void *v)
/* Don't print "0x (null)" when offset is 0 */
if (tu->offset) {
- seq_printf(m, "0x%p", (void *)tu->offset);
+ seq_printf(m, "0x%px", (void *)tu->offset);
} else {
switch (sizeof(void *)) {
case 4: