From f371b304f12e31fe30207c41ca7754564e0ea4dc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 11 Dec 2017 11:39:02 -0800 Subject: bpf/tracing: allow user space to query prog array on the same tp Commit e87c6bc3852b ("bpf: permit multiple bpf attachments for a single perf event") added support to attach multiple bpf programs to a single perf event. Although this provides flexibility, users may want to know what other bpf programs attached to the same tp interface. Besides getting visibility for the underlying bpf system, such information may also help consolidate multiple bpf programs, understand potential performance issues due to a large array, and debug (e.g., one bpf program which overwrites return code may impact subsequent program results). Commit 2541517c32be ("tracing, perf: Implement BPF programs attached to kprobes") utilized the existing perf ioctl interface and added the command PERF_EVENT_IOC_SET_BPF to attach a bpf program to a tracepoint. This patch adds a new ioctl command, given a perf event fd, to query the bpf program array attached to the same perf tracepoint event. The new uapi ioctl command: PERF_EVENT_IOC_QUERY_BPF The new uapi/linux/perf_event.h structure: struct perf_event_query_bpf { __u32 ids_len; __u32 prog_cnt; __u32 ids[0]; }; User space provides buffer "ids" for kernel to copy to. When returning from the kernel, the number of available programs in the array is set in "prog_cnt". The usage: struct perf_event_query_bpf *query = malloc(sizeof(*query) + sizeof(u32) * ids_len); query.ids_len = ids_len; err = ioctl(pmu_efd, PERF_EVENT_IOC_QUERY_BPF, query); if (err == 0) { /* query.prog_cnt is the number of available progs, * number of progs in ids: (ids_len == 0) ? 0 : query.prog_cnt */ } else if (errno == ENOSPC) { /* query.ids_len number of progs copied, * query.prog_cnt is the number of available progs */ } else { /* other errors */ } Signed-off-by: Yonghong Song Acked-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0ce99c379c30..b143f2a05aff 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -820,3 +820,26 @@ void perf_event_detach_bpf_prog(struct perf_event *event) unlock: mutex_unlock(&bpf_event_mutex); } + +int bpf_event_query_prog_array(struct perf_event *event, void __user *info) +{ + struct perf_event_query_bpf __user *uquery = info; + struct perf_event_query_bpf query = {}; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -EINVAL; + if (copy_from_user(&query, uquery, sizeof(query))) + return -EFAULT; + + mutex_lock(&bpf_event_mutex); + ret = bpf_prog_array_copy_info(event->tp_event->prog_array, + uquery->ids, + query.ids_len, + &uquery->prog_cnt); + mutex_unlock(&bpf_event_mutex); + + return ret; +} -- cgit v1.2.3 From 9802d86585db91655c7d1929a4f6bbe0952ea88e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Dec 2017 11:36:48 -0500 Subject: bpf: add a bpf_override_function helper Error injection is sloppy and very ad-hoc. BPF could fill this niche perfectly with it's kprobe functionality. We could make sure errors are only triggered in specific call chains that we care about with very specific situations. Accomplish this with the bpf_override_funciton helper. This will modify the probe'd callers return value to the specified value and set the PC to an override function that simply returns, bypassing the originally probed function. This gives us a nice clean way to implement systematic error injection for all of our code paths. Acked-by: Alexei Starovoitov Acked-by: Ingo Molnar Signed-off-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 1 + arch/x86/include/asm/kprobes.h | 4 +++ arch/x86/include/asm/ptrace.h | 5 ++++ arch/x86/kernel/kprobes/ftrace.c | 14 ++++++++++ include/linux/filter.h | 3 ++- include/linux/trace_events.h | 1 + include/uapi/linux/bpf.h | 7 ++++- kernel/bpf/core.c | 3 +++ kernel/bpf/verifier.c | 2 ++ kernel/events/core.c | 7 +++++ kernel/trace/Kconfig | 11 ++++++++ kernel/trace/bpf_trace.c | 35 +++++++++++++++++++++++++ kernel/trace/trace_kprobe.c | 55 +++++++++++++++++++++++++++++++++++----- kernel/trace/trace_probe.h | 12 +++++++++ 15 files changed, 154 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..d3f4aaf9cb7a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -196,6 +196,9 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool +config HAVE_KPROBE_OVERRIDE + bool + config HAVE_NMI bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8eed3f94bfc7..04d66e6fa447 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -154,6 +154,7 @@ config X86 select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE + select HAVE_KPROBE_OVERRIDE select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 9f2e3102e0bb..36abb23a7a35 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -67,6 +67,10 @@ extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); asmlinkage void kretprobe_trampoline(void); +#ifdef CONFIG_KPROBES_ON_FTRACE +extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs); +#endif + /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 14131dd06b29..6de1fd3d0097 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -109,6 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->ax; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->ax = rc; +} + /* * user_mode(regs) determines whether a register set came from user * mode. On x86_32, this is true if V8086 mode was enabled OR if the diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 8dc0161cec8f..1ea748d682fd 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) p->ainsn.boostable = false; return 0; } + +asmlinkage void override_func(void); +asm( + ".type override_func, @function\n" + "override_func:\n" + " ret\n" + ".size override_func, .-override_func\n" +); + +void arch_ftrace_kprobe_override_function(struct pt_regs *regs) +{ + regs->ip = (unsigned long)&override_func; +} +NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); diff --git a/include/linux/filter.h b/include/linux/filter.h index 0062302e1285..5feb441d3dd9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -458,7 +458,8 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1; /* Do we need dst entry? */ + dst_needed:1, /* Do we need dst entry? */ + kprobe_override:1; /* Do we override a kprobe? */ enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index af44e7c2d577..5fea451f6e28 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -528,6 +528,7 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); +DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 80d62e88590c..595bda120cfb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -677,6 +677,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +740,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b16c6f8f42b6..d32bebf4f2de 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1320,6 +1320,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { + if (fp->kprobe_override) + return false; + if (!array->owner_prog_type) { /* There's no owner yet where we could check for * compatibility. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7afa92e9b409..e807bda7fe29 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4413,6 +4413,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_override_return) + prog->kprobe_override = 1; if (insn->imm == BPF_FUNC_tail_call) { /* If we tail call into other programs, we * cannot make any assumptions since they can diff --git a/kernel/events/core.c b/kernel/events/core.c index f10609e539d4..5857c500721b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8080,6 +8080,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EINVAL; } + /* Kprobe override only works for kprobes, not uprobes. */ + if (prog->kprobe_override && + !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) { + bpf_prog_put(prog); + return -EINVAL; + } + if (is_tracepoint || is_syscall_tp) { int off = trace_event_get_offsets(event->tp_event); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index af7dad126c13..3e6fd580fe7f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -529,6 +529,17 @@ config FUNCTION_PROFILER If in doubt, say N. +config BPF_KPROBE_OVERRIDE + bool "Enable BPF programs to override a kprobed function" + depends on BPF_EVENTS + depends on KPROBES_ON_FTRACE + depends on HAVE_KPROBE_OVERRIDE + depends on DYNAMIC_FTRACE_WITH_REGS + default n + help + Allows BPF to override the execution of a probed function and + set a different return value. This is used for error injection. + config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b143f2a05aff..e009b7ecf473 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,6 +13,10 @@ #include #include #include +#include +#include + +#include "trace_probe.h" #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -76,6 +80,24 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); +#ifdef CONFIG_BPF_KPROBE_OVERRIDE +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) +{ + __this_cpu_write(bpf_kprobe_override, 1); + regs_set_return_value(regs, rc); + arch_ftrace_kprobe_override_function(regs); + return 0; +} + +static const struct bpf_func_proto bpf_override_return_proto = { + .func = bpf_override_return, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; @@ -551,6 +573,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_stackid_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; +#ifdef CONFIG_BPF_KPROBE_OVERRIDE + case BPF_FUNC_override_return: + return &bpf_override_return_proto; +#endif default: return tracing_func_proto(func_id); } @@ -768,6 +794,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog_array *new_array; int ret = -EEXIST; + /* + * Kprobe override only works for ftrace based kprobes, and only if they + * are on the opt-in list. + */ + if (prog->kprobe_override && + (!trace_kprobe_ftrace(event->tp_event) || + !trace_kprobe_error_injectable(event->tp_event))) + return -EINVAL; + mutex_lock(&bpf_event_mutex); if (event->prog) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 492700c5fb4d..5db849809a56 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,6 +42,7 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) +DEFINE_PER_CPU(int, bpf_kprobe_override); static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { @@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } +int trace_kprobe_ftrace(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + return kprobe_ftrace(&tk->rp.kp); +} + +int trace_kprobe_error_injectable(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + unsigned long addr; + + if (tk->symbol) { + addr = (unsigned long) + kallsyms_lookup_name(trace_kprobe_symbol(tk)); + addr += tk->rp.kp.offset; + } else { + addr = (unsigned long)tk->rp.kp.addr; + } + return within_kprobe_error_injection_list(addr); +} + static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static void +static int kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; @@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int size, __size, dsize; int rctx; - if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) - return; + if (bpf_prog_array_valid(call)) { + int ret; + + ret = trace_call_bpf(call, regs); + + /* + * We need to check and see if we modified the pc of the + * pt_regs, and if so clear the kprobe and return 1 so that we + * don't do the instruction skipping. Also reset our state so + * we are clean the next pass through. + */ + if (__this_cpu_read(bpf_kprobe_override)) { + __this_cpu_write(bpf_kprobe_override, 0); + reset_current_kprobe(); + return 1; + } + if (!ret) + return 0; + } head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) - return; + return 0; dsize = __get_data_size(&tk->tp, regs); __size = sizeof(*entry) + tk->tp.size + dsize; @@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) - return; + return 0; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); + return 0; } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1275,6 +1315,7 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); + int ret = 0; raw_cpu_inc(*tk->nhit); @@ -1282,9 +1323,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS if (tk->tp.flags & TP_FLAG_PROFILE) - kprobe_perf_func(tk, regs); + ret = kprobe_perf_func(tk, regs); #endif - return 0; /* We don't tweek kernel, so just return 0 */ + return ret; } NOKPROBE_SYMBOL(kprobe_dispatcher); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index fb66e3eaa192..5e54d748c84c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -252,6 +252,8 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); +int trace_kprobe_ftrace(struct trace_event_call *call); +int trace_kprobe_error_injectable(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } + +static inline int trace_kprobe_ftrace(struct trace_event_call *call) +{ + return 0; +} + +static inline int trace_kprobe_error_injectable(struct trace_event_call *call) +{ + return 0; +} #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { -- cgit v1.2.3 From f4e2298e63d24bb7f5cf0f56f72867973cb7e652 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 13 Dec 2017 10:35:37 -0800 Subject: bpf/tracing: fix kernel/events/core.c compilation error Commit f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp") introduced a perf ioctl command to query prog array attached to the same perf tracepoint. The commit introduced a compilation error under certain config conditions, e.g., (1). CONFIG_BPF_SYSCALL is not defined, or (2). CONFIG_TRACING is defined but neither CONFIG_UPROBE_EVENTS nor CONFIG_KPROBE_EVENTS is defined. Error message: kernel/events/core.o: In function `perf_ioctl': core.c:(.text+0x98c4): undefined reference to `bpf_event_query_prog_array' This patch fixed this error by guarding the real definition under CONFIG_BPF_EVENTS and provided static inline dummy function if CONFIG_BPF_EVENTS was not defined. It renamed the function from bpf_event_query_prog_array to perf_event_query_prog_array and moved the definition from linux/bpf.h to linux/trace_events.h so the definition is in proximity to other prog_array related functions. Fixes: f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp") Reported-by: Stephen Rothwell Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 - include/linux/trace_events.h | 6 ++++++ kernel/events/core.c | 2 +- kernel/trace/bpf_trace.c | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 93e15b9d80c7..54dc7cae2949 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -254,7 +254,6 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); -int bpf_event_query_prog_array(struct perf_event *event, void __user *info); int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5fea451f6e28..8a1442c4e513 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -467,6 +467,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file) unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); void perf_event_detach_bpf_prog(struct perf_event *event); +int perf_event_query_prog_array(struct perf_event *event, void __user *info); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -481,6 +482,11 @@ perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) static inline void perf_event_detach_bpf_prog(struct perf_event *event) { } +static inline int +perf_event_query_prog_array(struct perf_event *event, void __user *info) +{ + return -EOPNOTSUPP; +} #endif enum { diff --git a/kernel/events/core.c b/kernel/events/core.c index 5857c500721b..34fda6aa4606 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4725,7 +4725,7 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon } case PERF_EVENT_IOC_QUERY_BPF: - return bpf_event_query_prog_array(event, (void __user *)arg); + return perf_event_query_prog_array(event, (void __user *)arg); default: return -ENOTTY; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e009b7ecf473..c5dd60cc0751 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -856,7 +856,7 @@ unlock: mutex_unlock(&bpf_event_mutex); } -int bpf_event_query_prog_array(struct perf_event *event, void __user *info) +int perf_event_query_prog_array(struct perf_event *event, void __user *info) { struct perf_event_query_bpf __user *uquery = info; struct perf_event_query_bpf query = {}; -- cgit v1.2.3 From 46df3d209db080395a98fc0875bd05e45e8f44e0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Dec 2017 21:42:57 -0500 Subject: trace: reenable preemption if we modify the ip Things got moved around between the original bpf_override_return patches and the final version, and now the ftrace kprobe dispatcher assumes if you modified the ip that you also enabled preemption. Make a comment of this and enable preemption, this fixes the lockdep splat that happened when using this feature. Fixes: 9802d86585db ("bpf: add a bpf_override_function helper") Signed-off-by: Josef Bacik Signed-off-by: Daniel Borkmann --- kernel/trace/trace_kprobe.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5db849809a56..91f4b57dab82 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1322,8 +1322,15 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) if (tk->tp.flags & TP_FLAG_TRACE) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS - if (tk->tp.flags & TP_FLAG_PROFILE) + if (tk->tp.flags & TP_FLAG_PROFILE) { ret = kprobe_perf_func(tk, regs); + /* + * The ftrace kprobe handler leaves it up to us to re-enable + * preemption here before returning if we've modified the ip. + */ + if (ret) + preempt_enable_no_resched(); + } #endif return ret; } -- cgit v1.2.3 From b4da3340eae2c3932144be3e81ccfd4e424d87b7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:54:04 +0900 Subject: tracing/kprobe: bpf: Check error injectable event is on function entry Check whether error injectable event is on function entry or not. Currently it checks the event is ftrace-based kprobes or not, but that is wrong. It should check if the event is on the entry of target function. Since error injection will override a function to just return with modified return value, that operation must be done before the target function starts making stackframe. As a side effect, bpf error injection is no need to depend on function-tracer. It can work with sw-breakpoint based kprobe events too. Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- arch/x86/include/asm/kprobes.h | 4 +--- arch/x86/kernel/kprobes/core.c | 14 ++++++++++++++ arch/x86/kernel/kprobes/ftrace.c | 14 -------------- kernel/trace/Kconfig | 2 -- kernel/trace/bpf_trace.c | 8 ++++---- kernel/trace/trace_kprobe.c | 9 ++++++--- kernel/trace/trace_probe.h | 12 ++++++------ 7 files changed, 31 insertions(+), 32 deletions(-) (limited to 'kernel/trace') diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 36abb23a7a35..367d99cff426 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -67,9 +67,7 @@ extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); asmlinkage void kretprobe_trampoline(void); -#ifdef CONFIG_KPROBES_ON_FTRACE -extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs); -#endif +extern void arch_kprobe_override_function(struct pt_regs *regs); /* Architecture specific copy of original instruction*/ struct arch_specific_insn { diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index bd36f3c33cd0..b02a377d5905 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1183,3 +1183,17 @@ int arch_trampoline_kprobe(struct kprobe *p) { return 0; } + +asmlinkage void override_func(void); +asm( + ".type override_func, @function\n" + "override_func:\n" + " ret\n" + ".size override_func, .-override_func\n" +); + +void arch_kprobe_override_function(struct pt_regs *regs) +{ + regs->ip = (unsigned long)&override_func; +} +NOKPROBE_SYMBOL(arch_kprobe_override_function); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 1ea748d682fd..8dc0161cec8f 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -97,17 +97,3 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) p->ainsn.boostable = false; return 0; } - -asmlinkage void override_func(void); -asm( - ".type override_func, @function\n" - "override_func:\n" - " ret\n" - ".size override_func, .-override_func\n" -); - -void arch_ftrace_kprobe_override_function(struct pt_regs *regs) -{ - regs->ip = (unsigned long)&override_func; -} -NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ae3a2d519e50..6400e1bf97c5 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -533,9 +533,7 @@ config FUNCTION_PROFILER config BPF_KPROBE_OVERRIDE bool "Enable BPF programs to override a kprobed function" depends on BPF_EVENTS - depends on KPROBES_ON_FTRACE depends on HAVE_KPROBE_OVERRIDE - depends on DYNAMIC_FTRACE_WITH_REGS default n help Allows BPF to override the execution of a probed function and diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f6d2327ecb59..1966ad3bf3e0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -85,7 +85,7 @@ BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) { __this_cpu_write(bpf_kprobe_override, 1); regs_set_return_value(regs, rc); - arch_ftrace_kprobe_override_function(regs); + arch_kprobe_override_function(regs); return 0; } @@ -800,11 +800,11 @@ int perf_event_attach_bpf_prog(struct perf_event *event, int ret = -EEXIST; /* - * Kprobe override only works for ftrace based kprobes, and only if they - * are on the opt-in list. + * Kprobe override only works if they are on the function entry, + * and only if they are on the opt-in list. */ if (prog->kprobe_override && - (!trace_kprobe_ftrace(event->tp_event) || + (!trace_kprobe_on_func_entry(event->tp_event) || !trace_kprobe_error_injectable(event->tp_event))) return -EINVAL; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 91f4b57dab82..3c8deb977a8b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -88,13 +88,16 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } -int trace_kprobe_ftrace(struct trace_event_call *call) +bool trace_kprobe_on_func_entry(struct trace_event_call *call) { struct trace_kprobe *tk = (struct trace_kprobe *)call->data; - return kprobe_ftrace(&tk->rp.kp); + + return kprobe_on_func_entry(tk->rp.kp.addr, + tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name, + tk->rp.kp.addr ? 0 : tk->rp.kp.offset); } -int trace_kprobe_error_injectable(struct trace_event_call *call) +bool trace_kprobe_error_injectable(struct trace_event_call *call) { struct trace_kprobe *tk = (struct trace_kprobe *)call->data; unsigned long addr; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 5e54d748c84c..e101c5bb9eda 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -252,8 +252,8 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); -int trace_kprobe_ftrace(struct trace_event_call *call); -int trace_kprobe_error_injectable(struct trace_event_call *call); +bool trace_kprobe_on_func_entry(struct trace_event_call *call); +bool trace_kprobe_error_injectable(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -280,14 +280,14 @@ alloc_symbol_cache(const char *sym, long offset) return NULL; } -static inline int trace_kprobe_ftrace(struct trace_event_call *call) +static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call) { - return 0; + return false; } -static inline int trace_kprobe_error_injectable(struct trace_event_call *call) +static inline bool trace_kprobe_error_injectable(struct trace_event_call *call) { - return 0; + return false; } #endif /* CONFIG_KPROBE_EVENTS */ -- cgit v1.2.3 From 66665ad2f1023d3ffb0c12eea9e0a6d0b613ecb3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:54:33 +0900 Subject: tracing/kprobe: bpf: Compare instruction pointer with original one Compare instruction pointer with original one on the stack instead using per-cpu bpf_kprobe_override flag. This patch also consolidates reset_current_kprobe() and preempt_enable_no_resched() blocks. Those can be done in one place. Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 1 - kernel/trace/trace_kprobe.c | 21 +++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1966ad3bf3e0..24ed6363e00f 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -83,7 +83,6 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); #ifdef CONFIG_BPF_KPROBE_OVERRIDE BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) { - __this_cpu_write(bpf_kprobe_override, 1); regs_set_return_value(regs, rc); arch_kprobe_override_function(regs); return 0; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3c8deb977a8b..b8c90441bc87 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,8 +42,6 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) -DEFINE_PER_CPU(int, bpf_kprobe_override); - static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { return tk->rp.handler != NULL; @@ -1205,6 +1203,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int rctx; if (bpf_prog_array_valid(call)) { + unsigned long orig_ip = instruction_pointer(regs); int ret; ret = trace_call_bpf(call, regs); @@ -1212,12 +1211,13 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) /* * We need to check and see if we modified the pc of the * pt_regs, and if so clear the kprobe and return 1 so that we - * don't do the instruction skipping. Also reset our state so - * we are clean the next pass through. + * don't do the single stepping. + * The ftrace kprobe handler leaves it up to us to re-enable + * preemption here before returning if we've modified the ip. */ - if (__this_cpu_read(bpf_kprobe_override)) { - __this_cpu_write(bpf_kprobe_override, 0); + if (orig_ip != instruction_pointer(regs)) { reset_current_kprobe(); + preempt_enable_no_resched(); return 1; } if (!ret) @@ -1325,15 +1325,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) if (tk->tp.flags & TP_FLAG_TRACE) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS - if (tk->tp.flags & TP_FLAG_PROFILE) { + if (tk->tp.flags & TP_FLAG_PROFILE) ret = kprobe_perf_func(tk, regs); - /* - * The ftrace kprobe handler leaves it up to us to re-enable - * preemption here before returning if we've modified the ip. - */ - if (ret) - preempt_enable_no_resched(); - } #endif return ret; } -- cgit v1.2.3 From 540adea3809f61115d2a1ea4ed6e627613452ba1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:55:03 +0900 Subject: error-injection: Separate error-injection from kprobe Since error-injection framework is not limited to be used by kprobes, nor bpf. Other kernel subsystems can use it freely for checking safeness of error-injection, e.g. livepatch, ftrace etc. So this separate error-injection framework from kprobes. Some differences has been made: - "kprobe" word is removed from any APIs/structures. - BPF_ALLOW_ERROR_INJECTION() is renamed to ALLOW_ERROR_INJECTION() since it is not limited for BPF too. - CONFIG_FUNCTION_ERROR_INJECTION is the config item of this feature. It is automatically enabled if the arch supports error injection feature for kprobe or ftrace etc. Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- arch/Kconfig | 2 +- arch/x86/Kconfig | 2 +- arch/x86/include/asm/error-injection.h | 13 ++ arch/x86/kernel/kprobes/core.c | 14 --- arch/x86/lib/Makefile | 1 + arch/x86/lib/error-inject.c | 19 +++ fs/btrfs/disk-io.c | 4 +- fs/btrfs/free-space-cache.c | 4 +- include/asm-generic/error-injection.h | 20 ++++ include/asm-generic/vmlinux.lds.h | 14 +-- include/linux/bpf.h | 11 -- include/linux/error-injection.h | 21 ++++ include/linux/kprobes.h | 1 - include/linux/module.h | 6 +- kernel/kprobes.c | 163 ------------------------- kernel/module.c | 8 +- kernel/trace/Kconfig | 2 +- kernel/trace/bpf_trace.c | 4 +- kernel/trace/trace_kprobe.c | 3 +- lib/Kconfig.debug | 4 + lib/Makefile | 1 + lib/error-inject.c | 213 +++++++++++++++++++++++++++++++++ 22 files changed, 317 insertions(+), 213 deletions(-) create mode 100644 arch/x86/include/asm/error-injection.h create mode 100644 arch/x86/lib/error-inject.c create mode 100644 include/asm-generic/error-injection.h create mode 100644 include/linux/error-injection.h create mode 100644 lib/error-inject.c (limited to 'kernel/trace') diff --git a/arch/Kconfig b/arch/Kconfig index d3f4aaf9cb7a..97376accfb14 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -196,7 +196,7 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool -config HAVE_KPROBE_OVERRIDE +config HAVE_FUNCTION_ERROR_INJECTION bool config HAVE_NMI diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 45dc6233f2b9..366b19cb79b7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -154,7 +154,7 @@ config X86 select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE - select HAVE_KPROBE_OVERRIDE + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h new file mode 100644 index 000000000000..47b7a1296245 --- /dev/null +++ b/arch/x86/include/asm/error-injection.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ERROR_INJECTION_H +#define _ASM_ERROR_INJECTION_H + +#include +#include +#include +#include + +asmlinkage void just_return_func(void); +void override_function_with_return(struct pt_regs *regs); + +#endif /* _ASM_ERROR_INJECTION_H */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index b02a377d5905..bd36f3c33cd0 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1183,17 +1183,3 @@ int arch_trampoline_kprobe(struct kprobe *p) { return 0; } - -asmlinkage void override_func(void); -asm( - ".type override_func, @function\n" - "override_func:\n" - " ret\n" - ".size override_func, .-override_func\n" -); - -void arch_kprobe_override_function(struct pt_regs *regs) -{ - regs->ip = (unsigned long)&override_func; -} -NOKPROBE_SYMBOL(arch_kprobe_override_function); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 7b181b61170e..171377b83be1 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c new file mode 100644 index 000000000000..7b881d03d0dd --- /dev/null +++ b/arch/x86/lib/error-inject.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +asmlinkage void just_return_func(void); + +asm( + ".type just_return_func, @function\n" + "just_return_func:\n" + " ret\n" + ".size just_return_func, .-just_return_func\n" +); + +void override_function_with_return(struct pt_regs *regs) +{ + regs->ip = (unsigned long)&just_return_func; +} +NOKPROBE_SYMBOL(override_function_with_return); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5da18ebc9222..9798e21ebe9d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include "ctree.h" #include "disk-io.h" @@ -3124,7 +3124,7 @@ recovery_tree_root: goto fail_block_groups; goto retry_root_backup; } -BPF_ALLOW_ERROR_INJECTION(open_ctree); +ALLOW_ERROR_INJECTION(open_ctree); static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index fb1382893bfc..ef847699031a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@ -333,7 +333,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, return 0; } -BPF_ALLOW_ERROR_INJECTION(io_ctl_init); +ALLOW_ERROR_INJECTION(io_ctl_init); static void io_ctl_free(struct btrfs_io_ctl *io_ctl) { diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h new file mode 100644 index 000000000000..08352c9d9f97 --- /dev/null +++ b/include/asm-generic/error-injection.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_ERROR_INJECTION_H +#define _ASM_GENERIC_ERROR_INJECTION_H + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#ifdef CONFIG_FUNCTION_ERROR_INJECTION +/* + * Whitelist ganerating macro. Specify functions which can be + * error-injectable using this macro. + */ +#define ALLOW_ERROR_INJECTION(fname) \ +static unsigned long __used \ + __attribute__((__section__("_error_injection_whitelist"))) \ + _eil_addr_##fname = (unsigned long)fname; +#else +#define ALLOW_ERROR_INJECTION(fname) +#endif +#endif + +#endif /* _ASM_GENERIC_ERROR_INJECTION_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a2e8582d094a..f2068cca5206 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -136,13 +136,13 @@ #define KPROBE_BLACKLIST() #endif -#ifdef CONFIG_BPF_KPROBE_OVERRIDE -#define ERROR_INJECT_LIST() . = ALIGN(8); \ - VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .; \ - KEEP(*(_kprobe_error_inject_list)) \ - VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .; +#ifdef CONFIG_FUNCTION_ERROR_INJECTION +#define ERROR_INJECT_WHITELIST() . = ALIGN(8); \ + VMLINUX_SYMBOL(__start_error_injection_whitelist) = .;\ + KEEP(*(_error_injection_whitelist)) \ + VMLINUX_SYMBOL(__stop_error_injection_whitelist) = .; #else -#define ERROR_INJECT_LIST() +#define ERROR_INJECT_WHITELIST() #endif #ifdef CONFIG_EVENT_TRACING @@ -573,7 +573,7 @@ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ KPROBE_BLACKLIST() \ - ERROR_INJECT_LIST() \ + ERROR_INJECT_WHITELIST() \ MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 44f26f6df8fc..3496977203a3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -613,15 +613,4 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto; void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) -#ifdef CONFIG_BPF_KPROBE_OVERRIDE -#define BPF_ALLOW_ERROR_INJECTION(fname) \ -static unsigned long __used \ - __attribute__((__section__("_kprobe_error_inject_list"))) \ - _eil_addr_##fname = (unsigned long)fname; -#else -#define BPF_ALLOW_ERROR_INJECTION(fname) -#endif -#endif - #endif /* _LINUX_BPF_H */ diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h new file mode 100644 index 000000000000..130a67c50dac --- /dev/null +++ b/include/linux/error-injection.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ERROR_INJECTION_H +#define _LINUX_ERROR_INJECTION_H + +#ifdef CONFIG_FUNCTION_ERROR_INJECTION + +#include + +extern bool within_error_injection_list(unsigned long addr); + +#else /* !CONFIG_FUNCTION_ERROR_INJECTION */ + +#include +static inline bool within_error_injection_list(unsigned long addr) +{ + return false; +} + +#endif + +#endif /* _LINUX_ERROR_INJECTION_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 963fd364f3d6..9440a2fc8893 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -271,7 +271,6 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset); extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); -extern bool within_kprobe_error_injection_list(unsigned long addr); struct kprobe_insn_cache { struct mutex mutex; diff --git a/include/linux/module.h b/include/linux/module.h index 548fa09fa806..792e51d83bda 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -476,9 +476,9 @@ struct module { unsigned int num_ctors; #endif -#ifdef CONFIG_BPF_KPROBE_OVERRIDE - unsigned int num_kprobe_ei_funcs; - unsigned long *kprobe_ei_funcs; +#ifdef CONFIG_FUNCTION_ERROR_INJECTION + unsigned int num_ei_funcs; + unsigned long *ei_funcs; #endif } ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b4aab48ad258..da2ccf142358 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -83,16 +83,6 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) return &(kretprobe_table_locks[hash].lock); } -/* List of symbols that can be overriden for error injection. */ -static LIST_HEAD(kprobe_error_injection_list); -static DEFINE_MUTEX(kprobe_ei_mutex); -struct kprobe_ei_entry { - struct list_head list; - unsigned long start_addr; - unsigned long end_addr; - void *priv; -}; - /* Blacklist -- list of struct kprobe_blacklist_entry */ static LIST_HEAD(kprobe_blacklist); @@ -1404,17 +1394,6 @@ bool within_kprobe_blacklist(unsigned long addr) return false; } -bool within_kprobe_error_injection_list(unsigned long addr) -{ - struct kprobe_ei_entry *ent; - - list_for_each_entry(ent, &kprobe_error_injection_list, list) { - if (addr >= ent->start_addr && addr < ent->end_addr) - return true; - } - return false; -} - /* * If we have a symbol_name argument, look it up and add the offset field * to it. This way, we can specify a relative address to a symbol. @@ -2189,86 +2168,6 @@ static int __init populate_kprobe_blacklist(unsigned long *start, return 0; } -#ifdef CONFIG_BPF_KPROBE_OVERRIDE -/* Markers of the _kprobe_error_inject_list section */ -extern unsigned long __start_kprobe_error_inject_list[]; -extern unsigned long __stop_kprobe_error_inject_list[]; - -/* - * Lookup and populate the kprobe_error_injection_list. - * - * For safety reasons we only allow certain functions to be overriden with - * bpf_error_injection, so we need to populate the list of the symbols that have - * been marked as safe for overriding. - */ -static void populate_kprobe_error_injection_list(unsigned long *start, - unsigned long *end, - void *priv) -{ - unsigned long *iter; - struct kprobe_ei_entry *ent; - unsigned long entry, offset = 0, size = 0; - - mutex_lock(&kprobe_ei_mutex); - for (iter = start; iter < end; iter++) { - entry = arch_deref_entry_point((void *)*iter); - - if (!kernel_text_address(entry) || - !kallsyms_lookup_size_offset(entry, &size, &offset)) { - pr_err("Failed to find error inject entry at %p\n", - (void *)entry); - continue; - } - - ent = kmalloc(sizeof(*ent), GFP_KERNEL); - if (!ent) - break; - ent->start_addr = entry; - ent->end_addr = entry + size; - ent->priv = priv; - INIT_LIST_HEAD(&ent->list); - list_add_tail(&ent->list, &kprobe_error_injection_list); - } - mutex_unlock(&kprobe_ei_mutex); -} - -static void __init populate_kernel_kprobe_ei_list(void) -{ - populate_kprobe_error_injection_list(__start_kprobe_error_inject_list, - __stop_kprobe_error_inject_list, - NULL); -} - -static void module_load_kprobe_ei_list(struct module *mod) -{ - if (!mod->num_kprobe_ei_funcs) - return; - populate_kprobe_error_injection_list(mod->kprobe_ei_funcs, - mod->kprobe_ei_funcs + - mod->num_kprobe_ei_funcs, mod); -} - -static void module_unload_kprobe_ei_list(struct module *mod) -{ - struct kprobe_ei_entry *ent, *n; - if (!mod->num_kprobe_ei_funcs) - return; - - mutex_lock(&kprobe_ei_mutex); - list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) { - if (ent->priv == mod) { - list_del_init(&ent->list); - kfree(ent); - } - } - mutex_unlock(&kprobe_ei_mutex); -} -#else -static inline void __init populate_kernel_kprobe_ei_list(void) {} -static inline void module_load_kprobe_ei_list(struct module *m) {} -static inline void module_unload_kprobe_ei_list(struct module *m) {} -#endif - /* Module notifier call back, checking kprobes on the module */ static int kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) @@ -2279,11 +2178,6 @@ static int kprobes_module_callback(struct notifier_block *nb, unsigned int i; int checkcore = (val == MODULE_STATE_GOING); - if (val == MODULE_STATE_COMING) - module_load_kprobe_ei_list(mod); - else if (val == MODULE_STATE_GOING) - module_unload_kprobe_ei_list(mod); - if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) return NOTIFY_DONE; @@ -2346,8 +2240,6 @@ static int __init init_kprobes(void) pr_err("Please take care of using kprobes.\n"); } - populate_kernel_kprobe_ei_list(); - if (kretprobe_blacklist_size) { /* lookup the function address from its name */ for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { @@ -2515,56 +2407,6 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = { .release = seq_release, }; -/* - * kprobes/error_injection_list -- shows which functions can be overriden for - * error injection. - * */ -static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos) -{ - mutex_lock(&kprobe_ei_mutex); - return seq_list_start(&kprobe_error_injection_list, *pos); -} - -static void kprobe_ei_seq_stop(struct seq_file *m, void *v) -{ - mutex_unlock(&kprobe_ei_mutex); -} - -static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos) -{ - return seq_list_next(v, &kprobe_error_injection_list, pos); -} - -static int kprobe_ei_seq_show(struct seq_file *m, void *v) -{ - char buffer[KSYM_SYMBOL_LEN]; - struct kprobe_ei_entry *ent = - list_entry(v, struct kprobe_ei_entry, list); - - sprint_symbol(buffer, ent->start_addr); - seq_printf(m, "%s\n", buffer); - return 0; -} - -static const struct seq_operations kprobe_ei_seq_ops = { - .start = kprobe_ei_seq_start, - .next = kprobe_ei_seq_next, - .stop = kprobe_ei_seq_stop, - .show = kprobe_ei_seq_show, -}; - -static int kprobe_ei_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &kprobe_ei_seq_ops); -} - -static const struct file_operations debugfs_kprobe_ei_ops = { - .open = kprobe_ei_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static void arm_all_kprobes(void) { struct hlist_head *head; @@ -2706,11 +2548,6 @@ static int __init debugfs_kprobe_init(void) if (!file) goto error; - file = debugfs_create_file("error_injection_list", 0444, dir, NULL, - &debugfs_kprobe_ei_ops); - if (!file) - goto error; - return 0; error: diff --git a/kernel/module.c b/kernel/module.c index bd695bfdc5c4..601494d4b7ea 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3118,10 +3118,10 @@ static int find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->ftrace_callsites), &mod->num_ftrace_callsites); #endif -#ifdef CONFIG_BPF_KPROBE_OVERRIDE - mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list", - sizeof(*mod->kprobe_ei_funcs), - &mod->num_kprobe_ei_funcs); +#ifdef CONFIG_FUNCTION_ERROR_INJECTION + mod->ei_funcs = section_objs(info, "_error_injection_whitelist", + sizeof(*mod->ei_funcs), + &mod->num_ei_funcs); #endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6400e1bf97c5..7114c885a78a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -533,7 +533,7 @@ config FUNCTION_PROFILER config BPF_KPROBE_OVERRIDE bool "Enable BPF programs to override a kprobed function" depends on BPF_EVENTS - depends on HAVE_KPROBE_OVERRIDE + depends on FUNCTION_ERROR_INJECTION default n help Allows BPF to override the execution of a probed function and diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 24ed6363e00f..f274468cbc45 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "trace_probe.h" #include "trace.h" @@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) { regs_set_return_value(regs, rc); - arch_kprobe_override_function(regs); + override_function_with_return(regs); return 0; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b8c90441bc87..1fad24acd444 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "trace_probe.h" @@ -107,7 +108,7 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call) } else { addr = (unsigned long)tk->rp.kp.addr; } - return within_kprobe_error_injection_list(addr); + return within_error_injection_list(addr); } static int register_kprobe_event(struct trace_kprobe *tk); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9d5b78aad4c5..2a33efdd1fea 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1500,6 +1500,10 @@ config FAULT_INJECTION Provide fault-injection framework. For more details, see Documentation/fault-injection/. +config FUNCTION_ERROR_INJECTION + def_bool y + depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + config FAILSLAB bool "Fault-injection capability for kmalloc" depends on FAULT_INJECTION diff --git a/lib/Makefile b/lib/Makefile index a6c8529dd9b2..75ec13778cd8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -149,6 +149,7 @@ obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ of-reconfig-notifier-error-inject.o +obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o diff --git a/lib/error-inject.c b/lib/error-inject.c new file mode 100644 index 000000000000..bccadcf3c981 --- /dev/null +++ b/lib/error-inject.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +// error-inject.c: Function-level error injection table +#include +#include +#include +#include +#include +#include +#include +#include + +/* Whitelist of symbols that can be overridden for error injection. */ +static LIST_HEAD(error_injection_list); +static DEFINE_MUTEX(ei_mutex); +struct ei_entry { + struct list_head list; + unsigned long start_addr; + unsigned long end_addr; + void *priv; +}; + +bool within_error_injection_list(unsigned long addr) +{ + struct ei_entry *ent; + bool ret = false; + + mutex_lock(&ei_mutex); + list_for_each_entry(ent, &error_injection_list, list) { + if (addr >= ent->start_addr && addr < ent->end_addr) { + ret = true; + break; + } + } + mutex_unlock(&ei_mutex); + return ret; +} + +/* + * Lookup and populate the error_injection_list. + * + * For safety reasons we only allow certain functions to be overridden with + * bpf_error_injection, so we need to populate the list of the symbols that have + * been marked as safe for overriding. + */ +static void populate_error_injection_list(unsigned long *start, + unsigned long *end, void *priv) +{ + unsigned long *iter; + struct ei_entry *ent; + unsigned long entry, offset = 0, size = 0; + + mutex_lock(&ei_mutex); + for (iter = start; iter < end; iter++) { + entry = arch_deref_entry_point((void *)*iter); + + if (!kernel_text_address(entry) || + !kallsyms_lookup_size_offset(entry, &size, &offset)) { + pr_err("Failed to find error inject entry at %p\n", + (void *)entry); + continue; + } + + ent = kmalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + break; + ent->start_addr = entry; + ent->end_addr = entry + size; + ent->priv = priv; + INIT_LIST_HEAD(&ent->list); + list_add_tail(&ent->list, &error_injection_list); + } + mutex_unlock(&ei_mutex); +} + +/* Markers of the _error_inject_whitelist section */ +extern unsigned long __start_error_injection_whitelist[]; +extern unsigned long __stop_error_injection_whitelist[]; + +static void __init populate_kernel_ei_list(void) +{ + populate_error_injection_list(__start_error_injection_whitelist, + __stop_error_injection_whitelist, + NULL); +} + +#ifdef CONFIG_MODULES +static void module_load_ei_list(struct module *mod) +{ + if (!mod->num_ei_funcs) + return; + + populate_error_injection_list(mod->ei_funcs, + mod->ei_funcs + mod->num_ei_funcs, mod); +} + +static void module_unload_ei_list(struct module *mod) +{ + struct ei_entry *ent, *n; + + if (!mod->num_ei_funcs) + return; + + mutex_lock(&ei_mutex); + list_for_each_entry_safe(ent, n, &error_injection_list, list) { + if (ent->priv == mod) { + list_del_init(&ent->list); + kfree(ent); + } + } + mutex_unlock(&ei_mutex); +} + +/* Module notifier call back, checking error injection table on the module */ +static int ei_module_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + + if (val == MODULE_STATE_COMING) + module_load_ei_list(mod); + else if (val == MODULE_STATE_GOING) + module_unload_ei_list(mod); + + return NOTIFY_DONE; +} + +static struct notifier_block ei_module_nb = { + .notifier_call = ei_module_callback, + .priority = 0 +}; + +static __init int module_ei_init(void) +{ + return register_module_notifier(&ei_module_nb); +} +#else /* !CONFIG_MODULES */ +#define module_ei_init() (0) +#endif + +/* + * error_injection/whitelist -- shows which functions can be overridden for + * error injection. + */ +static void *ei_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&ei_mutex); + return seq_list_start(&error_injection_list, *pos); +} + +static void ei_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&ei_mutex); +} + +static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &error_injection_list, pos); +} + +static int ei_seq_show(struct seq_file *m, void *v) +{ + struct ei_entry *ent = list_entry(v, struct ei_entry, list); + + seq_printf(m, "%pf\n", (void *)ent->start_addr); + return 0; +} + +static const struct seq_operations ei_seq_ops = { + .start = ei_seq_start, + .next = ei_seq_next, + .stop = ei_seq_stop, + .show = ei_seq_show, +}; + +static int ei_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &ei_seq_ops); +} + +static const struct file_operations debugfs_ei_ops = { + .open = ei_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init ei_debugfs_init(void) +{ + struct dentry *dir, *file; + + dir = debugfs_create_dir("error_injection", NULL); + if (!dir) + return -ENOMEM; + + file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops); + if (!file) { + debugfs_remove(dir); + return -ENOMEM; + } + + return 0; +} + +static int __init init_error_injection(void) +{ + populate_kernel_ei_list(); + + if (!module_ei_init()) + ei_debugfs_init(); + + return 0; +} +late_initcall(init_error_injection); -- cgit v1.2.3 From eefa864a81501161c05b35f14197677c937e5b9a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 17 Jan 2018 09:19:32 -0800 Subject: bpf: change fake_ip for bpf_trace_printk helper Currently, for bpf_trace_printk helper, fake ip address 0x1 is used with comments saying that fake ip will not be printed. This is indeed true for 4.12 and earlier version, but for 4.13 and later version, the ip address will be printed if it cannot be resolved with kallsym. Running samples/bpf/tracex5 program and you will have the following in the debugfs trace_pipe output: ... <...>-1819 [003] .... 443.497877: 0x00000001: mmap <...>-1819 [003] .... 443.498289: 0x00000001: syscall=102 (one of get/set uid/pid/gid) ... The kernel commit changed this behavior is: commit feaf1283d11794b9d518fcfd54b6bf8bee1f0b4b Author: Steven Rostedt (VMware) Date: Thu Jun 22 17:04:55 2017 -0400 tracing: Show address when function names are not found ... This patch changed the comment and also altered the fake ip address to 0x0 as users may think 0x1 has some special meaning while it doesn't. The new output: ... <...>-1799 [002] .... 25.953576: 0: mmap <...>-1799 [002] .... 25.953865: 0: read(fd=0, buf=00000000053936b5, size=512) ... Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f274468cbc45..fc2838ac8b78 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -245,7 +245,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, */ #define __BPF_TP_EMIT() __BPF_ARG3_TP() #define __BPF_TP(...) \ - __trace_printk(1 /* Fake ip will not be printed. */, \ + __trace_printk(0 /* Fake ip */, \ fmt, ##__VA_ARGS__) #define __BPF_ARG1_TP(...) \ -- cgit v1.2.3