From 153474ba1a4aed0a7b797b4c2be8c35c7a4e57bd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 11:46:37 -0600 Subject: ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h Rename tracehook_report_syscall_{entry,exit} to ptrace_report_syscall_{entry,exit} and place them in ptrace.h There is no longer any generic tracehook infractructure so make these ptrace specific functions ptrace specific. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-3-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- kernel/entry/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/entry/common.c b/kernel/entry/common.c index bad713684c2e..f52e57c4d6d8 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 0cfcb2b9ef48bbcaf5d43b9f1893f63a938e8176 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:00:55 -0600 Subject: ptrace: Remove arch_syscall_{enter,exit}_tracehook These functions are alwasy one-to-one wrappers around ptrace_report_syscall_entry and ptrace_report_syscall_exit. So directly call the functions they are wrapping instead. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-4-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/entry-common.h | 43 ++----------------------------------------- kernel/entry/common.c | 4 ++-- 2 files changed, 4 insertions(+), 43 deletions(-) (limited to 'kernel') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index a670e9fba7a9..9efbdda61f7a 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -79,26 +79,6 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs); static __always_inline void arch_check_user_regs(struct pt_regs *regs) {} #endif -/** - * arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry() - * @regs: Pointer to currents pt_regs - * - * Returns: 0 on success or an error code to skip the syscall. - * - * Defaults to tracehook_report_syscall_entry(). Can be replaced by - * architecture specific code. - * - * Invoked from syscall_enter_from_user_mode() - */ -static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs); - -#ifndef arch_syscall_enter_tracehook -static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs) -{ - return ptrace_report_syscall_entry(regs); -} -#endif - /** * enter_from_user_mode - Establish state when coming from user mode * @@ -157,7 +137,7 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); * It handles the following work items: * * 1) syscall_work flag dependent invocations of - * arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter() + * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() * 2) Invocation of audit_syscall_entry() */ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); @@ -279,25 +259,6 @@ static __always_inline void arch_exit_to_user_mode(void) { } */ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); -/** - * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() - * @regs: Pointer to currents pt_regs - * @step: Indicator for single step - * - * Defaults to tracehook_report_syscall_exit(). Can be replaced by - * architecture specific code. - * - * Invoked from syscall_exit_to_user_mode() - */ -static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step); - -#ifndef arch_syscall_exit_tracehook -static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) -{ - ptrace_report_syscall_exit(regs, step); -} -#endif - /** * exit_to_user_mode - Fixup state when exiting to user mode * @@ -347,7 +308,7 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs); * - rseq syscall exit * - audit * - syscall tracing - * - tracehook (single stepping) + * - ptrace (single stepping) * * 2) Preparatory work * - Exit to user mode loop (common TIF handling). Invokes diff --git a/kernel/entry/common.c b/kernel/entry/common.c index f52e57c4d6d8..f0b1daa1e8da 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -59,7 +59,7 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, /* Handle ptrace */ if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { - ret = arch_syscall_enter_tracehook(regs); + ret = ptrace_report_syscall_entry(regs); if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) return -1L; } @@ -253,7 +253,7 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long work) step = report_single_step(work); if (step || work & SYSCALL_WORK_SYSCALL_TRACE) - arch_syscall_exit_tracehook(regs, step); + ptrace_report_syscall_exit(regs, step); } /* -- cgit v1.2.3 From c145137dc990fd67b52fbc52faae5ba46f168cca Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:04:27 -0600 Subject: ptrace: Remove tracehook_signal_handler The two line function tracehook_signal_handler is only called from signal_delivered. Expand it inline in signal_delivered and remove it. Just to make it easier to understand what is going on. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-5-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/Kconfig | 1 - include/linux/tracehook.h | 17 ----------------- kernel/signal.c | 3 ++- 3 files changed, 2 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/arch/Kconfig b/arch/Kconfig index a517a949eb1d..6382520ef0a5 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -219,7 +219,6 @@ config TRACE_IRQFLAGS_SUPPORT # CORE_DUMP_USE_REGSET #define'd in linux/elf.h # TIF_SYSCALL_TRACE calls ptrace_report_syscall_{entry,exit} # TIF_NOTIFY_RESUME calls tracehook_notify_resume() -# signal delivery calls tracehook_signal_handler() # config HAVE_ARCH_TRACEHOOK bool diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 819e82ac09bd..b77bf4917196 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -52,23 +52,6 @@ struct linux_binprm; -/** - * tracehook_signal_handler - signal handler setup is complete - * @stepping: nonzero if debugger single-step or block-step in use - * - * Called by the arch code after a signal handler has been set up. - * Register and stack state reflects the user handler about to run. - * Signal mask changes have already been made. - * - * Called without locks, shortly before returning to user mode - * (or handling more signals). - */ -static inline void tracehook_signal_handler(int stepping) -{ - if (stepping) - ptrace_notify(SIGTRAP); -} - /** * set_notify_resume - cause tracehook_notify_resume() to be called * @task: task that will call tracehook_notify_resume() diff --git a/kernel/signal.c b/kernel/signal.c index 38602738866e..0e0bd1c1068b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2898,7 +2898,8 @@ static void signal_delivered(struct ksignal *ksig, int stepping) set_current_blocked(&blocked); if (current->sas_ss_flags & SS_AUTODISARM) sas_ss_reset(current); - tracehook_signal_handler(stepping); + if (stepping) + ptrace_notify(SIGTRAP); } void signal_setup_done(int failed, struct ksignal *ksig, int stepping) -- cgit v1.2.3 From 8ca07e17c9dd4c4afcb4a3f2ea8f0a0d41c0f982 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Jan 2022 13:55:47 -0600 Subject: task_work: Remove unnecessary include from posix_timers.h Break a header file circular dependency by removing the unnecessary include of task_work.h from posix_timers.h. sched.h -> posix-timers.h posix-timers.h -> task_work.h task_work.h -> sched.h Add missing includes of task_work.h to: arch/x86/mm/tlb.c kernel/time/posix-cpu-timers.c Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-6-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/x86/mm/tlb.c | 1 + include/linux/posix-timers.h | 1 - kernel/time/posix-cpu-timers.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a6cf56a14939..6eb4d91d5365 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 5bbcd280bfd2..83539bb2f023 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -6,7 +6,6 @@ #include #include #include -#include struct kernel_siginfo; struct task_struct; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 96b4e7810426..9190d9eb236d 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "posix-timers.h" -- cgit v1.2.3 From 7f62d40d9cb50fd146fe8ff071f98fa3c1855083 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 08:52:41 -0600 Subject: task_work: Introduce task_work_pending Wrap the test of task->task_works in a helper function to make it clear what is being tested. All of the other readers of task->task_work use READ_ONCE and this is even necessary on current as other processes can update task->task_work. So for consistency I have added READ_ONCE into task_work_pending. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-7-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- fs/io_uring.c | 6 +++--- include/linux/task_work.h | 5 +++++ include/linux/tracehook.h | 4 ++-- kernel/signal.c | 4 ++-- kernel/task_work.c | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/fs/io_uring.c b/fs/io_uring.c index e54c4127422e..e85261079a78 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2590,7 +2590,7 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) static inline bool io_run_task_work(void) { - if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || task_work_pending(current)) { __set_current_state(TASK_RUNNING); tracehook_notify_signal(); return true; @@ -7602,7 +7602,7 @@ static int io_sq_thread(void *data) } prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE); - if (!io_sqd_events_pending(sqd) && !current->task_works) { + if (!io_sqd_events_pending(sqd) && !task_work_pending(current)) { bool needs_sched = true; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { @@ -10321,7 +10321,7 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, hlist_for_each_entry(req, list, hash_node) seq_printf(m, " op=%d, task_works=%d\n", req->opcode, - req->task->task_works != NULL); + task_work_pending(req->task)); } seq_puts(m, "CqOverflowList:\n"); diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 5b8a93f288bb..897494b597ba 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -19,6 +19,11 @@ enum task_work_notify_mode { TWA_SIGNAL, }; +static inline bool task_work_pending(struct task_struct *task) +{ + return READ_ONCE(task->task_works); +} + int task_work_add(struct task_struct *task, struct callback_head *twork, enum task_work_notify_mode mode); diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b77bf4917196..fa834a22e86e 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -90,7 +90,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) * hlist_add_head(task->task_works); */ smp_mb__after_atomic(); - if (unlikely(current->task_works)) + if (unlikely(task_work_pending(current))) task_work_run(); #ifdef CONFIG_KEYS_REQUEST_CACHE @@ -115,7 +115,7 @@ static inline void tracehook_notify_signal(void) { clear_thread_flag(TIF_NOTIFY_SIGNAL); smp_mb__after_atomic(); - if (current->task_works) + if (task_work_pending(current)) task_work_run(); } diff --git a/kernel/signal.c b/kernel/signal.c index 0e0bd1c1068b..3b4cf25fb9b3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2344,7 +2344,7 @@ static void ptrace_do_notify(int signr, int exit_code, int why) void ptrace_notify(int exit_code) { BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); - if (unlikely(current->task_works)) + if (unlikely(task_work_pending(current))) task_work_run(); spin_lock_irq(¤t->sighand->siglock); @@ -2626,7 +2626,7 @@ bool get_signal(struct ksignal *ksig) struct signal_struct *signal = current->signal; int signr; - if (unlikely(current->task_works)) + if (unlikely(task_work_pending(current))) task_work_run(); /* diff --git a/kernel/task_work.c b/kernel/task_work.c index 1698fbe6f0e1..cc6fccb0e24d 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -78,7 +78,7 @@ task_work_cancel_match(struct task_struct *task, struct callback_head *work; unsigned long flags; - if (likely(!task->task_works)) + if (likely(!task_work_pending(task))) return NULL; /* * If cmpxchg() fails we continue without updating pprev. -- cgit v1.2.3 From 8ba62d37949e248c698c26e0d82d72fda5d33ebf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 09:51:14 -0600 Subject: task_work: Call tracehook_notify_signal from get_signal on all architectures Always handle TIF_NOTIFY_SIGNAL in get_signal. With commit 35d0b389f3b2 ("task_work: unconditionally run task_work from get_signal()") always calling task_work_run all of the work of tracehook_notify_signal is already happening except clearing TIF_NOTIFY_SIGNAL. Factor clear_notify_signal out of tracehook_notify_signal and use it in get_signal so that get_signal only needs one call of task_work_run. To keep the semantics in sync update xfer_to_guest_mode_work (which does not call get_signal) to call tracehook_notify_signal if either _TIF_SIGPENDING or _TIF_NOTIFY_SIGNAL. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-8-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/s390/kernel/signal.c | 4 ++-- arch/x86/kernel/signal.c | 4 ++-- include/linux/entry-common.h | 2 +- include/linux/tracehook.h | 9 +++++++-- kernel/entry/common.c | 12 ++---------- kernel/entry/kvm.c | 2 +- kernel/signal.c | 14 +++----------- 7 files changed, 18 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 307f5d99514d..ea9e5e8182cd 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -453,7 +453,7 @@ static void handle_signal(struct ksignal *ksig, sigset_t *oldset, * stack-frames in one go after that. */ -void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) +void arch_do_signal_or_restart(struct pt_regs *regs) { struct ksignal ksig; sigset_t *oldset = sigmask_to_save(); @@ -466,7 +466,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) current->thread.system_call = test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; - if (has_signal && get_signal(&ksig)) { + if (get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ if (current->thread.system_call) { regs->int_code = current->thread.system_call; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ec71e06ae364..de3d5b5724d8 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -861,11 +861,11 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) +void arch_do_signal_or_restart(struct pt_regs *regs) { struct ksignal ksig; - if (has_signal && get_signal(&ksig)) { + if (get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ handle_signal(&ksig, regs); return; diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 9efbdda61f7a..3537fd25f14e 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -257,7 +257,7 @@ static __always_inline void arch_exit_to_user_mode(void) { } * * Invoked from exit_to_user_mode_loop(). */ -void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); +void arch_do_signal_or_restart(struct pt_regs *regs); /** * exit_to_user_mode - Fixup state when exiting to user mode diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index fa834a22e86e..b44a7820c468 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -106,6 +106,12 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) rseq_handle_notify_resume(NULL, regs); } +static inline void clear_notify_signal(void) +{ + clear_thread_flag(TIF_NOTIFY_SIGNAL); + smp_mb__after_atomic(); +} + /* * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This * is currently used by TWA_SIGNAL based task_work, which requires breaking @@ -113,8 +119,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) */ static inline void tracehook_notify_signal(void) { - clear_thread_flag(TIF_NOTIFY_SIGNAL); - smp_mb__after_atomic(); + clear_notify_signal(); if (task_work_pending(current)) task_work_run(); } diff --git a/kernel/entry/common.c b/kernel/entry/common.c index f0b1daa1e8da..79eaf9b4b10d 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -139,15 +139,7 @@ void noinstr exit_to_user_mode(void) } /* Workaround to allow gradual conversion of architecture code */ -void __weak arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { } - -static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work) -{ - if (ti_work & _TIF_NOTIFY_SIGNAL) - tracehook_notify_signal(); - - arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING); -} +void __weak arch_do_signal_or_restart(struct pt_regs *regs) { } static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work) @@ -170,7 +162,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, klp_update_patch_state(current); if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) - handle_signal_work(regs, ti_work); + arch_do_signal_or_restart(regs); if (ti_work & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 96d476e06c77..cabf36a489e4 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -8,7 +8,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) do { int ret; - if (ti_work & _TIF_NOTIFY_SIGNAL) + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) tracehook_notify_signal(); if (ti_work & _TIF_SIGPENDING) { diff --git a/kernel/signal.c b/kernel/signal.c index 3b4cf25fb9b3..8632b88982c9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2626,20 +2626,12 @@ bool get_signal(struct ksignal *ksig) struct signal_struct *signal = current->signal; int signr; + clear_notify_signal(); if (unlikely(task_work_pending(current))) task_work_run(); - /* - * For non-generic architectures, check for TIF_NOTIFY_SIGNAL so - * that the arch handlers don't all have to do it. If we get here - * without TIF_SIGPENDING, just exit after running signal work. - */ - if (!IS_ENABLED(CONFIG_GENERIC_ENTRY)) { - if (test_thread_flag(TIF_NOTIFY_SIGNAL)) - tracehook_notify_signal(); - if (!task_sigpending(current)) - return false; - } + if (!task_sigpending(current)) + return false; if (unlikely(uprobe_deny_signal())) return false; -- cgit v1.2.3 From 7c5d8fa6fbb12a3f0eefe8762bfede508e147cb3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 11:18:54 -0600 Subject: task_work: Decouple TIF_NOTIFY_SIGNAL and task_work There are a small handful of reasons besides pending signals that the kernel might want to break out of interruptible sleeps. The flag TIF_NOTIFY_SIGNAL and the helpers that set and clear TIF_NOTIFY_SIGNAL provide that the infrastructure for breaking out of interruptible sleeps and entering the return to user space slow path for those cases. Expand tracehook_notify_signal inline in it's callers and remove it, which makes clear that TIF_NOTIFY_SIGNAL and task_work are separate concepts. Update the comment on set_notify_signal to more accurately describe it's purpose. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-9-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- fs/io-wq.c | 4 +++- fs/io_uring.c | 4 +++- include/linux/tracehook.h | 15 ++------------- kernel/entry/kvm.c | 7 +++++-- 4 files changed, 13 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/fs/io-wq.c b/fs/io-wq.c index bb7f161bb19c..8b9147873c2c 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -515,7 +515,9 @@ static bool io_flush_signals(void) { if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) { __set_current_state(TASK_RUNNING); - tracehook_notify_signal(); + clear_notify_signal(); + if (task_work_pending(current)) + task_work_run(); return true; } return false; diff --git a/fs/io_uring.c b/fs/io_uring.c index e85261079a78..d5fbae1030f9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2592,7 +2592,9 @@ static inline bool io_run_task_work(void) { if (test_thread_flag(TIF_NOTIFY_SIGNAL) || task_work_pending(current)) { __set_current_state(TASK_RUNNING); - tracehook_notify_signal(); + clear_notify_signal(); + if (task_work_pending(current)) + task_work_run(); return true; } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b44a7820c468..e5d676e841e3 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -113,19 +113,8 @@ static inline void clear_notify_signal(void) } /* - * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This - * is currently used by TWA_SIGNAL based task_work, which requires breaking - * wait loops to ensure that task_work is noticed and run. - */ -static inline void tracehook_notify_signal(void) -{ - clear_notify_signal(); - if (task_work_pending(current)) - task_work_run(); -} - -/* - * Called when we have work to process from exit_to_user_mode_loop() + * Called to break out of interruptible wait loops, and enter the + * exit_to_user_mode_loop(). */ static inline void set_notify_signal(struct task_struct *task) { diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index cabf36a489e4..3ab5f98988c3 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -8,8 +8,11 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) do { int ret; - if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) - tracehook_notify_signal(); + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { + clear_notify_signal(); + if (task_work_pending(current)) + task_work_run(); + } if (ti_work & _TIF_SIGPENDING) { kvm_handle_signal_exit(vcpu); -- cgit v1.2.3 From 03248addadf1a5ef0a03cbcd5ec905b49adb9658 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 12:20:45 -0600 Subject: resume_user_mode: Move to resume_user_mode.h Move set_notify_resume and tracehook_notify_resume into resume_user_mode.h. While doing that rename tracehook_notify_resume to resume_user_mode_work. Update all of the places that included tracehook.h for these functions to include resume_user_mode.h instead. Update all of the callers of tracehook_notify_resume to call resume_user_mode_work. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-12-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/Kconfig | 2 +- arch/alpha/kernel/signal.c | 4 +-- arch/arc/kernel/signal.c | 4 +-- arch/arm/kernel/signal.c | 4 +-- arch/arm64/kernel/signal.c | 4 +-- arch/csky/kernel/signal.c | 4 +-- arch/h8300/kernel/signal.c | 4 +-- arch/hexagon/kernel/process.c | 4 +-- arch/hexagon/kernel/signal.c | 1 - arch/ia64/kernel/process.c | 4 +-- arch/ia64/kernel/ptrace.c | 2 +- arch/ia64/kernel/signal.c | 1 - arch/m68k/kernel/signal.c | 4 +-- arch/microblaze/kernel/signal.c | 4 +-- arch/mips/kernel/signal.c | 4 +-- arch/nds32/kernel/signal.c | 4 +-- arch/nios2/kernel/signal.c | 4 +-- arch/openrisc/kernel/signal.c | 4 +-- arch/parisc/kernel/signal.c | 4 +-- arch/powerpc/kernel/signal.c | 4 +-- arch/riscv/kernel/signal.c | 4 +-- arch/sh/kernel/signal_32.c | 4 +-- arch/sparc/kernel/signal32.c | 1 - arch/sparc/kernel/signal_32.c | 4 +-- arch/sparc/kernel/signal_64.c | 4 +-- arch/um/kernel/process.c | 4 +-- arch/xtensa/kernel/signal.c | 4 +-- block/blk-cgroup.c | 2 +- include/linux/entry-kvm.h | 2 +- include/linux/resume_user_mode.h | 64 ++++++++++++++++++++++++++++++++++++++++ include/linux/tracehook.h | 51 -------------------------------- kernel/entry/common.c | 4 +-- kernel/entry/kvm.c | 2 +- kernel/task_work.c | 2 +- mm/memcontrol.c | 2 +- 35 files changed, 117 insertions(+), 107 deletions(-) create mode 100644 include/linux/resume_user_mode.h (limited to 'kernel') diff --git a/arch/Kconfig b/arch/Kconfig index 6382520ef0a5..2e3979c3d66d 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -218,7 +218,7 @@ config TRACE_IRQFLAGS_SUPPORT # linux/regset.h user_regset interfaces # CORE_DUMP_USE_REGSET #define'd in linux/elf.h # TIF_SYSCALL_TRACE calls ptrace_report_syscall_{entry,exit} -# TIF_NOTIFY_RESUME calls tracehook_notify_resume() +# TIF_NOTIFY_RESUME calls resume_user_mode_work() # config HAVE_ARCH_TRACEHOOK bool diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index d8ed71d5bed3..6f47f256fe80 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -531,7 +531,7 @@ do_work_pending(struct pt_regs *regs, unsigned long thread_flags, do_signal(regs, r0, r19); r0 = 0; } else { - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } } local_irq_disable(); diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index cb2f88502baf..f748483628f2 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include @@ -438,5 +438,5 @@ void do_notify_resume(struct pt_regs *regs) * user mode */ if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index c532a6041066..459abc5d1819 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include @@ -627,7 +627,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } else if (thread_flags & _TIF_UPROBE) { uprobe_notify_resume(regs); } else { - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } } local_irq_disable(); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index d8aaf4b6f432..413c51de9d10 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -941,7 +941,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) do_signal(regs); if (thread_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); if (thread_flags & _TIF_FOREIGN_FPSTATE) fpsimd_restore_current_state(); diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index c7b763d2f526..7a3149a27e4d 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -265,5 +265,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index 75a1c36b105a..0716fc8a8ce2 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include @@ -283,5 +283,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags) do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 232dfd8956aa..ae3f728eeca0 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include /* * Program thread launch. Often defined as a macro in processor.h, @@ -178,7 +178,7 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) } if (thread_info_flags & _TIF_NOTIFY_RESUME) { - tracehook_notify_resume(regs); + resume_user_mode_work(regs); return 1; } diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c index 94cc7ff52dce..bcba31e9e0ae 100644 --- a/arch/hexagon/kernel/signal.c +++ b/arch/hexagon/kernel/signal.c @@ -7,7 +7,6 @@ #include #include -#include #include #include diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 834df24a88f1..d7a256bd9d6b 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include @@ -179,7 +179,7 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) if (test_thread_flag(TIF_NOTIFY_RESUME)) { local_irq_enable(); /* force interrupt enable */ - tracehook_notify_resume(&scr->pt); + resume_user_mode_work(&scr->pt); } /* copy user rbs to kernel rbs */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 6af64aae087d..a19acd9f5e1f 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index c1b299760bf7..51cf6a7ec158 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 338817d0cb3f..49533f65958a 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include @@ -1109,5 +1109,5 @@ void do_notify_resume(struct pt_regs *regs) do_signal(regs); if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 23e8a9336a29..561eb82d7af6 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -311,5 +311,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, int in_syscall) do_signal(regs, in_syscall); if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 5bce782e694c..1a99f26bf99f 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include @@ -916,7 +916,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); user_enter(); } diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c index 7e3ca430a223..551caef595cb 100644 --- a/arch/nds32/kernel/signal.c +++ b/arch/nds32/kernel/signal.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include @@ -380,5 +380,5 @@ do_notify_resume(struct pt_regs *regs, unsigned int thread_flags) do_signal(regs); if (thread_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index 2009ae2d3c3b..530b60c99545 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include @@ -319,7 +319,7 @@ asmlinkage int do_notify_resume(struct pt_regs *regs) return restart; } } else if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); return 0; } diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 92c5b70740f5..80f69740c731 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -309,7 +309,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } syscall = 0; } else { - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } } local_irq_disable(); diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 46b1050640b8..2f7ebe9add20 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include @@ -602,5 +602,5 @@ void do_notify_resume(struct pt_regs *regs, long in_syscall) do_signal(regs, in_syscall); if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index b93b87df499d..f7f8620663c7 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -9,7 +9,7 @@ * this archive for more details. */ -#include +#include #include #include #include @@ -294,7 +294,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) } if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } static unsigned long get_tm_stackpointer(struct task_struct *tsk) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index c2d5ecbe5526..d80bf5896c6f 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include @@ -317,5 +317,5 @@ asmlinkage __visible void do_notify_resume(struct pt_regs *regs, do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index dd3092911efa..90f495d35db2 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -503,5 +503,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0, do_signal(regs, save_r0); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 6cc124a3bb98..f9fe502b81c6 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index ffab16369bea..80c89b362d8b 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -19,7 +19,7 @@ #include #include /* do_coredum */ #include -#include +#include #include #include @@ -524,7 +524,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs, orig_i0); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } asmlinkage int do_sys_sigstack(struct sigstack __user *ssptr, diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 2a78d2af1265..8b9fc76cd3e0 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -552,7 +552,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs, orig_i0); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); user_enter(); } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 4a420778ed87..80504680be08 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -104,7 +104,7 @@ void interrupt_end(void) test_thread_flag(TIF_NOTIFY_SIGNAL)) do_signal(regs); if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } int get_current_pid(void) diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index f6c949895b3e..6f68649e86ba 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -511,5 +511,5 @@ void do_notify_resume(struct pt_regs *regs) do_signal(regs); if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 650f7e27989f..4d8be1634bc6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include "blk.h" diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 07c878d6e323..6813171afccb 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -3,7 +3,7 @@ #define __LINUX_ENTRYKVM_H #include -#include +#include #include #include #include diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h new file mode 100644 index 000000000000..285189454449 --- /dev/null +++ b/include/linux/resume_user_mode.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef LINUX_RESUME_USER_MODE_H +#define LINUX_RESUME_USER_MODE_H + +#include +#include +#include +#include + +/** + * set_notify_resume - cause resume_user_mode_work() to be called + * @task: task that will call resume_user_mode_work() + * + * Calling this arranges that @task will call resume_user_mode_work() + * before returning to user mode. If it's already running in user mode, + * it will enter the kernel and call resume_user_mode_work() soon. + * If it's blocked, it will not be woken. + */ +static inline void set_notify_resume(struct task_struct *task) +{ + if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) + kick_process(task); +} + + +/** + * resume_user_mode_work - Perform work before returning to user mode + * @regs: user-mode registers of @current task + * + * This is called when %TIF_NOTIFY_RESUME has been set. Now we are + * about to return to user mode, and the user state in @regs can be + * inspected or adjusted. The caller in arch code has cleared + * %TIF_NOTIFY_RESUME before the call. If the flag gets set again + * asynchronously, this will be called again before we return to + * user mode. + * + * Called without locks. + */ +static inline void resume_user_mode_work(struct pt_regs *regs) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); + /* + * This barrier pairs with task_work_add()->set_notify_resume() after + * hlist_add_head(task->task_works); + */ + smp_mb__after_atomic(); + if (unlikely(task_work_pending(current))) + task_work_run(); + +#ifdef CONFIG_KEYS_REQUEST_CACHE + if (unlikely(current->cached_requested_key)) { + key_put(current->cached_requested_key); + current->cached_requested_key = NULL; + } +#endif + + mem_cgroup_handle_over_high(); + blkcg_maybe_throttle_current(); + + rseq_handle_notify_resume(NULL, regs); +} + +#endif /* LINUX_RESUME_USER_MODE_H */ diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 946404ebe10b..9f6b3fd1880a 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -52,56 +52,5 @@ struct linux_binprm; -/** - * set_notify_resume - cause tracehook_notify_resume() to be called - * @task: task that will call tracehook_notify_resume() - * - * Calling this arranges that @task will call tracehook_notify_resume() - * before returning to user mode. If it's already running in user mode, - * it will enter the kernel and call tracehook_notify_resume() soon. - * If it's blocked, it will not be woken. - */ -static inline void set_notify_resume(struct task_struct *task) -{ - if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) - kick_process(task); -} - -/** - * tracehook_notify_resume - report when about to return to user mode - * @regs: user-mode registers of @current task - * - * This is called when %TIF_NOTIFY_RESUME has been set. Now we are - * about to return to user mode, and the user state in @regs can be - * inspected or adjusted. The caller in arch code has cleared - * %TIF_NOTIFY_RESUME before the call. If the flag gets set again - * asynchronously, this will be called again before we return to - * user mode. - * - * Called without locks. - */ -static inline void tracehook_notify_resume(struct pt_regs *regs) -{ - clear_thread_flag(TIF_NOTIFY_RESUME); - /* - * This barrier pairs with task_work_add()->set_notify_resume() after - * hlist_add_head(task->task_works); - */ - smp_mb__after_atomic(); - if (unlikely(task_work_pending(current))) - task_work_run(); - -#ifdef CONFIG_KEYS_REQUEST_CACHE - if (unlikely(current->cached_requested_key)) { - key_put(current->cached_requested_key); - current->cached_requested_key = NULL; - } -#endif - - mem_cgroup_handle_over_high(); - blkcg_maybe_throttle_current(); - - rseq_handle_notify_resume(NULL, regs); -} #endif /* */ diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 79eaf9b4b10d..a86823cad853 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -165,7 +165,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, arch_do_signal_or_restart(regs); if (ti_work & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); /* Architecture specific TIF work */ arch_exit_to_user_mode_work(regs, ti_work); diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 3ab5f98988c3..9d09f489b60e 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -23,7 +23,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) schedule(); if (ti_work & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(NULL); + resume_user_mode_work(NULL); ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work); if (ret) diff --git a/kernel/task_work.c b/kernel/task_work.c index cc6fccb0e24d..c59e1a49bc40 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include +#include static struct callback_head work_exited; /* all we need is ->next == NULL */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 09d342c7cbd0..2aaa400f34d6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -59,7 +59,7 @@ #include #include #include -#include +#include #include #include #include "internal.h" -- cgit v1.2.3 From 355f841a3f8ca980c9682937a5257d3a1f6fc09d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 12:47:08 -0600 Subject: tracehook: Remove tracehook.h Now that all of the definitions have moved out of tracehook.h into ptrace.h, sched/signal.h, resume_user_mode.h there is nothing left in tracehook.h so remove it. Update the few files that were depending upon tracehook.h to bring in definitions to use the headers they need directly. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-13-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- MAINTAINERS | 1 - arch/s390/include/asm/entry-common.h | 1 - arch/s390/kernel/ptrace.c | 1 - arch/s390/kernel/signal.c | 1 - arch/x86/kernel/ptrace.c | 1 - arch/x86/kernel/signal.c | 1 - fs/coredump.c | 1 - fs/exec.c | 1 - fs/io-wq.c | 2 +- fs/io_uring.c | 1 - fs/proc/array.c | 1 - fs/proc/base.c | 1 - include/linux/tracehook.h | 56 ------------------------------------ kernel/exit.c | 3 +- kernel/livepatch/transition.c | 1 - kernel/seccomp.c | 1 - kernel/signal.c | 2 +- security/apparmor/domain.c | 1 - security/selinux/hooks.c | 1 - 19 files changed, 4 insertions(+), 74 deletions(-) delete mode 100644 include/linux/tracehook.h (limited to 'kernel') diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c914384..2f16a23a26a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15623,7 +15623,6 @@ F: arch/*/ptrace*.c F: include/asm-generic/syscall.h F: include/linux/ptrace.h F: include/linux/regset.h -F: include/linux/tracehook.h F: include/uapi/linux/ptrace.h F: include/uapi/linux/ptrace.h F: kernel/ptrace.c diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 17aead80aadb..eabab24b71dd 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 0ea3d02b378d..641fa36f6101 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index ea9e5e8182cd..8b7b5f80f722 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 6d2244c94799..419768d7605e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index de3d5b5724d8..e439eb14325f 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/coredump.c b/fs/coredump.c index 1c060c0a2d72..f54c5e316df3 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/exec.c b/fs/exec.c index 79f2c9483302..e23e2d430485 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/io-wq.c b/fs/io-wq.c index 8b9147873c2c..cb3cb1833ef6 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/fs/io_uring.c b/fs/io_uring.c index d5fbae1030f9..6c7eacc0ebd6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -78,7 +78,6 @@ #include #include #include -#include #include #include diff --git a/fs/proc/array.c b/fs/proc/array.c index fd8b0c12b2cb..eb815759842c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -88,7 +88,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/proc/base.c b/fs/proc/base.c index d654ce7150fd..01fb37ecc89f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -74,7 +74,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h deleted file mode 100644 index 9f6b3fd1880a..000000000000 --- a/include/linux/tracehook.h +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Tracing hooks - * - * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. - * - * This file defines hook entry points called by core code where - * user tracing/debugging support might need to do something. These - * entry points are called tracehook_*(). Each hook declared below - * has a detailed kerneldoc comment giving the context (locking et - * al) from which it is called, and the meaning of its return value. - * - * Each function here typically has only one call site, so it is ok - * to have some nontrivial tracehook_*() inlines. In all cases, the - * fast path when no tracing is enabled should be very short. - * - * The purpose of this file and the tracehook_* layer is to consolidate - * the interface that the kernel core and arch code uses to enable any - * user debugging or tracing facility (such as ptrace). The interfaces - * here are carefully documented so that maintainers of core and arch - * code do not need to think about the implementation details of the - * tracing facilities. Likewise, maintainers of the tracing code do not - * need to understand all the calling core or arch code in detail, just - * documented circumstances of each call, such as locking conditions. - * - * If the calling core code changes so that locking is different, then - * it is ok to change the interface documented here. The maintainer of - * core code changing should notify the maintainers of the tracing code - * that they need to work out the change. - * - * Some tracehook_*() inlines take arguments that the current tracing - * implementations might not necessarily use. These function signatures - * are chosen to pass in all the information that is on hand in the - * caller and might conceivably be relevant to a tracer, so that the - * core code won't have to be updated when tracing adds more features. - * If a call site changes so that some of those parameters are no longer - * already on hand without extra work, then the tracehook_* interface - * can change so there is no make-work burden on the core code. The - * maintainer of core code changing should notify the maintainers of the - * tracing code that they need to work out the change. - */ - -#ifndef _LINUX_TRACEHOOK_H -#define _LINUX_TRACEHOOK_H 1 - -#include -#include -#include -#include -#include -#include -struct linux_binprm; - - - -#endif /* */ diff --git a/kernel/exit.c b/kernel/exit.c index b00a25bb4ab9..9326d1f97fc7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -49,7 +49,8 @@ #include /* for audit_free() */ #include #include -#include +#include +#include #include #include #include diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index 5683ac0d2566..df808d97d84f 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -9,7 +9,6 @@ #include #include -#include #include "core.h" #include "patch.h" #include "transition.h" diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4d8f44a17727..63198086ee83 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index 8632b88982c9..c2dee5420567 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 583680f6cd81..a29e69d2c300 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5b6895e4fc29..4d2cd6b9f6fc 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 336d4b814bf078fa698488632c19beca47308896 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:15:32 -0600 Subject: ptrace: Move setting/clearing ptrace_message into ptrace_stop Today ptrace_message is easy to overlook as it not a core part of ptrace_stop. It has been overlooked so much that there are places that set ptrace_message and don't clear it, and places that never set it. So if you get an unlucky sequence of events the ptracer may be able to read a ptrace_message that does not apply to the current ptrace stop. Move setting of ptrace_message into ptrace_stop so that it always gets set before the stop, and always gets cleared after the stop. This prevents non-sense from being reported to userspace and makes ptrace_message more visible in the ptrace helper functions so that kernel developers can see it. Link: https://lkml.kernel.org/r/87bky67qfv.fsf_-_@email.froward.int.ebiederm.org Acked-by: Oleg Nesterov Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/ptrace.h | 9 +++------ include/uapi/linux/ptrace.h | 2 +- kernel/signal.c | 21 ++++++++++++--------- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 5310f43e4762..3e6b46e2b7be 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned extern void ptrace_disable(struct task_struct *); extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); -extern void ptrace_notify(int exit_code); +extern void ptrace_notify(int exit_code, unsigned long message); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, const struct cred *ptracer_cred); @@ -155,8 +155,7 @@ static inline bool ptrace_event_enabled(struct task_struct *task, int event) static inline void ptrace_event(int event, unsigned long message) { if (unlikely(ptrace_event_enabled(current, event))) { - current->ptrace_message = message; - ptrace_notify((event << 8) | SIGTRAP); + ptrace_notify((event << 8) | SIGTRAP, message); } else if (event == PTRACE_EVENT_EXEC) { /* legacy EXEC report via SIGTRAP */ if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED) @@ -424,8 +423,7 @@ static inline int ptrace_report_syscall(unsigned long message) if (!(ptrace & PT_PTRACED)) return 0; - current->ptrace_message = message; - ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); + ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), message); /* * this isn't the same as continuing with a signal, but it will do @@ -437,7 +435,6 @@ static inline int ptrace_report_syscall(unsigned long message) current->exit_code = 0; } - current->ptrace_message = 0; return fatal_signal_pending(current); } diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index b7af92e07d1f..195ae64a8c87 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -114,7 +114,7 @@ struct ptrace_rseq_configuration { /* * These values are stored in task->ptrace_message - * by ptrace_report_syscall_* to describe the current syscall-stop. + * by ptrace_stop to describe the current syscall-stop. */ #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 diff --git a/kernel/signal.c b/kernel/signal.c index c2dee5420567..a49ac7149256 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2191,7 +2191,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, * If we actually decide not to stop at all because the tracer * is gone, we keep current->exit_code unless clear_code. */ -static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t *info) +static void ptrace_stop(int exit_code, int why, int clear_code, + unsigned long message, kernel_siginfo_t *info) __releases(¤t->sighand->siglock) __acquires(¤t->sighand->siglock) { @@ -2237,6 +2238,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t */ smp_wmb(); + current->ptrace_message = message; current->last_siginfo = info; current->exit_code = exit_code; @@ -2315,6 +2317,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t */ spin_lock_irq(¤t->sighand->siglock); current->last_siginfo = NULL; + current->ptrace_message = 0; /* LISTENING can be set only during STOP traps, clear it */ current->jobctl &= ~JOBCTL_LISTENING; @@ -2327,7 +2330,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t recalc_sigpending_tsk(current); } -static void ptrace_do_notify(int signr, int exit_code, int why) +static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long message) { kernel_siginfo_t info; @@ -2338,17 +2341,17 @@ static void ptrace_do_notify(int signr, int exit_code, int why) info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); /* Let the debugger run. */ - ptrace_stop(exit_code, why, 1, &info); + ptrace_stop(exit_code, why, 1, message, &info); } -void ptrace_notify(int exit_code) +void ptrace_notify(int exit_code, unsigned long message) { BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); if (unlikely(task_work_pending(current))) task_work_run(); spin_lock_irq(¤t->sighand->siglock); - ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED); + ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message); spin_unlock_irq(¤t->sighand->siglock); } @@ -2504,10 +2507,10 @@ static void do_jobctl_trap(void) signr = SIGTRAP; WARN_ON_ONCE(!signr); ptrace_do_notify(signr, signr | (PTRACE_EVENT_STOP << 8), - CLD_STOPPED); + CLD_STOPPED, 0); } else { WARN_ON_ONCE(!signr); - ptrace_stop(signr, CLD_STOPPED, 0, NULL); + ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL); current->exit_code = 0; } } @@ -2561,7 +2564,7 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) * comment in dequeue_signal(). */ current->jobctl |= JOBCTL_STOP_DEQUEUED; - ptrace_stop(signr, CLD_TRAPPED, 0, info); + ptrace_stop(signr, CLD_TRAPPED, 0, 0, info); /* We're back. Did the debugger cancel the sig? */ signr = current->exit_code; @@ -2891,7 +2894,7 @@ static void signal_delivered(struct ksignal *ksig, int stepping) if (current->sas_ss_flags & SS_AUTODISARM) sas_ss_reset(current); if (stepping) - ptrace_notify(SIGTRAP); + ptrace_notify(SIGTRAP, 0); } void signal_setup_done(int failed, struct ksignal *ksig, int stepping) -- cgit v1.2.3 From 6487d1dab837214ec2fd3f0ddd5f787e63be7c20 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:19:13 -0600 Subject: ptrace: Return the signal to continue with from ptrace_stop The signal a task should continue with after a ptrace stop is inconsistently read, cleared, and sent. Solve this by reading and clearing the signal to be sent in ptrace_stop. In an ideal world everything except ptrace_signal would share a common implementation of continuing with the signal, so ptracers could count on the signal they ask to continue with actually being delivered. For now retain bug compatibility and just return with the signal number the ptracer requested the code continue with. Link: https://lkml.kernel.org/r/875yoe7qdp.fsf_-_@email.froward.int.ebiederm.org Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/ptrace.h | 12 ++++++------ kernel/signal.c | 32 +++++++++++++++++++------------- 2 files changed, 25 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 3e6b46e2b7be..15b3d176b6b4 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned extern void ptrace_disable(struct task_struct *); extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); -extern void ptrace_notify(int exit_code, unsigned long message); +extern int ptrace_notify(int exit_code, unsigned long message); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, const struct cred *ptracer_cred); @@ -419,21 +419,21 @@ extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oa static inline int ptrace_report_syscall(unsigned long message) { int ptrace = current->ptrace; + int signr; if (!(ptrace & PT_PTRACED)) return 0; - ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), message); + signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), + message); /* * this isn't the same as continuing with a signal, but it will do * for normal use. strace only continues with a signal if the * stopping signal is not SIGTRAP. -brl */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } + if (signr) + send_sig(signr, current, 1); return fatal_signal_pending(current); } diff --git a/kernel/signal.c b/kernel/signal.c index a49ac7149256..f1c4ab85833c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2188,15 +2188,17 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, * That makes it a way to test a stopped process for * being ptrace-stopped vs being job-control-stopped. * - * If we actually decide not to stop at all because the tracer - * is gone, we keep current->exit_code unless clear_code. + * Returns the signal the ptracer requested the code resume + * with. If the code did not stop because the tracer is gone, + * the stop signal remains unchanged unless clear_code. */ -static void ptrace_stop(int exit_code, int why, int clear_code, +static int ptrace_stop(int exit_code, int why, int clear_code, unsigned long message, kernel_siginfo_t *info) __releases(¤t->sighand->siglock) __acquires(¤t->sighand->siglock) { bool gstop_done = false; + bool read_code = true; if (arch_ptrace_stop_needed()) { /* @@ -2305,8 +2307,9 @@ static void ptrace_stop(int exit_code, int why, int clear_code, /* tasklist protects us from ptrace_freeze_traced() */ __set_current_state(TASK_RUNNING); + read_code = false; if (clear_code) - current->exit_code = 0; + exit_code = 0; read_unlock(&tasklist_lock); } @@ -2316,8 +2319,11 @@ static void ptrace_stop(int exit_code, int why, int clear_code, * any signal-sending on another CPU that wants to examine it. */ spin_lock_irq(¤t->sighand->siglock); + if (read_code) + exit_code = current->exit_code; current->last_siginfo = NULL; current->ptrace_message = 0; + current->exit_code = 0; /* LISTENING can be set only during STOP traps, clear it */ current->jobctl &= ~JOBCTL_LISTENING; @@ -2328,9 +2334,10 @@ static void ptrace_stop(int exit_code, int why, int clear_code, * This sets TIF_SIGPENDING, but never clears it. */ recalc_sigpending_tsk(current); + return exit_code; } -static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long message) +static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long message) { kernel_siginfo_t info; @@ -2341,18 +2348,21 @@ static void ptrace_do_notify(int signr, int exit_code, int why, unsigned long me info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); /* Let the debugger run. */ - ptrace_stop(exit_code, why, 1, message, &info); + return ptrace_stop(exit_code, why, 1, message, &info); } -void ptrace_notify(int exit_code, unsigned long message) +int ptrace_notify(int exit_code, unsigned long message) { + int signr; + BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); if (unlikely(task_work_pending(current))) task_work_run(); spin_lock_irq(¤t->sighand->siglock); - ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message); + signr = ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message); spin_unlock_irq(¤t->sighand->siglock); + return signr; } /** @@ -2511,7 +2521,6 @@ static void do_jobctl_trap(void) } else { WARN_ON_ONCE(!signr); ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL); - current->exit_code = 0; } } @@ -2564,15 +2573,12 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) * comment in dequeue_signal(). */ current->jobctl |= JOBCTL_STOP_DEQUEUED; - ptrace_stop(signr, CLD_TRAPPED, 0, 0, info); + signr = ptrace_stop(signr, CLD_TRAPPED, 0, 0, info); /* We're back. Did the debugger cancel the sig? */ - signr = current->exit_code; if (signr == 0) return signr; - current->exit_code = 0; - /* * Update the siginfo structure if the signal has * changed. If the debugger wanted something -- cgit v1.2.3 From ee1fee900537b5d9560e9f937402de5ddc8412f3 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 19 Mar 2022 02:08:37 +0100 Subject: ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE Setting PTRACE_O_SUSPEND_SECCOMP is supposed to be a highly privileged operation because it allows the tracee to completely bypass all seccomp filters on kernels with CONFIG_CHECKPOINT_RESTORE=y. It is only supposed to be settable by a process with global CAP_SYS_ADMIN, and only if that process is not subject to any seccomp filters at all. However, while these permission checks were done on the PTRACE_SETOPTIONS path, they were missing on the PTRACE_SEIZE path, which also sets user-specified ptrace flags. Move the permissions checks out into a helper function and let both ptrace_attach() and ptrace_setoptions() call it. Cc: stable@kernel.org Fixes: 13c4a90119d2 ("seccomp: add ptrace options for suspend/resume") Signed-off-by: Jann Horn Link: https://lkml.kernel.org/r/20220319010838.1386861-1-jannh@google.com Signed-off-by: Eric W. Biederman --- kernel/ptrace.c | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index eea265082e97..ccc4b465775b 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -371,6 +371,26 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } +static int check_ptrace_options(unsigned long data) +{ + if (data & ~(unsigned long)PTRACE_O_MASK) + return -EINVAL; + + if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { + if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || + !IS_ENABLED(CONFIG_SECCOMP)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || + current->ptrace & PT_SUSPEND_SECCOMP) + return -EPERM; + } + return 0; +} + static int ptrace_attach(struct task_struct *task, long request, unsigned long addr, unsigned long flags) @@ -382,8 +402,16 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) { if (addr != 0) goto out; + /* + * This duplicates the check in check_ptrace_options() because + * ptrace_attach() and ptrace_setoptions() have historically + * used different error codes for unknown ptrace options. + */ if (flags & ~(unsigned long)PTRACE_O_MASK) goto out; + retval = check_ptrace_options(flags); + if (retval) + return retval; flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); } else { flags = PT_PTRACED; @@ -654,22 +682,11 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds static int ptrace_setoptions(struct task_struct *child, unsigned long data) { unsigned flags; + int ret; - if (data & ~(unsigned long)PTRACE_O_MASK) - return -EINVAL; - - if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { - if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || - !IS_ENABLED(CONFIG_SECCOMP)) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || - current->ptrace & PT_SUSPEND_SECCOMP) - return -EPERM; - } + ret = check_ptrace_options(data); + if (ret) + return ret; /* Avoid intermediate state when all opts are cleared */ flags = child->ptrace; -- cgit v1.2.3