summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-07 19:16:24 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-07 19:16:24 +0100
commitc6792d44d8f08451047051351dfdcc8332a028e3 (patch)
tree6a8456c6eb5b312e64131c1943bd552aed35d967
parentMerge tag 'sched_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/... (diff)
parententry: Use different define for selector variable in SUD (diff)
downloadlinux-c6792d44d8f08451047051351dfdcc8332a028e3.tar.xz
linux-c6792d44d8f08451047051351dfdcc8332a028e3.zip
Merge tag 'core_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull syscall entry fixes from Borislav Petkov: - For syscall user dispatch, separate prctl operation from syscall redirection range specification before the API has been made official in 5.11. - Ensure tasks using the generic syscall code do trap after returning from a syscall when single-stepping is requested. * tag 'core_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: entry: Use different define for selector variable in SUD entry: Ensure trap after single-step on system call return
-rw-r--r--Documentation/admin-guide/syscall-user-dispatch.rst4
-rw-r--r--arch/x86/include/asm/entry-common.h2
-rw-r--r--arch/x86/kernel/step.c10
-rw-r--r--include/linux/entry-common.h1
-rw-r--r--include/linux/thread_info.h2
-rw-r--r--include/uapi/linux/prctl.h3
-rw-r--r--kernel/entry/common.c12
-rw-r--r--kernel/entry/syscall_user_dispatch.c4
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c8
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_test.c14
10 files changed, 33 insertions, 27 deletions
diff --git a/Documentation/admin-guide/syscall-user-dispatch.rst b/Documentation/admin-guide/syscall-user-dispatch.rst
index a380d6515774..60314953c728 100644
--- a/Documentation/admin-guide/syscall-user-dispatch.rst
+++ b/Documentation/admin-guide/syscall-user-dispatch.rst
@@ -70,8 +70,8 @@ trampoline code on the vDSO, that trampoline is never intercepted.
[selector] is a pointer to a char-sized region in the process memory
region, that provides a quick way to enable disable syscall redirection
thread-wide, without the need to invoke the kernel directly. selector
-can be set to PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF. Any other
-value should terminate the program with a SIGSYS.
+can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK.
+Any other value should terminate the program with a SIGSYS.
Security Notes
--------------
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 6fe54b2813c1..2b87b191b3b8 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -43,8 +43,6 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs)
}
#define arch_check_user_regs arch_check_user_regs
-#define ARCH_SYSCALL_EXIT_WORK (_TIF_SINGLESTEP)
-
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 60d2c3798ba2..0f3c307b37b3 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -127,12 +127,17 @@ static int enable_single_step(struct task_struct *child)
regs->flags |= X86_EFLAGS_TF;
/*
- * Always set TIF_SINGLESTEP - this guarantees that
- * we single-step system calls etc.. This will also
+ * Always set TIF_SINGLESTEP. This will also
* cause us to set TF when returning to user mode.
*/
set_tsk_thread_flag(child, TIF_SINGLESTEP);
+ /*
+ * Ensure that a trap is triggered once stepping out of a system
+ * call prior to executing any user instruction.
+ */
+ set_task_syscall_work(child, SYSCALL_EXIT_TRAP);
+
oflags = regs->flags;
/* Set TF on the kernel stack.. */
@@ -230,6 +235,7 @@ void user_disable_single_step(struct task_struct *child)
/* Always clear TIF_SINGLESTEP... */
clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+ clear_task_syscall_work(child, SYSCALL_EXIT_TRAP);
/* But touch TF only if it was set by us.. */
if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF))
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index ca86a00abe86..a104b298019a 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -46,6 +46,7 @@
SYSCALL_WORK_SYSCALL_TRACE | \
SYSCALL_WORK_SYSCALL_AUDIT | \
SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
+ SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
ARCH_SYSCALL_WORK_EXIT)
/*
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index c8a974cead73..9b2158c69275 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -43,6 +43,7 @@ enum syscall_work_bit {
SYSCALL_WORK_BIT_SYSCALL_EMU,
SYSCALL_WORK_BIT_SYSCALL_AUDIT,
SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH,
+ SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP,
};
#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP)
@@ -51,6 +52,7 @@ enum syscall_work_bit {
#define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU)
#define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT)
#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH)
+#define SYSCALL_WORK_SYSCALL_EXIT_TRAP BIT(SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP)
#endif
#include <asm/thread_info.h>
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 90deb41c8a34..667f1aed091c 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -251,5 +251,8 @@ struct prctl_mm_map {
#define PR_SET_SYSCALL_USER_DISPATCH 59
# define PR_SYS_DISPATCH_OFF 0
# define PR_SYS_DISPATCH_ON 1
+/* The control values for the user space selector when dispatch is enabled */
+# define SYSCALL_DISPATCH_FILTER_ALLOW 0
+# define SYSCALL_DISPATCH_FILTER_BLOCK 1
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 6dd82be60df8..f9d491b17b78 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -209,15 +209,9 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
lockdep_sys_exit();
}
-#ifndef _TIF_SINGLESTEP
-static inline bool report_single_step(unsigned long work)
-{
- return false;
-}
-#else
/*
* If SYSCALL_EMU is set, then the only reason to report is when
- * TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
+ * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
* instruction has been already reported in syscall_enter_from_user_mode().
*/
static inline bool report_single_step(unsigned long work)
@@ -225,10 +219,8 @@ static inline bool report_single_step(unsigned long work)
if (work & SYSCALL_WORK_SYSCALL_EMU)
return false;
- return !!(current_thread_info()->flags & _TIF_SINGLESTEP);
+ return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
}
-#endif
-
static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
{
diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
index b0338a5625d9..c240302f56e2 100644
--- a/kernel/entry/syscall_user_dispatch.c
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -50,10 +50,10 @@ bool syscall_user_dispatch(struct pt_regs *regs)
if (unlikely(__get_user(state, sd->selector)))
do_exit(SIGSEGV);
- if (likely(state == PR_SYS_DISPATCH_OFF))
+ if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW))
return false;
- if (state != PR_SYS_DISPATCH_ON)
+ if (state != SYSCALL_DISPATCH_FILTER_BLOCK)
do_exit(SIGSYS);
}
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
index 6689f1183dbf..073a03702ff5 100644
--- a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
@@ -22,6 +22,8 @@
# define PR_SET_SYSCALL_USER_DISPATCH 59
# define PR_SYS_DISPATCH_OFF 0
# define PR_SYS_DISPATCH_ON 1
+# define SYSCALL_DISPATCH_FILTER_ALLOW 0
+# define SYSCALL_DISPATCH_FILTER_BLOCK 1
#endif
#ifdef __NR_syscalls
@@ -55,8 +57,8 @@ unsigned long trapped_call_count = 0;
unsigned long native_call_count = 0;
char selector;
-#define SYSCALL_BLOCK (selector = PR_SYS_DISPATCH_ON)
-#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF)
+#define SYSCALL_BLOCK (selector = SYSCALL_DISPATCH_FILTER_BLOCK)
+#define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW)
#define CALIBRATION_STEP 100000
#define CALIBRATE_TO_SECS 5
@@ -170,7 +172,7 @@ int main(void)
syscall(MAGIC_SYSCALL_1);
#ifdef TEST_BLOCKED_RETURN
- if (selector == PR_SYS_DISPATCH_OFF) {
+ if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) {
fprintf(stderr, "Failed to return with selector blocked.\n");
exit(-1);
}
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
index 6498b050ef89..b5d592d4099e 100644
--- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
@@ -18,6 +18,8 @@
# define PR_SET_SYSCALL_USER_DISPATCH 59
# define PR_SYS_DISPATCH_OFF 0
# define PR_SYS_DISPATCH_ON 1
+# define SYSCALL_DISPATCH_FILTER_ALLOW 0
+# define SYSCALL_DISPATCH_FILTER_BLOCK 1
#endif
#ifndef SYS_USER_DISPATCH
@@ -30,8 +32,8 @@
# define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */
#endif
-#define SYSCALL_DISPATCH_ON(x) ((x) = 1)
-#define SYSCALL_DISPATCH_OFF(x) ((x) = 0)
+#define SYSCALL_DISPATCH_ON(x) ((x) = SYSCALL_DISPATCH_FILTER_BLOCK)
+#define SYSCALL_DISPATCH_OFF(x) ((x) = SYSCALL_DISPATCH_FILTER_ALLOW)
/* Test Summary:
*
@@ -56,7 +58,7 @@
TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
{
- char sel = 0;
+ char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
struct sysinfo info;
int ret;
@@ -79,7 +81,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
TEST(bad_prctl_param)
{
- char sel = 0;
+ char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
int op;
/* Invalid op */
@@ -220,7 +222,7 @@ TEST_SIGNAL(bad_selector, SIGSYS)
sigset_t mask;
struct sysinfo info;
- glob_sel = 0;
+ glob_sel = SYSCALL_DISPATCH_FILTER_ALLOW;
nr_syscalls_emulated = 0;
si_code = 0;
si_errno = 0;
@@ -288,7 +290,7 @@ TEST(direct_dispatch_range)
{
int ret = 0;
struct sysinfo info;
- char sel = 0;
+ char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
/*
* Instead of calculating libc addresses; allow the entire