diff options
author | Daan De Meyer <daan.j.demeyer@gmail.com> | 2024-08-29 17:10:46 +0200 |
---|---|---|
committer | Ryan Wilson <ryantimwilson@meta.com> | 2024-11-05 14:32:02 +0100 |
commit | 406f1775017a5631bc91a1f53ac5e50f4fbfac0c (patch) | |
tree | a47f9ee6d0b9e1b036300949e2d803d676497946 /src/basic | |
parent | exec-invoke: Add debug logging for setup_private_users() (diff) | |
download | systemd-406f1775017a5631bc91a1f53ac5e50f4fbfac0c.tar.xz systemd-406f1775017a5631bc91a1f53ac5e50f4fbfac0c.zip |
core: Introduce PrivatePIDs=
This new setting allows unsharing the pid namespace in a unit. Because
you have to fork to get a process into a pid namespace, we fork in
systemd-executor to get into the new pid namespace. The parent then
sends the pid of the child process back to the manager and exits while
the child process continues on with the rest of exec_invoke() and then
executes the actual payload.
Communicating the child pid is done via a new pidref socket pair that is
set up on manager startup.
We unshare the PID namespace right before the mount namespace so we
mount procfs correctly. Note PrivatePIDs=yes always implies MountAPIVFS=yes
to mount procfs.
When running unprivileged in a user session, user namespace is set up first
to allow for PID namespace to be unshared. However, when running in
privileged mode, we unshare the user namespace last to ensure the user
namespace does not own the PID namespace and cannot break out of the sandbox.
Note we disallow Type=forking services from using PrivatePIDs=yes since the
init proess inside the PID namespace must not exit for other processes in
the namespace to exist.
Note Daan De Meyer did the original work for this commit with Ryan Wilson
addressing follow-ups.
Co-authored-by: Daan De Meyer <daan.j.demeyer@gmail.com>
Diffstat (limited to 'src/basic')
-rw-r--r-- | src/basic/process-util.c | 5 | ||||
-rw-r--r-- | src/basic/process-util.h | 27 |
2 files changed, 17 insertions, 15 deletions
diff --git a/src/basic/process-util.c b/src/basic/process-util.c index a85a1b35f0..75bc65652e 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -1521,11 +1521,12 @@ int safe_fork_full( } } - if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS)) != 0) + if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS|FORK_NEW_PIDNS)) != 0) pid = raw_clone(SIGCHLD| (FLAGS_SET(flags, FORK_NEW_MOUNTNS) ? CLONE_NEWNS : 0) | (FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0) | - (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0)); + (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0) | + (FLAGS_SET(flags, FORK_NEW_PIDNS) ? CLONE_NEWPID : 0)); else pid = fork(); if (pid < 0) diff --git a/src/basic/process-util.h b/src/basic/process-util.h index 05b7a69fc6..cb6d47a5bb 100644 --- a/src/basic/process-util.h +++ b/src/basic/process-util.h @@ -166,7 +166,7 @@ int must_be_root(void); pid_t clone_with_nested_stack(int (*fn)(void *), int flags, void *userdata); -/* 💣 Note that FORK_NEW_USERNS, FORK_NEW_MOUNTNS, or FORK_NEW_NETNS should not be called in threaded +/* 💣 Note that FORK_NEW_USERNS, FORK_NEW_MOUNTNS, FORK_NEW_NETNS or FORK_NEW_PIDNS should not be called in threaded * programs, because they cause us to use raw_clone() which does not synchronize the glibc malloc() locks, * and thus will cause deadlocks if the parent uses threads and the child does memory allocations. Hence: if * the parent is threaded these flags may not be used. These flags cannot be used if the parent uses threads @@ -181,18 +181,19 @@ typedef enum ForkFlags { FORK_REOPEN_LOG = 1 << 6, /* Reopen log connection */ FORK_LOG = 1 << 7, /* Log above LOG_DEBUG log level about failures */ FORK_WAIT = 1 << 8, /* Wait until child exited */ - FORK_NEW_MOUNTNS = 1 << 9, /* Run child in its own mount namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ - FORK_MOUNTNS_SLAVE = 1 << 10, /* Make child's mount namespace MS_SLAVE */ - FORK_PRIVATE_TMP = 1 << 11, /* Mount new /tmp/ in the child (combine with FORK_NEW_MOUNTNS!) */ - FORK_RLIMIT_NOFILE_SAFE = 1 << 12, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */ - FORK_STDOUT_TO_STDERR = 1 << 13, /* Make stdout a copy of stderr */ - FORK_FLUSH_STDIO = 1 << 14, /* fflush() stdout (and stderr) before forking */ - FORK_NEW_USERNS = 1 << 15, /* Run child in its own user namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ - FORK_CLOEXEC_OFF = 1 << 16, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */ - FORK_KEEP_NOTIFY_SOCKET = 1 << 17, /* Unless this specified, $NOTIFY_SOCKET will be unset. */ - FORK_DETACH = 1 << 18, /* Double fork if needed to ensure PID1/subreaper is parent */ - FORK_NEW_NETNS = 1 << 19, /* Run child in its own network namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ - FORK_PACK_FDS = 1 << 20, /* Rearrange the passed FDs to be FD 3,4,5,etc. Updates the array in place (combine with FORK_CLOSE_ALL_FDS!) */ + FORK_MOUNTNS_SLAVE = 1 << 9, /* Make child's mount namespace MS_SLAVE */ + FORK_PRIVATE_TMP = 1 << 10, /* Mount new /tmp/ in the child (combine with FORK_NEW_MOUNTNS!) */ + FORK_RLIMIT_NOFILE_SAFE = 1 << 11, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */ + FORK_STDOUT_TO_STDERR = 1 << 12, /* Make stdout a copy of stderr */ + FORK_FLUSH_STDIO = 1 << 13, /* fflush() stdout (and stderr) before forking */ + FORK_CLOEXEC_OFF = 1 << 14, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */ + FORK_KEEP_NOTIFY_SOCKET = 1 << 15, /* Unless this specified, $NOTIFY_SOCKET will be unset. */ + FORK_DETACH = 1 << 16, /* Double fork if needed to ensure PID1/subreaper is parent */ + FORK_PACK_FDS = 1 << 17, /* Rearrange the passed FDs to be FD 3,4,5,etc. Updates the array in place (combine with FORK_CLOSE_ALL_FDS!) */ + FORK_NEW_MOUNTNS = 1 << 18, /* Run child in its own mount namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ + FORK_NEW_USERNS = 1 << 19, /* Run child in its own user namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ + FORK_NEW_NETNS = 1 << 20, /* Run child in its own network namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ + FORK_NEW_PIDNS = 1 << 21, /* Run child in its own PID namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ } ForkFlags; int safe_fork_full( |