summaryrefslogtreecommitdiffstats
path: root/src/basic
diff options
context:
space:
mode:
authorDaan De Meyer <daan.j.demeyer@gmail.com>2024-08-29 17:10:46 +0200
committerRyan Wilson <ryantimwilson@meta.com>2024-11-05 14:32:02 +0100
commit406f1775017a5631bc91a1f53ac5e50f4fbfac0c (patch)
treea47f9ee6d0b9e1b036300949e2d803d676497946 /src/basic
parentexec-invoke: Add debug logging for setup_private_users() (diff)
downloadsystemd-406f1775017a5631bc91a1f53ac5e50f4fbfac0c.tar.xz
systemd-406f1775017a5631bc91a1f53ac5e50f4fbfac0c.zip
core: Introduce PrivatePIDs=
This new setting allows unsharing the pid namespace in a unit. Because you have to fork to get a process into a pid namespace, we fork in systemd-executor to get into the new pid namespace. The parent then sends the pid of the child process back to the manager and exits while the child process continues on with the rest of exec_invoke() and then executes the actual payload. Communicating the child pid is done via a new pidref socket pair that is set up on manager startup. We unshare the PID namespace right before the mount namespace so we mount procfs correctly. Note PrivatePIDs=yes always implies MountAPIVFS=yes to mount procfs. When running unprivileged in a user session, user namespace is set up first to allow for PID namespace to be unshared. However, when running in privileged mode, we unshare the user namespace last to ensure the user namespace does not own the PID namespace and cannot break out of the sandbox. Note we disallow Type=forking services from using PrivatePIDs=yes since the init proess inside the PID namespace must not exit for other processes in the namespace to exist. Note Daan De Meyer did the original work for this commit with Ryan Wilson addressing follow-ups. Co-authored-by: Daan De Meyer <daan.j.demeyer@gmail.com>
Diffstat (limited to 'src/basic')
-rw-r--r--src/basic/process-util.c5
-rw-r--r--src/basic/process-util.h27
2 files changed, 17 insertions, 15 deletions
diff --git a/src/basic/process-util.c b/src/basic/process-util.c
index a85a1b35f0..75bc65652e 100644
--- a/src/basic/process-util.c
+++ b/src/basic/process-util.c
@@ -1521,11 +1521,12 @@ int safe_fork_full(
}
}
- if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS)) != 0)
+ if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS|FORK_NEW_PIDNS)) != 0)
pid = raw_clone(SIGCHLD|
(FLAGS_SET(flags, FORK_NEW_MOUNTNS) ? CLONE_NEWNS : 0) |
(FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0) |
- (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0));
+ (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0) |
+ (FLAGS_SET(flags, FORK_NEW_PIDNS) ? CLONE_NEWPID : 0));
else
pid = fork();
if (pid < 0)
diff --git a/src/basic/process-util.h b/src/basic/process-util.h
index 05b7a69fc6..cb6d47a5bb 100644
--- a/src/basic/process-util.h
+++ b/src/basic/process-util.h
@@ -166,7 +166,7 @@ int must_be_root(void);
pid_t clone_with_nested_stack(int (*fn)(void *), int flags, void *userdata);
-/* 💣 Note that FORK_NEW_USERNS, FORK_NEW_MOUNTNS, or FORK_NEW_NETNS should not be called in threaded
+/* 💣 Note that FORK_NEW_USERNS, FORK_NEW_MOUNTNS, FORK_NEW_NETNS or FORK_NEW_PIDNS should not be called in threaded
* programs, because they cause us to use raw_clone() which does not synchronize the glibc malloc() locks,
* and thus will cause deadlocks if the parent uses threads and the child does memory allocations. Hence: if
* the parent is threaded these flags may not be used. These flags cannot be used if the parent uses threads
@@ -181,18 +181,19 @@ typedef enum ForkFlags {
FORK_REOPEN_LOG = 1 << 6, /* Reopen log connection */
FORK_LOG = 1 << 7, /* Log above LOG_DEBUG log level about failures */
FORK_WAIT = 1 << 8, /* Wait until child exited */
- FORK_NEW_MOUNTNS = 1 << 9, /* Run child in its own mount namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
- FORK_MOUNTNS_SLAVE = 1 << 10, /* Make child's mount namespace MS_SLAVE */
- FORK_PRIVATE_TMP = 1 << 11, /* Mount new /tmp/ in the child (combine with FORK_NEW_MOUNTNS!) */
- FORK_RLIMIT_NOFILE_SAFE = 1 << 12, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */
- FORK_STDOUT_TO_STDERR = 1 << 13, /* Make stdout a copy of stderr */
- FORK_FLUSH_STDIO = 1 << 14, /* fflush() stdout (and stderr) before forking */
- FORK_NEW_USERNS = 1 << 15, /* Run child in its own user namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
- FORK_CLOEXEC_OFF = 1 << 16, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */
- FORK_KEEP_NOTIFY_SOCKET = 1 << 17, /* Unless this specified, $NOTIFY_SOCKET will be unset. */
- FORK_DETACH = 1 << 18, /* Double fork if needed to ensure PID1/subreaper is parent */
- FORK_NEW_NETNS = 1 << 19, /* Run child in its own network namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
- FORK_PACK_FDS = 1 << 20, /* Rearrange the passed FDs to be FD 3,4,5,etc. Updates the array in place (combine with FORK_CLOSE_ALL_FDS!) */
+ FORK_MOUNTNS_SLAVE = 1 << 9, /* Make child's mount namespace MS_SLAVE */
+ FORK_PRIVATE_TMP = 1 << 10, /* Mount new /tmp/ in the child (combine with FORK_NEW_MOUNTNS!) */
+ FORK_RLIMIT_NOFILE_SAFE = 1 << 11, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */
+ FORK_STDOUT_TO_STDERR = 1 << 12, /* Make stdout a copy of stderr */
+ FORK_FLUSH_STDIO = 1 << 13, /* fflush() stdout (and stderr) before forking */
+ FORK_CLOEXEC_OFF = 1 << 14, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */
+ FORK_KEEP_NOTIFY_SOCKET = 1 << 15, /* Unless this specified, $NOTIFY_SOCKET will be unset. */
+ FORK_DETACH = 1 << 16, /* Double fork if needed to ensure PID1/subreaper is parent */
+ FORK_PACK_FDS = 1 << 17, /* Rearrange the passed FDs to be FD 3,4,5,etc. Updates the array in place (combine with FORK_CLOSE_ALL_FDS!) */
+ FORK_NEW_MOUNTNS = 1 << 18, /* Run child in its own mount namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
+ FORK_NEW_USERNS = 1 << 19, /* Run child in its own user namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
+ FORK_NEW_NETNS = 1 << 20, /* Run child in its own network namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
+ FORK_NEW_PIDNS = 1 << 21, /* Run child in its own PID namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
} ForkFlags;
int safe_fork_full(