summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Yuan <me@yhndnzj.com>2024-04-25 18:43:29 +0200
committerMike Yuan <me@yhndnzj.com>2024-09-03 21:04:50 +0200
commit368a3071e9124aae319bb44ba248fd4b08d6dd81 (patch)
treebc0d7c52f9323f0d534096406dc88e1e5b5ff41f
parentbuild(deps): bump systemd/mkosi (diff)
downloadsystemd-368a3071e9124aae319bb44ba248fd4b08d6dd81.tar.xz
systemd-368a3071e9124aae319bb44ba248fd4b08d6dd81.zip
core: introduce BindJournalSockets=
Closes #32478
-rw-r--r--man/org.freedesktop.systemd1.xml44
-rw-r--r--man/systemd.exec.xml17
-rw-r--r--src/core/dbus-execute.c5
-rw-r--r--src/core/exec-invoke.c2
-rw-r--r--src/core/execute-serialize.c8
-rw-r--r--src/core/execute.c26
-rw-r--r--src/core/execute.h2
-rw-r--r--src/core/load-fragment-gperf.gperf.in1
-rw-r--r--src/core/namespace.c13
-rw-r--r--src/core/namespace.h1
-rw-r--r--src/shared/bus-unit-util.c1
11 files changed, 110 insertions, 10 deletions
diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml
index a5c98d3458..20bbae4487 100644
--- a/man/org.freedesktop.systemd1.xml
+++ b/man/org.freedesktop.systemd1.xml
@@ -3333,6 +3333,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MountAPIVFS = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+ readonly b BindJournalSockets = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s KeyringMode = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s ProtectProc = '...';
@@ -3932,6 +3934,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
<!--property MountAPIVFS is not documented!-->
+ <!--property BindJournalSockets is not documented!-->
+
<!--property KeyringMode is not documented!-->
<!--property ProtectProc is not documented!-->
@@ -4642,6 +4646,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
<variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/>
<variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/>
@@ -5468,6 +5474,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MountAPIVFS = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+ readonly b BindJournalSockets = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s KeyringMode = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s ProtectProc = '...';
@@ -6079,6 +6087,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<!--property MountAPIVFS is not documented!-->
+ <!--property BindJournalSockets is not documented!-->
+
<!--property KeyringMode is not documented!-->
<!--property ProtectProc is not documented!-->
@@ -6763,6 +6773,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/>
<variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/>
@@ -7453,6 +7465,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MountAPIVFS = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+ readonly b BindJournalSockets = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s KeyringMode = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s ProtectProc = '...';
@@ -7990,6 +8004,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
<!--property MountAPIVFS is not documented!-->
+ <!--property BindJournalSockets is not documented!-->
+
<!--property KeyringMode is not documented!-->
<!--property ProtectProc is not documented!-->
@@ -8586,6 +8602,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
<variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/>
<variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/>
@@ -9399,6 +9417,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MountAPIVFS = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+ readonly b BindJournalSockets = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s KeyringMode = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s ProtectProc = '...';
@@ -9922,6 +9942,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
<!--property MountAPIVFS is not documented!-->
+ <!--property BindJournalSockets is not documented!-->
+
<!--property KeyringMode is not documented!-->
<!--property ProtectProc is not documented!-->
@@ -10504,6 +10526,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
<variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/>
<variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/>
@@ -12149,8 +12173,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
<para><varname>StatusBusError</varname>,
<varname>StatusVarlinkError</varname>,
<varname>LiveMountResult</varname>,
- <varname>PrivateTmpEx</varname>, and
- <varname>ImportCredentialEx</varname> were added in version 257.</para>
+ <varname>PrivateTmpEx</varname>,
+ <varname>ImportCredentialEx</varname>, and
+ <varname>BindJournalSockets</varname> were added in version 257.</para>
</refsect2>
<refsect2>
<title>Socket Unit Objects</title>
@@ -12187,8 +12212,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
<varname>EffectiveTasksMax</varname>,
<varname>MemoryZSwapWriteback</varname>, and
<varname>PassFileDescriptorsToExec</varname> were added in version 256.</para>
- <para><varname>PrivateTmpEx</varname>, and
- <varname>ImportCredentialEx</varname> were added in version 257.</para>
+ <para><varname>PrivateTmpEx</varname>,
+ <varname>ImportCredentialEx</varname>, and
+ <varname>BindJournalSockets</varname> were added in version 257.</para>
</refsect2>
<refsect2>
<title>Mount Unit Objects</title>
@@ -12222,8 +12248,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
<varname>EffectiveMemoryMax</varname>,
<varname>EffectiveTasksMax</varname>, and
<varname>MemoryZSwapWriteback</varname> were added in version 256.</para>
- <para><varname>PrivateTmpEx</varname>, and
- <varname>ImportCredentialEx</varname> were added in version 257.</para>
+ <para><varname>PrivateTmpEx</varname>,
+ <varname>ImportCredentialEx</varname>, and
+ <varname>BindJournalSockets</varname> were added in version 257.</para>
</refsect2>
<refsect2>
<title>Swap Unit Objects</title>
@@ -12257,8 +12284,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
<varname>EffectiveMemoryMax</varname>,
<varname>EffectiveTasksMax</varname>, and
<varname>MemoryZSwapWriteback</varname> were added in version 256.</para>
- <para><varname>PrivateTmpEx</varname>, and
- <varname>ImportCredentialEx</varname> were added in version 257.</para>
+ <para><varname>PrivateTmpEx</varname>,
+ <varname>ImportCredentialEx</varname>, and
+ <varname>BindJournalSockets</varname> were added in version 257.</para>
</refsect2>
<refsect2>
<title>Slice Unit Objects</title>
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index c9ae38bf1f..0318961569 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -367,6 +367,23 @@
</varlistentry>
<varlistentry>
+ <term><varname>BindJournalSockets=</varname></term>
+
+ <listitem><para>Takes a boolean argument. If true, sockets from <citerefentry>
+ <refentrytitle>systemd-journald.socket</refentrytitle><manvolnum>8</manvolnum></citerefentry>
+ will be bind mounted into the mount namespace. This is particularly useful when a different instance
+ of <filename>/run/</filename> is employed, to make sure processes running in the namespace
+ can still make use of <citerefentry><refentrytitle>sd-journal</refentrytitle><manvolnum>3</manvolnum></citerefentry>.
+ </para>
+
+ <para>This option is implied when <varname>LogNamespace=</varname> is used,
+ when <varname>MountAPIVFS=yes</varname>, or when <varname>PrivateDevices=yes</varname> is used
+ in conjunction with either <varname>RootDirectory=</varname> or <varname>RootImage=</varname>.</para>
+
+ <xi:include href="version-info.xml" xpointer="v257"/></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>ProtectProc=</varname></term>
<listitem><para>Takes one of <literal>noaccess</literal>, <literal>invisible</literal>,
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 2954f63422..4679f54a3e 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -55,6 +55,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_system, protect_system,
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_personality, personality, unsigned long);
static BUS_DEFINE_PROPERTY_GET(property_get_ioprio, "i", ExecContext, exec_context_get_effective_ioprio);
static BUS_DEFINE_PROPERTY_GET(property_get_mount_apivfs, "b", ExecContext, exec_context_get_effective_mount_apivfs);
+static BUS_DEFINE_PROPERTY_GET(property_get_bind_journal_sockets, "b", ExecContext, exec_context_get_effective_bind_journal_sockets);
static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_class, "i", ExecContext, exec_context_get_effective_ioprio, ioprio_prio_class);
static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext, exec_context_get_effective_ioprio, ioprio_prio_data);
static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL);
@@ -1193,6 +1194,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TemporaryFileSystem", "a(ss)", property_get_temporary_filesystems, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MountAPIVFS", "b", property_get_mount_apivfs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindJournalSockets", "b", property_get_bind_journal_sockets, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectProc", "s", property_get_protect_proc, offsetof(ExecContext, protect_proc), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProcSubset", "s", property_get_proc_subset, offsetof(ExecContext, proc_subset), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1864,6 +1866,9 @@ int bus_exec_context_set_transient_property(
if (streq(name, "MountAPIVFS"))
return bus_set_transient_tristate(u, name, &c->mount_apivfs, message, flags, error);
+ if (streq(name, "BindJournalSockets"))
+ return bus_set_transient_tristate(u, name, &c->bind_journal_sockets, message, flags, error);
+
if (streq(name, "PrivateNetwork"))
return bus_set_transient_bool(u, name, &c->private_network, message, flags, error);
diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c
index 183bca092d..af5552379e 100644
--- a/src/core/exec-invoke.c
+++ b/src/core/exec-invoke.c
@@ -3240,6 +3240,7 @@ static int apply_mount_namespace(
.private_tmp = needs_sandboxing ? context->private_tmp : false,
.mount_apivfs = needs_sandboxing && exec_context_get_effective_mount_apivfs(context),
+ .bind_journal_sockets = needs_sandboxing && exec_context_get_effective_bind_journal_sockets(context),
/* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */
.mount_nosuid = needs_sandboxing && context->no_new_privileges && !mac_selinux_use(),
@@ -3861,6 +3862,7 @@ static bool exec_context_need_unprivileged_private_users(
context->ipc_namespace_path ||
context->private_mounts > 0 ||
context->mount_apivfs > 0 ||
+ context->bind_journal_sockets > 0 ||
context->n_bind_mounts > 0 ||
context->n_temporary_filesystems > 0 ||
context->root_directory ||
diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c
index 84628f91fb..32d6118ab7 100644
--- a/src/core/execute-serialize.c
+++ b/src/core/execute-serialize.c
@@ -1854,6 +1854,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
if (r < 0)
return r;
+ r = serialize_item_tristate(f, "exec-context-bind-journal-sockets", c->bind_journal_sockets);
+ if (r < 0)
+ return r;
+
r = serialize_item_tristate(f, "exec-context-memory-ksm", c->memory_ksm);
if (r < 0)
return r;
@@ -2726,6 +2730,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
r = safe_atoi(val, &c->mount_apivfs);
if (r < 0)
return r;
+ } else if ((val = startswith(l, "exec-context-bind-journal-sockets="))) {
+ r = safe_atoi(val, &c->bind_journal_sockets);
+ if (r < 0)
+ return r;
} else if ((val = startswith(l, "exec-context-memory-ksm="))) {
r = safe_atoi(val, &c->memory_ksm);
if (r < 0)
diff --git a/src/core/execute.c b/src/core/execute.c
index 5bbd1d835c..7a365e23b9 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -284,7 +284,7 @@ bool exec_needs_mount_namespace(
context->directories[EXEC_DIRECTORY_LOGS].n_items > 0))
return true;
- if (context->log_namespace)
+ if (exec_context_get_effective_bind_journal_sockets(context))
return true;
return false;
@@ -539,6 +539,7 @@ void exec_context_init(ExecContext *c) {
.tty_cols = UINT_MAX,
.private_mounts = -1,
.mount_apivfs = -1,
+ .bind_journal_sockets = -1,
.memory_ksm = -1,
.set_login_environment = -1,
};
@@ -979,6 +980,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
"%sProtectHome: %s\n"
"%sProtectSystem: %s\n"
"%sMountAPIVFS: %s\n"
+ "%sBindJournalSockets: %s\n"
"%sIgnoreSIGPIPE: %s\n"
"%sMemoryDenyWriteExecute: %s\n"
"%sRestrictRealtime: %s\n"
@@ -1004,6 +1006,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
prefix, protect_home_to_string(c->protect_home),
prefix, protect_system_to_string(c->protect_system),
prefix, yes_no(exec_context_get_effective_mount_apivfs(c)),
+ prefix, yes_no(exec_context_get_effective_bind_journal_sockets(c)),
prefix, yes_no(c->ignore_sigpipe),
prefix, yes_no(c->memory_deny_write_execute),
prefix, yes_no(c->restrict_realtime),
@@ -1486,6 +1489,27 @@ bool exec_context_get_effective_mount_apivfs(const ExecContext *c) {
return false;
}
+bool exec_context_get_effective_bind_journal_sockets(const ExecContext *c) {
+ assert(c);
+
+ /* If log namespace is specified, "/run/systemd/journal.namespace/" would be bind mounted to
+ * "/run/systemd/journal/", which effectively means BindJournalSockets=yes */
+ if (c->log_namespace)
+ return true;
+
+ if (c->bind_journal_sockets >= 0)
+ return c->bind_journal_sockets > 0;
+
+ if (exec_context_get_effective_mount_apivfs(c))
+ return true;
+
+ /* When PrivateDevices=yes, /dev/log gets symlinked to /run/systemd/journal/dev-log */
+ if (exec_context_with_rootfs(c) && c->private_devices)
+ return true;
+
+ return false;
+}
+
void exec_context_free_log_extra_fields(ExecContext *c) {
assert(c);
diff --git a/src/core/execute.h b/src/core/execute.h
index a3fc52bfd0..5754273999 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -313,6 +313,7 @@ struct ExecContext {
int private_mounts;
int mount_apivfs;
+ int bind_journal_sockets;
int memory_ksm;
PrivateTmp private_tmp;
bool private_network;
@@ -519,6 +520,7 @@ bool exec_context_maintains_privileges(const ExecContext *c);
int exec_context_get_effective_ioprio(const ExecContext *c);
bool exec_context_get_effective_mount_apivfs(const ExecContext *c);
+bool exec_context_get_effective_bind_journal_sockets(const ExecContext *c);
void exec_context_free_log_extra_fields(ExecContext *c);
diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in
index 7441d3c759..32ac87ce85 100644
--- a/src/core/load-fragment-gperf.gperf.in
+++ b/src/core/load-fragment-gperf.gperf.in
@@ -137,6 +137,7 @@
{{type}}.ProtectHome, config_parse_protect_home, 0, offsetof({{type}}, exec_context.protect_home)
{{type}}.MountFlags, config_parse_exec_mount_propagation_flag, 0, offsetof({{type}}, exec_context.mount_propagation_flag)
{{type}}.MountAPIVFS, config_parse_tristate, 0, offsetof({{type}}, exec_context.mount_apivfs)
+{{type}}.BindJournalSockets, config_parse_tristate, 0, offsetof({{type}}, exec_context.bind_journal_sockets)
{{type}}.Personality, config_parse_personality, 0, offsetof({{type}}, exec_context.personality)
{{type}}.RuntimeDirectoryPreserve, config_parse_exec_preserve_mode, 0, offsetof({{type}}, exec_context.runtime_directory_preserve_mode)
{{type}}.RuntimeDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_RUNTIME].mode)
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 314268e2eb..553f3f4940 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -93,7 +93,7 @@ typedef struct MountEntry {
const char *path_const; /* Memory allocated on stack or static */
MountMode mode;
bool ignore:1; /* Ignore if path does not exist? */
- bool has_prefix:1; /* Already is prefixed by the root dir? */
+ bool has_prefix:1; /* Already prefixed by the root dir? */
bool read_only:1; /* Shall this mount point be read-only? */
bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */
bool noexec:1; /* Shall set MS_NOEXEC on the mount itself */
@@ -120,6 +120,12 @@ typedef struct MountList {
size_t n_mounts;
} MountList;
+static const BindMount bind_journal_sockets_table[] = {
+ { (char*) "/run/systemd/journal/socket", (char*) "/run/systemd/journal/socket", .read_only = true, .ignore_enoent = true },
+ { (char*) "/run/systemd/journal/stdout", (char*) "/run/systemd/journal/stdout", .read_only = true, .ignore_enoent = true },
+ { (char*) "/run/systemd/journal/dev-log", (char*) "/run/systemd/journal/dev-log", .read_only = true, .ignore_enoent = true },
+};
+
/* If MountAPIVFS= is used, let's mount /sys, /proc, /dev and /run into the it, but only as a fallback if the user hasn't mounted
* something there already. These mounts are hence overridden by any other explicitly configured mounts. */
static const MountEntry apivfs_table[] = {
@@ -2585,6 +2591,11 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
.read_only = true,
.source_malloc = TAKE_PTR(q),
};
+
+ } else if (p->bind_journal_sockets) {
+ r = append_bind_mounts(&ml, bind_journal_sockets_table, ELEMENTSOF(bind_journal_sockets_table));
+ if (r < 0)
+ return r;
}
/* Will be used to add bind mounts at runtime */
diff --git a/src/core/namespace.h b/src/core/namespace.h
index bff99b9daa..538d23753c 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -152,6 +152,7 @@ struct NamespaceParameters {
bool private_ipc;
bool mount_apivfs;
+ bool bind_journal_sockets;
bool mount_nosuid;
ProtectHome protect_home;
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 6ce76ded43..edfe1482c8 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -1076,6 +1076,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
"ProtectClock",
"ProtectControlGroups",
"MountAPIVFS",
+ "BindJournalSockets",
"CPUSchedulingResetOnFork",
"LockPersonality",
"ProtectHostname",