diff options
author | Mike Yuan <me@yhndnzj.com> | 2024-04-25 18:43:29 +0200 |
---|---|---|
committer | Mike Yuan <me@yhndnzj.com> | 2024-09-03 21:04:50 +0200 |
commit | 368a3071e9124aae319bb44ba248fd4b08d6dd81 (patch) | |
tree | bc0d7c52f9323f0d534096406dc88e1e5b5ff41f | |
parent | build(deps): bump systemd/mkosi (diff) | |
download | systemd-368a3071e9124aae319bb44ba248fd4b08d6dd81.tar.xz systemd-368a3071e9124aae319bb44ba248fd4b08d6dd81.zip |
core: introduce BindJournalSockets=
Closes #32478
-rw-r--r-- | man/org.freedesktop.systemd1.xml | 44 | ||||
-rw-r--r-- | man/systemd.exec.xml | 17 | ||||
-rw-r--r-- | src/core/dbus-execute.c | 5 | ||||
-rw-r--r-- | src/core/exec-invoke.c | 2 | ||||
-rw-r--r-- | src/core/execute-serialize.c | 8 | ||||
-rw-r--r-- | src/core/execute.c | 26 | ||||
-rw-r--r-- | src/core/execute.h | 2 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.in | 1 | ||||
-rw-r--r-- | src/core/namespace.c | 13 | ||||
-rw-r--r-- | src/core/namespace.h | 1 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 1 |
11 files changed, 110 insertions, 10 deletions
diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index a5c98d3458..20bbae4487 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -3333,6 +3333,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MountAPIVFS = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b BindJournalSockets = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KeyringMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ProtectProc = '...'; @@ -3932,6 +3934,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { <!--property MountAPIVFS is not documented!--> + <!--property BindJournalSockets is not documented!--> + <!--property KeyringMode is not documented!--> <!--property ProtectProc is not documented!--> @@ -4642,6 +4646,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { <variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/> + <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/> + <variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/> <variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/> @@ -5468,6 +5474,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MountAPIVFS = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b BindJournalSockets = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KeyringMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ProtectProc = '...'; @@ -6079,6 +6087,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { <!--property MountAPIVFS is not documented!--> + <!--property BindJournalSockets is not documented!--> + <!--property KeyringMode is not documented!--> <!--property ProtectProc is not documented!--> @@ -6763,6 +6773,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { <variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/> + <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/> + <variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/> <variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/> @@ -7453,6 +7465,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MountAPIVFS = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b BindJournalSockets = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KeyringMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ProtectProc = '...'; @@ -7990,6 +8004,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { <!--property MountAPIVFS is not documented!--> + <!--property BindJournalSockets is not documented!--> + <!--property KeyringMode is not documented!--> <!--property ProtectProc is not documented!--> @@ -8586,6 +8602,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { <variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/> + <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/> + <variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/> <variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/> @@ -9399,6 +9417,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MountAPIVFS = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b BindJournalSockets = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KeyringMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ProtectProc = '...'; @@ -9922,6 +9942,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { <!--property MountAPIVFS is not documented!--> + <!--property BindJournalSockets is not documented!--> + <!--property KeyringMode is not documented!--> <!--property ProtectProc is not documented!--> @@ -10504,6 +10526,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { <variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/> + <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/> + <variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/> <variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/> @@ -12149,8 +12173,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ <para><varname>StatusBusError</varname>, <varname>StatusVarlinkError</varname>, <varname>LiveMountResult</varname>, - <varname>PrivateTmpEx</varname>, and - <varname>ImportCredentialEx</varname> were added in version 257.</para> + <varname>PrivateTmpEx</varname>, + <varname>ImportCredentialEx</varname>, and + <varname>BindJournalSockets</varname> were added in version 257.</para> </refsect2> <refsect2> <title>Socket Unit Objects</title> @@ -12187,8 +12212,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ <varname>EffectiveTasksMax</varname>, <varname>MemoryZSwapWriteback</varname>, and <varname>PassFileDescriptorsToExec</varname> were added in version 256.</para> - <para><varname>PrivateTmpEx</varname>, and - <varname>ImportCredentialEx</varname> were added in version 257.</para> + <para><varname>PrivateTmpEx</varname>, + <varname>ImportCredentialEx</varname>, and + <varname>BindJournalSockets</varname> were added in version 257.</para> </refsect2> <refsect2> <title>Mount Unit Objects</title> @@ -12222,8 +12248,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ <varname>EffectiveMemoryMax</varname>, <varname>EffectiveTasksMax</varname>, and <varname>MemoryZSwapWriteback</varname> were added in version 256.</para> - <para><varname>PrivateTmpEx</varname>, and - <varname>ImportCredentialEx</varname> were added in version 257.</para> + <para><varname>PrivateTmpEx</varname>, + <varname>ImportCredentialEx</varname>, and + <varname>BindJournalSockets</varname> were added in version 257.</para> </refsect2> <refsect2> <title>Swap Unit Objects</title> @@ -12257,8 +12284,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ <varname>EffectiveMemoryMax</varname>, <varname>EffectiveTasksMax</varname>, and <varname>MemoryZSwapWriteback</varname> were added in version 256.</para> - <para><varname>PrivateTmpEx</varname>, and - <varname>ImportCredentialEx</varname> were added in version 257.</para> + <para><varname>PrivateTmpEx</varname>, + <varname>ImportCredentialEx</varname>, and + <varname>BindJournalSockets</varname> were added in version 257.</para> </refsect2> <refsect2> <title>Slice Unit Objects</title> diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index c9ae38bf1f..0318961569 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -367,6 +367,23 @@ </varlistentry> <varlistentry> + <term><varname>BindJournalSockets=</varname></term> + + <listitem><para>Takes a boolean argument. If true, sockets from <citerefentry> + <refentrytitle>systemd-journald.socket</refentrytitle><manvolnum>8</manvolnum></citerefentry> + will be bind mounted into the mount namespace. This is particularly useful when a different instance + of <filename>/run/</filename> is employed, to make sure processes running in the namespace + can still make use of <citerefentry><refentrytitle>sd-journal</refentrytitle><manvolnum>3</manvolnum></citerefentry>. + </para> + + <para>This option is implied when <varname>LogNamespace=</varname> is used, + when <varname>MountAPIVFS=yes</varname>, or when <varname>PrivateDevices=yes</varname> is used + in conjunction with either <varname>RootDirectory=</varname> or <varname>RootImage=</varname>.</para> + + <xi:include href="version-info.xml" xpointer="v257"/></listitem> + </varlistentry> + + <varlistentry> <term><varname>ProtectProc=</varname></term> <listitem><para>Takes one of <literal>noaccess</literal>, <literal>invisible</literal>, diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 2954f63422..4679f54a3e 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -55,6 +55,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_system, protect_system, static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_personality, personality, unsigned long); static BUS_DEFINE_PROPERTY_GET(property_get_ioprio, "i", ExecContext, exec_context_get_effective_ioprio); static BUS_DEFINE_PROPERTY_GET(property_get_mount_apivfs, "b", ExecContext, exec_context_get_effective_mount_apivfs); +static BUS_DEFINE_PROPERTY_GET(property_get_bind_journal_sockets, "b", ExecContext, exec_context_get_effective_bind_journal_sockets); static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_class, "i", ExecContext, exec_context_get_effective_ioprio, ioprio_prio_class); static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext, exec_context_get_effective_ioprio, ioprio_prio_data); static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL); @@ -1193,6 +1194,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TemporaryFileSystem", "a(ss)", property_get_temporary_filesystems, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MountAPIVFS", "b", property_get_mount_apivfs, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("BindJournalSockets", "b", property_get_bind_journal_sockets, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectProc", "s", property_get_protect_proc, offsetof(ExecContext, protect_proc), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProcSubset", "s", property_get_proc_subset, offsetof(ExecContext, proc_subset), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1864,6 +1866,9 @@ int bus_exec_context_set_transient_property( if (streq(name, "MountAPIVFS")) return bus_set_transient_tristate(u, name, &c->mount_apivfs, message, flags, error); + if (streq(name, "BindJournalSockets")) + return bus_set_transient_tristate(u, name, &c->bind_journal_sockets, message, flags, error); + if (streq(name, "PrivateNetwork")) return bus_set_transient_bool(u, name, &c->private_network, message, flags, error); diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index 183bca092d..af5552379e 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -3240,6 +3240,7 @@ static int apply_mount_namespace( .private_tmp = needs_sandboxing ? context->private_tmp : false, .mount_apivfs = needs_sandboxing && exec_context_get_effective_mount_apivfs(context), + .bind_journal_sockets = needs_sandboxing && exec_context_get_effective_bind_journal_sockets(context), /* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */ .mount_nosuid = needs_sandboxing && context->no_new_privileges && !mac_selinux_use(), @@ -3861,6 +3862,7 @@ static bool exec_context_need_unprivileged_private_users( context->ipc_namespace_path || context->private_mounts > 0 || context->mount_apivfs > 0 || + context->bind_journal_sockets > 0 || context->n_bind_mounts > 0 || context->n_temporary_filesystems > 0 || context->root_directory || diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c index 84628f91fb..32d6118ab7 100644 --- a/src/core/execute-serialize.c +++ b/src/core/execute-serialize.c @@ -1854,6 +1854,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) { if (r < 0) return r; + r = serialize_item_tristate(f, "exec-context-bind-journal-sockets", c->bind_journal_sockets); + if (r < 0) + return r; + r = serialize_item_tristate(f, "exec-context-memory-ksm", c->memory_ksm); if (r < 0) return r; @@ -2726,6 +2730,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) { r = safe_atoi(val, &c->mount_apivfs); if (r < 0) return r; + } else if ((val = startswith(l, "exec-context-bind-journal-sockets="))) { + r = safe_atoi(val, &c->bind_journal_sockets); + if (r < 0) + return r; } else if ((val = startswith(l, "exec-context-memory-ksm="))) { r = safe_atoi(val, &c->memory_ksm); if (r < 0) diff --git a/src/core/execute.c b/src/core/execute.c index 5bbd1d835c..7a365e23b9 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -284,7 +284,7 @@ bool exec_needs_mount_namespace( context->directories[EXEC_DIRECTORY_LOGS].n_items > 0)) return true; - if (context->log_namespace) + if (exec_context_get_effective_bind_journal_sockets(context)) return true; return false; @@ -539,6 +539,7 @@ void exec_context_init(ExecContext *c) { .tty_cols = UINT_MAX, .private_mounts = -1, .mount_apivfs = -1, + .bind_journal_sockets = -1, .memory_ksm = -1, .set_login_environment = -1, }; @@ -979,6 +980,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { "%sProtectHome: %s\n" "%sProtectSystem: %s\n" "%sMountAPIVFS: %s\n" + "%sBindJournalSockets: %s\n" "%sIgnoreSIGPIPE: %s\n" "%sMemoryDenyWriteExecute: %s\n" "%sRestrictRealtime: %s\n" @@ -1004,6 +1006,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { prefix, protect_home_to_string(c->protect_home), prefix, protect_system_to_string(c->protect_system), prefix, yes_no(exec_context_get_effective_mount_apivfs(c)), + prefix, yes_no(exec_context_get_effective_bind_journal_sockets(c)), prefix, yes_no(c->ignore_sigpipe), prefix, yes_no(c->memory_deny_write_execute), prefix, yes_no(c->restrict_realtime), @@ -1486,6 +1489,27 @@ bool exec_context_get_effective_mount_apivfs(const ExecContext *c) { return false; } +bool exec_context_get_effective_bind_journal_sockets(const ExecContext *c) { + assert(c); + + /* If log namespace is specified, "/run/systemd/journal.namespace/" would be bind mounted to + * "/run/systemd/journal/", which effectively means BindJournalSockets=yes */ + if (c->log_namespace) + return true; + + if (c->bind_journal_sockets >= 0) + return c->bind_journal_sockets > 0; + + if (exec_context_get_effective_mount_apivfs(c)) + return true; + + /* When PrivateDevices=yes, /dev/log gets symlinked to /run/systemd/journal/dev-log */ + if (exec_context_with_rootfs(c) && c->private_devices) + return true; + + return false; +} + void exec_context_free_log_extra_fields(ExecContext *c) { assert(c); diff --git a/src/core/execute.h b/src/core/execute.h index a3fc52bfd0..5754273999 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -313,6 +313,7 @@ struct ExecContext { int private_mounts; int mount_apivfs; + int bind_journal_sockets; int memory_ksm; PrivateTmp private_tmp; bool private_network; @@ -519,6 +520,7 @@ bool exec_context_maintains_privileges(const ExecContext *c); int exec_context_get_effective_ioprio(const ExecContext *c); bool exec_context_get_effective_mount_apivfs(const ExecContext *c); +bool exec_context_get_effective_bind_journal_sockets(const ExecContext *c); void exec_context_free_log_extra_fields(ExecContext *c); diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index 7441d3c759..32ac87ce85 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -137,6 +137,7 @@ {{type}}.ProtectHome, config_parse_protect_home, 0, offsetof({{type}}, exec_context.protect_home) {{type}}.MountFlags, config_parse_exec_mount_propagation_flag, 0, offsetof({{type}}, exec_context.mount_propagation_flag) {{type}}.MountAPIVFS, config_parse_tristate, 0, offsetof({{type}}, exec_context.mount_apivfs) +{{type}}.BindJournalSockets, config_parse_tristate, 0, offsetof({{type}}, exec_context.bind_journal_sockets) {{type}}.Personality, config_parse_personality, 0, offsetof({{type}}, exec_context.personality) {{type}}.RuntimeDirectoryPreserve, config_parse_exec_preserve_mode, 0, offsetof({{type}}, exec_context.runtime_directory_preserve_mode) {{type}}.RuntimeDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_RUNTIME].mode) diff --git a/src/core/namespace.c b/src/core/namespace.c index 314268e2eb..553f3f4940 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -93,7 +93,7 @@ typedef struct MountEntry { const char *path_const; /* Memory allocated on stack or static */ MountMode mode; bool ignore:1; /* Ignore if path does not exist? */ - bool has_prefix:1; /* Already is prefixed by the root dir? */ + bool has_prefix:1; /* Already prefixed by the root dir? */ bool read_only:1; /* Shall this mount point be read-only? */ bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */ bool noexec:1; /* Shall set MS_NOEXEC on the mount itself */ @@ -120,6 +120,12 @@ typedef struct MountList { size_t n_mounts; } MountList; +static const BindMount bind_journal_sockets_table[] = { + { (char*) "/run/systemd/journal/socket", (char*) "/run/systemd/journal/socket", .read_only = true, .ignore_enoent = true }, + { (char*) "/run/systemd/journal/stdout", (char*) "/run/systemd/journal/stdout", .read_only = true, .ignore_enoent = true }, + { (char*) "/run/systemd/journal/dev-log", (char*) "/run/systemd/journal/dev-log", .read_only = true, .ignore_enoent = true }, +}; + /* If MountAPIVFS= is used, let's mount /sys, /proc, /dev and /run into the it, but only as a fallback if the user hasn't mounted * something there already. These mounts are hence overridden by any other explicitly configured mounts. */ static const MountEntry apivfs_table[] = { @@ -2585,6 +2591,11 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) { .read_only = true, .source_malloc = TAKE_PTR(q), }; + + } else if (p->bind_journal_sockets) { + r = append_bind_mounts(&ml, bind_journal_sockets_table, ELEMENTSOF(bind_journal_sockets_table)); + if (r < 0) + return r; } /* Will be used to add bind mounts at runtime */ diff --git a/src/core/namespace.h b/src/core/namespace.h index bff99b9daa..538d23753c 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -152,6 +152,7 @@ struct NamespaceParameters { bool private_ipc; bool mount_apivfs; + bool bind_journal_sockets; bool mount_nosuid; ProtectHome protect_home; diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 6ce76ded43..edfe1482c8 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -1076,6 +1076,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con "ProtectClock", "ProtectControlGroups", "MountAPIVFS", + "BindJournalSockets", "CPUSchedulingResetOnFork", "LockPersonality", "ProtectHostname", |