summaryrefslogtreecommitdiffstats
path: root/src/nspawn
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2016-02-03 20:32:06 +0100
committerLennart Poettering <lennart@poettering.net>2016-02-03 23:58:24 +0100
commit7732f92bad5f24a4bd03bb357af46da56b0ac94d (patch)
tree10e33a8ec564376365e05a0f12681a73aab40986 /src/nspawn
parentresolved: apply epoch to system time from PID 1 (diff)
downloadsystemd-7732f92bad5f24a4bd03bb357af46da56b0ac94d.tar.xz
systemd-7732f92bad5f24a4bd03bb357af46da56b0ac94d.zip
nspawn: optionally run a stub init process as PID 1
This adds a new switch --as-pid2, which allows running commands as PID 2, while a stub init process is run as PID 1. This is useful in order to run arbitrary commands in a container, as PID1's semantics are different from all other processes regarding reaping of unknown children or signal handling.
Diffstat (limited to 'src/nspawn')
-rw-r--r--src/nspawn/nspawn-gperf.gperf3
-rw-r--r--src/nspawn/nspawn-settings.c93
-rw-r--r--src/nspawn/nspawn-settings.h38
-rw-r--r--src/nspawn/nspawn-stub-pid1.c170
-rw-r--r--src/nspawn/nspawn-stub-pid1.h22
-rw-r--r--src/nspawn/nspawn.c52
6 files changed, 350 insertions, 28 deletions
diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf
index 08020d510c..116655cdd2 100644
--- a/src/nspawn/nspawn-gperf.gperf
+++ b/src/nspawn/nspawn-gperf.gperf
@@ -15,7 +15,8 @@ struct ConfigPerfItem;
%struct-type
%includes
%%
-Exec.Boot, config_parse_tristate, 0, offsetof(Settings, boot)
+Exec.Boot, config_parse_boot, 0, 0
+Exec.ProcessTwo, config_parse_pid2, 0, 0,
Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters)
Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment)
Exec.User, config_parse_string, 0, offsetof(Settings, user)
diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c
index 3c552e0734..12524d3b89 100644
--- a/src/nspawn/nspawn-settings.c
+++ b/src/nspawn/nspawn-settings.c
@@ -24,6 +24,7 @@
#include "conf-parser.h"
#include "nspawn-network.h"
#include "nspawn-settings.h"
+#include "parse-util.h"
#include "process-util.h"
#include "strv.h"
#include "util.h"
@@ -39,7 +40,7 @@ int settings_load(FILE *f, const char *path, Settings **ret) {
if (!s)
return -ENOMEM;
- s->boot = -1;
+ s->start_mode = _START_MODE_INVALID;
s->personality = PERSONALITY_INVALID;
s->read_only = -1;
@@ -303,3 +304,93 @@ int config_parse_veth_extra(
return 0;
}
+
+int config_parse_boot(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Boot= parameter %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (r > 0) {
+ if (settings->start_mode == START_PID2)
+ goto conflict;
+
+ settings->start_mode = START_BOOT;
+ } else {
+ if (settings->start_mode == START_BOOT)
+ goto conflict;
+
+ if (settings->start_mode < 0)
+ settings->start_mode = START_PID1;
+ }
+
+ return 0;
+
+conflict:
+ log_syntax(unit, LOG_ERR, filename, line, r, "Conflicting Boot= or ProcessTwo= setting found. Ignoring.");
+ return 0;
+}
+
+int config_parse_pid2(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse ProcessTwo= parameter %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (r > 0) {
+ if (settings->start_mode == START_BOOT)
+ goto conflict;
+
+ settings->start_mode = START_PID2;
+ } else {
+ if (settings->start_mode == START_PID2)
+ goto conflict;
+
+ if (settings->start_mode < 0)
+ settings->start_mode = START_PID1;
+ }
+
+ return 0;
+
+conflict:
+ log_syntax(unit, LOG_ERR, filename, line, r, "Conflicting Boot= or ProcessTwo= setting found. Ignoring.");
+ return 0;
+}
diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h
index 236a1baa4f..fdb07486da 100644
--- a/src/nspawn/nspawn-settings.h
+++ b/src/nspawn/nspawn-settings.h
@@ -27,26 +27,34 @@
#include "nspawn-expose-ports.h"
#include "nspawn-mount.h"
+typedef enum StartMode {
+ START_PID1, /* Run parameters as command line as process 1 */
+ START_PID2, /* Use stub init process as PID 1, run parameters as command line as process 2 */
+ START_BOOT, /* Search for init system, pass arguments as parameters */
+ _START_MODE_MAX,
+ _START_MODE_INVALID = -1
+} StartMode;
+
typedef enum SettingsMask {
- SETTING_BOOT = 1 << 0,
- SETTING_ENVIRONMENT = 1 << 1,
- SETTING_USER = 1 << 2,
- SETTING_CAPABILITY = 1 << 3,
- SETTING_KILL_SIGNAL = 1 << 4,
- SETTING_PERSONALITY = 1 << 5,
- SETTING_MACHINE_ID = 1 << 6,
- SETTING_NETWORK = 1 << 7,
- SETTING_EXPOSE_PORTS = 1 << 8,
- SETTING_READ_ONLY = 1 << 9,
- SETTING_VOLATILE_MODE = 1 << 10,
- SETTING_CUSTOM_MOUNTS = 1 << 11,
+ SETTING_START_MODE = 1 << 0,
+ SETTING_ENVIRONMENT = 1 << 1,
+ SETTING_USER = 1 << 2,
+ SETTING_CAPABILITY = 1 << 3,
+ SETTING_KILL_SIGNAL = 1 << 4,
+ SETTING_PERSONALITY = 1 << 5,
+ SETTING_MACHINE_ID = 1 << 6,
+ SETTING_NETWORK = 1 << 7,
+ SETTING_EXPOSE_PORTS = 1 << 8,
+ SETTING_READ_ONLY = 1 << 9,
+ SETTING_VOLATILE_MODE = 1 << 10,
+ SETTING_CUSTOM_MOUNTS = 1 << 11,
SETTING_WORKING_DIRECTORY = 1 << 12,
- _SETTINGS_MASK_ALL = (1 << 13) -1
+ _SETTINGS_MASK_ALL = (1 << 13) -1
} SettingsMask;
typedef struct Settings {
/* [Run] */
- int boot;
+ StartMode start_mode;
char **parameters;
char **environment;
char *user;
@@ -91,3 +99,5 @@ int config_parse_volatile_mode(const char *unit, const char *filename, unsigned
int config_parse_bind(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_veth_extra(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_boot(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_pid2(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/nspawn/nspawn-stub-pid1.c b/src/nspawn/nspawn-stub-pid1.c
new file mode 100644
index 0000000000..2de87e3c63
--- /dev/null
+++ b/src/nspawn/nspawn-stub-pid1.c
@@ -0,0 +1,170 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/reboot.h>
+#include <sys/unistd.h>
+#include <sys/wait.h>
+
+#include "fd-util.h"
+#include "log.h"
+#include "nspawn-stub-pid1.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "time-util.h"
+#include "def.h"
+
+int stub_pid1(void) {
+ enum {
+ STATE_RUNNING,
+ STATE_REBOOT,
+ STATE_POWEROFF,
+ } state = STATE_RUNNING;
+
+ sigset_t fullmask, oldmask, waitmask;
+ usec_t quit_usec = USEC_INFINITY;
+ pid_t pid;
+ int r;
+
+ /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful
+ * for allowing arbitrary processes run in a container, and still have all zombies reaped. */
+
+ assert_se(sigfillset(&fullmask) >= 0);
+ assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0);
+
+ pid = fork();
+ if (pid < 0)
+ return log_error_errno(errno, "Failed to fork child pid: %m");
+
+ if (pid == 0) {
+ /* Return in the child */
+ assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0);
+ setsid();
+ return 0;
+ }
+
+ reset_all_signal_handlers();
+
+ log_close();
+ close_all_fds(NULL, 0);
+ log_open();
+
+ rename_process("STUBINIT");
+
+ assert_se(sigemptyset(&waitmask) >= 0);
+ assert_se(sigset_add_many(&waitmask,
+ SIGCHLD, /* posix: process died */
+ SIGINT, /* sysv: ctrl-alt-del */
+ SIGRTMIN+3, /* systemd: halt */
+ SIGRTMIN+4, /* systemd: poweroff */
+ SIGRTMIN+5, /* systemd: reboot */
+ SIGRTMIN+6, /* systemd: kexec */
+ SIGRTMIN+13, /* systemd: halt */
+ SIGRTMIN+14, /* systemd: poweroff */
+ SIGRTMIN+15, /* systemd: reboot */
+ SIGRTMIN+16, /* systemd: kexec */
+ -1) >= 0);
+
+ /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't
+ * support reexec/reloading in this stub process. */
+
+ for (;;) {
+ siginfo_t si;
+ usec_t current_usec;
+
+ si.si_pid = 0;
+ r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG);
+ if (r < 0) {
+ r = log_error_errno(errno, "Failed to reap children: %m");
+ goto finish;
+ }
+
+ current_usec = now(CLOCK_MONOTONIC);
+
+ if (si.si_pid == pid || current_usec >= quit_usec) {
+
+ /* The child we started ourselves died or we reached a timeout. */
+
+ if (state == STATE_REBOOT) { /* dispatch a queued reboot */
+ (void) reboot(RB_AUTOBOOT);
+ r = log_error_errno(errno, "Failed to reboot: %m");
+ goto finish;
+
+ } else if (state == STATE_POWEROFF)
+ (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */
+
+ if (si.si_pid == pid && si.si_code == CLD_EXITED)
+ r = si.si_status; /* pass on exit code */
+ else
+ r = 255; /* signal, coredump, timeout, … */
+
+ goto finish;
+ }
+ if (si.si_pid != 0)
+ /* We reaped something. Retry until there's nothing more to reap. */
+ continue;
+
+ if (quit_usec == USEC_INFINITY)
+ r = sigwaitinfo(&waitmask, &si);
+ else {
+ struct timespec ts;
+ r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec));
+ }
+ if (r < 0) {
+ if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */
+ continue;
+ if (errno == EAGAIN) /* timeout reached */
+ continue;
+
+ r = log_error_errno(errno, "Failed to wait for signal: %m");
+ goto finish;
+ }
+
+ if (si.si_signo == SIGCHLD)
+ continue; /* Let's reap this */
+
+ if (state != STATE_RUNNING)
+ continue;
+
+ /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a
+ * constant… */
+
+ if (si.si_signo == SIGRTMIN+3 ||
+ si.si_signo == SIGRTMIN+4 ||
+ si.si_signo == SIGRTMIN+13 ||
+ si.si_signo == SIGRTMIN+14)
+
+ state = STATE_POWEROFF;
+
+ else if (si.si_signo == SIGINT ||
+ si.si_signo == SIGRTMIN+5 ||
+ si.si_signo == SIGRTMIN+6 ||
+ si.si_signo == SIGRTMIN+15 ||
+ si.si_signo == SIGRTMIN+16)
+
+ state = STATE_REBOOT;
+ else
+ assert_not_reached("Got unexpected signal");
+
+ /* (void) kill_and_sigcont(pid, SIGTERM); */
+ quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC;
+ }
+
+finish:
+ _exit(r < 0 ? EXIT_FAILURE : r);
+}
diff --git a/src/nspawn/nspawn-stub-pid1.h b/src/nspawn/nspawn-stub-pid1.h
new file mode 100644
index 0000000000..36c1aaf5dd
--- /dev/null
+++ b/src/nspawn/nspawn-stub-pid1.h
@@ -0,0 +1,22 @@
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+int stub_pid1(void);
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 0dae9984c9..e7314ac29c 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -79,6 +79,7 @@
#include "nspawn-register.h"
#include "nspawn-settings.h"
#include "nspawn-setuid.h"
+#include "nspawn-stub-pid1.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
@@ -123,7 +124,7 @@ static const char *arg_selinux_apifs_context = NULL;
static const char *arg_slice = NULL;
static bool arg_private_network = false;
static bool arg_read_only = false;
-static bool arg_boot = false;
+static StartMode arg_start_mode = START_PID1;
static bool arg_ephemeral = false;
static LinkJournal arg_link_journal = LINK_AUTO;
static bool arg_link_journal_try = false;
@@ -193,6 +194,7 @@ static void help(void) {
" -x --ephemeral Run container with snapshot of root directory, and\n"
" remove it after exit\n"
" -i --image=PATH File system device or disk image for the container\n"
+ " -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n"
" -b --boot Boot up full system (i.e. invoke init)\n"
" --chdir=PATH Set working directory in the container\n"
" -u --user=USER Run the command under specified user or uid\n"
@@ -358,6 +360,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "ephemeral", no_argument, NULL, 'x' },
{ "user", required_argument, NULL, 'u' },
{ "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
+ { "as-pid2", no_argument, NULL, 'a' },
{ "boot", no_argument, NULL, 'b' },
{ "uuid", required_argument, NULL, ARG_UUID },
{ "read-only", no_argument, NULL, ARG_READ_ONLY },
@@ -404,7 +407,7 @@ static int parse_argv(int argc, char *argv[]) {
assert(argc >= 0);
assert(argv);
- while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:qi:xp:n", options, NULL)) >= 0)
+ while ((c = getopt_long(argc, argv, "+hD:u:abL:M:jS:Z:qi:xp:n", options, NULL)) >= 0)
switch (c) {
@@ -495,8 +498,23 @@ static int parse_argv(int argc, char *argv[]) {
break;
case 'b':
- arg_boot = true;
- arg_settings_mask |= SETTING_BOOT;
+ if (arg_start_mode == START_PID2) {
+ log_error("--boot and --as-pid2 may not be combined.");
+ return -EINVAL;
+ }
+
+ arg_start_mode = START_BOOT;
+ arg_settings_mask |= SETTING_START_MODE;
+ break;
+
+ case 'a':
+ if (arg_start_mode == START_BOOT) {
+ log_error("--boot and --as-pid2 may not be combined.");
+ return -EINVAL;
+ }
+
+ arg_start_mode = START_PID2;
+ arg_settings_mask |= SETTING_START_MODE;
break;
case ARG_UUID:
@@ -876,7 +894,7 @@ static int parse_argv(int argc, char *argv[]) {
if (arg_share_system)
arg_register = false;
- if (arg_boot && arg_share_system) {
+ if (arg_start_mode != START_PID1 && arg_share_system) {
log_error("--boot and --share-system may not be combined.");
return -EINVAL;
}
@@ -924,7 +942,7 @@ static int parse_argv(int argc, char *argv[]) {
if (!arg_parameters)
return log_oom();
- arg_settings_mask |= SETTING_BOOT;
+ arg_settings_mask |= SETTING_START_MODE;
}
/* Load all settings from .nspawn files */
@@ -960,7 +978,7 @@ static int verify_arguments(void) {
return -EINVAL;
}
- if (arg_boot && arg_kill_signal <= 0)
+ if (arg_start_mode == START_BOOT && arg_kill_signal <= 0)
arg_kill_signal = SIGRTMIN+3;
return 0;
@@ -2584,6 +2602,12 @@ static int inner_child(
if (chdir(arg_chdir) < 0)
return log_error_errno(errno, "Failed to change to specified working directory %s: %m", arg_chdir);
+ if (arg_start_mode == START_PID2) {
+ r = stub_pid1();
+ if (r < 0)
+ return r;
+ }
+
/* Now, explicitly close the log, so that we
* then can close all remaining fds. Closing
* the log explicitly first has the benefit
@@ -2595,7 +2619,7 @@ static int inner_child(
log_close();
(void) fdset_close_others(fds);
- if (arg_boot) {
+ if (arg_start_mode == START_BOOT) {
char **a;
size_t m;
@@ -2917,9 +2941,9 @@ static int load_settings(void) {
/* Copy over bits from the settings, unless they have been
* explicitly masked by command line switches. */
- if ((arg_settings_mask & SETTING_BOOT) == 0 &&
- settings->boot >= 0) {
- arg_boot = settings->boot;
+ if ((arg_settings_mask & SETTING_START_MODE) == 0 &&
+ settings->start_mode >= 0) {
+ arg_start_mode = settings->start_mode;
strv_free(arg_parameters);
arg_parameters = settings->parameters;
@@ -3074,6 +3098,10 @@ int main(int argc, char *argv[]) {
log_parse_environment();
log_open();
+ /* Make sure rename_process() in the stub init process can work */
+ saved_argv = argv;
+ saved_argc = argc;
+
r = parse_argv(argc, argv);
if (r <= 0)
goto finish;
@@ -3180,7 +3208,7 @@ int main(int argc, char *argv[]) {
}
}
- if (arg_boot) {
+ if (arg_start_mode == START_BOOT) {
if (path_is_os_tree(arg_directory) <= 0) {
log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", arg_directory);
r = -EINVAL;