diff options
author | Kyle Walker <kwalker@redhat.com> | 2017-12-13 18:49:26 +0100 |
---|---|---|
committer | Kyle Walker <kwalker@redhat.com> | 2017-12-14 14:58:56 +0100 |
commit | d5641e0d7e8f55937fbc3a7ecd667e42c5836d80 (patch) | |
tree | fcbbcf855ab16280e0029ea8a57cad5b89d66c3f /src/basic/process-util.c | |
parent | sd-bus: fix a memory leak in message_new_reply() (#7636) (diff) | |
download | systemd-d5641e0d7e8f55937fbc3a7ecd667e42c5836d80.tar.xz systemd-d5641e0d7e8f55937fbc3a7ecd667e42c5836d80.zip |
core: Implement timeout based umount/remount limit
Remount, and subsequent umount, attempts can hang for inaccessible network
based mount points. This can leave a system in a hard hang state that
requires a hard reset in order to recover. This change moves the remount,
and umount attempts into separate child processes. The remount and umount
operations will block for up to 90 seconds (DEFAULT_TIMEOUT_USEC). Should
those waits fail, the parent will issue a SIGKILL to the child and continue
with the shutdown efforts.
In addition, instead of only reporting some additional errors on the final
attempt, failures are reported as they occur.
Diffstat (limited to 'src/basic/process-util.c')
-rw-r--r-- | src/basic/process-util.c | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/src/basic/process-util.c b/src/basic/process-util.c index 5f001494f0..0b2eb07abc 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -699,6 +699,67 @@ int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_cod return -EPROTO; } +/* + * Return values: + * < 0 : wait_for_terminate_with_timeout() failed to get the state of the + * process, the process timed out, the process was terminated by a + * signal, or failed for an unknown reason. + * >=0 : The process terminated normally with no failures. + * + * Success is indicated by a return value of zero, a timeout is indicated + * by ETIMEDOUT, and all other child failure states are indicated by error + * is indicated by a non-zero value. + */ +int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) { + sigset_t mask; + int r; + usec_t until; + + assert_se(sigemptyset(&mask) == 0); + assert_se(sigaddset(&mask, SIGCHLD) == 0); + + /* Drop into a sigtimewait-based timeout. Waiting for the + * pid to exit. */ + until = now(CLOCK_MONOTONIC) + timeout; + for (;;) { + usec_t n; + siginfo_t status = {}; + struct timespec ts; + + n = now(CLOCK_MONOTONIC); + if (n >= until) + break; + + r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0; + /* Assuming we woke due to the child exiting. */ + if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) { + if (status.si_pid == pid) { + /* This is the correct child.*/ + if (status.si_code == CLD_EXITED) + return (status.si_status == 0) ? 0 : -EPROTO; + else + return -EPROTO; + } + } + /* Not the child, check for errors and proceed appropriately */ + if (r < 0) { + switch (r) { + case -EAGAIN: + /* Timed out, child is likely hung. */ + return -ETIMEDOUT; + case -EINTR: + /* Received a different signal and should retry */ + continue; + default: + /* Return any unexpected errors */ + return r; + } + } + } + + return -EPROTO; +} + void sigkill_wait(pid_t pid) { assert(pid > 1); |