diff options
author | David Lamparter <equinox@opensourcerouting.org> | 2018-12-21 17:26:25 +0100 |
---|---|---|
committer | David Lamparter <equinox@diac24.net> | 2019-02-19 21:34:08 +0100 |
commit | 5c9d1c83a8729e1d6808d8abe74bb67c332a9f9a (patch) | |
tree | 864acdf580b3d451934868ede14188efc3945938 /watchfrr | |
parent | debian: fix dropping daemons.conf (diff) | |
download | frr-5c9d1c83a8729e1d6808d8abe74bb67c332a9f9a.tar.xz frr-5c9d1c83a8729e1d6808d8abe74bb67c332a9f9a.zip |
watchfrr: don't wait forever at startup
If we wait forever for all daemons to come up, we can hang the entire
boot process, especially on init.d based systems.
Signed-off-by: David Lamparter <equinox@diac24.net>
Diffstat (limited to 'watchfrr')
-rw-r--r-- | watchfrr/watchfrr.c | 49 |
1 files changed, 38 insertions, 11 deletions
diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index e28da6db8..2a0b6d890 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -83,6 +83,7 @@ static const char *phase_str[] = { }; #define PHASE_TIMEOUT (3*gs.restart_timeout) +#define STARTUP_TIMEOUT 55 * 1000 struct restart_info { const char *name; @@ -97,6 +98,7 @@ struct restart_info { static struct global_state { restart_phase_t phase; struct thread *t_phase_hanging; + struct thread *t_startup_timeout; const char *vtydir; long period; long timeout; @@ -630,23 +632,38 @@ static int handle_read(struct thread *t_read) * Wait till we notice that all daemons are ready before * we send we are ready to systemd */ -static void daemon_send_ready(void) +static void daemon_send_ready(int exitcode) { + FILE *fp; static int sent = 0; - if (!sent && gs.numdown == 0) { - FILE *fp; + if (sent) + return; + + if (exitcode == 0) zlog_notice("all daemons up, doing startup-complete notify"); - frr_detach(); + else if (gs.numdown < gs.numdaemons) + flog_err(EC_WATCHFRR_CONNECTION, + "startup did not complete within timeout" + " (%d/%d daemons running)", + gs.numdaemons - gs.numdown, gs.numdaemons); + else { + flog_err(EC_WATCHFRR_CONNECTION, + "all configured daemons failed to start" + " -- exiting watchfrr"); + exit(exitcode); + + } - fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w"); - if (fp) - fclose(fp); + frr_detach(); + + fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w"); + if (fp) + fclose(fp); #if defined HAVE_SYSTEMD - systemd_send_started(master, 0); + systemd_send_started(master, 0); #endif - sent = 1; - } + sent = 1; } static void daemon_up(struct daemon *dmn, const char *why) @@ -655,7 +672,8 @@ static void daemon_up(struct daemon *dmn, const char *why) gs.numdown--; dmn->connect_tries = 0; zlog_notice("%s state -> up : %s", dmn->name, why); - daemon_send_ready(); + if (gs.numdown == 0) + daemon_send_ready(0); SET_WAKEUP_ECHO(dmn); phase_check(); } @@ -1030,6 +1048,12 @@ static char *translate_blanks(const char *cmd, const char *blankstr) return res; } +static int startup_timeout(struct thread *t_wakeup) +{ + daemon_send_ready(1); + return 0; +} + static void watchfrr_init(int argc, char **argv) { const char *special = "zebra"; @@ -1037,6 +1061,9 @@ static void watchfrr_init(int argc, char **argv) struct daemon *dmn, **add = &gs.daemons; char alldaemons[512] = "", *p = alldaemons; + thread_add_timer_msec(master, startup_timeout, NULL, STARTUP_TIMEOUT, + &gs.t_startup_timeout); + for (i = optind; i < argc; i++) { dmn = XCALLOC(MTYPE_WATCHFRR_DAEMON, sizeof(*dmn)); |