summaryrefslogtreecommitdiffstats
path: root/watchfrr
diff options
context:
space:
mode:
authorDavid Lamparter <equinox@opensourcerouting.org>2018-12-21 17:26:25 +0100
committerDavid Lamparter <equinox@diac24.net>2019-02-19 21:34:08 +0100
commit5c9d1c83a8729e1d6808d8abe74bb67c332a9f9a (patch)
tree864acdf580b3d451934868ede14188efc3945938 /watchfrr
parentdebian: fix dropping daemons.conf (diff)
downloadfrr-5c9d1c83a8729e1d6808d8abe74bb67c332a9f9a.tar.xz
frr-5c9d1c83a8729e1d6808d8abe74bb67c332a9f9a.zip
watchfrr: don't wait forever at startup
If we wait forever for all daemons to come up, we can hang the entire boot process, especially on init.d based systems. Signed-off-by: David Lamparter <equinox@diac24.net>
Diffstat (limited to 'watchfrr')
-rw-r--r--watchfrr/watchfrr.c49
1 files changed, 38 insertions, 11 deletions
diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c
index e28da6db8..2a0b6d890 100644
--- a/watchfrr/watchfrr.c
+++ b/watchfrr/watchfrr.c
@@ -83,6 +83,7 @@ static const char *phase_str[] = {
};
#define PHASE_TIMEOUT (3*gs.restart_timeout)
+#define STARTUP_TIMEOUT 55 * 1000
struct restart_info {
const char *name;
@@ -97,6 +98,7 @@ struct restart_info {
static struct global_state {
restart_phase_t phase;
struct thread *t_phase_hanging;
+ struct thread *t_startup_timeout;
const char *vtydir;
long period;
long timeout;
@@ -630,23 +632,38 @@ static int handle_read(struct thread *t_read)
* Wait till we notice that all daemons are ready before
* we send we are ready to systemd
*/
-static void daemon_send_ready(void)
+static void daemon_send_ready(int exitcode)
{
+ FILE *fp;
static int sent = 0;
- if (!sent && gs.numdown == 0) {
- FILE *fp;
+ if (sent)
+ return;
+
+ if (exitcode == 0)
zlog_notice("all daemons up, doing startup-complete notify");
- frr_detach();
+ else if (gs.numdown < gs.numdaemons)
+ flog_err(EC_WATCHFRR_CONNECTION,
+ "startup did not complete within timeout"
+ " (%d/%d daemons running)",
+ gs.numdaemons - gs.numdown, gs.numdaemons);
+ else {
+ flog_err(EC_WATCHFRR_CONNECTION,
+ "all configured daemons failed to start"
+ " -- exiting watchfrr");
+ exit(exitcode);
+
+ }
- fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
- if (fp)
- fclose(fp);
+ frr_detach();
+
+ fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
+ if (fp)
+ fclose(fp);
#if defined HAVE_SYSTEMD
- systemd_send_started(master, 0);
+ systemd_send_started(master, 0);
#endif
- sent = 1;
- }
+ sent = 1;
}
static void daemon_up(struct daemon *dmn, const char *why)
@@ -655,7 +672,8 @@ static void daemon_up(struct daemon *dmn, const char *why)
gs.numdown--;
dmn->connect_tries = 0;
zlog_notice("%s state -> up : %s", dmn->name, why);
- daemon_send_ready();
+ if (gs.numdown == 0)
+ daemon_send_ready(0);
SET_WAKEUP_ECHO(dmn);
phase_check();
}
@@ -1030,6 +1048,12 @@ static char *translate_blanks(const char *cmd, const char *blankstr)
return res;
}
+static int startup_timeout(struct thread *t_wakeup)
+{
+ daemon_send_ready(1);
+ return 0;
+}
+
static void watchfrr_init(int argc, char **argv)
{
const char *special = "zebra";
@@ -1037,6 +1061,9 @@ static void watchfrr_init(int argc, char **argv)
struct daemon *dmn, **add = &gs.daemons;
char alldaemons[512] = "", *p = alldaemons;
+ thread_add_timer_msec(master, startup_timeout, NULL, STARTUP_TIMEOUT,
+ &gs.t_startup_timeout);
+
for (i = optind; i < argc; i++) {
dmn = XCALLOC(MTYPE_WATCHFRR_DAEMON, sizeof(*dmn));