summaryrefslogtreecommitdiffstats
path: root/ospfd
diff options
context:
space:
mode:
authorRenato Westphal <renato@opensourcerouting.org>2021-11-18 18:52:20 +0100
committerRenato Westphal <renato@opensourcerouting.org>2021-11-23 19:31:39 +0100
commit1bb2674ce451d634635fcfec509bd195af8310ca (patch)
tree0bb97696ec7a396f21b6fb583d0c5d441096bdfa /ospfd
parentRevert "ospfd: remove commands for broken GR helper mode" (diff)
downloadfrr-1bb2674ce451d634635fcfec509bd195af8310ca.tar.xz
frr-1bb2674ce451d634635fcfec509bd195af8310ca.zip
ospfd: fix incorrect detection of topology changes in helper mode
This commit fixes a rather obscure bug that was causing the GR topotest to fail on a frequent basis. RFC 3623 specifies that a router acting as a helper to a restarting neighbor should monitor topology changes and abort the GR procedures when one is detected, falling back to normal OSPF operation. ospfd uses the ospf_lsa_different() function to detect when the content of an LSA has changed, which is considered as a topology change. The problem is that ospf_lsa_different() can return true even when the two LSAs passed as parameters are identical, provided one LSA has the OSPF_LSA_RECEIVED flag set and the other not. In the context of the ospf_gr_topo1 test, router rt6 performs a graceful restart and a few seconds later acts as a helper for router rt7. When it's acting as a helper for rt7, it still didn't translate its NSSA Type-7 LSAs, something that happens only after 7 seconds (OSPF_ABR_TASK_DELAY) of the first SPF run. The translated Type-5 LSAs on its LSDB were learned from the helping neighbors (rt3 and rt7). It's then possible that the NSSA Type-7 LSAs might be translated while rt6 is acting as helper for rt7, which causes the daemon to detect a non-existent topology change only because the OSPF_LSA_RECEIVED flag is unset in the recently originated Type-5 LSA. Fix this problem by ignoring the OSPF_LSA_RECEIVED flag when comparing LSAs for the purpose of topology change detection. In short, the bug would only show up when the restarting router would start acting as a helper immediately after coming back up (which would be hard to happen in the real world). The topotest failures became more frequent after commit 6255aad0bc78c1 because of the removal of the 'sleep' calls, which used to give ospfd more time to converge before start acting as a helper for other routers. The problem still occurred from time to time though. Fixes #9983. Signed-off-by: Renato Westphal <renato@opensourcerouting.org>
Diffstat (limited to 'ospfd')
-rw-r--r--ospfd/ospf_gr_helper.c3
-rw-r--r--ospfd/ospf_lsa.c30
-rw-r--r--ospfd/ospf_lsa.h3
3 files changed, 27 insertions, 9 deletions
diff --git a/ospfd/ospf_gr_helper.c b/ospfd/ospf_gr_helper.c
index a58a120b6..ae002fdc9 100644
--- a/ospfd/ospf_gr_helper.c
+++ b/ospfd/ospf_gr_helper.c
@@ -624,9 +624,6 @@ void ospf_helper_handle_topo_chg(struct ospf *ospf, struct ospf_lsa *lsa)
struct listnode *node;
struct ospf_interface *oi;
- if (!ospf->active_restarter_cnt)
- return;
-
/* Topo change not required to be handled if strict
* LSA check is disabled for this router.
*/
diff --git a/ospfd/ospf_lsa.c b/ospfd/ospf_lsa.c
index 10541b09b..6588d7020 100644
--- a/ospfd/ospf_lsa.c
+++ b/ospfd/ospf_lsa.c
@@ -2692,7 +2692,7 @@ struct ospf_lsa *ospf_lsa_install(struct ospf *ospf, struct ospf_interface *oi,
/* Do comparision and record if recalc needed. */
rt_recalc = 0;
- if (old == NULL || ospf_lsa_different(old, lsa)) {
+ if (old == NULL || ospf_lsa_different(old, lsa, false)) {
/* Ref rfc3623 section 3.2.3
* Installing new lsa or change in the existing LSA
* or flushing existing LSA leads to topo change
@@ -2700,7 +2700,9 @@ struct ospf_lsa *ospf_lsa_install(struct ospf *ospf, struct ospf_interface *oi,
* So, router should be aborted from HELPER role
* if it is detected as TOPO change.
*/
- if (CHECK_LSA_TYPE_1_TO_5_OR_7(lsa->data->type))
+ if (ospf->active_restarter_cnt
+ && CHECK_LSA_TYPE_1_TO_5_OR_7(lsa->data->type)
+ && ospf_lsa_different(old, lsa, true))
ospf_helper_handle_topo_chg(ospf, lsa);
rt_recalc = 1;
@@ -3302,8 +3304,25 @@ int ospf_lsa_more_recent(struct ospf_lsa *l1, struct ospf_lsa *l2)
return 0;
}
-/* If two LSAs are different, return 1, otherwise return 0. */
-int ospf_lsa_different(struct ospf_lsa *l1, struct ospf_lsa *l2)
+/*
+ * Check if two LSAs are different.
+ *
+ * l1
+ * The first LSA to compare.
+ *
+ * l2
+ * The second LSA to compare.
+ *
+ * ignore_rcvd_flag
+ * When set to true, ignore whether the LSAs were received from the network
+ * or not. This parameter should be set to true when checking for topology
+ * changes as part of the Graceful Restart helper neighbor procedures.
+ *
+ * Returns:
+ * true if the LSAs are different, false otherwise.
+ */
+int ospf_lsa_different(struct ospf_lsa *l1, struct ospf_lsa *l2,
+ bool ignore_rcvd_flag)
{
char *p1, *p2;
assert(l1);
@@ -3326,7 +3345,8 @@ int ospf_lsa_different(struct ospf_lsa *l1, struct ospf_lsa *l2)
if (l1->size == 0)
return 1;
- if (CHECK_FLAG((l1->flags ^ l2->flags), OSPF_LSA_RECEIVED))
+ if (!ignore_rcvd_flag
+ && CHECK_FLAG((l1->flags ^ l2->flags), OSPF_LSA_RECEIVED))
return 1; /* May be a stale LSA in the LSBD */
assert(l1->size > OSPF_LSA_HEADER_SIZE);
diff --git a/ospfd/ospf_lsa.h b/ospfd/ospf_lsa.h
index 2e648b335..a97957371 100644
--- a/ospfd/ospf_lsa.h
+++ b/ospfd/ospf_lsa.h
@@ -293,7 +293,8 @@ extern struct ospf_lsa *ospf_lsa_lookup_by_id(struct ospf_area *, uint32_t,
extern struct ospf_lsa *ospf_lsa_lookup_by_header(struct ospf_area *,
struct lsa_header *);
extern int ospf_lsa_more_recent(struct ospf_lsa *, struct ospf_lsa *);
-extern int ospf_lsa_different(struct ospf_lsa *, struct ospf_lsa *);
+extern int ospf_lsa_different(struct ospf_lsa *, struct ospf_lsa *,
+ bool ignore_rcvd_flag);
extern void ospf_flush_self_originated_lsas_now(struct ospf *);
extern int ospf_lsa_is_self_originated(struct ospf *, struct ospf_lsa *);