summaryrefslogtreecommitdiffstats
path: root/zebra/zebra_nhg.c
diff options
context:
space:
mode:
authorDonald Sharp <sharpd@nvidia.com>2023-10-19 22:38:12 +0200
committerDonald Sharp <sharpd@nvidia.com>2023-10-23 14:15:11 +0200
commita272a2b364604f829aa21f323c0422613b423c43 (patch)
tree3e70d520c9a862f945cc5173fcda601ca541c98d /zebra/zebra_nhg.c
parentMerge pull request #14607 from mobash-rasool/fixes2 (diff)
downloadfrr-a272a2b364604f829aa21f323c0422613b423c43.tar.xz
frr-a272a2b364604f829aa21f323c0422613b423c43.zip
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for resolving nexthops for a prefix. This is typically an ok thing to do but fails in several specific scenarios. If a nexthop matches to a route that is not usable, nexthop resolution just gives up and refuses to use that particular route. For example if zebra currently has a covering prefix say a 10.0.0.0/8. And about the same time it receives a 10.1.0.0/16 ( a more specific than the /8 ) and another route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16 is processed enough to know we want to install it and the prefix is sent to the dataplane for installation( it is queued ) and then route A is processed, nexthop resolution will fail and the route A will be left in limbo as uninstallable. Let's modify the nexthop resolution code in zebra such that if a nexthop's most specific match is unusable, continue looking up the table till we get to the 0.0.0.0/0 route( if it's even installed ). If we find a usable route for the nexthop accept it and use it. The bgp_default_originate topology test is frequently failing with this exact problem: B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21 B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21 B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21 C>* 1.0.3.17/32 is directly connected, lo, 00:02:00 B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32 B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21 B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21 C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00 C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00 B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32 B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21 B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32 Notice that the 1.0.1.17/32 route is inactive but the nexthop 192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match ) *and* the 0.0.0.0/0 route ( longest match ). When looking at the logs the 1.0.1.17/32 route was not being installed because the matching route was not in a usable state, which is because the 192.168.0.0/24 route was in the process of being installed. Signed-off-by: Donald Sharp <sharpd@nvidia.com>
Diffstat (limited to 'zebra/zebra_nhg.c')
-rw-r--r--zebra/zebra_nhg.c82
1 files changed, 36 insertions, 46 deletions
diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c
index 6517b7830..19e2657f1 100644
--- a/zebra/zebra_nhg.c
+++ b/zebra/zebra_nhg.c
@@ -2248,20 +2248,6 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
return 1;
}
- if (top &&
- ((top->family == AF_INET && top->prefixlen == IPV4_MAX_BITLEN &&
- nexthop->gate.ipv4.s_addr == top->u.prefix4.s_addr) ||
- (top->family == AF_INET6 && top->prefixlen == IPV6_MAX_BITLEN &&
- memcmp(&nexthop->gate.ipv6, &top->u.prefix6, IPV6_MAX_BYTELEN) ==
- 0)) &&
- nexthop->vrf_id == vrf_id) {
- if (IS_ZEBRA_DEBUG_RIB_DETAILED)
- zlog_debug(
- " :%s: Attempting to install a max prefixlength route through itself",
- __func__);
- return 0;
- }
-
/* Validation for ipv4 mapped ipv6 nexthop. */
if (IS_MAPPED_IPV6(&nexthop->gate.ipv6)) {
afi = AFI_IP;
@@ -2364,7 +2350,7 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
zlog_debug(
" %s: Matched against ourself and prefix length is not max bit length",
__func__);
- return 0;
+ goto continue_up_tree;
}
/* Pick up selected route. */
@@ -2391,20 +2377,12 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
/* If there is no selected route or matched route is EGP, go up
* tree.
*/
- if (!match) {
- do {
- rn = rn->parent;
- } while (rn && rn->info == NULL);
- if (rn)
- route_lock_node(rn);
- continue;
- }
/* If the candidate match's type is considered "connected",
* we consider it first.
*/
- if (RIB_CONNECTED_ROUTE(match) ||
- (RIB_SYSTEM_ROUTE(match) && RSYSTEM_ROUTE(type))) {
+ if (match && (RIB_CONNECTED_ROUTE(match) ||
+ (RIB_SYSTEM_ROUTE(match) && RSYSTEM_ROUTE(type)))) {
match = zebra_nhg_connected_ifindex(rn, match,
nexthop->ifindex);
@@ -2420,11 +2398,7 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
zlog_debug(
"%s: %pNHv given ifindex does not match nexthops ifindex found: %pNHv",
__func__, nexthop, newhop);
- /*
- * NEXTHOP_TYPE_*_IFINDEX but ifindex
- * doesn't match what we found.
- */
- return 0;
+ goto continue_up_tree;
}
/* NHRP special case: need to indicate onlink */
@@ -2437,7 +2411,7 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
__func__, match, match->nhe, newhop);
return 1;
- } else if (CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) {
+ } else if (match && CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) {
struct nexthop_group *nhg;
struct nexthop *resolver;
struct backup_nh_map_s map = {};
@@ -2473,6 +2447,10 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
"%s: match %p (%pNG) not installed or being Route Replaced",
__func__, match, match->nhe);
+ if (CHECK_FLAG(match->status,
+ ROUTE_ENTRY_QUEUED))
+ goto continue_up_tree;
+
goto done_with_match;
}
@@ -2541,25 +2519,37 @@ done_with_match:
if (pmtu)
*pmtu = match->mtu;
- } else if (IS_ZEBRA_DEBUG_RIB_DETAILED)
- zlog_debug(
- " %s: Recursion failed to find",
- __func__);
-
- return resolved;
- } else {
- if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
- zlog_debug(
- " %s: Route Type %s has not turned on recursion",
- __func__, zebra_route_string(type));
- if (type == ZEBRA_ROUTE_BGP
- && !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP))
+ } else {
+ if (IS_ZEBRA_DEBUG_RIB_DETAILED)
zlog_debug(
- " EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\"");
+ " %s: Recursion failed to find while looking at %pRN",
+ __func__, rn);
+ goto continue_up_tree;
}
- return 0;
+
+ return 1;
+ } else if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
+ zlog_debug(
+ " %s: Route Type %s has not turned on recursion %pRN failed to match",
+ __func__, zebra_route_string(type), rn);
+ if (type == ZEBRA_ROUTE_BGP
+ && !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP))
+ zlog_debug(
+ " EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\"");
}
+
+ continue_up_tree:
+ /*
+ * If there is no selected route or matched route is EGP, go up
+ * tree.
+ */
+ do {
+ rn = rn->parent;
+ } while (rn && rn->info == NULL);
+ if (rn)
+ route_lock_node(rn);
}
+
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
zlog_debug(" %s: Nexthop did not lookup in table",
__func__);