summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Stapp <mjs@voltanet.io>2019-10-28 13:07:23 +0100
committerGitHub <noreply@github.com>2019-10-28 13:07:23 +0100
commit882364f11a7ab5545b8c2def58b8996893b31702 (patch)
tree68673cbd330ed2e199ecce478cfb8a53ca4b8401
parentMerge pull request #5216 from ton31337/fix/override_peers_ttl_if_peer_group_c... (diff)
parentzebra: rt_netlink nexthop handling checkpatch (diff)
downloadfrr-882364f11a7ab5545b8c2def58b8996893b31702.tar.xz
frr-882364f11a7ab5545b8c2def58b8996893b31702.zip
Merge pull request #4897 from sworleys/zebra_nhg_add
Zebra Nexthop Group Rework and Kernel Nexthop Object API Init
-rw-r--r--doc/user/zebra.rst24
-rw-r--r--include/linux/nexthop.h58
-rw-r--r--include/subdir.am1
-rw-r--r--lib/nexthop.c62
-rw-r--r--lib/nexthop.h12
-rw-r--r--lib/nexthop_group.c130
-rw-r--r--lib/nexthop_group.h17
-rw-r--r--lib/route_types.txt2
-rw-r--r--pbrd/pbr_nht.c3
-rw-r--r--tests/topotests/bfd-topo2/r1/ipv6_routes.json1
-rw-r--r--tests/topotests/bfd-topo2/r2/ipv4_routes.json1
-rw-r--r--tests/topotests/bfd-topo2/r2/ipv6_routes.json1
-rw-r--r--tests/topotests/bfd-topo2/r3/ipv4_routes.json1
-rw-r--r--tests/topotests/bfd-topo2/r4/ipv6_routes.json1
-rw-r--r--tests/topotests/bgp_ipv6_rtadv/r1/ipv6_routes.json1
-rw-r--r--tests/topotests/bgp_vrf_lite_ipv6_rtadv/r1/ipv6_routes.json1
-rw-r--r--zebra/connected.c10
-rw-r--r--zebra/if_netlink.c25
-rw-r--r--zebra/interface.c89
-rw-r--r--zebra/interface.h18
-rw-r--r--zebra/kernel_netlink.c33
-rw-r--r--zebra/kernel_socket.c11
-rw-r--r--zebra/main.c1
-rw-r--r--zebra/redistribute.c8
-rw-r--r--zebra/rib.h42
-rw-r--r--zebra/rt.h8
-rw-r--r--zebra/rt_netlink.c1089
-rw-r--r--zebra/rt_netlink.h4
-rw-r--r--zebra/rt_socket.c5
-rw-r--r--zebra/rtread_getmsg.c2
-rw-r--r--zebra/subdir.am1
-rw-r--r--zebra/zapi_msg.c28
-rw-r--r--zebra/zebra_dplane.c295
-rw-r--r--zebra/zebra_dplane.h27
-rw-r--r--zebra/zebra_errors.c51
-rw-r--r--zebra/zebra_errors.h6
-rw-r--r--zebra/zebra_fpm_dt.c2
-rw-r--r--zebra/zebra_fpm_netlink.c2
-rw-r--r--zebra/zebra_fpm_protobuf.c2
-rw-r--r--zebra/zebra_mpls.c61
-rw-r--r--zebra/zebra_nhg.c1407
-rw-r--r--zebra/zebra_nhg.h199
-rw-r--r--zebra/zebra_nhg_private.h62
-rw-r--r--zebra/zebra_pw.c2
-rw-r--r--zebra/zebra_rib.c487
-rw-r--r--zebra/zebra_rnh.c22
-rw-r--r--zebra/zebra_router.c19
-rw-r--r--zebra/zebra_router.h8
-rw-r--r--zebra/zebra_snmp.c12
-rw-r--r--zebra/zebra_vty.c327
50 files changed, 4064 insertions, 617 deletions
diff --git a/doc/user/zebra.rst b/doc/user/zebra.rst
index af465f6fd..2099dfdd6 100644
--- a/doc/user/zebra.rst
+++ b/doc/user/zebra.rst
@@ -839,11 +839,22 @@ zebra Terminal Mode Commands
.. index:: show ipv6 route
.. clicmd:: show ipv6 route
-.. index:: show interface [{vrf VRF|brief}]
-.. clicmd:: show interface [{vrf VRF|brief}]
+.. index:: show [ip|ipv6] route [PREFIX] [nexthop-group]
+.. clicmd:: show [ip|ipv6] route [PREFIX] [nexthop-group]
-.. index:: show interface [{vrf all|brief}]
-.. clicmd:: show interface [{vrf all|brief}]
+ Display detailed information about a route. If [nexthop-group] is
+ included, it will display the nexthop group ID the route is using as well.
+
+.. index:: show interface [NAME] [{vrf VRF|brief}] [nexthop-group]
+.. clicmd:: show interface [NAME] [{vrf VRF|brief}] [nexthop-group]
+
+.. index:: show interface [NAME] [{vrf all|brief}] [nexthop-group]
+.. clicmd:: show interface [NAME] [{vrf all|brief}] [nexthop-group]
+
+ Display interface information. If no extra information is added, it will
+ dump information on all interfaces. If [NAME] is specified, it will display
+ detailed information about that single interface. If [nexthop-group] is
+ specified, it will display nexthop groups pointing out that interface.
.. index:: show ip prefix-list [NAME]
.. clicmd:: show ip prefix-list [NAME]
@@ -900,3 +911,8 @@ zebra Terminal Mode Commands
Reset statistics related to the zebra code that interacts with the
optional Forwarding Plane Manager (FPM) component.
+.. index:: show nexthop-group [ID] [vrf NAME] [ip|ipv6]
+.. clicmd:: show nexthop-group [ID] [vrf NAME] [ip|ipv6]
+
+ Display nexthop groups created by zebra.
+
diff --git a/include/linux/nexthop.h b/include/linux/nexthop.h
new file mode 100644
index 000000000..e4d6e256e
--- /dev/null
+++ b/include/linux/nexthop.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_NEXTHOP_H
+#define _LINUX_NEXTHOP_H
+
+#include <linux/types.h>
+
+#define RTM_NHA(h) ((struct rtattr *)(((char *)(h)) + \
+ NLMSG_ALIGN(sizeof(struct nhmsg))))
+
+struct nhmsg {
+ unsigned char nh_family;
+ unsigned char nh_scope; /* return only */
+ unsigned char nh_protocol; /* Routing protocol that installed nh */
+ unsigned char resvd;
+ unsigned int nh_flags; /* RTNH_F flags */
+};
+
+struct nexthop_grp {
+ __u32 id; /* nexthop id - must exist */
+ __u8 weight; /* weight of this nexthop */
+ __u8 resvd1;
+ __u16 resvd2;
+};
+
+enum {
+ NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */
+ __NEXTHOP_GRP_TYPE_MAX,
+};
+
+#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1)
+
+enum {
+ NHA_UNSPEC,
+ NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */
+
+ NHA_GROUP, /* array of nexthop_grp */
+ NHA_GROUP_TYPE, /* u16 one of NEXTHOP_GRP_TYPE */
+ /* if NHA_GROUP attribute is added, no other attributes can be set */
+
+ NHA_BLACKHOLE, /* flag; nexthop used to blackhole packets */
+ /* if NHA_BLACKHOLE is added, OIF, GATEWAY, ENCAP can not be set */
+
+ NHA_OIF, /* u32; nexthop device */
+ NHA_GATEWAY, /* be32 (IPv4) or in6_addr (IPv6) gw address */
+ NHA_ENCAP_TYPE, /* u16; lwt encap type */
+ NHA_ENCAP, /* lwt encap data */
+
+ /* NHA_OIF can be appended to dump request to return only
+ * nexthops using given device
+ */
+ NHA_GROUPS, /* flag; only return nexthop groups in dump */
+ NHA_MASTER, /* u32; only return nexthops with given master dev */
+
+ __NHA_MAX,
+};
+
+#define NHA_MAX (__NHA_MAX - 1)
+#endif
diff --git a/include/subdir.am b/include/subdir.am
index 0d7fed285..b1ca1be54 100644
--- a/include/subdir.am
+++ b/include/subdir.am
@@ -6,6 +6,7 @@ noinst_HEADERS += \
include/linux/mpls_iptunnel.h \
include/linux/neighbour.h \
include/linux/netlink.h \
+ include/linux/nexthop.h \
include/linux/rtnetlink.h \
include/linux/socket.h \
include/linux/net_namespace.h \
diff --git a/lib/nexthop.c b/lib/nexthop.c
index cf5bed3d6..73c2de0cd 100644
--- a/lib/nexthop.c
+++ b/lib/nexthop.c
@@ -349,7 +349,7 @@ const char *nexthop2str(const struct nexthop *nexthop, char *str, int size)
* left branch is 'resolved' and right branch is 'next':
* https://en.wikipedia.org/wiki/Tree_traversal#/media/File:Sorted_binary_tree_preorder.svg
*/
-struct nexthop *nexthop_next(struct nexthop *nexthop)
+struct nexthop *nexthop_next(const struct nexthop *nexthop)
{
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
return nexthop->resolved;
@@ -364,6 +364,19 @@ struct nexthop *nexthop_next(struct nexthop *nexthop)
return NULL;
}
+/* Return the next nexthop in the tree that is resolved and active */
+struct nexthop *nexthop_next_active_resolved(const struct nexthop *nexthop)
+{
+ struct nexthop *next = nexthop_next(nexthop);
+
+ while (next
+ && (CHECK_FLAG(next->flags, NEXTHOP_FLAG_RECURSIVE)
+ || !CHECK_FLAG(next->flags, NEXTHOP_FLAG_ACTIVE)))
+ next = nexthop_next(next);
+
+ return next;
+}
+
unsigned int nexthop_level(struct nexthop *nexthop)
{
unsigned int rv = 0;
@@ -374,16 +387,13 @@ unsigned int nexthop_level(struct nexthop *nexthop)
return rv;
}
-uint32_t nexthop_hash(const struct nexthop *nexthop)
+/* Only hash word-sized things, let cmp do the rest. */
+uint32_t nexthop_hash_quick(const struct nexthop *nexthop)
{
uint32_t key = 0x45afe398;
key = jhash_3words(nexthop->type, nexthop->vrf_id,
nexthop->nh_label_type, key);
- /* gate and blackhole are together in a union */
- key = jhash(&nexthop->gate, sizeof(nexthop->gate), key);
- key = jhash(&nexthop->src, sizeof(nexthop->src), key);
- key = jhash(&nexthop->rmap_src, sizeof(nexthop->rmap_src), key);
if (nexthop->nh_label) {
int labels = nexthop->nh_label->num_labels;
@@ -410,17 +420,35 @@ uint32_t nexthop_hash(const struct nexthop *nexthop)
key = jhash_1word(nexthop->nh_label->label[i], key);
}
- switch (nexthop->type) {
- case NEXTHOP_TYPE_IPV4_IFINDEX:
- case NEXTHOP_TYPE_IPV6_IFINDEX:
- case NEXTHOP_TYPE_IFINDEX:
- key = jhash_1word(nexthop->ifindex, key);
- break;
- case NEXTHOP_TYPE_BLACKHOLE:
- case NEXTHOP_TYPE_IPV4:
- case NEXTHOP_TYPE_IPV6:
- break;
- }
+ key = jhash_2words(nexthop->ifindex,
+ CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK),
+ key);
+
+ return key;
+}
+
+
+#define GATE_SIZE 4 /* Number of uint32_t words in struct g_addr */
+
+/* For a more granular hash */
+uint32_t nexthop_hash(const struct nexthop *nexthop)
+{
+ uint32_t gate_src_rmap_raw[GATE_SIZE * 3] = {};
+ /* Get all the quick stuff */
+ uint32_t key = nexthop_hash_quick(nexthop);
+
+ assert(((sizeof(nexthop->gate) + sizeof(nexthop->src)
+ + sizeof(nexthop->rmap_src))
+ / 3)
+ == (GATE_SIZE * sizeof(uint32_t)));
+
+ memcpy(gate_src_rmap_raw, &nexthop->gate, GATE_SIZE);
+ memcpy(gate_src_rmap_raw + GATE_SIZE, &nexthop->src, GATE_SIZE);
+ memcpy(gate_src_rmap_raw + (2 * GATE_SIZE), &nexthop->rmap_src,
+ GATE_SIZE);
+
+ key = jhash2(gate_src_rmap_raw, (GATE_SIZE * 3), key);
+
return key;
}
diff --git a/lib/nexthop.h b/lib/nexthop.h
index 9dd5fc6fd..fe029f186 100644
--- a/lib/nexthop.h
+++ b/lib/nexthop.h
@@ -137,6 +137,14 @@ void nexthop_del_labels(struct nexthop *);
* 32-bit hash of nexthop
*/
uint32_t nexthop_hash(const struct nexthop *nexthop);
+/*
+ * Hash a nexthop only on word-sized attributes:
+ * - vrf_id
+ * - ifindex
+ * - type
+ * - (some) flags
+ */
+uint32_t nexthop_hash_quick(const struct nexthop *nexthop);
extern bool nexthop_same(const struct nexthop *nh1, const struct nexthop *nh2);
extern bool nexthop_same_no_labels(const struct nexthop *nh1,
@@ -153,7 +161,9 @@ extern int nexthop_same_firsthop(struct nexthop *next1, struct nexthop *next2);
extern const char *nexthop2str(const struct nexthop *nexthop,
char *str, int size);
-extern struct nexthop *nexthop_next(struct nexthop *nexthop);
+extern struct nexthop *nexthop_next(const struct nexthop *nexthop);
+extern struct nexthop *
+nexthop_next_active_resolved(const struct nexthop *nexthop);
extern unsigned int nexthop_level(struct nexthop *nexthop);
/* Copies to an already allocated nexthop struct */
extern void nexthop_copy(struct nexthop *copy, const struct nexthop *nexthop,
diff --git a/lib/nexthop_group.c b/lib/nexthop_group.c
index 9564321d3..9552f8956 100644
--- a/lib/nexthop_group.c
+++ b/lib/nexthop_group.c
@@ -81,6 +81,17 @@ uint8_t nexthop_group_nexthop_num(const struct nexthop_group *nhg)
return num;
}
+uint8_t nexthop_group_nexthop_num_no_recurse(const struct nexthop_group *nhg)
+{
+ struct nexthop *nhop;
+ uint8_t num = 0;
+
+ for (nhop = nhg->nexthop; nhop; nhop = nhop->next)
+ num++;
+
+ return num;
+}
+
uint8_t nexthop_group_active_nexthop_num(const struct nexthop_group *nhg)
{
struct nexthop *nhop;
@@ -94,7 +105,22 @@ uint8_t nexthop_group_active_nexthop_num(const struct nexthop_group *nhg)
return num;
}
-struct nexthop *nexthop_exists(struct nexthop_group *nhg, struct nexthop *nh)
+uint8_t
+nexthop_group_active_nexthop_num_no_recurse(const struct nexthop_group *nhg)
+{
+ struct nexthop *nhop;
+ uint8_t num = 0;
+
+ for (nhop = nhg->nexthop; nhop; nhop = nhop->next) {
+ if (CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_ACTIVE))
+ num++;
+ }
+
+ return num;
+}
+
+struct nexthop *nexthop_exists(const struct nexthop_group *nhg,
+ const struct nexthop *nh)
{
struct nexthop *nexthop;
@@ -106,6 +132,74 @@ struct nexthop *nexthop_exists(struct nexthop_group *nhg, struct nexthop *nh)
return NULL;
}
+static bool
+nexthop_group_equal_common(const struct nexthop_group *nhg1,
+ const struct nexthop_group *nhg2,
+ uint8_t (*nexthop_group_nexthop_num_func)(
+ const struct nexthop_group *nhg))
+{
+ if (nhg1 && !nhg2)
+ return false;
+
+ if (!nhg1 && nhg2)
+ return false;
+
+ if (nhg1 == nhg2)
+ return true;
+
+ if (nexthop_group_nexthop_num_func(nhg1)
+ != nexthop_group_nexthop_num_func(nhg2))
+ return false;
+
+ return true;
+}
+
+/* This assumes ordered */
+bool nexthop_group_equal_no_recurse(const struct nexthop_group *nhg1,
+ const struct nexthop_group *nhg2)
+{
+ struct nexthop *nh1 = NULL;
+ struct nexthop *nh2 = NULL;
+
+ if (!nexthop_group_equal_common(nhg1, nhg2,
+ &nexthop_group_nexthop_num_no_recurse))
+ return false;
+
+ for (nh1 = nhg1->nexthop, nh2 = nhg2->nexthop; nh1 || nh2;
+ nh1 = nh1->next, nh2 = nh2->next) {
+ if (nh1 && !nh2)
+ return false;
+ if (!nh1 && nh2)
+ return false;
+ if (!nexthop_same(nh1, nh2))
+ return false;
+ }
+
+ return true;
+}
+
+/* This assumes ordered */
+bool nexthop_group_equal(const struct nexthop_group *nhg1,
+ const struct nexthop_group *nhg2)
+{
+ struct nexthop *nh1 = NULL;
+ struct nexthop *nh2 = NULL;
+
+ if (!nexthop_group_equal_common(nhg1, nhg2, &nexthop_group_nexthop_num))
+ return false;
+
+ for (nh1 = nhg1->nexthop, nh2 = nhg2->nexthop; nh1 || nh2;
+ nh1 = nexthop_next(nh1), nh2 = nexthop_next(nh2)) {
+ if (nh1 && !nh2)
+ return false;
+ if (!nh1 && nh2)
+ return false;
+ if (!nexthop_same(nh1, nh2))
+ return false;
+ }
+
+ return true;
+}
struct nexthop_group *nexthop_group_new(void)
{
return XCALLOC(MTYPE_NEXTHOP_GROUP, sizeof(struct nexthop_group));
@@ -119,6 +213,9 @@ void nexthop_group_copy(struct nexthop_group *to, struct nexthop_group *from)
void nexthop_group_delete(struct nexthop_group **nhg)
{
+ if ((*nhg)->nexthop)
+ nexthops_free((*nhg)->nexthop);
+
XFREE(MTYPE_NEXTHOP_GROUP, *nhg);
}
@@ -217,7 +314,7 @@ void copy_nexthops(struct nexthop **tnh, const struct nexthop *nh,
}
}
-uint32_t nexthop_group_hash(const struct nexthop_group *nhg)
+uint32_t nexthop_group_hash_no_recurse(const struct nexthop_group *nhg)
{
struct nexthop *nh;
uint32_t key = 0;
@@ -232,6 +329,35 @@ uint32_t nexthop_group_hash(const struct nexthop_group *nhg)
return key;
}
+uint32_t nexthop_group_hash(const struct nexthop_group *nhg)
+{
+ struct nexthop *nh;
+ uint32_t key = 0;
+
+ for (ALL_NEXTHOPS_PTR(nhg, nh))
+ key = jhash_1word(nexthop_hash(nh), key);
+
+ return key;
+}
+
+void nexthop_group_mark_duplicates(struct nexthop_group *nhg)
+{
+ struct nexthop *nexthop, *prev;
+
+ for (ALL_NEXTHOPS_PTR(nhg, nexthop)) {
+ UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE);
+ for (ALL_NEXTHOPS_PTR(nhg, prev)) {
+ if (prev == nexthop)
+ break;
+ if (nexthop_same_firsthop(nexthop, prev)) {
+ SET_FLAG(nexthop->flags,
+ NEXTHOP_FLAG_DUPLICATE);
+ break;
+ }
+ }
+ }
+}
+
static void nhgc_delete_nexthops(struct nexthop_group_cmd *nhgc)
{
struct nexthop *nexthop;
diff --git a/lib/nexthop_group.h b/lib/nexthop_group.h
index 4f4d40eb3..391775c69 100644
--- a/lib/nexthop_group.h
+++ b/lib/nexthop_group.h
@@ -47,7 +47,9 @@ void nexthop_group_copy(struct nexthop_group *to,
void copy_nexthops(struct nexthop **tnh, const struct nexthop *nh,
struct nexthop *rparent);
+uint32_t nexthop_group_hash_no_recurse(const struct nexthop_group *nhg);
uint32_t nexthop_group_hash(const struct nexthop_group *nhg);
+void nexthop_group_mark_duplicates(struct nexthop_group *nhg);
/* The following for loop allows to iterate over the nexthop
* structure of routes.
@@ -110,8 +112,15 @@ void nexthop_group_disable_vrf(struct vrf *vrf);
void nexthop_group_interface_state_change(struct interface *ifp,
ifindex_t oldifindex);
-extern struct nexthop *nexthop_exists(struct nexthop_group *nhg,
- struct nexthop *nh);
+extern struct nexthop *nexthop_exists(const struct nexthop_group *nhg,
+ const struct nexthop *nh);
+/* This assumes ordered */
+extern bool nexthop_group_equal_no_recurse(const struct nexthop_group *nhg1,
+ const struct nexthop_group *nhg2);
+
+/* This assumes ordered */
+extern bool nexthop_group_equal(const struct nexthop_group *nhg1,
+ const struct nexthop_group *nhg2);
extern struct nexthop_group_cmd *nhgc_find(const char *name);
@@ -120,7 +129,11 @@ extern void nexthop_group_write_nexthop(struct vty *vty, struct nexthop *nh);
/* Return the number of nexthops in this nhg */
extern uint8_t nexthop_group_nexthop_num(const struct nexthop_group *nhg);
extern uint8_t
+nexthop_group_nexthop_num_no_recurse(const struct nexthop_group *nhg);
+extern uint8_t
nexthop_group_active_nexthop_num(const struct nexthop_group *nhg);
+extern uint8_t
+nexthop_group_active_nexthop_num_no_recurse(const struct nexthop_group *nhg);
#ifdef __cplusplus
}
diff --git a/lib/route_types.txt b/lib/route_types.txt
index 59f3a91cf..71d0a4644 100644
--- a/lib/route_types.txt
+++ b/lib/route_types.txt
@@ -84,6 +84,7 @@ ZEBRA_ROUTE_PBR, pbr, pbrd, 'F', 1, 1, 0, "PBR"
ZEBRA_ROUTE_BFD, bfd, bfdd, '-', 0, 0, 0, "BFD"
ZEBRA_ROUTE_OPENFABRIC, openfabric, fabricd, 'f', 1, 1, 1, "OpenFabric"
ZEBRA_ROUTE_VRRP, vrrp, vrrpd, '-', 0, 0, 0, "VRRP"
+ZEBRA_ROUTE_NHG, nhg, none, '-', 0, 0, 0, "Nexthop Group"
ZEBRA_ROUTE_ALL, wildcard, none, '-', 0, 0, 0, "-"
@@ -113,3 +114,4 @@ ZEBRA_ROUTE_PBR, "Policy Based Routing (PBR)"
ZEBRA_ROUTE_BFD, "Bidirectional Fowarding Detection (BFD)"
ZEBRA_ROUTE_VRRP, "Virtual Router Redundancy Protocol (VRRP)"
ZEBRA_ROUTE_OPENFABRIC, "OpenFabric Routing Protocol"
+ZEBRA_ROUTE_NHG, "Zebra Nexthop Groups (NHG)"
diff --git a/pbrd/pbr_nht.c b/pbrd/pbr_nht.c
index 67a1fe2ff..7ccd14d1f 100644
--- a/pbrd/pbr_nht.c
+++ b/pbrd/pbr_nht.c
@@ -578,8 +578,6 @@ void pbr_nht_delete_individual_nexthop(struct pbr_map_sequence *pbrms)
hash_release(pbr_nhg_hash, pnhgc);
- _nexthop_del(pbrms->nhg, nh);
- nexthop_free(nh);
nexthop_group_delete(&pbrms->nhg);
XFREE(MTYPE_TMP, pbrms->internal_nhg_name);
}
@@ -639,7 +637,6 @@ void pbr_nht_delete_group(const char *name)
if (pbrms->nhgrp_name
&& strmatch(pbrms->nhgrp_name, name)) {
pbrms->reason |= PBR_MAP_INVALID_NO_NEXTHOPS;
- nexthop_group_delete(&pbrms->nhg);
pbrms->nhg = NULL;
pbrms->internal_nhg_name = NULL;
pbrm->valid = false;
diff --git a/tests/topotests/bfd-topo2/r1/ipv6_routes.json b/tests/topotests/bfd-topo2/r1/ipv6_routes.json
index d09439a8a..0fd03b516 100644
--- a/tests/topotests/bfd-topo2/r1/ipv6_routes.json
+++ b/tests/topotests/bfd-topo2/r1/ipv6_routes.json
@@ -33,7 +33,6 @@
{
"interfaceName": "r1-eth0",
"interfaceIndex": 2,
- "flags": 1,
"active": true,
"afi": "ipv6"
}
diff --git a/tests/topotests/bfd-topo2/r2/ipv4_routes.json b/tests/topotests/bfd-topo2/r2/ipv4_routes.json
index 3c41e1343..69a5f1a5b 100644
--- a/tests/topotests/bfd-topo2/r2/ipv4_routes.json
+++ b/tests/topotests/bfd-topo2/r2/ipv4_routes.json
@@ -11,7 +11,6 @@
{
"active": true,
"directlyConnected": true,
- "flags": 1,
"interfaceIndex": 3,
"interfaceName": "r2-eth1"
}
diff --git a/tests/topotests/bfd-topo2/r2/ipv6_routes.json b/tests/topotests/bfd-topo2/r2/ipv6_routes.json
index bb45bbae5..66abade38 100644
--- a/tests/topotests/bfd-topo2/r2/ipv6_routes.json
+++ b/tests/topotests/bfd-topo2/r2/ipv6_routes.json
@@ -11,7 +11,6 @@
{
"active": true,
"directlyConnected": true,
- "flags": 1,
"interfaceIndex": 4,
"interfaceName": "r2-eth2"
}
diff --git a/tests/topotests/bfd-topo2/r3/ipv4_routes.json b/tests/topotests/bfd-topo2/r3/ipv4_routes.json
index cbf116e68..d4a0812ae 100644
--- a/tests/topotests/bfd-topo2/r3/ipv4_routes.json
+++ b/tests/topotests/bfd-topo2/r3/ipv4_routes.json
@@ -11,7 +11,6 @@
{
"active": true,
"directlyConnected": true,
- "flags": 1,
"interfaceIndex": 2,
"interfaceName": "r3-eth0"
}
diff --git a/tests/topotests/bfd-topo2/r4/ipv6_routes.json b/tests/topotests/bfd-topo2/r4/ipv6_routes.json
index a22c90cbb..af8272c4a 100644
--- a/tests/topotests/bfd-topo2/r4/ipv6_routes.json
+++ b/tests/topotests/bfd-topo2/r4/ipv6_routes.json
@@ -11,7 +11,6 @@
{
"active": true,
"directlyConnected": true,
- "flags": 1,
"interfaceIndex": 2,
"interfaceName": "r4-eth0"
}
diff --git a/tests/topotests/bgp_ipv6_rtadv/r1/ipv6_routes.json b/tests/topotests/bgp_ipv6_rtadv/r1/ipv6_routes.json
index d0378b564..acf5c8b27 100644
--- a/tests/topotests/bgp_ipv6_rtadv/r1/ipv6_routes.json
+++ b/tests/topotests/bgp_ipv6_rtadv/r1/ipv6_routes.json
@@ -10,7 +10,6 @@
{
"interfaceName": "r1-eth0",
"interfaceIndex": 2,
- "flags": 1,
"active": true,
"afi": "ipv6"
}
diff --git a/tests/topotests/bgp_vrf_lite_ipv6_rtadv/r1/ipv6_routes.json b/tests/topotests/bgp_vrf_lite_ipv6_rtadv/r1/ipv6_routes.json
index 1ca62094b..e5aff94bd 100644
--- a/tests/topotests/bgp_vrf_lite_ipv6_rtadv/r1/ipv6_routes.json
+++ b/tests/topotests/bgp_vrf_lite_ipv6_rtadv/r1/ipv6_routes.json
@@ -10,7 +10,6 @@
"internalFlags": 0,
"nexthops": [
{
- "flags": 1,
"afi": "ipv6",
"interfaceIndex": 2,
"interfaceName": "r1-eth0",
diff --git a/zebra/connected.c b/zebra/connected.c
index a81c0cbe7..75f4f53bc 100644
--- a/zebra/connected.c
+++ b/zebra/connected.c
@@ -251,10 +251,10 @@ void connected_up(struct interface *ifp, struct connected *ifc)
metric = (ifc->metric < (uint32_t)METRIC_MAX) ?
ifc->metric : ifp->metric;
rib_add(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT,
- 0, 0, &p, NULL, &nh, zvrf->table_id, metric, 0, 0, 0);
+ 0, 0, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0);
rib_add(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT,
- 0, 0, &p, NULL, &nh, zvrf->table_id, metric, 0, 0, 0);
+ 0, 0, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0);
/* Schedule LSP forwarding entries for processing, if appropriate. */
if (zvrf->vrf->vrf_id == VRF_DEFAULT) {
@@ -393,11 +393,11 @@ void connected_down(struct interface *ifp, struct connected *ifc)
* Same logic as for connected_up(): push the changes into the
* head.
*/
- rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT,
- 0, 0, &p, NULL, &nh, zvrf->table_id, 0, 0, false);
+ rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0,
+ 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false);
rib_delete(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT,
- 0, 0, &p, NULL, &nh, zvrf->table_id, 0, 0, false);
+ 0, 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false);
/* Schedule LSP forwarding entries for processing, if appropriate. */
if (zvrf->vrf->vrf_id == VRF_DEFAULT) {
diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c
index d42f68cbe..c09007bcb 100644
--- a/zebra/if_netlink.c
+++ b/zebra/if_netlink.c
@@ -66,6 +66,7 @@
#include "zebra/zebra_ptm.h"
#include "zebra/zebra_mpls.h"
#include "zebra/kernel_netlink.h"
+#include "zebra/rt_netlink.h"
#include "zebra/if_netlink.h"
#include "zebra/zebra_errors.h"
#include "zebra/zebra_vxlan.h"
@@ -807,6 +808,23 @@ int interface_lookup_netlink(struct zebra_ns *zns)
/* fixup linkages */
zebra_if_update_all_links();
+ return 0;
+}
+
+/**
+ * interface_addr_lookup_netlink() - Look up interface addresses
+ *
+ * @zns: Zebra netlink socket
+ * Return: Result status
+ */
+static int interface_addr_lookup_netlink(struct zebra_ns *zns)
+{
+ int ret;
+ struct zebra_dplane_info dp_info;
+ struct nlsock *netlink_cmd = &zns->netlink_cmd;
+
+ /* Capture key info from ns struct */
+ zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
/* Get IPv4 address of the interfaces. */
ret = netlink_request_intf_addr(netlink_cmd, AF_INET, RTM_GETADDR, 0);
@@ -1460,6 +1478,13 @@ int netlink_protodown(struct interface *ifp, bool down)
void interface_list(struct zebra_ns *zns)
{
interface_lookup_netlink(zns);
+ /* We add routes for interface address,
+ * so we need to get the nexthop info
+ * from the kernel before we can do that
+ */
+ netlink_nexthop_read(zns);
+
+ interface_addr_lookup_netlink(zns);
}
#endif /* GNU_LINUX */
diff --git a/zebra/interface.c b/zebra/interface.c
index ef03cf87f..daa93e36d 100644
--- a/zebra/interface.c
+++ b/zebra/interface.c
@@ -107,6 +107,17 @@ static void zebra_if_node_destroy(route_table_delegate_t *delegate,
route_node_destroy(delegate, table, node);
}
+static void zebra_if_nhg_dependents_free(struct zebra_if *zebra_if)
+{
+ nhg_connected_tree_free(&zebra_if->nhg_dependents);
+}
+
+static void zebra_if_nhg_dependents_init(struct zebra_if *zebra_if)
+{
+ nhg_connected_tree_init(&zebra_if->nhg_dependents);
+}
+
+
route_table_delegate_t zebra_if_table_delegate = {
.create_node = route_node_create,
.destroy_node = zebra_if_node_destroy};
@@ -120,6 +131,9 @@ static int if_zebra_new_hook(struct interface *ifp)
zebra_if->multicast = IF_ZEBRA_MULTICAST_UNSPEC;
zebra_if->shutdown = IF_ZEBRA_SHUTDOWN_OFF;
+
+ zebra_if_nhg_dependents_init(zebra_if);
+
zebra_ptm_if_init(zebra_if);
ifp->ptm_enable = zebra_ptm_get_enable_state();
@@ -175,6 +189,34 @@ static int if_zebra_new_hook(struct interface *ifp)
return 0;
}
+static void if_nhg_dependents_check_valid(struct nhg_hash_entry *nhe)
+{
+ zebra_nhg_check_valid(nhe);
+ if (!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_VALID))
+ /* Assuming uninstalled as well here */
+ UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
+}
+
+static void if_down_nhg_dependents(const struct interface *ifp)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+ struct zebra_if *zif = (struct zebra_if *)ifp->info;
+
+ frr_each(nhg_connected_tree, &zif->nhg_dependents, rb_node_dep)
+ if_nhg_dependents_check_valid(rb_node_dep->nhe);
+}
+
+static void if_nhg_dependents_release(const struct interface *ifp)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+ struct zebra_if *zif = (struct zebra_if *)ifp->info;
+
+ frr_each(nhg_connected_tree, &zif->nhg_dependents, rb_node_dep) {
+ rb_node_dep->nhe->ifp = NULL; /* Null it out */
+ if_nhg_dependents_check_valid(rb_node_dep->nhe);
+ }
+}
+
/* Called when interface is deleted. */
static int if_zebra_delete_hook(struct interface *ifp)
{
@@ -196,7 +238,11 @@ static int if_zebra_delete_hook(struct interface *ifp)
list_delete(&rtadv->AdvDNSSLList);
#endif /* HAVE_RTADV */
+ if_nhg_dependents_release(ifp);
+ zebra_if_nhg_dependents_free(zebra_if);
+
XFREE(MTYPE_TMP, zebra_if->desc);
+
THREAD_OFF(zebra_if->speed_update);
XFREE(MTYPE_ZINFO, zebra_if);
@@ -925,6 +971,47 @@ static void if_down_del_nbr_connected(struct interface *ifp)
}
}
+void if_nhg_dependents_add(struct interface *ifp, struct nhg_hash_entry *nhe)
+{
+ if (ifp->info) {
+ struct zebra_if *zif = (struct zebra_if *)ifp->info;
+
+ nhg_connected_tree_add_nhe(&zif->nhg_dependents, nhe);
+ }
+}
+
+void if_nhg_dependents_del(struct interface *ifp, struct nhg_hash_entry *nhe)
+{
+ if (ifp->info) {
+ struct zebra_if *zif = (struct zebra_if *)ifp->info;
+
+ nhg_connected_tree_del_nhe(&zif->nhg_dependents, nhe);
+ }
+}
+
+unsigned int if_nhg_dependents_count(const struct interface *ifp)
+{
+ if (ifp->info) {
+ struct zebra_if *zif = (struct zebra_if *)ifp->info;
+
+ return nhg_connected_tree_count(&zif->nhg_dependents);
+ }
+
+ return 0;
+}
+
+
+bool if_nhg_dependents_is_empty(const struct interface *ifp)
+{
+ if (ifp->info) {
+ struct zebra_if *zif = (struct zebra_if *)ifp->info;
+
+ return nhg_connected_tree_is_empty(&zif->nhg_dependents);
+ }
+
+ return false;
+}
+
/* Interface is up. */
void if_up(struct interface *ifp)
{
@@ -988,6 +1075,8 @@ void if_down(struct interface *ifp)
zif->down_count++;
quagga_timestamp(2, zif->down_last, sizeof(zif->down_last));
+ if_down_nhg_dependents(ifp);
+
/* Handle interface down for specific types for EVPN. Non-VxLAN
* interfaces
* are checked to see if (remote) neighbor entries need to be purged
diff --git a/zebra/interface.h b/zebra/interface.h
index e134b9b42..78ccbae62 100644
--- a/zebra/interface.h
+++ b/zebra/interface.h
@@ -27,6 +27,7 @@
#include "hook.h"
#include "zebra/zebra_l2.h"
+#include "zebra/zebra_nhg_private.h"
#ifdef __cplusplus
extern "C" {
@@ -277,6 +278,15 @@ struct zebra_if {
/* Installed addresses chains tree. */
struct route_table *ipv4_subnets;
+ /* Nexthops pointing to this interface */
+ /**
+ * Any nexthop that we get should have an
+ * interface. When an interface goes down,
+ * we will use this list to update the nexthops
+ * pointing to it with that info.
+ */
+ struct nhg_connected_tree_head nhg_dependents;
+
/* Information about up/down changes */
unsigned int up_count;
char up_last[QUAGGA_TIMESTAMP_LEN];
@@ -424,6 +434,14 @@ extern void zebra_if_update_link(struct interface *ifp, ifindex_t link_ifindex,
extern void zebra_if_update_all_links(void);
extern void zebra_if_set_protodown(struct interface *ifp, bool down);
+/* Nexthop group connected functions */
+extern void if_nhg_dependents_add(struct interface *ifp,
+ struct nhg_hash_entry *nhe);
+extern void if_nhg_dependents_del(struct interface *ifp,
+ struct nhg_hash_entry *nhe);
+extern unsigned int if_nhg_dependents_count(const struct interface *ifp);
+extern bool if_nhg_dependents_is_empty(const struct interface *ifp);
+
extern void vrf_add_update(struct vrf *vrfp);
#ifdef HAVE_PROC_NET_DEV
diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
index f52b4746a..23f1a3bf8 100644
--- a/zebra/kernel_netlink.c
+++ b/zebra/kernel_netlink.c
@@ -99,6 +99,9 @@ static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
{RTM_NEWRULE, "RTM_NEWRULE"},
{RTM_DELRULE, "RTM_DELRULE"},
{RTM_GETRULE, "RTM_GETRULE"},
+ {RTM_NEWNEXTHOP, "RTM_NEWNEXTHOP"},
+ {RTM_DELNEXTHOP, "RTM_DELNEXTHOP"},
+ {RTM_GETNEXTHOP, "RTM_GETNEXTHOP"},
{0}};
static const struct message rtproto_str[] = {
@@ -291,6 +294,10 @@ static int netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id,
return netlink_rule_change(h, ns_id, startup);
case RTM_DELRULE:
return netlink_rule_change(h, ns_id, startup);
+ case RTM_NEWNEXTHOP:
+ return netlink_nexthop_change(h, ns_id, startup);
+ case RTM_DELNEXTHOP:
+ return netlink_nexthop_change(h, ns_id, startup);
default:
/*
* If we have received this message then
@@ -884,15 +891,20 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
msg_type,
err->msg.nlmsg_seq,
err->msg.nlmsg_pid);
- } else
- flog_err(
- EC_ZEBRA_UNEXPECTED_MESSAGE,
- "%s error: %s, type=%s(%u), seq=%u, pid=%u",
- nl->name,
- safe_strerror(-errnum),
- nl_msg_type_to_str(msg_type),
- msg_type, err->msg.nlmsg_seq,
- err->msg.nlmsg_pid);
+ } else {
+ if ((msg_type != RTM_GETNEXTHOP)
+ || !startup)
+ flog_err(
+ EC_ZEBRA_UNEXPECTED_MESSAGE,
+ "%s error: %s, type=%s(%u), seq=%u, pid=%u",
+ nl->name,
+ safe_strerror(-errnum),
+ nl_msg_type_to_str(
+ msg_type),
+ msg_type,
+ err->msg.nlmsg_seq,
+ err->msg.nlmsg_pid);
+ }
return -1;
}
@@ -1096,7 +1108,8 @@ void kernel_init(struct zebra_ns *zns)
RTMGRP_IPV4_MROUTE |
RTMGRP_NEIGH |
(1 << (RTNLGRP_IPV4_RULE - 1)) |
- (1 << (RTNLGRP_IPV6_RULE - 1));
+ (1 << (RTNLGRP_IPV6_RULE - 1)) |
+ (1 << (RTNLGRP_NEXTHOP - 1));
snprintf(zns->netlink.name, sizeof(zns->netlink.name),
"netlink-listen (NS %u)", zns->ns_id);
diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c
index f5aca2341..c2812aa47 100644
--- a/zebra/kernel_socket.c
+++ b/zebra/kernel_socket.c
@@ -1139,16 +1139,17 @@ void rtm_read(struct rt_msghdr *rtm)
*/
if (rtm->rtm_type == RTM_CHANGE)
rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL,
- 0, zebra_flags, &p, NULL, NULL, RT_TABLE_MAIN,
- 0, 0, true);
+ 0, zebra_flags, &p, NULL, NULL, 0, RT_TABLE_MAIN, 0,
+ 0, true);
if (rtm->rtm_type == RTM_GET || rtm->rtm_type == RTM_ADD
|| rtm->rtm_type == RTM_CHANGE)
rib_add(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, 0,
- zebra_flags, &p, NULL, &nh, RT_TABLE_MAIN, 0, 0, 0, 0);
+ zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN,
+ 0, 0, 0, 0);
else
rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL,
- 0, zebra_flags, &p, NULL, &nh, RT_TABLE_MAIN,
- 0, 0, true);
+ 0, zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN, 0,
+ 0, true);
}
/* Interface function for the kernel routing table updates. Support
diff --git a/zebra/main.c b/zebra/main.c
index f0225ac5e..334354eaa 100644
--- a/zebra/main.c
+++ b/zebra/main.c
@@ -165,6 +165,7 @@ static void sigint(void)
}
if (zrouter.lsp_process_q)
work_queue_free_and_null(&zrouter.lsp_process_q);
+
vrf_terminate();
ns_walk_func(zebra_ns_early_shutdown);
diff --git a/zebra/redistribute.c b/zebra/redistribute.c
index 0dc9de0c5..4e0163f8a 100644
--- a/zebra/redistribute.c
+++ b/zebra/redistribute.c
@@ -643,7 +643,7 @@ int zebra_add_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn,
afi = family2afi(rn->p.family);
if (rmap_name)
ret = zebra_import_table_route_map_check(
- afi, re->type, re->instance, &rn->p, re->ng.nexthop,
+ afi, re->type, re->instance, &rn->p, re->ng->nexthop,
zvrf->vrf->vrf_id, re->tag, rmap_name);
if (ret != RMAP_PERMITMATCH) {
@@ -676,10 +676,10 @@ int zebra_add_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn,
newre->metric = re->metric;
newre->mtu = re->mtu;
newre->table = zvrf->table_id;
- newre->nexthop_num = 0;
newre->uptime = monotime(NULL);
newre->instance = re->table;
- route_entry_copy_nexthops(newre, re->ng.nexthop);
+ newre->ng = nexthop_group_new();
+ route_entry_copy_nexthops(newre, re->ng->nexthop);
rib_add_multipath(afi, SAFI_UNICAST, &p, NULL, newre);
@@ -696,7 +696,7 @@ int zebra_del_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn,
prefix_copy(&p, &rn->p);
rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_TABLE,
- re->table, re->flags, &p, NULL, re->ng.nexthop,
+ re->table, re->flags, &p, NULL, re->ng->nexthop, re->nhe_id,
zvrf->table_id, re->metric, re->distance, false);
return 0;
diff --git a/zebra/rib.h b/zebra/rib.h
index ee1df89c0..35aa011c0 100644
--- a/zebra/rib.h
+++ b/zebra/rib.h
@@ -88,11 +88,14 @@ struct route_entry {
struct re_list_item next;
/* Nexthop structure (from RIB) */
- struct nexthop_group ng;
+ struct nexthop_group *ng;
/* Nexthop group from FIB (optional) */
struct nexthop_group fib_ng;
+ /* Nexthop group hash entry ID */
+ uint32_t nhe_id;
+
/* Tag */
route_tag_t tag;
@@ -135,10 +138,6 @@ struct route_entry {
/* Route has Failed installation into the Data Plane in some manner */
#define ROUTE_ENTRY_FAILED 0x20
- /* Nexthop information. */
- uint8_t nexthop_num;
- uint8_t nexthop_active_num;
-
/* Sequence value incremented for each dataplane operation */
uint32_t dplane_sequence;
@@ -154,13 +153,14 @@ struct route_entry {
#define RIB_KERNEL_ROUTE(R) RKERNEL_ROUTE((R)->type)
/* meta-queue structure:
- * sub-queue 0: connected, kernel
- * sub-queue 1: static
- * sub-queue 2: RIP, RIPng, OSPF, OSPF6, IS-IS, EIGRP, NHRP
- * sub-queue 3: iBGP, eBGP
- * sub-queue 4: any other origin (if any)
+ * sub-queue 0: nexthop group objects
+ * sub-queue 1: connected, kernel
+ * sub-queue 2: static
+ * sub-queue 3: RIP, RIPng, OSPF, OSPF6, IS-IS, EIGRP, NHRP
+ * sub-queue 4: iBGP, eBGP
+ * sub-queue 5: any other origin (if any)
*/
-#define MQ_SIZE 5
+#define MQ_SIZE 6
struct meta_queue {
struct list *subq[MQ_SIZE];
uint32_t size; /* sum of lengths of all subqueues */
@@ -210,7 +210,7 @@ DECLARE_LIST(re_list, struct route_entry, next);
#define RIB_ROUTE_QUEUED(x) (1 << (x))
// If MQ_SIZE is modified this value needs to be updated.
-#define RIB_ROUTE_ANY_QUEUED 0x1F
+#define RIB_ROUTE_ANY_QUEUED 0x3F
/*
* The maximum qindex that can be used.
@@ -364,8 +364,8 @@ extern void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re);
extern int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, int flags, struct prefix *p,
struct prefix_ipv6 *src_p, const struct nexthop *nh,
- uint32_t table_id, uint32_t metric, uint32_t mtu,
- uint8_t distance, route_tag_t tag);
+ uint32_t nhe_id, uint32_t table_id, uint32_t metric,
+ uint32_t mtu, uint8_t distance, route_tag_t tag);
extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct route_entry *re);
@@ -373,8 +373,8 @@ extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, int flags, struct prefix *p,
struct prefix_ipv6 *src_p, const struct nexthop *nh,
- uint32_t table_id, uint32_t metric, uint8_t distance,
- bool fromkernel);
+ uint32_t nhe_id, uint32_t table_id, uint32_t metric,
+ uint8_t distance, bool fromkernel);
extern struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id,
union g_addr *addr,
@@ -398,7 +398,13 @@ extern unsigned long rib_score_proto(uint8_t proto, unsigned short instance);
extern unsigned long rib_score_proto_table(uint8_t proto,
unsigned short instance,
struct route_table *table);
-extern void rib_queue_add(struct route_node *rn);
+
+extern int rib_queue_add(struct route_node *rn);
+
+struct nhg_ctx; /* Forward declaration */
+
+extern int rib_queue_nhg_add(struct nhg_ctx *ctx);
+
extern void meta_queue_free(struct meta_queue *mq);
extern int zebra_rib_labeled_unicast(struct route_entry *re);
extern struct route_table *rib_table_ipv6;
@@ -527,7 +533,7 @@ static inline struct nexthop_group *rib_active_nhg(struct route_entry *re)
if (re->fib_ng.nexthop)
return &(re->fib_ng);
else
- return &(re->ng);
+ return re->ng;
}
extern void zebra_vty_init(void);
diff --git a/zebra/rt.h b/zebra/rt.h
index f311a6b9d..4b9a3f83f 100644
--- a/zebra/rt.h
+++ b/zebra/rt.h
@@ -40,13 +40,17 @@ extern "C" {
#define RSYSTEM_ROUTE(type) \
((RKERNEL_ROUTE(type)) || (type) == ZEBRA_ROUTE_CONNECT)
+
/*
- * Update or delete a route, LSP, pseudowire, or vxlan MAC from the kernel,
- * using info from a dataplane context.
+ * Update or delete a route, nexthop, LSP, pseudowire, or vxlan MAC from the
+ * kernel, using info from a dataplane context.
*/
extern enum zebra_dplane_result kernel_route_update(
struct zebra_dplane_ctx *ctx);
+extern enum zebra_dplane_result
+kernel_nexthop_update(struct zebra_dplane_ctx *ctx);
+
extern enum zebra_dplane_result kernel_lsp_update(
struct zebra_dplane_ctx *ctx);
diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c
index 43e44cad1..640802fe3 100644
--- a/zebra/rt_netlink.c
+++ b/zebra/rt_netlink.c
@@ -27,6 +27,7 @@
#include <linux/mpls_iptunnel.h>
#include <linux/neighbour.h>
#include <linux/rtnetlink.h>
+#include <linux/nexthop.h>
/* Hack for GNU libc version 2. */
#ifndef MSG_TRUNC
@@ -49,6 +50,7 @@
#include "vty.h"
#include "mpls.h"
#include "vxlan.h"
+#include "printfrr.h"
#include "zebra/zapi_msg.h"
#include "zebra/zebra_ns.h"
@@ -62,6 +64,7 @@
#include "zebra/zebra_mpls.h"
#include "zebra/kernel_netlink.h"
#include "zebra/rt_netlink.h"
+#include "zebra/zebra_nhg.h"
#include "zebra/zebra_mroute.h"
#include "zebra/zebra_vxlan.h"
#include "zebra/zebra_errors.h"
@@ -72,6 +75,8 @@
static vlanid_t filter_vlan = 0;
+static bool supports_nh;
+
struct gw_family_t {
uint16_t filler;
uint16_t family;
@@ -186,6 +191,7 @@ static inline int zebra2proto(int proto)
proto = RTPROT_OPENFABRIC;
break;
case ZEBRA_ROUTE_TABLE:
+ case ZEBRA_ROUTE_NHG:
proto = RTPROT_ZEBRA;
break;
default:
@@ -205,7 +211,7 @@ static inline int zebra2proto(int proto)
return proto;
}
-static inline int proto2zebra(int proto, int family)
+static inline int proto2zebra(int proto, int family, bool is_nexthop)
{
switch (proto) {
case RTPROT_BABEL:
@@ -249,6 +255,12 @@ static inline int proto2zebra(int proto, int family)
case RTPROT_OPENFABRIC:
proto = ZEBRA_ROUTE_OPENFABRIC;
break;
+ case RTPROT_ZEBRA:
+ if (is_nexthop) {
+ proto = ZEBRA_ROUTE_NHG;
+ break;
+ }
+ /* Intentional fall thru */
default:
/*
* When a user adds a new protocol this will show up
@@ -319,6 +331,169 @@ static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels)
return num_labels;
}
+static struct nexthop
+parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb,
+ enum blackhole_type bh_type, int index, void *prefsrc,
+ void *gate, afi_t afi, vrf_id_t vrf_id)
+{
+ struct interface *ifp = NULL;
+ struct nexthop nh = {0};
+ mpls_label_t labels[MPLS_MAX_LABELS] = {0};
+ int num_labels = 0;
+
+ vrf_id_t nh_vrf_id = vrf_id;
+ size_t sz = (afi == AFI_IP) ? 4 : 16;
+
+ if (bh_type == BLACKHOLE_UNSPEC) {
+ if (index && !gate)
+ nh.type = NEXTHOP_TYPE_IFINDEX;
+ else if (index && gate)
+ nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX
+ : NEXTHOP_TYPE_IPV6_IFINDEX;
+ else if (!index && gate)
+ nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4
+ : NEXTHOP_TYPE_IPV6;
+ else {
+ nh.type = NEXTHOP_TYPE_BLACKHOLE;
+ nh.bh_type = bh_type;
+ }
+ } else {
+ nh.type = NEXTHOP_TYPE_BLACKHOLE;
+ nh.bh_type = bh_type;
+ }
+ nh.ifindex = index;
+ if (prefsrc)
+ memcpy(&nh.src, prefsrc, sz);
+ if (gate)
+ memcpy(&nh.gate, gate, sz);
+
+ if (index) {
+ ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index);
+ if (ifp)
+ nh_vrf_id = ifp->vrf_id;
+ }
+ nh.vrf_id = nh_vrf_id;
+
+ if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
+ && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
+ == LWTUNNEL_ENCAP_MPLS) {
+ num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels);
+ }
+
+ if (rtm->rtm_flags & RTNH_F_ONLINK)
+ SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
+
+ if (num_labels)
+ nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels);
+
+ return nh;
+}
+
+static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id,
+ struct route_entry *re,
+ struct rtmsg *rtm,
+ struct rtnexthop *rtnh,
+ struct rtattr **tb,
+ void *prefsrc, vrf_id_t vrf_id)
+{
+ void *gate = NULL;
+ struct interface *ifp = NULL;
+ int index = 0;
+ /* MPLS labels */
+ mpls_label_t labels[MPLS_MAX_LABELS] = {0};
+ int num_labels = 0;
+ struct rtattr *rtnh_tb[RTA_MAX + 1] = {};
+
+ int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
+ vrf_id_t nh_vrf_id = vrf_id;
+
+ re->ng = nexthop_group_new();
+
+ for (;;) {
+ struct nexthop *nh = NULL;
+
+ if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
+ break;
+
+ index = rtnh->rtnh_ifindex;
+ if (index) {
+ /*
+ * Yes we are looking this up
+ * for every nexthop and just
+ * using the last one looked
+ * up right now
+ */
+ ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
+ index);
+ if (ifp)
+ nh_vrf_id = ifp->vrf_id;
+ else {
+ flog_warn(
+ EC_ZEBRA_UNKNOWN_INTERFACE,
+ "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
+ __PRETTY_FUNCTION__, index);
+ nh_vrf_id = VRF_DEFAULT;
+ }
+ } else
+ nh_vrf_id = vrf_id;
+
+ if (rtnh->rtnh_len > sizeof(*rtnh)) {
+ memset(rtnh_tb, 0, sizeof(rtnh_tb));
+
+ netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh),
+ rtnh->rtnh_len - sizeof(*rtnh));
+ if (rtnh_tb[RTA_GATEWAY])
+ gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]);
+ if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
+ && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
+ == LWTUNNEL_ENCAP_MPLS) {
+ num_labels = parse_encap_mpls(
+ rtnh_tb[RTA_ENCAP], labels);
+ }
+ }
+
+ if (gate && rtm->rtm_family == AF_INET) {
+ if (index)
+ nh = route_entry_nexthop_ipv4_ifindex_add(
+ re, gate, prefsrc, index, nh_vrf_id);
+ else
+ nh = route_entry_nexthop_ipv4_add(
+ re, gate, prefsrc, nh_vrf_id);
+ } else if (gate && rtm->rtm_family == AF_INET6) {
+ if (index)
+ nh = route_entry_nexthop_ipv6_ifindex_add(
+ re, gate, index, nh_vrf_id);
+ else
+ nh = route_entry_nexthop_ipv6_add(re, gate,
+ nh_vrf_id);
+ } else
+ nh = route_entry_nexthop_ifindex_add(re, index,
+ nh_vrf_id);
+
+ if (nh) {
+ if (num_labels)
+ nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
+ num_labels, labels);
+
+ if (rtnh->rtnh_flags & RTNH_F_ONLINK)
+ SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK);
+ }
+
+ if (rtnh->rtnh_len == 0)
+ break;
+
+ len -= NLMSG_ALIGN(rtnh->rtnh_len);
+ rtnh = RTNH_NEXT(rtnh);
+ }
+
+ uint8_t nhop_num = nexthop_group_nexthop_num(re->ng);
+
+ if (!nhop_num)
+ nexthop_group_delete(&re->ng);
+
+ return nhop_num;
+}
+
/* Looking up routing table by netlink interface. */
static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
int startup)
@@ -340,6 +515,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
uint32_t mtu = 0;
uint8_t distance = 0;
route_tag_t tag = 0;
+ uint32_t nhe_id = 0;
void *dest = NULL;
void *gate = NULL;
@@ -347,10 +523,6 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
void *src = NULL; /* IPv6 srcdest source prefix */
enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
- /* MPLS labels */
- mpls_label_t labels[MPLS_MAX_LABELS] = {0};
- int num_labels = 0;
-
rtm = NLMSG_DATA(h);
if (startup && h->nlmsg_type != RTM_NEWROUTE)
@@ -423,7 +595,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
/* Route which inserted by Zebra. */
if (is_selfroute(rtm->rtm_protocol)) {
flags |= ZEBRA_FLAG_SELFROUTE;
- proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family);
+ proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false);
}
if (tb[RTA_OIF])
index = *(int *)RTA_DATA(tb[RTA_OIF]);
@@ -444,6 +616,9 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
if (tb[RTA_GATEWAY])
gate = RTA_DATA(tb[RTA_GATEWAY]);
+ if (tb[RTA_NH_ID])
+ nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]);
+
if (tb[RTA_PRIORITY])
metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
@@ -547,75 +722,24 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
afi = AFI_IP6;
if (h->nlmsg_type == RTM_NEWROUTE) {
- struct interface *ifp;
- vrf_id_t nh_vrf_id = vrf_id;
if (!tb[RTA_MULTIPATH]) {
- struct nexthop nh;
- size_t sz = (afi == AFI_IP) ? 4 : 16;
-
- memset(&nh, 0, sizeof(nh));
-
- if (bh_type == BLACKHOLE_UNSPEC) {
- if (index && !gate)
- nh.type = NEXTHOP_TYPE_IFINDEX;
- else if (index && gate)
- nh.type =
- (afi == AFI_IP)
- ? NEXTHOP_TYPE_IPV4_IFINDEX
- : NEXTHOP_TYPE_IPV6_IFINDEX;
- else if (!index && gate)
- nh.type = (afi == AFI_IP)
- ? NEXTHOP_TYPE_IPV4
- : NEXTHOP_TYPE_IPV6;
- else {
- nh.type = NEXTHOP_TYPE_BLACKHOLE;
- nh.bh_type = bh_type;
- }
- } else {
- nh.type = NEXTHOP_TYPE_BLACKHOLE;
- nh.bh_type = bh_type;
- }
- nh.ifindex = index;
- if (prefsrc)
- memcpy(&nh.src, prefsrc, sz);
- if (gate)
- memcpy(&nh.gate, gate, sz);
-
- if (index) {
- ifp = if_lookup_by_index_per_ns(
- zebra_ns_lookup(ns_id),
- index);
- if (ifp)
- nh_vrf_id = ifp->vrf_id;
- }
- nh.vrf_id = nh_vrf_id;
+ struct nexthop nh = {0};
- if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
- && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
- == LWTUNNEL_ENCAP_MPLS) {
- num_labels =
- parse_encap_mpls(tb[RTA_ENCAP], labels);
+ if (!nhe_id) {
+ nh = parse_nexthop_unicast(
+ ns_id, rtm, tb, bh_type, index, prefsrc,
+ gate, afi, vrf_id);
}
-
- if (rtm->rtm_flags & RTNH_F_ONLINK)
- SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
-
- if (num_labels)
- nexthop_add_labels(&nh, ZEBRA_LSP_STATIC,
- num_labels, labels);
-
rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p,
- &src_p, &nh, table, metric, mtu, distance, tag);
+ &src_p, &nh, nhe_id, table, metric, mtu,
+ distance, tag);
} else {
/* This is a multipath route */
-
struct route_entry *re;
struct rtnexthop *rtnh =
(struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
- len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
-
re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
re->type = proto;
re->distance = distance;
@@ -624,148 +748,73 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
re->mtu = mtu;
re->vrf_id = vrf_id;
re->table = table;
- re->nexthop_num = 0;
re->uptime = monotime(NULL);
re->tag = tag;
+ re->nhe_id = nhe_id;
- for (;;) {
- struct nexthop *nh = NULL;
-
- if (len < (int)sizeof(*rtnh)
- || rtnh->rtnh_len > len)
- break;
-
- index = rtnh->rtnh_ifindex;
- if (index) {
- /*
- * Yes we are looking this up
- * for every nexthop and just
- * using the last one looked
- * up right now
- */
- ifp = if_lookup_by_index_per_ns(
- zebra_ns_lookup(ns_id),
- index);
- if (ifp)
- nh_vrf_id = ifp->vrf_id;
- else {
- flog_warn(
- EC_ZEBRA_UNKNOWN_INTERFACE,
- "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
- __PRETTY_FUNCTION__,
- index);
- nh_vrf_id = VRF_DEFAULT;
- }
- } else
- nh_vrf_id = vrf_id;
-
- gate = 0;
- if (rtnh->rtnh_len > sizeof(*rtnh)) {
- memset(tb, 0, sizeof(tb));
- netlink_parse_rtattr(
- tb, RTA_MAX, RTNH_DATA(rtnh),
- rtnh->rtnh_len - sizeof(*rtnh));
- if (tb[RTA_GATEWAY])
- gate = RTA_DATA(
- tb[RTA_GATEWAY]);
- if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
- && *(uint16_t *)RTA_DATA(
- tb[RTA_ENCAP_TYPE])
- == LWTUNNEL_ENCAP_MPLS) {
- num_labels = parse_encap_mpls(
- tb[RTA_ENCAP], labels);
- }
- }
-
- if (gate) {
- if (rtm->rtm_family == AF_INET) {
- if (index)
- nh = route_entry_nexthop_ipv4_ifindex_add(
- re, gate,
- prefsrc, index,
- nh_vrf_id);
- else
- nh = route_entry_nexthop_ipv4_add(
- re, gate,
- prefsrc,
- nh_vrf_id);
- } else if (rtm->rtm_family
- == AF_INET6) {
- if (index)
- nh = route_entry_nexthop_ipv6_ifindex_add(
- re, gate, index,
- nh_vrf_id);
- else
- nh = route_entry_nexthop_ipv6_add(
- re, gate,
- nh_vrf_id);
- }
- } else
- nh = route_entry_nexthop_ifindex_add(
- re, index, nh_vrf_id);
+ if (!nhe_id) {
+ uint8_t nhop_num =
+ parse_multipath_nexthops_unicast(
+ ns_id, re, rtm, rtnh, tb,
+ prefsrc, vrf_id);
- if (nh && num_labels)
- nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
- num_labels, labels);
-
- if (nh && (rtnh->rtnh_flags & RTNH_F_ONLINK))
- SET_FLAG(nh->flags,
- NEXTHOP_FLAG_ONLINK);
-
- if (rtnh->rtnh_len == 0)
- break;
-
- len -= NLMSG_ALIGN(rtnh->rtnh_len);
- rtnh = RTNH_NEXT(rtnh);
+ zserv_nexthop_num_warn(
+ __func__, (const struct prefix *)&p,
+ nhop_num);
}
- zserv_nexthop_num_warn(__func__,
- (const struct prefix *)&p,
- re->nexthop_num);
- if (re->nexthop_num == 0)
- XFREE(MTYPE_RE, re);
- else
+ if (nhe_id || re->ng)
rib_add_multipath(afi, SAFI_UNICAST, &p,
&src_p, re);
+ else
+ XFREE(MTYPE_RE, re);
}
} else {
- if (!tb[RTA_MULTIPATH]) {
- struct nexthop nh;
- size_t sz = (afi == AFI_IP) ? 4 : 16;
-
- memset(&nh, 0, sizeof(nh));
- if (bh_type == BLACKHOLE_UNSPEC) {
- if (index && !gate)
- nh.type = NEXTHOP_TYPE_IFINDEX;
- else if (index && gate)
- nh.type =
- (afi == AFI_IP)
- ? NEXTHOP_TYPE_IPV4_IFINDEX
- : NEXTHOP_TYPE_IPV6_IFINDEX;
- else if (!index && gate)
- nh.type = (afi == AFI_IP)
- ? NEXTHOP_TYPE_IPV4
- : NEXTHOP_TYPE_IPV6;
- else {
+ if (nhe_id) {
+ rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
+ &p, &src_p, NULL, nhe_id, table, metric,
+ distance, true);
+ } else {
+ if (!tb[RTA_MULTIPATH]) {
+ struct nexthop nh;
+ size_t sz = (afi == AFI_IP) ? 4 : 16;
+
+ memset(&nh, 0, sizeof(nh));
+ if (bh_type == BLACKHOLE_UNSPEC) {
+ if (index && !gate)
+ nh.type = NEXTHOP_TYPE_IFINDEX;
+ else if (index && gate)
+ nh.type =
+ (afi == AFI_IP)
+ ? NEXTHOP_TYPE_IPV4_IFINDEX
+ : NEXTHOP_TYPE_IPV6_IFINDEX;
+ else if (!index && gate)
+ nh.type =
+ (afi == AFI_IP)
+ ? NEXTHOP_TYPE_IPV4
+ : NEXTHOP_TYPE_IPV6;
+ else {
+ nh.type =
+ NEXTHOP_TYPE_BLACKHOLE;
+ nh.bh_type = BLACKHOLE_UNSPEC;
+ }
+ } else {
nh.type = NEXTHOP_TYPE_BLACKHOLE;
- nh.bh_type = BLACKHOLE_UNSPEC;
+ nh.bh_type = bh_type;
}
+ nh.ifindex = index;
+ if (gate)
+ memcpy(&nh.gate, gate, sz);
+ rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
+ flags, &p, &src_p, &nh, 0, table,
+ metric, distance, true);
} else {
- nh.type = NEXTHOP_TYPE_BLACKHOLE;
- nh.bh_type = bh_type;
+ /* XXX: need to compare the entire list of
+ * nexthops here for NLM_F_APPEND stupidity */
+ rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
+ flags, &p, &src_p, NULL, 0, table,
+ metric, distance, true);
}
- nh.ifindex = index;
- if (gate)
- memcpy(&nh.gate, gate, sz);
- rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
- &p, &src_p, &nh, table, metric, distance,
- true);
- } else {
- /* XXX: need to compare the entire list of nexthops
- * here for NLM_F_APPEND stupidity */
- rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
- &p, &src_p, NULL, table, metric, distance,
- true);
}
}
@@ -1023,6 +1072,35 @@ static void _netlink_route_rta_add_gateway_info(uint8_t route_family,
}
}
+static int build_label_stack(struct mpls_label_stack *nh_label,
+ mpls_lse_t *out_lse, char *label_buf,
+ size_t label_buf_size)
+{
+ char label_buf1[20];
+ int num_labels = 0;
+
+ for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
+ if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
+ continue;
+
+ if (IS_ZEBRA_DEBUG_KERNEL) {
+ if (!num_labels)
+ sprintf(label_buf, "label %u",
+ nh_label->label[i]);
+ else {
+ sprintf(label_buf1, "/%u", nh_label->label[i]);
+ strlcat(label_buf, label_buf1, label_buf_size);
+ }
+ }
+
+ out_lse[num_labels] =
+ mpls_lse_encode(nh_label->label[i], 0, 0, 0);
+ num_labels++;
+ }
+
+ return num_labels;
+}
+
/* This function takes a nexthop as argument and adds
* the appropriate netlink attributes to an existing
* netlink message.
@@ -1040,10 +1118,12 @@ static void _netlink_route_build_singlepath(const char *routedesc, int bytelen,
struct rtmsg *rtmsg,
size_t req_size, int cmd)
{
- struct mpls_label_stack *nh_label;
+
mpls_lse_t out_lse[MPLS_MAX_LABELS];
- int num_labels = 0;
char label_buf[256];
+ int num_labels = 0;
+
+ assert(nexthop);
/*
* label_buf is *only* currently used within debugging.
@@ -1053,30 +1133,8 @@ static void _netlink_route_build_singlepath(const char *routedesc, int bytelen,
*/
label_buf[0] = '\0';
- assert(nexthop);
- char label_buf1[20];
-
- nh_label = nexthop->nh_label;
-
- for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
- if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
- continue;
-
- if (IS_ZEBRA_DEBUG_KERNEL) {
- if (!num_labels)
- sprintf(label_buf, "label %u",
- nh_label->label[i]);
- else {
- sprintf(label_buf1, "/%u", nh_label->label[i]);
- strlcat(label_buf, label_buf1,
- sizeof(label_buf));
- }
- }
-
- out_lse[num_labels] =
- mpls_lse_encode(nh_label->label[i], 0, 0, 0);
- num_labels++;
- }
+ num_labels = build_label_stack(nexthop->nh_label, out_lse, label_buf,
+ sizeof(label_buf));
if (num_labels) {
/* Set the BoS bit */
@@ -1221,16 +1279,17 @@ static void _netlink_route_build_multipath(const char *routedesc, int bytelen,
struct rtmsg *rtmsg,
const union g_addr **src)
{
- struct mpls_label_stack *nh_label;
mpls_lse_t out_lse[MPLS_MAX_LABELS];
- int num_labels = 0;
char label_buf[256];
+ int num_labels = 0;
rtnh->rtnh_len = sizeof(*rtnh);
rtnh->rtnh_flags = 0;
rtnh->rtnh_hops = 0;
rta->rta_len += rtnh->rtnh_len;
+ assert(nexthop);
+
/*
* label_buf is *only* currently used within debugging.
* As such when we assign it we are guarding it inside
@@ -1239,30 +1298,8 @@ static void _netlink_route_build_multipath(const char *routedesc, int bytelen,
*/
label_buf[0] = '\0';
- assert(nexthop);
- char label_buf1[20];
-
- nh_label = nexthop->nh_label;
-
- for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
- if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
- continue;
-
- if (IS_ZEBRA_DEBUG_KERNEL) {
- if (!num_labels)
- sprintf(label_buf, "label %u",
- nh_label->label[i]);
- else {
- sprintf(label_buf1, "/%u", nh_label->label[i]);
- strlcat(label_buf, label_buf1,
- sizeof(label_buf));
- }
- }
-
- out_lse[num_labels] =
- mpls_lse_encode(nh_label->label[i], 0, 0, 0);
- num_labels++;
- }
+ num_labels = build_label_stack(nexthop->nh_label, out_lse, label_buf,
+ sizeof(label_buf));
if (num_labels) {
/* Set the BoS bit */
@@ -1430,6 +1467,13 @@ static void _netlink_route_debug(int cmd, const struct prefix *p,
}
}
+static void _netlink_nexthop_debug(int cmd, uint32_t id)
+{
+ if (IS_ZEBRA_DEBUG_KERNEL)
+ zlog_debug("netlink_nexthop(): %s, id=%u",
+ nl_msg_type_to_str(cmd), id);
+}
+
static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc)
{
if (IS_ZEBRA_DEBUG_KERNEL)
@@ -1595,6 +1639,13 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx)
RTA_PAYLOAD(rta));
}
+ if (supports_nh) {
+ /* Kernel supports nexthop objects */
+ addattr32(&req.n, sizeof(req), RTA_NH_ID,
+ dplane_ctx_get_nhe_id(ctx));
+ goto skip;
+ }
+
/* Count overall nexthops so we can decide whether to use singlepath
* or multipath case.
*/
@@ -1842,6 +1893,262 @@ int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
return suc;
}
+/* Char length to debug ID with */
+#define ID_LENGTH 10
+
+static void _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size,
+ uint32_t id,
+ const struct nh_grp *z_grp,
+ const uint8_t count)
+{
+ struct nexthop_grp grp[count];
+ /* Need space for max group size, "/", and null term */
+ char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1];
+ char buf1[ID_LENGTH + 2];
+
+ buf[0] = '\0';
+
+ memset(grp, 0, sizeof(grp));
+
+ if (count) {
+ for (int i = 0; i < count; i++) {
+ grp[i].id = z_grp[i].id;
+ grp[i].weight = z_grp[i].weight;
+
+ if (IS_ZEBRA_DEBUG_KERNEL) {
+ if (i == 0)
+ snprintf(buf, sizeof(buf1), "group %u",
+ grp[i].id);
+ else {
+ snprintf(buf1, sizeof(buf1), "/%u",
+ grp[i].id);
+ strlcat(buf, buf1, sizeof(buf));
+ }
+ }
+ }
+ addattr_l(n, req_size, NHA_GROUP, grp, count * sizeof(*grp));
+ }
+
+ if (IS_ZEBRA_DEBUG_KERNEL)
+ zlog_debug("%s: ID (%u): %s", __func__, id, buf);
+}
+
+/**
+ * netlink_nexthop() - Nexthop change via the netlink interface
+ *
+ * @ctx: Dataplane ctx
+ *
+ * Return: Result status
+ */
+static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx)
+{
+ struct {
+ struct nlmsghdr n;
+ struct nhmsg nhm;
+ char buf[NL_PKT_BUF_SIZE];
+ } req;
+
+ mpls_lse_t out_lse[MPLS_MAX_LABELS];
+ char label_buf[256];
+ int num_labels = 0;
+ size_t req_size = sizeof(req);
+
+ /* Nothing to do if the kernel doesn't support nexthop objects */
+ if (!supports_nh)
+ return 0;
+
+ label_buf[0] = '\0';
+
+ memset(&req, 0, req_size);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
+ req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
+
+ if (cmd == RTM_NEWNEXTHOP)
+ req.n.nlmsg_flags |= NLM_F_REPLACE;
+
+ req.n.nlmsg_type = cmd;
+ req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
+
+ req.nhm.nh_family = AF_UNSPEC;
+ /* TODO: Scope? */
+
+ uint32_t id = dplane_ctx_get_nhe_id(ctx);
+
+ if (!id) {
+ flog_err(
+ EC_ZEBRA_NHG_FIB_UPDATE,
+ "Failed trying to update a nexthop group in the kernel that does not have an ID");
+ return -1;
+ }
+
+ addattr32(&req.n, req_size, NHA_ID, id);
+
+ if (cmd == RTM_NEWNEXTHOP) {
+ if (dplane_ctx_get_nhe_nh_grp_count(ctx))
+ _netlink_nexthop_build_group(
+ &req.n, req_size, id,
+ dplane_ctx_get_nhe_nh_grp(ctx),
+ dplane_ctx_get_nhe_nh_grp_count(ctx));
+ else {
+ const struct nexthop *nh =
+ dplane_ctx_get_nhe_ng(ctx)->nexthop;
+ afi_t afi = dplane_ctx_get_nhe_afi(ctx);
+
+ if (afi == AFI_IP)
+ req.nhm.nh_family = AF_INET;
+ else if (afi == AFI_IP6)
+ req.nhm.nh_family = AF_INET6;
+
+ switch (nh->type) {
+ case NEXTHOP_TYPE_IPV4:
+ case NEXTHOP_TYPE_IPV4_IFINDEX:
+ addattr_l(&req.n, req_size, NHA_GATEWAY,
+ &nh->gate.ipv4, IPV4_MAX_BYTELEN);
+ break;
+ case NEXTHOP_TYPE_IPV6:
+ case NEXTHOP_TYPE_IPV6_IFINDEX:
+ addattr_l(&req.n, req_size, NHA_GATEWAY,
+ &nh->gate.ipv6, IPV6_MAX_BYTELEN);
+ break;
+ case NEXTHOP_TYPE_BLACKHOLE:
+ addattr_l(&req.n, req_size, NHA_BLACKHOLE, NULL,
+ 0);
+ /* Blackhole shouldn't have anymore attributes
+ */
+ goto nexthop_done;
+ case NEXTHOP_TYPE_IFINDEX:
+ /* Don't need anymore info for this */
+ break;
+ }
+
+ if (!nh->ifindex) {
+ flog_err(
+ EC_ZEBRA_NHG_FIB_UPDATE,
+ "Context received for kernel nexthop update without an interface");
+ return -1;
+ }
+
+ addattr32(&req.n, req_size, NHA_OIF, nh->ifindex);
+
+ num_labels =
+ build_label_stack(nh->nh_label, out_lse,
+ label_buf, sizeof(label_buf));
+
+ if (num_labels) {
+ /* Set the BoS bit */
+ out_lse[num_labels - 1] |=
+ htonl(1 << MPLS_LS_S_SHIFT);
+
+ /*
+ * TODO: MPLS unsupported for now in kernel.
+ */
+ if (req.nhm.nh_family == AF_MPLS)
+ goto nexthop_done;
+#if 0
+ addattr_l(&req.n, req_size, NHA_NEWDST,
+ &out_lse,
+ num_labels
+ * sizeof(mpls_lse_t));
+#endif
+ else {
+ struct rtattr *nest;
+ uint16_t encap = LWTUNNEL_ENCAP_MPLS;
+
+ addattr_l(&req.n, req_size,
+ NHA_ENCAP_TYPE, &encap,
+ sizeof(uint16_t));
+ nest = addattr_nest(&req.n, req_size,
+ NHA_ENCAP);
+ addattr_l(&req.n, req_size,
+ MPLS_IPTUNNEL_DST, &out_lse,
+ num_labels
+ * sizeof(mpls_lse_t));
+ addattr_nest_end(&req.n, nest);
+ }
+ }
+
+ nexthop_done:
+ if (IS_ZEBRA_DEBUG_KERNEL) {
+ char buf[NEXTHOP_STRLEN];
+
+ snprintfrr(buf, sizeof(buf), "%pNHv", nh);
+ zlog_debug("%s: ID (%u): %s (%u) %s ", __func__,
+ id, buf, nh->vrf_id, label_buf);
+ }
+ }
+
+ req.nhm.nh_protocol = zebra2proto(dplane_ctx_get_nhe_type(ctx));
+
+ } else if (cmd != RTM_DELNEXTHOP) {
+ flog_err(
+ EC_ZEBRA_NHG_FIB_UPDATE,
+ "Nexthop group kernel update command (%d) does not exist",
+ cmd);
+ return -1;
+ }
+
+ _netlink_nexthop_debug(cmd, id);
+
+ return netlink_talk_info(netlink_talk_filter, &req.n,
+ dplane_ctx_get_ns(ctx), 0);
+}
+
+/**
+ * kernel_nexthop_update() - Update/delete a nexthop from the kernel
+ *
+ * @ctx: Dataplane context
+ *
+ * Return: Dataplane result flag
+ */
+enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx)
+{
+ int cmd = 0;
+ int ret = 0;
+
+ switch (dplane_ctx_get_op(ctx)) {
+ case DPLANE_OP_NH_DELETE:
+ cmd = RTM_DELNEXTHOP;
+ break;
+ case DPLANE_OP_NH_INSTALL:
+ case DPLANE_OP_NH_UPDATE:
+ cmd = RTM_NEWNEXTHOP;
+ break;
+ case DPLANE_OP_ROUTE_INSTALL:
+ case DPLANE_OP_ROUTE_UPDATE:
+ case DPLANE_OP_ROUTE_DELETE:
+ case DPLANE_OP_ROUTE_NOTIFY:
+ case DPLANE_OP_LSP_INSTALL:
+ case DPLANE_OP_LSP_UPDATE:
+ case DPLANE_OP_LSP_DELETE:
+ case DPLANE_OP_LSP_NOTIFY:
+ case DPLANE_OP_PW_INSTALL:
+ case DPLANE_OP_PW_UNINSTALL:
+ case DPLANE_OP_SYS_ROUTE_ADD:
+ case DPLANE_OP_SYS_ROUTE_DELETE:
+ case DPLANE_OP_ADDR_INSTALL:
+ case DPLANE_OP_ADDR_UNINSTALL:
+ case DPLANE_OP_MAC_INSTALL:
+ case DPLANE_OP_MAC_DELETE:
+ case DPLANE_OP_NEIGH_INSTALL:
+ case DPLANE_OP_NEIGH_UPDATE:
+ case DPLANE_OP_NEIGH_DELETE:
+ case DPLANE_OP_VTEP_ADD:
+ case DPLANE_OP_VTEP_DELETE:
+ case DPLANE_OP_NONE:
+ flog_err(
+ EC_ZEBRA_NHG_FIB_UPDATE,
+ "Context received for kernel nexthop update with incorrect OP code (%u)",
+ dplane_ctx_get_op(ctx));
+ return ZEBRA_DPLANE_REQUEST_FAILURE;
+ }
+
+ ret = netlink_nexthop(cmd, ctx);
+
+ return (ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS
+ : ZEBRA_DPLANE_REQUEST_FAILURE);
+}
+
/*
* Update or delete a prefix from the kernel,
* using info from a dataplane context.
@@ -1919,6 +2226,298 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx)
ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
}
+/**
+ * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop
+ *
+ * @tb: Netlink RTA data
+ * @family: Address family in the nhmsg
+ * @ifp: Interface connected - this should be NULL, we fill it in
+ * @ns_id: Namspace id
+ *
+ * Return: New nexthop
+ */
+static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb,
+ unsigned char family,
+ struct interface **ifp,
+ ns_id_t ns_id)
+{
+ struct nexthop nh = {};
+ void *gate = NULL;
+ enum nexthop_types_t type = 0;
+ int if_index = 0;
+ size_t sz = 0;
+
+ if_index = *(int *)RTA_DATA(tb[NHA_OIF]);
+
+
+ if (tb[NHA_GATEWAY]) {
+ switch (family) {
+ case AF_INET:
+ type = NEXTHOP_TYPE_IPV4_IFINDEX;
+ sz = 4;
+ break;
+ case AF_INET6:
+ type = NEXTHOP_TYPE_IPV6_IFINDEX;
+ sz = 16;
+ break;
+ default:
+ flog_warn(
+ EC_ZEBRA_BAD_NHG_MESSAGE,
+ "Nexthop gateway with bad address family (%d) received from kernel",
+ family);
+ return nh;
+ }
+ gate = RTA_DATA(tb[NHA_GATEWAY]);
+ } else
+ type = NEXTHOP_TYPE_IFINDEX;
+
+ if (type)
+ nh.type = type;
+
+ if (gate)
+ memcpy(&(nh.gate), gate, sz);
+
+ if (if_index)
+ nh.ifindex = if_index;
+
+ *ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex);
+ if (ifp)
+ nh.vrf_id = (*ifp)->vrf_id;
+ else {
+ flog_warn(
+ EC_ZEBRA_UNKNOWN_INTERFACE,
+ "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT",
+ __PRETTY_FUNCTION__, nh.ifindex);
+
+ nh.vrf_id = VRF_DEFAULT;
+ }
+
+ if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) {
+ uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]);
+ int num_labels = 0;
+
+ mpls_label_t labels[MPLS_MAX_LABELS] = {0};
+
+ if (encap_type == LWTUNNEL_ENCAP_MPLS)
+ num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels);
+
+ if (num_labels)
+ nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels,
+ labels);
+ }
+
+ return nh;
+}
+
+static int netlink_nexthop_process_group(struct rtattr **tb,
+ struct nh_grp *z_grp, int z_grp_size)
+{
+ uint8_t count = 0;
+ /* linux/nexthop.h group struct */
+ struct nexthop_grp *n_grp = NULL;
+
+ n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]);
+ count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp));
+
+ if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) {
+ flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE,
+ "Invalid nexthop group received from the kernel");
+ return count;
+ }
+
+#if 0
+ // TODO: Need type for something?
+ zlog_debug("Nexthop group type: %d",
+ *((uint16_t *)RTA_DATA(tb[NHA_GROUP_TYPE])));
+
+#endif
+
+ for (int i = 0; ((i < count) && (i < z_grp_size)); i++) {
+ z_grp[i].id = n_grp[i].id;
+ z_grp[i].weight = n_grp[i].weight;
+ }
+ return count;
+}
+
+/**
+ * netlink_nexthop_change() - Read in change about nexthops from the kernel
+ *
+ * @h: Netlink message header
+ * @ns_id: Namspace id
+ * @startup: Are we reading under startup conditions?
+ *
+ * Return: Result status
+ */
+int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
+{
+ int len;
+ /* nexthop group id */
+ uint32_t id;
+ unsigned char family;
+ int type;
+ afi_t afi = AFI_UNSPEC;
+ vrf_id_t vrf_id = 0;
+ struct interface *ifp = NULL;
+ struct nhmsg *nhm = NULL;
+ struct nexthop nh = {};
+ struct nh_grp grp[MULTIPATH_NUM] = {};
+ /* Count of nexthops in group array */
+ uint8_t grp_count = 0;
+ struct rtattr *tb[NHA_MAX + 1] = {};
+
+ nhm = NLMSG_DATA(h);
+
+ if (startup && h->nlmsg_type != RTM_NEWNEXTHOP)
+ return 0;
+
+ len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg));
+ if (len < 0) {
+ zlog_warn(
+ "%s: Message received from netlink is of a broken size %d %zu",
+ __PRETTY_FUNCTION__, h->nlmsg_len,
+ (size_t)NLMSG_LENGTH(sizeof(struct nhmsg)));
+ return -1;
+ }
+
+ netlink_parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len);
+
+
+ if (!tb[NHA_ID]) {
+ flog_warn(
+ EC_ZEBRA_BAD_NHG_MESSAGE,
+ "Nexthop group without an ID received from the kernel");
+ return -1;
+ }
+
+ /* We use the ID key'd nhg table for kernel updates */
+ id = *((uint32_t *)RTA_DATA(tb[NHA_ID]));
+
+ family = nhm->nh_family;
+ afi = family2afi(family);
+
+ type = proto2zebra(nhm->nh_protocol, 0, true);
+
+ if (IS_ZEBRA_DEBUG_KERNEL)
+ zlog_debug("%s ID (%u) %s NS %u",
+ nl_msg_type_to_str(h->nlmsg_type), id,
+ nl_family_to_str(family), ns_id);
+
+
+ if (h->nlmsg_type == RTM_NEWNEXTHOP) {
+ if (tb[NHA_GROUP]) {
+ /**
+ * If this is a group message its only going to have
+ * an array of nexthop IDs associated with it
+ */
+ grp_count = netlink_nexthop_process_group(
+ tb, grp, array_size(grp));
+ } else {
+ if (tb[NHA_BLACKHOLE]) {
+ /**
+ * This nexthop is just for blackhole-ing
+ * traffic, it should not have an OIF, GATEWAY,
+ * or ENCAP
+ */
+ nh.type = NEXTHOP_TYPE_BLACKHOLE;
+ nh.bh_type = BLACKHOLE_UNSPEC;
+ } else if (tb[NHA_OIF])
+ /**
+ * This is a true new nexthop, so we need
+ * to parse the gateway and device info
+ */
+ nh = netlink_nexthop_process_nh(tb, family,
+ &ifp, ns_id);
+ else {
+
+ flog_warn(
+ EC_ZEBRA_BAD_NHG_MESSAGE,
+ "Invalid Nexthop message received from the kernel with ID (%u)",
+ id);
+ return -1;
+ }
+ SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE);
+ if (nhm->nh_flags & RTNH_F_ONLINK)
+ SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
+ vrf_id = nh.vrf_id;
+ }
+
+ if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi,
+ type, startup))
+ return -1;
+
+ } else if (h->nlmsg_type == RTM_DELNEXTHOP)
+ zebra_nhg_kernel_del(id);
+
+ return 0;
+}
+
+#if 0 /* Force off kernel nexthop group installs for now */
+/**
+ * netlink_request_nexthop() - Request nextop information from the kernel
+ * @zns: Zebra namespace
+ * @family: AF_* netlink family
+ * @type: RTM_* route type
+ *
+ * Return: Result status
+ */
+static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type)
+{
+ struct {
+ struct nlmsghdr n;
+ struct nhmsg nhm;
+ } req;
+
+ /* Form the request, specifying filter (rtattr) if needed. */
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_type = type;
+ req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
+ req.nhm.nh_family = family;
+
+ return netlink_request(&zns->netlink_cmd, &req.n);
+}
+
+
+/**
+ * netlink_nexthop_read() - Nexthop read function using netlink interface
+ *
+ * @zns: Zebra name space
+ *
+ * Return: Result status
+ * Only called at bootstrap time.
+ */
+int netlink_nexthop_read(struct zebra_ns *zns)
+{
+ int ret;
+ struct zebra_dplane_info dp_info;
+
+ zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
+
+ /* Get nexthop objects */
+ ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP);
+ if (ret < 0)
+ return ret;
+ ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd,
+ &dp_info, 0, 1);
+
+ if (!ret)
+ /* If we succesfully read in nexthop objects,
+ * this kernel must support them.
+ */
+ supports_nh = true;
+ else if (IS_ZEBRA_DEBUG_KERNEL)
+ zlog_debug("Nexthop objects not supported on this kernel");
+
+ return ret;
+}
+#else
+int netlink_nexthop_read(struct zebra_ns *zns)
+{
+ return 0;
+}
+#endif
+
+
int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
int llalen, ns_id_t ns_id)
{
@@ -1951,7 +2550,7 @@ static int netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx,
req.n.nlmsg_type = cmd;
req.ndm.ndm_family = PF_BRIDGE;
req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT;
- req.ndm.ndm_flags |= NTF_SELF; // Handle by "self", not "master"
+ req.ndm.ndm_flags |= NTF_SELF; /* Handle by "self", not "master" */
addattr_l(&req.n, sizeof(req),
diff --git a/zebra/rt_netlink.h b/zebra/rt_netlink.h
index 29e0152bb..2b4b14514 100644
--- a/zebra/rt_netlink.h
+++ b/zebra/rt_netlink.h
@@ -69,6 +69,10 @@ extern int netlink_mpls_multipath(int cmd, struct zebra_dplane_ctx *ctx);
extern int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup);
extern int netlink_route_read(struct zebra_ns *zns);
+extern int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id,
+ int startup);
+extern int netlink_nexthop_read(struct zebra_ns *zns);
+
extern int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id);
extern int netlink_macfdb_read(struct zebra_ns *zns);
extern int netlink_macfdb_read_for_bridge(struct zebra_ns *zns,
diff --git a/zebra/rt_socket.c b/zebra/rt_socket.c
index 981ef7a88..73b3dd0b4 100644
--- a/zebra/rt_socket.c
+++ b/zebra/rt_socket.c
@@ -364,6 +364,11 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx)
return res;
}
+enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx)
+{
+ return ZEBRA_DPLANE_REQUEST_SUCCESS;
+}
+
int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
int llalen, ns_id_t ns_id)
{
diff --git a/zebra/rtread_getmsg.c b/zebra/rtread_getmsg.c
index 725bb63a0..3ba5d6ee7 100644
--- a/zebra/rtread_getmsg.c
+++ b/zebra/rtread_getmsg.c
@@ -102,7 +102,7 @@ static void handle_route_entry(mib2_ipRouteEntry_t *routeEntry)
nh.gate.ipv4.s_addr = routeEntry->ipRouteNextHop;
rib_add(AFI_IP, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, 0,
- zebra_flags, &prefix, NULL, &nh, 0, 0, 0, 0, 0);
+ zebra_flags, &prefix, NULL, &nh, 0, 0, 0, 0, 0, 0);
}
void route_read(struct zebra_ns *zns)
diff --git a/zebra/subdir.am b/zebra/subdir.am
index 25040a271..28847ce09 100644
--- a/zebra/subdir.am
+++ b/zebra/subdir.am
@@ -137,6 +137,7 @@ noinst_HEADERS += \
zebra/zebra_mpls.h \
zebra/zebra_mroute.h \
zebra/zebra_nhg.h \
+ zebra/zebra_nhg_private.h \
zebra/zebra_ns.h \
zebra/zebra_pbr.h \
zebra/zebra_ptm.h \
diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c
index e61e68b7f..d6ade783c 100644
--- a/zebra/zapi_msg.c
+++ b/zebra/zapi_msg.c
@@ -522,7 +522,7 @@ int zsend_redistribute_route(int cmd, struct zserv *client,
struct zapi_route api;
struct zapi_nexthop *api_nh;
struct nexthop *nexthop;
- int count = 0;
+ uint8_t count = 0;
afi_t afi;
size_t stream_size =
MAX(ZEBRA_MAX_PACKET_SIZ, sizeof(struct zapi_route));
@@ -559,12 +559,7 @@ int zsend_redistribute_route(int cmd, struct zserv *client,
memcpy(&api.src_prefix, src_p, sizeof(api.src_prefix));
}
- /* Nexthops. */
- if (re->nexthop_active_num) {
- SET_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP);
- api.nexthop_num = re->nexthop_active_num;
- }
- for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) {
+ for (nexthop = re->ng->nexthop; nexthop; nexthop = nexthop->next) {
if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
continue;
@@ -595,6 +590,12 @@ int zsend_redistribute_route(int cmd, struct zserv *client,
count++;
}
+ /* Nexthops. */
+ if (count) {
+ SET_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP);
+ api.nexthop_num = count;
+ }
+
/* Attributes. */
SET_FLAG(api.message, ZAPI_MESSAGE_DISTANCE);
api.distance = re->distance;
@@ -665,7 +666,8 @@ static int zsend_ipv4_nexthop_lookup_mrib(struct zserv *client,
* nexthop we are looking up. Therefore, we will just iterate
* over the top chain of nexthops.
*/
- for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next)
+ for (nexthop = re->ng->nexthop; nexthop;
+ nexthop = nexthop->next)
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
num += zserv_encode_nexthop(s, nexthop);
@@ -1422,6 +1424,8 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
re->flags = api.flags;
re->uptime = monotime(NULL);
re->vrf_id = vrf_id;
+ re->ng = nexthop_group_new();
+
if (api.tableid)
re->table = api.tableid;
else
@@ -1433,6 +1437,8 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
"%s: received a route without nexthops for prefix %pFX from client %s",
__func__, &api.prefix,
zebra_route_string(client->proto));
+
+ nexthop_group_delete(&re->ng);
XFREE(MTYPE_RE, re);
return;
}
@@ -1531,7 +1537,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
EC_ZEBRA_NEXTHOP_CREATION_FAILED,
"%s: Nexthops Specified: %d but we failed to properly create one",
__PRETTY_FUNCTION__, api.nexthop_num);
- nexthops_free(re->ng.nexthop);
+ nexthop_group_delete(&re->ng);
XFREE(MTYPE_RE, re);
return;
}
@@ -1573,7 +1579,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
flog_warn(EC_ZEBRA_RX_SRCDEST_WRONG_AFI,
"%s: Received SRC Prefix but afi is not v6",
__PRETTY_FUNCTION__);
- nexthops_free(re->ng.nexthop);
+ nexthop_group_delete(&re->ng);
XFREE(MTYPE_RE, re);
return;
}
@@ -1627,7 +1633,7 @@ static void zread_route_del(ZAPI_HANDLER_ARGS)
table_id = zvrf->table_id;
rib_delete(afi, api.safi, zvrf_id(zvrf), api.type, api.instance,
- api.flags, &api.prefix, src_p, NULL, table_id, api.metric,
+ api.flags, &api.prefix, src_p, NULL, 0, table_id, api.metric,
api.distance, false);
/* Stats */
diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
index bf343e06e..a88b0a38d 100644
--- a/zebra/zebra_dplane.c
+++ b/zebra/zebra_dplane.c
@@ -67,6 +67,20 @@ const uint32_t DPLANE_DEFAULT_NEW_WORK = 100;
#endif /* DPLANE_DEBUG */
/*
+ * Nexthop information captured for nexthop/nexthop group updates
+ */
+struct dplane_nexthop_info {
+ uint32_t id;
+ afi_t afi;
+ vrf_id_t vrf_id;
+ int type;
+
+ struct nexthop_group ng;
+ struct nh_grp nh_grp[MULTIPATH_NUM];
+ uint8_t nh_grp_count;
+};
+
+/*
* Route information captured for route updates.
*/
struct dplane_route_info {
@@ -95,6 +109,9 @@ struct dplane_route_info {
uint32_t zd_mtu;
uint32_t zd_nexthop_mtu;
+ /* Nexthop hash entry info */
+ struct dplane_nexthop_info nhe;
+
/* Nexthops */
struct nexthop_group zd_ng;
@@ -321,6 +338,9 @@ static struct zebra_dplane_globals {
_Atomic uint32_t dg_route_errors;
_Atomic uint32_t dg_other_errors;
+ _Atomic uint32_t dg_nexthops_in;
+ _Atomic uint32_t dg_nexthop_errors;
+
_Atomic uint32_t dg_lsps_in;
_Atomic uint32_t dg_lsp_errors;
@@ -461,6 +481,18 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx)
break;
+ case DPLANE_OP_NH_INSTALL:
+ case DPLANE_OP_NH_UPDATE:
+ case DPLANE_OP_NH_DELETE: {
+ if ((*pctx)->u.rinfo.nhe.ng.nexthop) {
+ /* This deals with recursive nexthops too */
+ nexthops_free((*pctx)->u.rinfo.nhe.ng.nexthop);
+
+ (*pctx)->u.rinfo.nhe.ng.nexthop = NULL;
+ }
+ break;
+ }
+
case DPLANE_OP_LSP_INSTALL:
case DPLANE_OP_LSP_UPDATE:
case DPLANE_OP_LSP_DELETE:
@@ -638,6 +670,17 @@ const char *dplane_op2str(enum dplane_op_e op)
ret = "ROUTE_NOTIFY";
break;
+ /* Nexthop update */
+ case DPLANE_OP_NH_INSTALL:
+ ret = "NH_INSTALL";
+ break;
+ case DPLANE_OP_NH_UPDATE:
+ ret = "NH_UPDATE";
+ break;
+ case DPLANE_OP_NH_DELETE:
+ ret = "NH_DELETE";
+ break;
+
case DPLANE_OP_LSP_INSTALL:
ret = "LSP_INSTALL";
break;
@@ -1015,6 +1058,51 @@ const struct zebra_dplane_info *dplane_ctx_get_ns(
return &(ctx->zd_ns_info);
}
+/* Accessors for nexthop information */
+uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return ctx->u.rinfo.nhe.id;
+}
+
+afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return ctx->u.rinfo.nhe.afi;
+}
+
+vrf_id_t dplane_ctx_get_nhe_vrf_id(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return ctx->u.rinfo.nhe.vrf_id;
+}
+
+int dplane_ctx_get_nhe_type(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return ctx->u.rinfo.nhe.type;
+}
+
+const struct nexthop_group *
+dplane_ctx_get_nhe_ng(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return &(ctx->u.rinfo.nhe.ng);
+}
+
+const struct nh_grp *
+dplane_ctx_get_nhe_nh_grp(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return ctx->u.rinfo.nhe.nh_grp;
+}
+
+uint8_t dplane_ctx_get_nhe_nh_grp_count(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return ctx->u.rinfo.nhe.nh_grp_count;
+}
+
/* Accessors for LSP information */
mpls_label_t dplane_ctx_get_in_label(const struct zebra_dplane_ctx *ctx)
@@ -1419,7 +1507,7 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx,
ctx->u.rinfo.zd_safi = info->safi;
/* Copy nexthops; recursive info is included too */
- copy_nexthops(&(ctx->u.rinfo.zd_ng.nexthop), re->ng.nexthop, NULL);
+ copy_nexthops(&(ctx->u.rinfo.zd_ng.nexthop), re->ng->nexthop, NULL);
/* Ensure that the dplane's nexthops flags are clear. */
for (ALL_NEXTHOPS(ctx->u.rinfo.zd_ng, nexthop))
@@ -1437,6 +1525,29 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx,
zns = zvrf->zns;
dplane_ctx_ns_init(ctx, zns, (op == DPLANE_OP_ROUTE_UPDATE));
+#ifdef HAVE_NETLINK
+ if (re->nhe_id) {
+ struct nhg_hash_entry *nhe =
+ zebra_nhg_resolve(zebra_nhg_lookup_id(re->nhe_id));
+
+ ctx->u.rinfo.nhe.id = nhe->id;
+ /*
+ * Check if the nhe is installed/queued before doing anything
+ * with this route.
+ *
+ * If its a delete we only use the prefix anyway, so this only
+ * matters for INSTALL/UPDATE.
+ */
+ if (((op == DPLANE_OP_ROUTE_INSTALL)
+ || (op == DPLANE_OP_ROUTE_UPDATE))
+ && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)
+ && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED)) {
+ ret = ENOENT;
+ goto done;
+ }
+ }
+#endif /* HAVE_NETLINK */
+
/* Trying out the sequence number idea, so we can try to detect
* when a result is stale.
*/
@@ -1449,6 +1560,64 @@ done:
return ret;
}
+/**
+ * dplane_ctx_nexthop_init() - Initialize a context block for a nexthop update
+ *
+ * @ctx: Dataplane context to init
+ * @op: Operation being performed
+ * @nhe: Nexthop group hash entry
+ *
+ * Return: Result status
+ */
+static int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx,
+ enum dplane_op_e op,
+ struct nhg_hash_entry *nhe)
+{
+ struct zebra_vrf *zvrf = NULL;
+ struct zebra_ns *zns = NULL;
+
+ int ret = EINVAL;
+
+ if (!ctx || !nhe)
+ goto done;
+
+ ctx->zd_op = op;
+ ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS;
+
+ /* Copy over nhe info */
+ ctx->u.rinfo.nhe.id = nhe->id;
+ ctx->u.rinfo.nhe.afi = nhe->afi;
+ ctx->u.rinfo.nhe.vrf_id = nhe->vrf_id;
+ ctx->u.rinfo.nhe.type = nhe->type;
+
+ nexthop_group_copy(&(ctx->u.rinfo.nhe.ng), nhe->nhg);
+
+ /* If its a group, convert it to a grp array of ids */
+ if (!zebra_nhg_depends_is_empty(nhe)
+ && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_RECURSIVE))
+ ctx->u.rinfo.nhe.nh_grp_count = zebra_nhg_nhe2grp(
+ ctx->u.rinfo.nhe.nh_grp, nhe, MULTIPATH_NUM);
+
+ zvrf = vrf_info_lookup(nhe->vrf_id);
+
+ /*
+ * Fallback to default namespace if the vrf got ripped out from under
+ * us.
+ */
+ zns = zvrf ? zvrf->zns : zebra_ns_lookup(NS_DEFAULT);
+
+ /*
+ * TODO: Might not need to mark this as an update, since
+ * it probably won't require two messages
+ */
+ dplane_ctx_ns_init(ctx, zns, (op == DPLANE_OP_NH_UPDATE));
+
+ ret = AOK;
+
+done:
+ return ret;
+}
+
/*
* Capture information for an LSP update in a dplane context.
*/
@@ -1577,7 +1746,7 @@ static int dplane_ctx_pw_init(struct zebra_dplane_ctx *ctx,
if (re)
copy_nexthops(&(ctx->u.pw.nhg.nexthop),
- re->ng.nexthop, NULL);
+ re->ng->nexthop, NULL);
route_unlock_node(rn);
}
@@ -1673,7 +1842,7 @@ dplane_route_update_internal(struct route_node *rn,
* We'll need these to do per-nexthop deletes.
*/
copy_nexthops(&(ctx->u.rinfo.zd_old_ng.nexthop),
- old_re->ng.nexthop, NULL);
+ old_re->ng->nexthop, NULL);
#endif /* !HAVE_NETLINK */
}
@@ -1688,7 +1857,53 @@ dplane_route_update_internal(struct route_node *rn,
if (ret == AOK)
result = ZEBRA_DPLANE_REQUEST_QUEUED;
else {
- atomic_fetch_add_explicit(&zdplane_info.dg_route_errors, 1,
+ if (ret == ENOENT)
+ result = ZEBRA_DPLANE_REQUEST_SUCCESS;
+ else
+ atomic_fetch_add_explicit(&zdplane_info.dg_route_errors,
+ 1, memory_order_relaxed);
+ if (ctx)
+ dplane_ctx_free(&ctx);
+ }
+
+ return result;
+}
+
+/**
+ * dplane_nexthop_update_internal() - Helper for enqueuing nexthop changes
+ *
+ * @nhe: Nexthop group hash entry where the change occured
+ * @op: The operation to be enqued
+ *
+ * Return: Result of the change
+ */
+static enum zebra_dplane_result
+dplane_nexthop_update_internal(struct nhg_hash_entry *nhe, enum dplane_op_e op)
+{
+ enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE;
+ int ret = EINVAL;
+ struct zebra_dplane_ctx *ctx = NULL;
+
+ /* Obtain context block */
+ ctx = dplane_ctx_alloc();
+ if (!ctx) {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ ret = dplane_ctx_nexthop_init(ctx, op, nhe);
+ if (ret == AOK)
+ ret = dplane_update_enqueue(ctx);
+
+done:
+ /* Update counter */
+ atomic_fetch_add_explicit(&zdplane_info.dg_nexthops_in, 1,
+ memory_order_relaxed);
+
+ if (ret == AOK)
+ result = ZEBRA_DPLANE_REQUEST_QUEUED;
+ else {
+ atomic_fetch_add_explicit(&zdplane_info.dg_nexthop_errors, 1,
memory_order_relaxed);
if (ctx)
dplane_ctx_free(&ctx);
@@ -1853,6 +2068,45 @@ done:
}
/*
+ * Enqueue a nexthop add for the dataplane.
+ */
+enum zebra_dplane_result dplane_nexthop_add(struct nhg_hash_entry *nhe)
+{
+ enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE;
+
+ if (nhe)
+ ret = dplane_nexthop_update_internal(nhe, DPLANE_OP_NH_INSTALL);
+ return ret;
+}
+
+/*
+ * Enqueue a nexthop update for the dataplane.
+ *
+ * Might not need this func since zebra's nexthop objects should be immutable?
+ */
+enum zebra_dplane_result dplane_nexthop_update(struct nhg_hash_entry *nhe)
+{
+ enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE;
+
+ if (nhe)
+ ret = dplane_nexthop_update_internal(nhe, DPLANE_OP_NH_UPDATE);
+ return ret;
+}
+
+/*
+ * Enqueue a nexthop removal for the dataplane.
+ */
+enum zebra_dplane_result dplane_nexthop_delete(struct nhg_hash_entry *nhe)
+{
+ enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE;
+
+ if (nhe)
+ ret = dplane_nexthop_update_internal(nhe, DPLANE_OP_NH_DELETE);
+
+ return ret;
+}
+
+/*
* Enqueue LSP add for the dataplane.
*/
enum zebra_dplane_result dplane_lsp_add(zebra_lsp_t *lsp)
@@ -2873,6 +3127,33 @@ kernel_dplane_address_update(struct zebra_dplane_ctx *ctx)
return res;
}
+/**
+ * kernel_dplane_nexthop_update() - Handler for kernel nexthop updates
+ *
+ * @ctx: Dataplane context
+ *
+ * Return: Dataplane result flag
+ */
+static enum zebra_dplane_result
+kernel_dplane_nexthop_update(struct zebra_dplane_ctx *ctx)
+{
+ enum zebra_dplane_result res;
+
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) {
+ zlog_debug("ID (%u) Dplane nexthop update ctx %p op %s",
+ dplane_ctx_get_nhe_id(ctx), ctx,
+ dplane_op2str(dplane_ctx_get_op(ctx)));
+ }
+
+ res = kernel_nexthop_update(ctx);
+
+ if (res != ZEBRA_DPLANE_REQUEST_SUCCESS)
+ atomic_fetch_add_explicit(&zdplane_info.dg_nexthop_errors, 1,
+ memory_order_relaxed);
+
+ return res;
+}
+
/*
* Handler for kernel-facing EVPN MAC address updates
*/
@@ -2967,6 +3248,12 @@ static int kernel_dplane_process_func(struct zebra_dplane_provider *prov)
res = kernel_dplane_route_update(ctx);
break;
+ case DPLANE_OP_NH_INSTALL:
+ case DPLANE_OP_NH_UPDATE:
+ case DPLANE_OP_NH_DELETE:
+ res = kernel_dplane_nexthop_update(ctx);
+ break;
+
case DPLANE_OP_LSP_INSTALL:
case DPLANE_OP_LSP_UPDATE:
case DPLANE_OP_LSP_DELETE:
diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h
index be945632c..fede3bfcc 100644
--- a/zebra/zebra_dplane.h
+++ b/zebra/zebra_dplane.h
@@ -30,6 +30,7 @@
#include "zebra/rib.h"
#include "zebra/zserv.h"
#include "zebra/zebra_mpls.h"
+#include "zebra/zebra_nhg.h"
#ifdef __cplusplus
extern "C" {
@@ -108,6 +109,11 @@ enum dplane_op_e {
DPLANE_OP_ROUTE_DELETE,
DPLANE_OP_ROUTE_NOTIFY,
+ /* Nexthop update */
+ DPLANE_OP_NH_INSTALL,
+ DPLANE_OP_NH_UPDATE,
+ DPLANE_OP_NH_DELETE,
+
/* LSP update */
DPLANE_OP_LSP_INSTALL,
DPLANE_OP_LSP_UPDATE,
@@ -269,6 +275,17 @@ const struct nexthop_group *dplane_ctx_get_ng(
const struct nexthop_group *dplane_ctx_get_old_ng(
const struct zebra_dplane_ctx *ctx);
+/* Accessors for nexthop information */
+uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx);
+afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx);
+vrf_id_t dplane_ctx_get_nhe_vrf_id(const struct zebra_dplane_ctx *ctx);
+int dplane_ctx_get_nhe_type(const struct zebra_dplane_ctx *ctx);
+const struct nexthop_group *
+dplane_ctx_get_nhe_ng(const struct zebra_dplane_ctx *ctx);
+const struct nh_grp *
+dplane_ctx_get_nhe_nh_grp(const struct zebra_dplane_ctx *ctx);
+uint8_t dplane_ctx_get_nhe_nh_grp_count(const struct zebra_dplane_ctx *ctx);
+
/* Accessors for LSP information */
mpls_label_t dplane_ctx_get_in_label(const struct zebra_dplane_ctx *ctx);
void dplane_ctx_set_in_label(struct zebra_dplane_ctx *ctx,
@@ -373,6 +390,16 @@ enum zebra_dplane_result dplane_route_notif_update(
enum dplane_op_e op,
struct zebra_dplane_ctx *ctx);
+
+/* Forward ref of nhg_hash_entry */
+struct nhg_hash_entry;
+/*
+ * Enqueue a nexthop change operation for the dataplane.
+ */
+enum zebra_dplane_result dplane_nexthop_add(struct nhg_hash_entry *nhe);
+enum zebra_dplane_result dplane_nexthop_update(struct nhg_hash_entry *nhe);
+enum zebra_dplane_result dplane_nexthop_delete(struct nhg_hash_entry *nhe);
+
/*
* Enqueue LSP change operations for the dataplane.
*/
diff --git a/zebra/zebra_errors.c b/zebra/zebra_errors.c
index a7e5147af..5a0905d59 100644
--- a/zebra/zebra_errors.c
+++ b/zebra/zebra_errors.c
@@ -283,6 +283,39 @@ static struct log_ref ferr_zebra_err[] = {
.description = "Zebra received an event from inotify, but failed to read what it was.",
.suggestion = "Notify a developer.",
},
+ {
+ .code = EC_ZEBRA_NHG_TABLE_INSERT_FAILED,
+ .title =
+ "Nexthop Group Hash Table Insert Failure",
+ .description =
+ "Zebra failed in inserting a Nexthop Group into its hash tables.",
+ .suggestion =
+ "Check to see if the entry already exists or if the netlink message was parsed incorrectly."
+ },
+ {
+ .code = EC_ZEBRA_NHG_SYNC,
+ .title =
+ "Zebra's Nexthop Groups are out of sync",
+ .description =
+ "Zebra's nexthop group tables are out of sync with the nexthop groups in the fib.",
+ .suggestion =
+ "Check the current status of the kernels nexthop groups and compare it to Zebra's."
+ },
+ {
+ .code = EC_ZEBRA_NHG_FIB_UPDATE,
+ .title =
+ "Zebra failed updating the fib with Nexthop Group",
+ .description =
+ "Zebra was not able to successfully install a new nexthop group into the fib",
+ .suggestion =
+ "Check to see if the nexthop group on the route you tried to install is valid."
+ },
+ {
+ .code = EC_ZEBRA_IF_LOOKUP_FAILED,
+ .title = "Zebra interface lookup failed",
+ .description = "Zebra attempted to look up a interface for a particular vrf_id and interface index, but didn't find anything.",
+ .suggestion = "If you entered a command to trigger this error, make sure you entered the arguments correctly. Check your config file for any potential errors. If these look correct, seek help.",
+ },
/* Warnings */
{
.code = EC_ZEBRAING_LM_PROTO_MISMATCH,
@@ -729,6 +762,24 @@ static struct log_ref ferr_zebra_err[] = {
"Check network topology to detect duplicate host IP for correctness.",
},
{
+ .code = EC_ZEBRA_BAD_NHG_MESSAGE,
+ .title =
+ "Bad Nexthop Group Message",
+ .description =
+ "Zebra received Nexthop Group message from the kernel that it cannot process.",
+ .suggestion =
+ "Check the kernel's link states and routing table to see how it matches ours."
+ },
+ {
+ .code = EC_ZEBRA_DUPLICATE_NHG_MESSAGE,
+ .title =
+ "Duplicate Nexthop Group Message",
+ .description =
+ "Zebra received Nexthop Group message from the kernel that it is identical to one it/we already have but with a different ID.",
+ .suggestion =
+ "See if the nexthop you are trying to add is already present in the fib."
+ },
+ {
.code = END_FERR,
}
};
diff --git a/zebra/zebra_errors.h b/zebra/zebra_errors.h
index 222055dd8..f9ccc2db2 100644
--- a/zebra/zebra_errors.h
+++ b/zebra/zebra_errors.h
@@ -72,6 +72,10 @@ enum zebra_log_refs {
EC_ZEBRA_VNI_DEL_FAILED,
EC_ZEBRA_VTEP_ADD_FAILED,
EC_ZEBRA_VNI_ADD_FAILED,
+ EC_ZEBRA_NHG_TABLE_INSERT_FAILED,
+ EC_ZEBRA_NHG_SYNC,
+ EC_ZEBRA_NHG_FIB_UPDATE,
+ EC_ZEBRA_IF_LOOKUP_FAILED,
/* warnings */
EC_ZEBRA_NS_NOTIFY_READ,
EC_ZEBRAING_LM_PROTO_MISMATCH,
@@ -125,6 +129,8 @@ enum zebra_log_refs {
EC_ZEBRA_DUP_MAC_DETECTED,
EC_ZEBRA_DUP_IP_INHERIT_DETECTED,
EC_ZEBRA_DUP_IP_DETECTED,
+ EC_ZEBRA_BAD_NHG_MESSAGE,
+ EC_ZEBRA_DUPLICATE_NHG_MESSAGE,
};
void zebra_error_init(void);
diff --git a/zebra/zebra_fpm_dt.c b/zebra/zebra_fpm_dt.c
index e87fa0ad7..debcf60ee 100644
--- a/zebra/zebra_fpm_dt.c
+++ b/zebra/zebra_fpm_dt.c
@@ -90,7 +90,7 @@ static int zfpm_dt_find_route(rib_dest_t **dest_p, struct route_entry **re_p)
if (!re)
continue;
- if (re->nexthop_active_num <= 0)
+ if (nexthop_group_active_nexthop_num(re->ng) == 0)
continue;
*dest_p = dest;
diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c
index f347d3955..b54d8fbc1 100644
--- a/zebra/zebra_fpm_netlink.c
+++ b/zebra/zebra_fpm_netlink.c
@@ -314,7 +314,7 @@ static int netlink_route_info_fill(netlink_route_info_t *ri, int cmd,
ri->rtm_type = RTN_UNICAST;
ri->metric = &re->metric;
- for (ALL_NEXTHOPS(re->ng, nexthop)) {
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) {
if (ri->num_nhs >= zrouter.multipath_num)
break;
diff --git a/zebra/zebra_fpm_protobuf.c b/zebra/zebra_fpm_protobuf.c
index 3054b8a34..a11517ab8 100644
--- a/zebra/zebra_fpm_protobuf.c
+++ b/zebra/zebra_fpm_protobuf.c
@@ -173,7 +173,7 @@ static Fpm__AddRoute *create_add_route_message(qpb_allocator_t *allocator,
* Figure out the set of nexthops to be added to the message.
*/
num_nhs = 0;
- for (ALL_NEXTHOPS(re->ng, nexthop)) {
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) {
if (num_nhs >= zrouter.multipath_num)
break;
diff --git a/zebra/zebra_mpls.c b/zebra/zebra_mpls.c
index 8088ec1bf..ef1bd0260 100644
--- a/zebra/zebra_mpls.c
+++ b/zebra/zebra_mpls.c
@@ -185,7 +185,7 @@ static int lsp_install(struct zebra_vrf *zvrf, mpls_label_t label,
* the label advertised by the recursive nexthop (plus we don't have the
* logic yet to push multiple labels).
*/
- for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) {
+ for (nexthop = re->ng->nexthop; nexthop; nexthop = nexthop->next) {
/* Skip inactive and recursive entries. */
if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
continue;
@@ -635,7 +635,7 @@ static int nhlfe_nexthop_active_ipv4(zebra_nhlfe_t *nhlfe,
|| !CHECK_FLAG(match->flags, ZEBRA_FLAG_SELECTED))
continue;
- for (match_nh = match->ng.nexthop; match_nh;
+ for (match_nh = match->ng->nexthop; match_nh;
match_nh = match_nh->next) {
if (match->type == ZEBRA_ROUTE_CONNECT
|| nexthop->ifindex == match_nh->ifindex) {
@@ -686,10 +686,10 @@ static int nhlfe_nexthop_active_ipv6(zebra_nhlfe_t *nhlfe,
break;
}
- if (!match || !match->ng.nexthop)
+ if (!match || !match->ng->nexthop)
return 0;
- nexthop->ifindex = match->ng.nexthop->ifindex;
+ nexthop->ifindex = match->ng->nexthop->ifindex;
return 1;
}
@@ -2590,11 +2590,13 @@ int mpls_ftn_update(int add, struct zebra_vrf *zvrf, enum lsp_types_t type,
struct route_node *rn;
struct route_entry *re;
struct nexthop *nexthop;
+ struct nexthop_group new_grp = {};
+ struct nhg_hash_entry *nhe = NULL;
bool found;
+ afi_t afi = family2afi(prefix->family);
/* Lookup table. */
- table = zebra_vrf_table(family2afi(prefix->family), SAFI_UNICAST,
- zvrf_id(zvrf));
+ table = zebra_vrf_table(afi, SAFI_UNICAST, zvrf_id(zvrf));
if (!table)
return -1;
@@ -2610,8 +2612,15 @@ int mpls_ftn_update(int add, struct zebra_vrf *zvrf, enum lsp_types_t type,
if (re == NULL)
return -1;
+ /*
+ * Copy over current nexthops into a temporary group.
+ * We can't just change the values here since we are hashing
+ * on labels. We need to create a whole new group
+ */
+ nexthop_group_copy(&new_grp, re->ng);
+
found = false;
- for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) {
+ for (nexthop = new_grp.nexthop; nexthop; nexthop = nexthop->next) {
switch (nexthop->type) {
case NEXTHOP_TYPE_IPV4:
case NEXTHOP_TYPE_IPV4_IFINDEX:
@@ -2625,7 +2634,7 @@ int mpls_ftn_update(int add, struct zebra_vrf *zvrf, enum lsp_types_t type,
continue;
if (!mpls_ftn_update_nexthop(add, nexthop, type,
out_label))
- return 0;
+ break;
found = true;
break;
case NEXTHOP_TYPE_IPV6:
@@ -2640,7 +2649,7 @@ int mpls_ftn_update(int add, struct zebra_vrf *zvrf, enum lsp_types_t type,
continue;
if (!mpls_ftn_update_nexthop(add, nexthop, type,
out_label))
- return 0;
+ break;
found = true;
break;
default:
@@ -2648,14 +2657,19 @@ int mpls_ftn_update(int add, struct zebra_vrf *zvrf, enum lsp_types_t type,
}
}
- if (!found)
- return -1;
+ if (found) {
+ nhe = zebra_nhg_rib_find(0, &new_grp, afi);
- SET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
- SET_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED);
- rib_queue_add(rn);
+ zebra_nhg_re_update_ref(re, nhe);
- return 0;
+ SET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
+ SET_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED);
+ rib_queue_add(rn);
+ }
+
+ nexthops_free(new_grp.nexthop);
+
+ return found ? 0 : -1;
}
int mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type,
@@ -2684,7 +2698,7 @@ int mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type,
if (re == NULL)
return -1;
- for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next)
+ for (nexthop = re->ng->nexthop; nexthop; nexthop = nexthop->next)
nexthop_del_labels(nexthop);
SET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
@@ -2889,7 +2903,12 @@ static void mpls_ftn_uninstall_all(struct zebra_vrf *zvrf,
for (rn = route_top(table); rn; rn = route_next(rn)) {
update = 0;
RNODE_FOREACH_RE (rn, re) {
- for (nexthop = re->ng.nexthop; nexthop;
+ struct nexthop_group new_grp = {};
+ struct nhg_hash_entry *nhe = NULL;
+
+ nexthop_group_copy(&new_grp, re->ng);
+
+ for (nexthop = new_grp.nexthop; nexthop;
nexthop = nexthop->next) {
if (nexthop->nh_label_type != lsp_type)
continue;
@@ -2900,6 +2919,14 @@ static void mpls_ftn_uninstall_all(struct zebra_vrf *zvrf,
ROUTE_ENTRY_LABELS_CHANGED);
update = 1;
}
+
+ if (CHECK_FLAG(re->status,
+ ROUTE_ENTRY_LABELS_CHANGED)) {
+ nhe = zebra_nhg_rib_find(0, &new_grp, afi);
+ zebra_nhg_re_update_ref(re, nhe);
+ }
+
+ nexthops_free(new_grp.nexthop);
}
if (update)
diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c
index 4e696b39a..2bb117b27 100644
--- a/zebra/zebra_nhg.c
+++ b/zebra/zebra_nhg.c
@@ -26,14 +26,1094 @@
#include "lib/nexthop_group_private.h"
#include "lib/routemap.h"
#include "lib/mpls.h"
+#include "lib/jhash.h"
+#include "lib/debug.h"
#include "zebra/connected.h"
#include "zebra/debug.h"
#include "zebra/zebra_router.h"
-#include "zebra/zebra_nhg.h"
+#include "zebra/zebra_nhg_private.h"
#include "zebra/zebra_rnh.h"
#include "zebra/zebra_routemap.h"
+#include "zebra/zebra_memory.h"
+#include "zebra/zserv.h"
#include "zebra/rt.h"
+#include "zebra_errors.h"
+#include "zebra_dplane.h"
+#include "zebra/interface.h"
+
+DEFINE_MTYPE_STATIC(ZEBRA, NHG, "Nexthop Group Entry");
+DEFINE_MTYPE_STATIC(ZEBRA, NHG_CONNECTED, "Nexthop Group Connected");
+DEFINE_MTYPE_STATIC(ZEBRA, NHG_CTX, "Nexthop Group Context");
+
+/* id counter to keep in sync with kernel */
+uint32_t id_counter;
+
+static struct nhg_hash_entry *depends_find(struct nexthop *nh, afi_t afi);
+static void depends_add(struct nhg_connected_tree_head *head,
+ struct nhg_hash_entry *depend);
+static struct nhg_hash_entry *
+depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh,
+ afi_t afi);
+static struct nhg_hash_entry *
+depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id);
+static void depends_decrement_free(struct nhg_connected_tree_head *head);
+
+
+static void nhg_connected_free(struct nhg_connected *dep)
+{
+ XFREE(MTYPE_NHG_CONNECTED, dep);
+}
+
+static struct nhg_connected *nhg_connected_new(struct nhg_hash_entry *nhe)
+{
+ struct nhg_connected *new = NULL;
+
+ new = XCALLOC(MTYPE_NHG_CONNECTED, sizeof(struct nhg_connected));
+ new->nhe = nhe;
+
+ return new;
+}
+
+void nhg_connected_tree_free(struct nhg_connected_tree_head *head)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+
+ if (!nhg_connected_tree_is_empty(head)) {
+ frr_each_safe(nhg_connected_tree, head, rb_node_dep) {
+ nhg_connected_tree_del(head, rb_node_dep);
+ nhg_connected_free(rb_node_dep);
+ }
+ }
+}
+
+bool nhg_connected_tree_is_empty(const struct nhg_connected_tree_head *head)
+{
+ return nhg_connected_tree_count(head) ? false : true;
+}
+
+struct nhg_connected *
+nhg_connected_tree_root(struct nhg_connected_tree_head *head)
+{
+ return nhg_connected_tree_first(head);
+}
+
+void nhg_connected_tree_del_nhe(struct nhg_connected_tree_head *head,
+ struct nhg_hash_entry *depend)
+{
+ struct nhg_connected lookup = {};
+ struct nhg_connected *remove = NULL;
+
+ lookup.nhe = depend;
+
+ /* Lookup to find the element, then remove it */
+ remove = nhg_connected_tree_find(head, &lookup);
+ remove = nhg_connected_tree_del(head, remove);
+
+ if (remove)
+ nhg_connected_free(remove);
+}
+
+void nhg_connected_tree_add_nhe(struct nhg_connected_tree_head *head,
+ struct nhg_hash_entry *depend)
+{
+ struct nhg_connected *new = NULL;
+
+ new = nhg_connected_new(depend);
+
+ if (new)
+ nhg_connected_tree_add(head, new);
+}
+
+static void
+nhg_connected_tree_decrement_ref(struct nhg_connected_tree_head *head)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+
+ frr_each_safe(nhg_connected_tree, head, rb_node_dep) {
+ zebra_nhg_decrement_ref(rb_node_dep->nhe);
+ }
+}
+
+static void
+nhg_connected_tree_increment_ref(struct nhg_connected_tree_head *head)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+
+ frr_each(nhg_connected_tree, head, rb_node_dep) {
+ zebra_nhg_increment_ref(rb_node_dep->nhe);
+ }
+}
+
+struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe)
+{
+ if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_RECURSIVE)
+ && !zebra_nhg_depends_is_empty(nhe)) {
+ nhe = nhg_connected_tree_root(&nhe->nhg_depends)->nhe;
+ return zebra_nhg_resolve(nhe);
+ }
+
+ return nhe;
+}
+
+unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe)
+{
+ return nhg_connected_tree_count(&nhe->nhg_depends);
+}
+
+bool zebra_nhg_depends_is_empty(const struct nhg_hash_entry *nhe)
+{
+ return nhg_connected_tree_is_empty(&nhe->nhg_depends);
+}
+
+static void zebra_nhg_depends_del(struct nhg_hash_entry *from,
+ struct nhg_hash_entry *depend)
+{
+ nhg_connected_tree_del_nhe(&from->nhg_depends, depend);
+}
+
+static void zebra_nhg_depends_init(struct nhg_hash_entry *nhe)
+{
+ nhg_connected_tree_init(&nhe->nhg_depends);
+}
+
+unsigned int zebra_nhg_dependents_count(const struct nhg_hash_entry *nhe)
+{
+ return nhg_connected_tree_count(&nhe->nhg_dependents);
+}
+
+
+bool zebra_nhg_dependents_is_empty(const struct nhg_hash_entry *nhe)
+{
+ return nhg_connected_tree_is_empty(&nhe->nhg_dependents);
+}
+
+static void zebra_nhg_dependents_del(struct nhg_hash_entry *from,
+ struct nhg_hash_entry *dependent)
+{
+ nhg_connected_tree_del_nhe(&from->nhg_dependents, dependent);
+}
+
+static void zebra_nhg_dependents_add(struct nhg_hash_entry *to,
+ struct nhg_hash_entry *dependent)
+{
+ nhg_connected_tree_add_nhe(&to->nhg_dependents, dependent);
+}
+
+static void zebra_nhg_dependents_init(struct nhg_hash_entry *nhe)
+{
+ nhg_connected_tree_init(&nhe->nhg_dependents);
+}
+
+/* Release this nhe from anything depending on it */
+static void zebra_nhg_dependents_release(struct nhg_hash_entry *nhe)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+
+ frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) {
+ zebra_nhg_depends_del(rb_node_dep->nhe, nhe);
+ /* recheck validity of the dependent */
+ zebra_nhg_check_valid(rb_node_dep->nhe);
+ }
+}
+
+/* Release this nhe from anything that it depends on */
+static void zebra_nhg_depends_release(struct nhg_hash_entry *nhe)
+{
+ if (!zebra_nhg_depends_is_empty(nhe)) {
+ struct nhg_connected *rb_node_dep = NULL;
+
+ frr_each_safe(nhg_connected_tree, &nhe->nhg_depends,
+ rb_node_dep) {
+ zebra_nhg_dependents_del(rb_node_dep->nhe, nhe);
+ }
+ }
+}
+
+
+struct nhg_hash_entry *zebra_nhg_lookup_id(uint32_t id)
+{
+ struct nhg_hash_entry lookup = {};
+
+ lookup.id = id;
+ return hash_lookup(zrouter.nhgs_id, &lookup);
+}
+
+static int zebra_nhg_insert_id(struct nhg_hash_entry *nhe)
+{
+ if (hash_lookup(zrouter.nhgs_id, nhe)) {
+ flog_err(
+ EC_ZEBRA_NHG_TABLE_INSERT_FAILED,
+ "Failed inserting NHG id=%u into the ID hash table, entry already exists",
+ nhe->id);
+ return -1;
+ }
+
+ hash_get(zrouter.nhgs_id, nhe, hash_alloc_intern);
+
+ return 0;
+}
+
+static void zebra_nhg_set_if(struct nhg_hash_entry *nhe, struct interface *ifp)
+{
+ nhe->ifp = ifp;
+ if_nhg_dependents_add(ifp, nhe);
+}
+
+static void
+zebra_nhg_connect_depends(struct nhg_hash_entry *nhe,
+ struct nhg_connected_tree_head nhg_depends)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+
+ /* This has been allocated higher above in the stack. Could probably
+ * re-allocate and free the old stuff but just using the same memory
+ * for now. Otherwise, their might be a time trade-off for repeated
+ * alloc/frees as startup.
+ */
+ nhe->nhg_depends = nhg_depends;
+
+ /* Attach backpointer to anything that it depends on */
+ zebra_nhg_dependents_init(nhe);
+ if (!zebra_nhg_depends_is_empty(nhe)) {
+ frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
+ zebra_nhg_dependents_add(rb_node_dep->nhe, nhe);
+ }
+ }
+
+ /* Add the ifp now if its not a group or recursive and has ifindex */
+ if (zebra_nhg_depends_is_empty(nhe) && nhe->nhg->nexthop
+ && nhe->nhg->nexthop->ifindex) {
+ struct interface *ifp = NULL;
+
+ ifp = if_lookup_by_index(nhe->nhg->nexthop->ifindex,
+ nhe->vrf_id);
+ if (ifp)
+ zebra_nhg_set_if(nhe, ifp);
+ else
+ flog_err(
+ EC_ZEBRA_IF_LOOKUP_FAILED,
+ "Zebra failed to lookup an interface with ifindex=%d in vrf=%u for NHE id=%u",
+ nhe->nhg->nexthop->ifindex, nhe->vrf_id,
+ nhe->id);
+ }
+}
+
+static struct nhg_hash_entry *zebra_nhg_copy(struct nhg_hash_entry *copy,
+ uint32_t id)
+{
+ struct nhg_hash_entry *nhe;
+
+ nhe = XCALLOC(MTYPE_NHG, sizeof(struct nhg_hash_entry));
+
+ nhe->id = id;
+
+ nhe->nhg = nexthop_group_new();
+ nexthop_group_copy(nhe->nhg, copy->nhg);
+
+ nhe->vrf_id = copy->vrf_id;
+ nhe->afi = copy->afi;
+ nhe->type = copy->type ? copy->type : ZEBRA_ROUTE_NHG;
+ nhe->refcnt = 0;
+ nhe->dplane_ref = zebra_router_get_next_sequence();
+
+ return nhe;
+}
+
+/* Allocation via hash handler */
+static void *zebra_nhg_hash_alloc(void *arg)
+{
+ struct nhg_hash_entry *nhe = NULL;
+ struct nhg_hash_entry *copy = arg;
+
+ nhe = zebra_nhg_copy(copy, copy->id);
+
+ /* Mark duplicate nexthops in a group at creation time. */
+ nexthop_group_mark_duplicates(nhe->nhg);
+
+ zebra_nhg_connect_depends(nhe, copy->nhg_depends);
+ zebra_nhg_insert_id(nhe);
+
+ return nhe;
+}
+
+uint32_t zebra_nhg_hash_key(const void *arg)
+{
+ const struct nhg_hash_entry *nhe = arg;
+
+ uint32_t key = 0x5a351234;
+
+ key = jhash_3words(nhe->vrf_id, nhe->afi, nexthop_group_hash(nhe->nhg),
+ key);
+
+ return key;
+}
+
+uint32_t zebra_nhg_id_key(const void *arg)
+{
+ const struct nhg_hash_entry *nhe = arg;
+
+ return nhe->id;
+}
+
+bool zebra_nhg_hash_equal(const void *arg1, const void *arg2)
+{
+ const struct nhg_hash_entry *nhe1 = arg1;
+ const struct nhg_hash_entry *nhe2 = arg2;
+
+ /* No matter what if they equal IDs, assume equal */
+ if (nhe1->id && nhe2->id && (nhe1->id == nhe2->id))
+ return true;
+
+ if (nhe1->vrf_id != nhe2->vrf_id)
+ return false;
+
+ if (nhe1->afi != nhe2->afi)
+ return false;
+
+ if (nexthop_group_active_nexthop_num_no_recurse(nhe1->nhg)
+ != nexthop_group_active_nexthop_num_no_recurse(nhe2->nhg))
+ return false;
+
+ if (!nexthop_group_equal_no_recurse(nhe1->nhg, nhe2->nhg))
+ return false;
+
+ return true;
+}
+
+bool zebra_nhg_hash_id_equal(const void *arg1, const void *arg2)
+{
+ const struct nhg_hash_entry *nhe1 = arg1;
+ const struct nhg_hash_entry *nhe2 = arg2;
+
+ return nhe1->id == nhe2->id;
+}
+
+static int zebra_nhg_process_grp(struct nexthop_group *nhg,
+ struct nhg_connected_tree_head *depends,
+ struct nh_grp *grp, uint8_t count)
+{
+ nhg_connected_tree_init(depends);
+
+ for (int i = 0; i < count; i++) {
+ struct nhg_hash_entry *depend = NULL;
+ /* We do not care about nexthop_grp.weight at
+ * this time. But we should figure out
+ * how to adapt this to our code in
+ * the future.
+ */
+ depend = depends_find_id_add(depends, grp[i].id);
+
+ if (!depend) {
+ flog_err(
+ EC_ZEBRA_NHG_SYNC,
+ "Received Nexthop Group from the kernel with a dependent Nexthop ID (%u) which we do not have in our table",
+ grp[i].id);
+ return -1;
+ }
+
+ /*
+ * If this is a nexthop with its own group
+ * dependencies, add them as well. Not sure its
+ * even possible to have a group within a group
+ * in the kernel.
+ */
+
+ copy_nexthops(&nhg->nexthop, depend->nhg->nexthop, NULL);
+ }
+
+ return 0;
+}
+
+static void handle_recursive_depend(struct nhg_connected_tree_head *nhg_depends,
+ struct nexthop *nh, afi_t afi)
+{
+ struct nhg_hash_entry *depend = NULL;
+ struct nexthop_group resolved_ng = {};
+
+ _nexthop_group_add_sorted(&resolved_ng, nh);
+
+ depend = zebra_nhg_rib_find(0, &resolved_ng, afi);
+ depends_add(nhg_depends, depend);
+}
+
+static bool zebra_nhg_find(struct nhg_hash_entry **nhe, uint32_t id,
+ struct nexthop_group *nhg,
+ struct nhg_connected_tree_head *nhg_depends,
+ vrf_id_t vrf_id, afi_t afi, int type)
+{
+ struct nhg_hash_entry lookup = {};
+
+ uint32_t old_id_counter = id_counter;
+
+ bool created = false;
+ bool recursive = false;
+
+ /*
+ * If it has an id at this point, we must have gotten it from the kernel
+ */
+ lookup.id = id ? id : ++id_counter;
+
+ lookup.type = type ? type : ZEBRA_ROUTE_NHG;
+ lookup.nhg = nhg;
+
+ if (lookup.nhg->nexthop->next) {
+ /* Groups can have all vrfs and AF's in them */
+ lookup.afi = AFI_UNSPEC;
+ lookup.vrf_id = 0;
+ } else {
+ switch (lookup.nhg->nexthop->type) {
+ case (NEXTHOP_TYPE_IFINDEX):
+ case (NEXTHOP_TYPE_BLACKHOLE):
+ /*
+ * This switch case handles setting the afi different
+ * for ipv4/v6 routes. Ifindex/blackhole nexthop
+ * objects cannot be ambiguous, they must be Address
+ * Family specific. If we get here, we will either use
+ * the AF of the route, or the one we got passed from
+ * here from the kernel.
+ */
+ lookup.afi = afi;
+ break;
+ case (NEXTHOP_TYPE_IPV4_IFINDEX):
+ case (NEXTHOP_TYPE_IPV4):
+ lookup.afi = AFI_IP;
+ break;
+ case (NEXTHOP_TYPE_IPV6_IFINDEX):
+ case (NEXTHOP_TYPE_IPV6):
+ lookup.afi = AFI_IP6;
+ break;
+ }
+
+ lookup.vrf_id = vrf_id;
+ }
+
+ if (id)
+ (*nhe) = zebra_nhg_lookup_id(id);
+ else
+ (*nhe) = hash_lookup(zrouter.nhgs, &lookup);
+
+ /* If it found an nhe in our tables, this new ID is unused */
+ if (*nhe)
+ id_counter = old_id_counter;
+
+ if (!(*nhe)) {
+ /* Only hash/lookup the depends if the first lookup
+ * fails to find something. This should hopefully save a
+ * lot of cycles for larger ecmp sizes.
+ */
+ if (nhg_depends)
+ /* If you don't want to hash on each nexthop in the
+ * nexthop group struct you can pass the depends
+ * directly. Kernel-side we do this since it just looks
+ * them up via IDs.
+ */
+ lookup.nhg_depends = *nhg_depends;
+ else {
+ if (nhg->nexthop->next) {
+ zebra_nhg_depends_init(&lookup);
+
+ /* If its a group, create a dependency tree */
+ struct nexthop *nh = NULL;
+
+ for (nh = nhg->nexthop; nh; nh = nh->next)
+ depends_find_add(&lookup.nhg_depends,
+ nh, afi);
+ } else if (CHECK_FLAG(nhg->nexthop->flags,
+ NEXTHOP_FLAG_RECURSIVE)) {
+ zebra_nhg_depends_init(&lookup);
+ handle_recursive_depend(&lookup.nhg_depends,
+ nhg->nexthop->resolved,
+ afi);
+ recursive = true;
+ }
+ }
+
+ (*nhe) = hash_get(zrouter.nhgs, &lookup, zebra_nhg_hash_alloc);
+ created = true;
+
+ if (recursive)
+ SET_FLAG((*nhe)->flags, NEXTHOP_GROUP_RECURSIVE);
+ }
+ return created;
+}
+
+/* Find/create a single nexthop */
+static struct nhg_hash_entry *
+zebra_nhg_find_nexthop(uint32_t id, struct nexthop *nh, afi_t afi, int type)
+{
+ struct nhg_hash_entry *nhe = NULL;
+ struct nexthop_group nhg = {};
+
+ _nexthop_group_add_sorted(&nhg, nh);
+
+ zebra_nhg_find(&nhe, id, &nhg, NULL, nh->vrf_id, afi, 0);
+
+ return nhe;
+}
+
+static struct nhg_ctx *nhg_ctx_new()
+{
+ struct nhg_ctx *new = NULL;
+
+ new = XCALLOC(MTYPE_NHG_CTX, sizeof(struct nhg_ctx));
+
+ return new;
+}
+
+static void nhg_ctx_free(struct nhg_ctx *ctx)
+{
+ XFREE(MTYPE_NHG_CTX, ctx);
+}
+
+static uint32_t nhg_ctx_get_id(const struct nhg_ctx *ctx)
+{
+ return ctx->id;
+}
+
+static void nhg_ctx_set_status(struct nhg_ctx *ctx, enum nhg_ctx_status status)
+{
+ ctx->status = status;
+}
+
+static enum nhg_ctx_status nhg_ctx_get_status(const struct nhg_ctx *ctx)
+{
+ return ctx->status;
+}
+
+static void nhg_ctx_set_op(struct nhg_ctx *ctx, enum nhg_ctx_op_e op)
+{
+ ctx->op = op;
+}
+
+static enum nhg_ctx_op_e nhg_ctx_get_op(const struct nhg_ctx *ctx)
+{
+ return ctx->op;
+}
+
+static vrf_id_t nhg_ctx_get_vrf_id(const struct nhg_ctx *ctx)
+{
+ return ctx->vrf_id;
+}
+
+static int nhg_ctx_get_type(const struct nhg_ctx *ctx)
+{
+ return ctx->type;
+}
+
+static int nhg_ctx_get_afi(const struct nhg_ctx *ctx)
+{
+ return ctx->afi;
+}
+
+static struct nexthop *nhg_ctx_get_nh(struct nhg_ctx *ctx)
+{
+ return &ctx->u.nh;
+}
+
+static uint8_t nhg_ctx_get_count(const struct nhg_ctx *ctx)
+{
+ return ctx->count;
+}
+
+static struct nh_grp *nhg_ctx_get_grp(struct nhg_ctx *ctx)
+{
+ return ctx->u.grp;
+}
+
+static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh,
+ struct nh_grp *grp, vrf_id_t vrf_id,
+ afi_t afi, int type, uint8_t count)
+{
+ struct nhg_ctx *ctx = NULL;
+
+ ctx = nhg_ctx_new();
+
+ ctx->id = id;
+ ctx->vrf_id = vrf_id;
+ ctx->afi = afi;
+ ctx->type = type;
+ ctx->count = count;
+
+ if (count)
+ /* Copy over the array */
+ memcpy(&ctx->u.grp, grp, count * sizeof(struct nh_grp));
+ else if (nh)
+ ctx->u.nh = *nh;
+
+ return ctx;
+}
+
+static bool zebra_nhg_contains_unhashable(struct nhg_hash_entry *nhe)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+
+ frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
+ if (CHECK_FLAG(rb_node_dep->nhe->flags,
+ NEXTHOP_GROUP_UNHASHABLE))
+ return true;
+ }
+
+ return false;
+}
+
+static void zebra_nhg_set_unhashable(struct nhg_hash_entry *nhe)
+{
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_UNHASHABLE);
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
+
+ flog_warn(
+ EC_ZEBRA_DUPLICATE_NHG_MESSAGE,
+ "Nexthop Group with ID (%d) is a duplicate, therefore unhashable, ignoring",
+ nhe->id);
+}
+
+static void zebra_nhg_set_valid(struct nhg_hash_entry *nhe)
+{
+ struct nhg_connected *rb_node_dep;
+
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
+
+ frr_each(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep)
+ zebra_nhg_set_valid(rb_node_dep->nhe);
+}
+
+static void zebra_nhg_set_invalid(struct nhg_hash_entry *nhe)
+{
+ struct nhg_connected *rb_node_dep;
+
+ UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
+
+ /* Update validity of nexthops depending on it */
+ frr_each(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep)
+ zebra_nhg_check_valid(rb_node_dep->nhe);
+}
+
+void zebra_nhg_check_valid(struct nhg_hash_entry *nhe)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+ bool valid = false;
+
+ /* If anthing else in the group is valid, the group is valid */
+ frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
+ if (CHECK_FLAG(rb_node_dep->nhe->flags, NEXTHOP_GROUP_VALID)) {
+ valid = true;
+ goto done;
+ }
+ }
+
+done:
+ if (valid)
+ zebra_nhg_set_valid(nhe);
+ else
+ zebra_nhg_set_invalid(nhe);
+}
+
+
+static void zebra_nhg_release(struct nhg_hash_entry *nhe)
+{
+ /* Remove it from any lists it may be on */
+ zebra_nhg_depends_release(nhe);
+ zebra_nhg_dependents_release(nhe);
+ if (nhe->ifp)
+ if_nhg_dependents_del(nhe->ifp, nhe);
+
+ /*
+ * If its unhashable, we didn't store it here and have to be
+ * sure we don't clear one thats actually being used.
+ */
+ if (!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_UNHASHABLE))
+ hash_release(zrouter.nhgs, nhe);
+
+ hash_release(zrouter.nhgs_id, nhe);
+}
+
+static void zebra_nhg_handle_uninstall(struct nhg_hash_entry *nhe)
+{
+ zebra_nhg_release(nhe);
+ zebra_nhg_free(nhe);
+}
+
+static void zebra_nhg_handle_install(struct nhg_hash_entry *nhe)
+{
+ /* Update validity of groups depending on it */
+ struct nhg_connected *rb_node_dep;
+
+ frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep)
+ zebra_nhg_set_valid(rb_node_dep->nhe);
+}
+
+/*
+ * The kernel/other program has changed the state of a nexthop object we are
+ * using.
+ */
+static void zebra_nhg_handle_kernel_state_change(struct nhg_hash_entry *nhe,
+ bool is_delete)
+{
+ if (nhe->refcnt) {
+ flog_err(
+ EC_ZEBRA_NHG_SYNC,
+ "Kernel %s a nexthop group with ID (%u) that we are still using for a route, sending it back down",
+ (is_delete ? "deleted" : "updated"), nhe->id);
+
+ UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
+ zebra_nhg_install_kernel(nhe);
+ } else
+ zebra_nhg_handle_uninstall(nhe);
+}
+
+static int nhg_ctx_process_new(struct nhg_ctx *ctx)
+{
+ struct nexthop_group *nhg = NULL;
+ struct nhg_connected_tree_head nhg_depends = {};
+ struct nhg_hash_entry *lookup = NULL;
+ struct nhg_hash_entry *nhe = NULL;
+
+ uint32_t id = nhg_ctx_get_id(ctx);
+ uint8_t count = nhg_ctx_get_count(ctx);
+ vrf_id_t vrf_id = nhg_ctx_get_vrf_id(ctx);
+ int type = nhg_ctx_get_type(ctx);
+ afi_t afi = nhg_ctx_get_afi(ctx);
+
+ lookup = zebra_nhg_lookup_id(id);
+
+ if (lookup) {
+ /* This is already present in our table, hence an update
+ * that we did not initate.
+ */
+ zebra_nhg_handle_kernel_state_change(lookup, false);
+ return 0;
+ }
+
+ if (nhg_ctx_get_count(ctx)) {
+ nhg = nexthop_group_new();
+ if (zebra_nhg_process_grp(nhg, &nhg_depends,
+ nhg_ctx_get_grp(ctx), count)) {
+ depends_decrement_free(&nhg_depends);
+ nexthop_group_delete(&nhg);
+ return -ENOENT;
+ }
+
+ if (!zebra_nhg_find(&nhe, id, nhg, &nhg_depends, vrf_id, type,
+ afi))
+ depends_decrement_free(&nhg_depends);
+
+ /* These got copied over in zebra_nhg_alloc() */
+ nexthop_group_delete(&nhg);
+ } else
+ nhe = zebra_nhg_find_nexthop(id, nhg_ctx_get_nh(ctx), afi,
+ type);
+
+ if (nhe) {
+ if (id != nhe->id) {
+ struct nhg_hash_entry *kernel_nhe = NULL;
+
+ /* Duplicate but with different ID from
+ * the kernel
+ */
+
+ /* The kernel allows duplicate nexthops
+ * as long as they have different IDs.
+ * We are ignoring those to prevent
+ * syncing problems with the kernel
+ * changes.
+ *
+ * We maintain them *ONLY* in the ID hash table to
+ * track them and set the flag to indicated
+ * their attributes are unhashable.
+ */
+
+ kernel_nhe = zebra_nhg_copy(nhe, id);
+ zebra_nhg_insert_id(kernel_nhe);
+ zebra_nhg_set_unhashable(kernel_nhe);
+ } else if (zebra_nhg_contains_unhashable(nhe)) {
+ /* The group we got contains an unhashable/duplicated
+ * depend, so lets mark this group as unhashable as well
+ * and release it from the non-ID hash.
+ */
+ hash_release(zrouter.nhgs, nhe);
+ zebra_nhg_set_unhashable(nhe);
+ } else {
+ /* It actually created a new nhe */
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
+ }
+ } else {
+ flog_err(
+ EC_ZEBRA_TABLE_LOOKUP_FAILED,
+ "Zebra failed to find or create a nexthop hash entry for ID (%u)",
+ id);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int nhg_ctx_process_del(struct nhg_ctx *ctx)
+{
+ struct nhg_hash_entry *nhe = NULL;
+ uint32_t id = nhg_ctx_get_id(ctx);
+
+ nhe = zebra_nhg_lookup_id(id);
+
+ if (!nhe) {
+ flog_warn(
+ EC_ZEBRA_BAD_NHG_MESSAGE,
+ "Kernel delete message received for nexthop group ID (%u) that we do not have in our ID table",
+ id);
+ return -1;
+ }
+
+ zebra_nhg_handle_kernel_state_change(nhe, true);
+
+ return 0;
+}
+
+static void nhg_ctx_process_finish(struct nhg_ctx *ctx)
+{
+ struct nexthop *nh;
+
+ /*
+ * Just freeing for now, maybe do something more in the future
+ * based on flag.
+ */
+
+ if (nhg_ctx_get_count(ctx))
+ goto done;
+
+ nh = nhg_ctx_get_nh(ctx);
+
+ nexthop_del_labels(nh);
+
+done:
+ if (ctx)
+ nhg_ctx_free(ctx);
+}
+
+static int queue_add(struct nhg_ctx *ctx)
+{
+ /* If its queued or already processed do nothing */
+ if (nhg_ctx_get_status(ctx) == NHG_CTX_QUEUED)
+ return 0;
+
+ if (rib_queue_nhg_add(ctx)) {
+ nhg_ctx_set_status(ctx, NHG_CTX_FAILURE);
+ return -1;
+ }
+
+ nhg_ctx_set_status(ctx, NHG_CTX_QUEUED);
+
+ return 0;
+}
+
+int nhg_ctx_process(struct nhg_ctx *ctx)
+{
+ int ret = 0;
+
+ switch (nhg_ctx_get_op(ctx)) {
+ case NHG_CTX_OP_NEW:
+ ret = nhg_ctx_process_new(ctx);
+ if (nhg_ctx_get_count(ctx) && ret == -ENOENT
+ && nhg_ctx_get_status(ctx) != NHG_CTX_REQUEUED) {
+ /**
+ * We have entered a situation where we are
+ * processing a group from the kernel
+ * that has a contained nexthop which
+ * we have not yet processed.
+ *
+ * Re-enqueue this ctx to be handled exactly one
+ * more time (indicated by the flag).
+ *
+ * By the time we get back to it, we
+ * should have processed its depends.
+ */
+ nhg_ctx_set_status(ctx, NHG_CTX_NONE);
+ if (queue_add(ctx) == 0) {
+ nhg_ctx_set_status(ctx, NHG_CTX_REQUEUED);
+ return 0;
+ }
+ }
+ break;
+ case NHG_CTX_OP_DEL:
+ ret = nhg_ctx_process_del(ctx);
+ case NHG_CTX_OP_NONE:
+ break;
+ }
+
+ nhg_ctx_set_status(ctx, (ret ? NHG_CTX_FAILURE : NHG_CTX_SUCCESS));
+
+ nhg_ctx_process_finish(ctx);
+
+ return ret;
+}
+
+/* Kernel-side, you either get a single new nexthop or a array of ID's */
+int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, struct nh_grp *grp,
+ uint8_t count, vrf_id_t vrf_id, afi_t afi, int type,
+ int startup)
+{
+ struct nhg_ctx *ctx = NULL;
+
+ if (id > id_counter)
+ /* Increase our counter so we don't try to create
+ * an ID that already exists
+ */
+ id_counter = id;
+
+ ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count);
+ nhg_ctx_set_op(ctx, NHG_CTX_OP_NEW);
+
+ /* Under statup conditions, we need to handle them immediately
+ * like we do for routes. Otherwise, we are going to get a route
+ * with a nhe_id that we have not handled.
+ */
+ if (startup)
+ return nhg_ctx_process(ctx);
+
+ if (queue_add(ctx)) {
+ nhg_ctx_process_finish(ctx);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Kernel-side, received delete message */
+int zebra_nhg_kernel_del(uint32_t id)
+{
+ struct nhg_ctx *ctx = NULL;
+
+ ctx = nhg_ctx_init(id, NULL, NULL, 0, 0, 0, 0);
+
+ nhg_ctx_set_op(ctx, NHG_CTX_OP_DEL);
+
+ if (queue_add(ctx)) {
+ nhg_ctx_process_finish(ctx);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Some dependency helper functions */
+static struct nhg_hash_entry *depends_find(struct nexthop *nh, afi_t afi)
+{
+ struct nexthop *lookup = NULL;
+ struct nhg_hash_entry *nhe = NULL;
+
+ copy_nexthops(&lookup, nh, NULL);
+
+ /* Clear it, in case its a group */
+ nexthops_free(lookup->next);
+ nexthops_free(lookup->prev);
+ lookup->next = NULL;
+ lookup->prev = NULL;
+
+ nhe = zebra_nhg_find_nexthop(0, lookup, afi, 0);
+
+ nexthops_free(lookup);
+
+ return nhe;
+}
+
+static void depends_add(struct nhg_connected_tree_head *head,
+ struct nhg_hash_entry *depend)
+{
+ nhg_connected_tree_add_nhe(head, depend);
+ zebra_nhg_increment_ref(depend);
+}
+
+static struct nhg_hash_entry *
+depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh,
+ afi_t afi)
+{
+ struct nhg_hash_entry *depend = NULL;
+
+ depend = depends_find(nh, afi);
+
+ if (depend)
+ depends_add(head, depend);
+
+ return depend;
+}
+
+static struct nhg_hash_entry *
+depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id)
+{
+ struct nhg_hash_entry *depend = NULL;
+
+ depend = zebra_nhg_lookup_id(id);
+
+ if (depend)
+ depends_add(head, depend);
+
+ return depend;
+}
+
+static void depends_decrement_free(struct nhg_connected_tree_head *head)
+{
+ nhg_connected_tree_decrement_ref(head);
+ nhg_connected_tree_free(head);
+}
+
+/* Rib-side, you get a nexthop group struct */
+struct nhg_hash_entry *
+zebra_nhg_rib_find(uint32_t id, struct nexthop_group *nhg, afi_t rt_afi)
+{
+ struct nhg_hash_entry *nhe = NULL;
+
+ if (!(nhg && nhg->nexthop)) {
+ flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED,
+ "No nexthop passed to %s", __func__);
+ return NULL;
+ }
+
+ zebra_nhg_find(&nhe, id, nhg, NULL, nhg->nexthop->vrf_id, rt_afi, 0);
+
+ return nhe;
+}
+
+static void zebra_nhg_free_members(struct nhg_hash_entry *nhe)
+{
+ nexthop_group_delete(&nhe->nhg);
+ /* Decrement to remove connection ref */
+ nhg_connected_tree_decrement_ref(&nhe->nhg_depends);
+ nhg_connected_tree_free(&nhe->nhg_depends);
+ nhg_connected_tree_free(&nhe->nhg_dependents);
+}
+
+void zebra_nhg_free(void *arg)
+{
+ struct nhg_hash_entry *nhe = NULL;
+
+ nhe = (struct nhg_hash_entry *)arg;
+
+ if (nhe->refcnt)
+ zlog_debug("nhe_id=%u hash refcnt=%d", nhe->id, nhe->refcnt);
+
+ zebra_nhg_free_members(nhe);
+
+ XFREE(MTYPE_NHG, nhe);
+}
+
+void zebra_nhg_decrement_ref(struct nhg_hash_entry *nhe)
+{
+ nhe->refcnt--;
+
+ if (!zebra_nhg_depends_is_empty(nhe))
+ nhg_connected_tree_decrement_ref(&nhe->nhg_depends);
+
+ if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0)
+ zebra_nhg_uninstall_kernel(nhe);
+}
+
+void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe)
+{
+ nhe->refcnt++;
+
+ if (!zebra_nhg_depends_is_empty(nhe))
+ nhg_connected_tree_increment_ref(&nhe->nhg_depends);
+}
static void nexthop_set_resolved(afi_t afi, const struct nexthop *newhop,
struct nexthop *nexthop)
@@ -152,7 +1232,8 @@ static bool nexthop_valid_resolve(const struct nexthop *nexthop,
/*
* Given a nexthop we need to properly recursively resolve
* the route. As such, do a table lookup to find and match
- * if at all possible. Set the nexthop->ifindex as appropriate
+ * if at all possible. Set the nexthop->ifindex and resolved_id
+ * as appropriate
*/
static int nexthop_active(afi_t afi, struct route_entry *re,
struct nexthop *nexthop, struct route_node *top)
@@ -171,6 +1252,7 @@ static int nexthop_active(afi_t afi, struct route_entry *re,
|| nexthop->type == NEXTHOP_TYPE_IPV6)
nexthop->ifindex = 0;
+
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
nexthops_free(nexthop->resolved);
nexthop->resolved = NULL;
@@ -210,13 +1292,12 @@ static int nexthop_active(afi_t afi, struct route_entry *re,
if (connected_is_unnumbered(ifp)) {
if (if_is_operative(ifp))
return 1;
- else {
- if (IS_ZEBRA_DEBUG_RIB_DETAILED)
- zlog_debug(
- "\t%s: Onlink and interface %s is not operative",
- __PRETTY_FUNCTION__, ifp->name);
- return 0;
- }
+
+ if (IS_ZEBRA_DEBUG_RIB_DETAILED)
+ zlog_debug(
+ "\t%s: Onlink and interface %s is not operative",
+ __PRETTY_FUNCTION__, ifp->name);
+ return 0;
}
if (!if_is_operative(ifp)) {
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
@@ -276,7 +1357,8 @@ static int nexthop_active(afi_t afi, struct route_entry *re,
/* Pick up selected route. */
/* However, do not resolve over default route unless explicitly
- * allowed. */
+ * allowed.
+ */
if (is_default_prefix(&rn->p)
&& !rnh_resolve_via_default(zvrf, p.family)) {
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
@@ -294,7 +1376,8 @@ static int nexthop_active(afi_t afi, struct route_entry *re,
match = dest->selected_fib;
/* If there is no selected route or matched route is EGP, go up
- tree. */
+ * tree.
+ */
if (!match) {
do {
rn = rn->parent;
@@ -307,7 +1390,7 @@ static int nexthop_active(afi_t afi, struct route_entry *re,
if (match->type == ZEBRA_ROUTE_CONNECT) {
/* Directly point connected route. */
- newhop = match->ng.nexthop;
+ newhop = match->ng->nexthop;
if (newhop) {
if (nexthop->type == NEXTHOP_TYPE_IPV4
|| nexthop->type == NEXTHOP_TYPE_IPV6)
@@ -316,7 +1399,7 @@ static int nexthop_active(afi_t afi, struct route_entry *re,
return 1;
} else if (CHECK_FLAG(re->flags, ZEBRA_FLAG_ALLOW_RECURSION)) {
resolved = 0;
- for (ALL_NEXTHOPS(match->ng, newhop)) {
+ for (ALL_NEXTHOPS_PTR(match->ng, newhop)) {
if (!CHECK_FLAG(match->status,
ROUTE_ENTRY_INSTALLED))
continue;
@@ -330,13 +1413,14 @@ static int nexthop_active(afi_t afi, struct route_entry *re,
}
if (resolved)
re->nexthop_mtu = match->mtu;
+
if (!resolved && IS_ZEBRA_DEBUG_RIB_DETAILED)
zlog_debug("\t%s: Recursion failed to find",
__PRETTY_FUNCTION__);
return resolved;
} else if (re->type == ZEBRA_ROUTE_STATIC) {
resolved = 0;
- for (ALL_NEXTHOPS(match->ng, newhop)) {
+ for (ALL_NEXTHOPS_PTR(match->ng, newhop)) {
if (!CHECK_FLAG(match->status,
ROUTE_ENTRY_INSTALLED))
continue;
@@ -382,6 +1466,9 @@ static int nexthop_active(afi_t afi, struct route_entry *re,
* appropriately as well. An existing route map can turn
* (otherwise active) nexthop into inactive, but not vice versa.
*
+ * If it finds a nexthop recursivedly, set the resolved_id
+ * to match that nexthop's nhg_hash_entry ID;
+ *
* The return value is the final value of 'ACTIVE' flag.
*/
static unsigned nexthop_active_check(struct route_node *rn,
@@ -505,23 +1592,29 @@ static unsigned nexthop_active_check(struct route_node *rn,
/*
* Iterate over all nexthops of the given RIB entry and refresh their
- * ACTIVE flag. re->nexthop_active_num is updated accordingly. If any
- * nexthop is found to toggle the ACTIVE flag, the whole re structure
- * is flagged with ROUTE_ENTRY_CHANGED.
+ * ACTIVE flag. If any nexthop is found to toggle the ACTIVE flag,
+ * the whole re structure is flagged with ROUTE_ENTRY_CHANGED.
*
* Return value is the new number of active nexthops.
*/
int nexthop_active_update(struct route_node *rn, struct route_entry *re)
{
+ struct nexthop_group new_grp = {};
struct nexthop *nexthop;
union g_addr prev_src;
unsigned int prev_active, new_active;
ifindex_t prev_index;
+ uint8_t curr_active = 0;
+
+ afi_t rt_afi = family2afi(rn->p.family);
- re->nexthop_active_num = 0;
UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
- for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) {
+ /* Copy over the nexthops in current state */
+ nexthop_group_copy(&new_grp, re->ng);
+
+ for (nexthop = new_grp.nexthop; nexthop; nexthop = nexthop->next) {
+
/* No protocol daemon provides src and so we're skipping
* tracking it */
prev_src = nexthop->rmap_src;
@@ -533,14 +1626,19 @@ int nexthop_active_update(struct route_node *rn, struct route_entry *re)
* a multipath perpsective should not be a data plane
* decision point.
*/
- new_active = nexthop_active_check(rn, re, nexthop);
+ new_active =
+ nexthop_active_check(rn, re, nexthop);
+
if (new_active
- && re->nexthop_active_num >= zrouter.multipath_num) {
+ && nexthop_group_active_nexthop_num(&new_grp)
+ >= zrouter.multipath_num) {
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
new_active = 0;
}
+
if (new_active)
- re->nexthop_active_num++;
+ curr_active++;
+
/* Don't allow src setting on IPv6 addr for now */
if (prev_active != new_active || prev_index != nexthop->ifindex
|| ((nexthop->type >= NEXTHOP_TYPE_IFINDEX
@@ -555,6 +1653,269 @@ int nexthop_active_update(struct route_node *rn, struct route_entry *re)
SET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
}
- return re->nexthop_active_num;
+ if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)) {
+ struct nhg_hash_entry *new_nhe = NULL;
+
+ new_nhe = zebra_nhg_rib_find(0, &new_grp, rt_afi);
+
+ zebra_nhg_re_update_ref(re, new_nhe);
+ }
+
+ if (curr_active) {
+ struct nhg_hash_entry *nhe = NULL;
+
+ nhe = zebra_nhg_lookup_id(re->nhe_id);
+
+ if (nhe)
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
+ else
+ flog_err(
+ EC_ZEBRA_TABLE_LOOKUP_FAILED,
+ "Active update on NHE id=%u that we do not have in our tables",
+ re->nhe_id);
+ }
+
+ /*
+ * Do not need these nexthops anymore since they
+ * were either copied over into an nhe or not
+ * used at all.
+ */
+ nexthops_free(new_grp.nexthop);
+ return curr_active;
}
+static void zebra_nhg_re_attach_ref(struct route_entry *re,
+ struct nhg_hash_entry *new)
+{
+ re->ng = new->nhg;
+ re->nhe_id = new->id;
+
+ zebra_nhg_increment_ref(new);
+}
+
+int zebra_nhg_re_update_ref(struct route_entry *re, struct nhg_hash_entry *new)
+{
+ struct nhg_hash_entry *old = NULL;
+ int ret = 0;
+
+ if (new == NULL) {
+ re->ng = NULL;
+ goto done;
+ }
+
+ if (re->nhe_id != new->id) {
+ old = zebra_nhg_lookup_id(re->nhe_id);
+
+ zebra_nhg_re_attach_ref(re, new);
+
+ if (old)
+ zebra_nhg_decrement_ref(old);
+ } else if (!re->ng)
+ /* This is the first time it's being attached */
+ zebra_nhg_re_attach_ref(re, new);
+
+done:
+ return ret;
+}
+
+/* Convert a nhe into a group array */
+uint8_t zebra_nhg_nhe2grp(struct nh_grp *grp, struct nhg_hash_entry *nhe,
+ int max_num)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+ struct nhg_hash_entry *depend = NULL;
+ uint8_t i = 0;
+
+ frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
+ bool duplicate = false;
+
+ depend = rb_node_dep->nhe;
+
+ /*
+ * If its recursive, use its resolved nhe in the group
+ */
+ if (CHECK_FLAG(depend->flags, NEXTHOP_GROUP_RECURSIVE)) {
+ depend = zebra_nhg_resolve(depend);
+ if (!depend) {
+ flog_err(
+ EC_ZEBRA_NHG_FIB_UPDATE,
+ "Failed to recursively resolve Nexthop Hash Entry in the group id=%u",
+ nhe->id);
+ continue;
+ }
+ }
+
+ /* Check for duplicate IDs, kernel doesn't like that */
+ for (int j = 0; j < i; j++) {
+ if (depend->id == grp[j].id)
+ duplicate = true;
+ }
+
+ if (!duplicate) {
+ grp[i].id = depend->id;
+ /* We aren't using weights for anything right now */
+ grp[i].weight = 0;
+ i++;
+ }
+
+ if (i >= max_num)
+ goto done;
+ }
+
+done:
+ return i;
+}
+
+void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe)
+{
+ struct nhg_connected *rb_node_dep = NULL;
+
+ /* Resolve it first */
+ nhe = zebra_nhg_resolve(nhe);
+
+ /* Make sure all depends are installed/queued */
+ frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
+ zebra_nhg_install_kernel(rb_node_dep->nhe);
+ }
+
+ if (!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)
+ && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED)) {
+ /* Change its type to us since we are installing it */
+ nhe->type = ZEBRA_ROUTE_NHG;
+
+ int ret = dplane_nexthop_add(nhe);
+
+ switch (ret) {
+ case ZEBRA_DPLANE_REQUEST_QUEUED:
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
+ break;
+ case ZEBRA_DPLANE_REQUEST_FAILURE:
+ flog_err(
+ EC_ZEBRA_DP_INSTALL_FAIL,
+ "Failed to install Nexthop ID (%u) into the kernel",
+ nhe->id);
+ break;
+ case ZEBRA_DPLANE_REQUEST_SUCCESS:
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
+ zebra_nhg_handle_install(nhe);
+ break;
+ }
+ }
+}
+
+void zebra_nhg_uninstall_kernel(struct nhg_hash_entry *nhe)
+{
+ if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)) {
+ int ret = dplane_nexthop_delete(nhe);
+
+ switch (ret) {
+ case ZEBRA_DPLANE_REQUEST_QUEUED:
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
+ break;
+ case ZEBRA_DPLANE_REQUEST_FAILURE:
+ flog_err(
+ EC_ZEBRA_DP_DELETE_FAIL,
+ "Failed to uninstall Nexthop ID (%u) from the kernel",
+ nhe->id);
+ break;
+ case ZEBRA_DPLANE_REQUEST_SUCCESS:
+ UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
+ break;
+ }
+ }
+
+ zebra_nhg_handle_uninstall(nhe);
+}
+
+void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx)
+{
+ enum dplane_op_e op;
+ enum zebra_dplane_result status;
+ uint32_t id = 0;
+ struct nhg_hash_entry *nhe = NULL;
+
+ op = dplane_ctx_get_op(ctx);
+ status = dplane_ctx_get_status(ctx);
+
+ id = dplane_ctx_get_nhe_id(ctx);
+
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug(
+ "Nexthop dplane ctx %p, op %s, nexthop ID (%u), result %s",
+ ctx, dplane_op2str(op), id, dplane_res2str(status));
+
+ switch (op) {
+ case DPLANE_OP_NH_DELETE:
+ if (status != ZEBRA_DPLANE_REQUEST_SUCCESS)
+ flog_err(
+ EC_ZEBRA_DP_DELETE_FAIL,
+ "Failed to uninstall Nexthop ID (%u) from the kernel",
+ id);
+ /* We already free'd the data, nothing to do */
+ break;
+ case DPLANE_OP_NH_INSTALL:
+ case DPLANE_OP_NH_UPDATE:
+ nhe = zebra_nhg_lookup_id(id);
+
+ if (!nhe) {
+ flog_err(
+ EC_ZEBRA_NHG_SYNC,
+ "%s operation preformed on Nexthop ID (%u) in the kernel, that we no longer have in our table",
+ dplane_op2str(op), id);
+ break;
+ }
+
+ UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
+ if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) {
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
+ SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
+ zebra_nhg_handle_install(nhe);
+ } else
+ flog_err(
+ EC_ZEBRA_DP_INSTALL_FAIL,
+ "Failed to install Nexthop ID (%u) into the kernel",
+ nhe->id);
+ break;
+ case DPLANE_OP_ROUTE_INSTALL:
+ case DPLANE_OP_ROUTE_UPDATE:
+ case DPLANE_OP_ROUTE_DELETE:
+ case DPLANE_OP_ROUTE_NOTIFY:
+ case DPLANE_OP_LSP_INSTALL:
+ case DPLANE_OP_LSP_UPDATE:
+ case DPLANE_OP_LSP_DELETE:
+ case DPLANE_OP_LSP_NOTIFY:
+ case DPLANE_OP_PW_INSTALL:
+ case DPLANE_OP_PW_UNINSTALL:
+ case DPLANE_OP_SYS_ROUTE_ADD:
+ case DPLANE_OP_SYS_ROUTE_DELETE:
+ case DPLANE_OP_ADDR_INSTALL:
+ case DPLANE_OP_ADDR_UNINSTALL:
+ case DPLANE_OP_MAC_INSTALL:
+ case DPLANE_OP_MAC_DELETE:
+ case DPLANE_OP_NEIGH_INSTALL:
+ case DPLANE_OP_NEIGH_UPDATE:
+ case DPLANE_OP_NEIGH_DELETE:
+ case DPLANE_OP_VTEP_ADD:
+ case DPLANE_OP_VTEP_DELETE:
+ case DPLANE_OP_NONE:
+ break;
+ }
+
+ dplane_ctx_fini(&ctx);
+}
+
+static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg)
+{
+ struct nhg_hash_entry *nhe = NULL;
+
+ nhe = (struct nhg_hash_entry *)bucket->data;
+
+ /* If its being ref'd, just let it be uninstalled via a route removal */
+ if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0)
+ zebra_nhg_uninstall_kernel(nhe);
+}
+
+void zebra_nhg_sweep_table(struct hash *hash)
+{
+ hash_iterate(hash, zebra_nhg_sweep_entry, NULL);
+}
diff --git a/zebra/zebra_nhg.h b/zebra/zebra_nhg.h
index ff2351c75..1f695433c 100644
--- a/zebra/zebra_nhg.h
+++ b/zebra/zebra_nhg.h
@@ -24,6 +24,205 @@
#define __ZEBRA_NHG_H__
#include "zebra/rib.h"
+#include "lib/nexthop_group.h"
+#include "zebra/zebra_dplane.h"
+
+/* This struct is used exclusively for dataplane
+ * interaction via a dataplane context.
+ *
+ * It is designed to mimic the netlink nexthop_grp
+ * struct in include/linux/nexthop.h
+ */
+struct nh_grp {
+ uint32_t id;
+ uint8_t weight;
+};
+
+PREDECL_RBTREE_UNIQ(nhg_connected_tree);
+
+/*
+ * Hashtables contiaining entries found in `zebra_router`.
+ */
+
+struct nhg_hash_entry {
+ uint32_t id;
+ afi_t afi;
+ vrf_id_t vrf_id;
+ int type;
+
+ struct nexthop_group *nhg;
+
+ /* If this is not a group, it
+ * will be a single nexthop
+ * and must have an interface
+ * associated with it.
+ * Otherwise, this will be null.
+ */
+ struct interface *ifp;
+
+ uint32_t refcnt;
+ uint32_t dplane_ref;
+
+ uint32_t flags;
+
+ /* Dependency tree for other entries.
+ * For instance a group with two
+ * nexthops will have two dependencies
+ * pointing to those nhg_hash_entries.
+ *
+ * Using a rb tree here to make lookups
+ * faster with ID's.
+ */
+ struct nhg_connected_tree_head nhg_depends, nhg_dependents;
+/*
+ * Is this nexthop group valid, ie all nexthops are fully resolved.
+ * What is fully resolved? It's a nexthop that is either self contained
+ * and correct( ie no recursive pointer ) or a nexthop that is recursively
+ * resolved and correct.
+ */
+#define NEXTHOP_GROUP_VALID (1 << 0)
+/*
+ * Has this nexthop group been installed? At this point in time, this
+ * means that the data-plane has been told about this nexthop group
+ * and it's possible usage by a route entry.
+ */
+#define NEXTHOP_GROUP_INSTALLED (1 << 1)
+/*
+ * Has the nexthop group been queued to be send to the FIB?
+ * The NEXTHOP_GROUP_VALID flag should also be set by this point.
+ */
+#define NEXTHOP_GROUP_QUEUED (1 << 2)
+/*
+ * Is this a nexthop that is recursively resolved?
+ */
+#define NEXTHOP_GROUP_RECURSIVE (1 << 3)
+/*
+ * This is a nexthop group we got from the kernel, it is identical to
+ * one we already have. (The kernel allows duplicate nexthops, we don't
+ * since we hash on them). We are only tracking it in our ID table,
+ * it is unusable by our created routes but may be used by routes we get
+ * from the kernel. Therefore, it is unhashable.
+ */
+#define NEXTHOP_GROUP_UNHASHABLE (1 << 4)
+};
+
+/* Was this one we created, either this session or previously? */
+#define ZEBRA_NHG_CREATED(NHE) ((NHE->type) == ZEBRA_ROUTE_NHG)
+
+
+enum nhg_ctx_op_e {
+ NHG_CTX_OP_NONE = 0,
+ NHG_CTX_OP_NEW,
+ NHG_CTX_OP_DEL,
+};
+
+enum nhg_ctx_status {
+ NHG_CTX_NONE = 0,
+ NHG_CTX_QUEUED,
+ NHG_CTX_REQUEUED,
+ NHG_CTX_SUCCESS,
+ NHG_CTX_FAILURE,
+};
+
+/*
+ * Context needed to queue nhg updates on the
+ * work queue.
+ */
+struct nhg_ctx {
+
+ /* Unique ID */
+ uint32_t id;
+
+ vrf_id_t vrf_id;
+ afi_t afi;
+ /*
+ * This should only every be ZEBRA_ROUTE_NHG unless we get a a kernel
+ * created nexthop not made by us.
+ */
+ int type;
+
+ /* If its a group array, how many? */
+ uint8_t count;
+
+ /* Its either a single nexthop or an array of ID's */
+ union {
+ struct nexthop nh;
+ struct nh_grp grp[MULTIPATH_NUM];
+ } u;
+
+ enum nhg_ctx_op_e op;
+ enum nhg_ctx_status status;
+};
+
+
+/**
+ * NHE abstracted tree functions.
+ * Use these where possible instead of the direct ones access ones.
+ */
+extern struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe);
+
+extern unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe);
+extern bool zebra_nhg_depends_is_empty(const struct nhg_hash_entry *nhe);
+
+extern unsigned int
+zebra_nhg_dependents_count(const struct nhg_hash_entry *nhe);
+extern bool zebra_nhg_dependents_is_empty(const struct nhg_hash_entry *nhe);
+
+/* Lookup ID, doesn't create */
+extern struct nhg_hash_entry *zebra_nhg_lookup_id(uint32_t id);
+
+/* Hash functions */
+extern uint32_t zebra_nhg_hash_key(const void *arg);
+extern uint32_t zebra_nhg_id_key(const void *arg);
+
+extern bool zebra_nhg_hash_equal(const void *arg1, const void *arg2);
+extern bool zebra_nhg_hash_id_equal(const void *arg1, const void *arg2);
+
+/*
+ * Process a context off of a queue.
+ * Specifically this should be from
+ * the rib meta queue.
+ */
+extern int nhg_ctx_process(struct nhg_ctx *ctx);
+
+/* Find via kernel nh creation */
+extern int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh,
+ struct nh_grp *grp, uint8_t count,
+ vrf_id_t vrf_id, afi_t afi, int type,
+ int startup);
+/* Del via kernel */
+extern int zebra_nhg_kernel_del(uint32_t id);
+
+/* Find via route creation */
+extern struct nhg_hash_entry *
+zebra_nhg_rib_find(uint32_t id, struct nexthop_group *nhg, afi_t rt_afi);
+
+/* Reference counter functions */
+extern void zebra_nhg_decrement_ref(struct nhg_hash_entry *nhe);
+extern void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe);
+extern int zebra_nhg_re_update_ref(struct route_entry *re,
+ struct nhg_hash_entry *nhe);
+
+/* Check validity of nhe, if invalid will update dependents as well */
+extern void zebra_nhg_check_valid(struct nhg_hash_entry *nhe);
+
+/* Convert nhe depends to a grp context that can be passed around safely */
+extern uint8_t zebra_nhg_nhe2grp(struct nh_grp *grp, struct nhg_hash_entry *nhe,
+ int size);
+
+/* Dataplane install/uninstall */
+extern void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe);
+extern void zebra_nhg_uninstall_kernel(struct nhg_hash_entry *nhe);
+
+/* Forward ref of dplane update context type */
+struct zebra_dplane_ctx;
+extern void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx);
+
+
+/* Sweet the nhg hash tables for old entries on restart */
+extern void zebra_nhg_sweep_table(struct hash *hash);
+
+/* Nexthop resolution processing */
extern int nexthop_active_update(struct route_node *rn, struct route_entry *re);
#endif
diff --git a/zebra/zebra_nhg_private.h b/zebra/zebra_nhg_private.h
new file mode 100644
index 000000000..170e2357e
--- /dev/null
+++ b/zebra/zebra_nhg_private.h
@@ -0,0 +1,62 @@
+/*
+ * Nexthop Group Private Functions.
+ * Copyright (C) 2019 Cumulus Networks, Inc.
+ * Stephen Worley
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; see the file COPYING; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * These functions should only be used internally for nhg_hash_entry
+ * manipulation and in certain special cases.
+ *
+ * Please use `zebra/zebra_nhg.h` for any general nhg_hash_entry api needs.
+ */
+
+#ifndef __ZEBRA_NHG_PRIVATE_H__
+#define __ZEBRA_NHG_PRIVATE_H__
+
+#include "zebra/zebra_nhg.h"
+
+/* Abstraction for connected trees */
+struct nhg_connected {
+ struct nhg_connected_tree_item tree_item;
+ struct nhg_hash_entry *nhe;
+};
+
+static int nhg_connected_cmp(const struct nhg_connected *con1,
+ const struct nhg_connected *con2)
+{
+ return (con1->nhe->id - con2->nhe->id);
+}
+
+DECLARE_RBTREE_UNIQ(nhg_connected_tree, struct nhg_connected, tree_item,
+ nhg_connected_cmp);
+
+/* nhg connected tree direct access functions */
+extern void nhg_connected_tree_init(struct nhg_connected_tree_head *head);
+extern void nhg_connected_tree_free(struct nhg_connected_tree_head *head);
+extern bool
+nhg_connected_tree_is_empty(const struct nhg_connected_tree_head *head);
+extern struct nhg_connected *
+nhg_connected_tree_root(struct nhg_connected_tree_head *head);
+extern void nhg_connected_tree_del_nhe(struct nhg_connected_tree_head *head,
+ struct nhg_hash_entry *nhe);
+extern void nhg_connected_tree_add_nhe(struct nhg_connected_tree_head *head,
+ struct nhg_hash_entry *nhe);
+
+extern void zebra_nhg_free(void *arg);
+
+#endif /* __ZEBRA_NHG_PRIVATE_H__ */
diff --git a/zebra/zebra_pw.c b/zebra/zebra_pw.c
index 09edbc9a6..3f1567a95 100644
--- a/zebra/zebra_pw.c
+++ b/zebra/zebra_pw.c
@@ -259,7 +259,7 @@ static int zebra_pw_check_reachability(struct zebra_pw *pw)
* Need to ensure that there's a label binding for all nexthops.
* Otherwise, ECMP for this route could render the pseudowire unusable.
*/
- for (ALL_NEXTHOPS(re->ng, nexthop)) {
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) {
if (!nexthop->nh_label) {
if (IS_ZEBRA_DEBUG_PW)
zlog_debug("%s: unlabeled route for %s",
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index c2fa33f57..e0bf1a58f 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -56,7 +56,6 @@
#include "zebra/zebra_vxlan.h"
#include "zebra/zapi_msg.h"
#include "zebra/zebra_dplane.h"
-#include "zebra/zebra_nhg.h"
DEFINE_MTYPE_STATIC(ZEBRA, RIB_UPDATE_CTX, "Rib update context object");
@@ -79,34 +78,35 @@ static const struct {
uint8_t distance;
uint8_t meta_q_map;
} route_info[ZEBRA_ROUTE_MAX] = {
- [ZEBRA_ROUTE_SYSTEM] = {ZEBRA_ROUTE_SYSTEM, 0, 4},
- [ZEBRA_ROUTE_KERNEL] = {ZEBRA_ROUTE_KERNEL, 0, 0},
- [ZEBRA_ROUTE_CONNECT] = {ZEBRA_ROUTE_CONNECT, 0, 0},
- [ZEBRA_ROUTE_STATIC] = {ZEBRA_ROUTE_STATIC, 1, 1},
- [ZEBRA_ROUTE_RIP] = {ZEBRA_ROUTE_RIP, 120, 2},
- [ZEBRA_ROUTE_RIPNG] = {ZEBRA_ROUTE_RIPNG, 120, 2},
- [ZEBRA_ROUTE_OSPF] = {ZEBRA_ROUTE_OSPF, 110, 2},
- [ZEBRA_ROUTE_OSPF6] = {ZEBRA_ROUTE_OSPF6, 110, 2},
- [ZEBRA_ROUTE_ISIS] = {ZEBRA_ROUTE_ISIS, 115, 2},
- [ZEBRA_ROUTE_BGP] = {ZEBRA_ROUTE_BGP, 20 /* IBGP is 200. */, 3},
- [ZEBRA_ROUTE_PIM] = {ZEBRA_ROUTE_PIM, 255, 4},
- [ZEBRA_ROUTE_EIGRP] = {ZEBRA_ROUTE_EIGRP, 90, 2},
- [ZEBRA_ROUTE_NHRP] = {ZEBRA_ROUTE_NHRP, 10, 2},
- [ZEBRA_ROUTE_HSLS] = {ZEBRA_ROUTE_HSLS, 255, 4},
- [ZEBRA_ROUTE_OLSR] = {ZEBRA_ROUTE_OLSR, 255, 4},
- [ZEBRA_ROUTE_TABLE] = {ZEBRA_ROUTE_TABLE, 150, 1},
- [ZEBRA_ROUTE_LDP] = {ZEBRA_ROUTE_LDP, 150, 4},
- [ZEBRA_ROUTE_VNC] = {ZEBRA_ROUTE_VNC, 20, 3},
- [ZEBRA_ROUTE_VNC_DIRECT] = {ZEBRA_ROUTE_VNC_DIRECT, 20, 3},
- [ZEBRA_ROUTE_VNC_DIRECT_RH] = {ZEBRA_ROUTE_VNC_DIRECT_RH, 20, 3},
- [ZEBRA_ROUTE_BGP_DIRECT] = {ZEBRA_ROUTE_BGP_DIRECT, 20, 3},
- [ZEBRA_ROUTE_BGP_DIRECT_EXT] = {ZEBRA_ROUTE_BGP_DIRECT_EXT, 20, 3},
- [ZEBRA_ROUTE_BABEL] = {ZEBRA_ROUTE_BABEL, 100, 2},
- [ZEBRA_ROUTE_SHARP] = {ZEBRA_ROUTE_SHARP, 150, 4},
- [ZEBRA_ROUTE_PBR] = {ZEBRA_ROUTE_PBR, 200, 4},
- [ZEBRA_ROUTE_BFD] = {ZEBRA_ROUTE_BFD, 255, 4},
- [ZEBRA_ROUTE_OPENFABRIC] = {ZEBRA_ROUTE_OPENFABRIC, 115, 2},
- [ZEBRA_ROUTE_VRRP] = {ZEBRA_ROUTE_VRRP, 255, 4}
+ [ZEBRA_ROUTE_NHG] = {ZEBRA_ROUTE_NHG, 255 /* Uneeded for nhg's */, 0},
+ [ZEBRA_ROUTE_SYSTEM] = {ZEBRA_ROUTE_SYSTEM, 0, 5},
+ [ZEBRA_ROUTE_KERNEL] = {ZEBRA_ROUTE_KERNEL, 0, 1},
+ [ZEBRA_ROUTE_CONNECT] = {ZEBRA_ROUTE_CONNECT, 0, 1},
+ [ZEBRA_ROUTE_STATIC] = {ZEBRA_ROUTE_STATIC, 1, 2},
+ [ZEBRA_ROUTE_RIP] = {ZEBRA_ROUTE_RIP, 120, 3},
+ [ZEBRA_ROUTE_RIPNG] = {ZEBRA_ROUTE_RIPNG, 120, 3},
+ [ZEBRA_ROUTE_OSPF] = {ZEBRA_ROUTE_OSPF, 110, 3},
+ [ZEBRA_ROUTE_OSPF6] = {ZEBRA_ROUTE_OSPF6, 110, 3},
+ [ZEBRA_ROUTE_ISIS] = {ZEBRA_ROUTE_ISIS, 115, 3},
+ [ZEBRA_ROUTE_BGP] = {ZEBRA_ROUTE_BGP, 20 /* IBGP is 200. */, 4},
+ [ZEBRA_ROUTE_PIM] = {ZEBRA_ROUTE_PIM, 255, 5},
+ [ZEBRA_ROUTE_EIGRP] = {ZEBRA_ROUTE_EIGRP, 90, 3},
+ [ZEBRA_ROUTE_NHRP] = {ZEBRA_ROUTE_NHRP, 10, 3},
+ [ZEBRA_ROUTE_HSLS] = {ZEBRA_ROUTE_HSLS, 255, 5},
+ [ZEBRA_ROUTE_OLSR] = {ZEBRA_ROUTE_OLSR, 255, 5},
+ [ZEBRA_ROUTE_TABLE] = {ZEBRA_ROUTE_TABLE, 150, 2},
+ [ZEBRA_ROUTE_LDP] = {ZEBRA_ROUTE_LDP, 150, 5},
+ [ZEBRA_ROUTE_VNC] = {ZEBRA_ROUTE_VNC, 20, 4},
+ [ZEBRA_ROUTE_VNC_DIRECT] = {ZEBRA_ROUTE_VNC_DIRECT, 20, 4},
+ [ZEBRA_ROUTE_VNC_DIRECT_RH] = {ZEBRA_ROUTE_VNC_DIRECT_RH, 20, 4},
+ [ZEBRA_ROUTE_BGP_DIRECT] = {ZEBRA_ROUTE_BGP_DIRECT, 20, 4},
+ [ZEBRA_ROUTE_BGP_DIRECT_EXT] = {ZEBRA_ROUTE_BGP_DIRECT_EXT, 20, 4},
+ [ZEBRA_ROUTE_BABEL] = {ZEBRA_ROUTE_BABEL, 100, 3},
+ [ZEBRA_ROUTE_SHARP] = {ZEBRA_ROUTE_SHARP, 150, 5},
+ [ZEBRA_ROUTE_PBR] = {ZEBRA_ROUTE_PBR, 200, 5},
+ [ZEBRA_ROUTE_BFD] = {ZEBRA_ROUTE_BFD, 255, 5},
+ [ZEBRA_ROUTE_OPENFABRIC] = {ZEBRA_ROUTE_OPENFABRIC, 115, 3},
+ [ZEBRA_ROUTE_VRRP] = {ZEBRA_ROUTE_VRRP, 255, 5}
/* Any new route type added to zebra, should be mirrored here */
/* no entry/default: 150 */
@@ -196,8 +196,7 @@ int zebra_check_addr(const struct prefix *p)
/* Add nexthop to the end of a rib node's nexthop list */
void route_entry_nexthop_add(struct route_entry *re, struct nexthop *nexthop)
{
- _nexthop_group_add_sorted(&re->ng, nexthop);
- re->nexthop_num++;
+ _nexthop_group_add_sorted(re->ng, nexthop);
}
@@ -206,10 +205,8 @@ void route_entry_nexthop_add(struct route_entry *re, struct nexthop *nexthop)
*/
void route_entry_copy_nexthops(struct route_entry *re, struct nexthop *nh)
{
- assert(!re->ng.nexthop);
- copy_nexthops(&re->ng.nexthop, nh, NULL);
- for (struct nexthop *nexthop = nh; nexthop; nexthop = nexthop->next)
- re->nexthop_num++;
+ assert(!re->ng->nexthop);
+ copy_nexthops(&re->ng->nexthop, nh, NULL);
}
/* Delete specified nexthop from the list. */
@@ -220,8 +217,7 @@ void route_entry_nexthop_delete(struct route_entry *re, struct nexthop *nexthop)
if (nexthop->prev)
nexthop->prev->next = nexthop->next;
else
- re->ng.nexthop = nexthop->next;
- re->nexthop_num--;
+ re->ng->nexthop = nexthop->next;
}
@@ -505,7 +501,7 @@ int zebra_rib_labeled_unicast(struct route_entry *re)
if (re->type != ZEBRA_ROUTE_BGP)
return 0;
- for (ALL_NEXTHOPS(re->ng, nexthop))
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop))
if (!nexthop->nh_label || !nexthop->nh_label->num_labels)
return 0;
@@ -529,26 +525,17 @@ void rib_install_kernel(struct route_node *rn, struct route_entry *re,
srcdest_rnode_prefixes(rn, &p, &src_p);
if (info->safi != SAFI_UNICAST) {
- for (ALL_NEXTHOPS(re->ng, nexthop))
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop))
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
return;
- } else {
- struct nexthop *prev;
-
- for (ALL_NEXTHOPS(re->ng, nexthop)) {
- UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_DUPLICATE);
- for (ALL_NEXTHOPS(re->ng, prev)) {
- if (prev == nexthop)
- break;
- if (nexthop_same_firsthop(nexthop, prev)) {
- SET_FLAG(nexthop->flags,
- NEXTHOP_FLAG_DUPLICATE);
- break;
- }
- }
- }
}
+
+ /*
+ * Install the resolved nexthop object first.
+ */
+ zebra_nhg_install_kernel(zebra_nhg_lookup_id(re->nhe_id));
+
/*
* If this is a replace to a new RE let the originator of the RE
* know that they've lost
@@ -586,7 +573,7 @@ void rib_install_kernel(struct route_node *rn, struct route_entry *re,
if (!RIB_SYSTEM_ROUTE(old)) {
/* Clear old route's FIB flags */
- for (ALL_NEXTHOPS(old->ng, nexthop)) {
+ for (ALL_NEXTHOPS_PTR(old->ng, nexthop)) {
UNSET_FLAG(nexthop->flags,
NEXTHOP_FLAG_FIB);
}
@@ -624,7 +611,7 @@ void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re)
if (info->safi != SAFI_UNICAST) {
UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
- for (ALL_NEXTHOPS(re->ng, nexthop))
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop))
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
return;
}
@@ -684,7 +671,7 @@ static void rib_uninstall(struct route_node *rn, struct route_entry *re)
re->fib_ng.nexthop = NULL;
}
- for (ALL_NEXTHOPS(re->ng, nexthop))
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop))
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
}
@@ -860,7 +847,7 @@ static void rib_process_add_fib(struct zebra_vrf *zvrf, struct route_node *rn,
/* Update real nexthop. This may actually determine if nexthop is active
* or not. */
- if (!nexthop_group_active_nexthop_num(&new->ng)) {
+ if (!nexthop_group_active_nexthop_num(new->ng)) {
UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED);
return;
}
@@ -929,7 +916,7 @@ static void rib_process_update_fib(struct zebra_vrf *zvrf,
/* Update the nexthop; we could determine here that nexthop is
* inactive. */
- if (nexthop_group_active_nexthop_num(&new->ng))
+ if (nexthop_group_active_nexthop_num(new->ng))
nh_active = 1;
/* If nexthop is active, install the selected route, if
@@ -1047,7 +1034,7 @@ static struct route_entry *rib_choose_best(struct route_entry *current,
/* both are connected. are either loop or vrf? */
struct nexthop *nexthop = NULL;
- for (ALL_NEXTHOPS(alternate->ng, nexthop)) {
+ for (ALL_NEXTHOPS_PTR(alternate->ng, nexthop)) {
struct interface *ifp = if_lookup_by_index(
nexthop->ifindex, alternate->vrf_id);
@@ -1055,7 +1042,7 @@ static struct route_entry *rib_choose_best(struct route_entry *current,
return alternate;
}
- for (ALL_NEXTHOPS(current->ng, nexthop)) {
+ for (ALL_NEXTHOPS_PTR(current->ng, nexthop)) {
struct interface *ifp = if_lookup_by_index(
nexthop->ifindex, current->vrf_id);
@@ -1086,6 +1073,12 @@ static struct route_entry *rib_choose_best(struct route_entry *current,
return current;
}
+/* Core function for processing nexthop group contexts's off metaq */
+static void rib_nhg_process(struct nhg_ctx *ctx)
+{
+ nhg_ctx_process(ctx);
+}
+
/* Core function for processing routing information base. */
static void rib_process(struct route_node *rn)
{
@@ -1380,7 +1373,7 @@ static void zebra_rib_fixup_system(struct route_node *rn)
SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED);
- for (ALL_NEXTHOPS(re->ng, nhop)) {
+ for (ALL_NEXTHOPS_PTR(re->ng, nhop)) {
if (CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_RECURSIVE))
continue;
@@ -1428,76 +1421,20 @@ static bool rib_update_re_from_ctx(struct route_entry *re,
* status.
*/
- /*
- * First check the fib nexthop-group, if it's present. The comparison
- * here is quite strict: we require that the fib sets match exactly.
+ /* Check both fib group and notif group for equivalence.
+ *
+ * Let's assume the nexthops are ordered here to save time.
*/
- matched = false;
- do {
- if (re->fib_ng.nexthop == NULL)
- break;
-
- matched = true;
-
- /* First check the route's fib nexthops */
- for (ALL_NEXTHOPS(re->fib_ng, nexthop)) {
-
- if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
- continue;
-
- ctx_nexthop = NULL;
- for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx),
- ctx_nexthop)) {
- if (nexthop_same(ctx_nexthop, nexthop))
- break;
- }
-
- if (ctx_nexthop == NULL) {
- /* Nexthop not in the new installed set */
- if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
- nexthop2str(nexthop, nh_str,
- sizeof(nh_str));
- zlog_debug("update_from_ctx: no match for fib nh %s",
- nh_str);
- }
-
- matched = false;
- break;
- }
- }
-
- if (!matched)
- break;
-
- /* Check the new installed set */
- ctx_nexthop = NULL;
- for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), ctx_nexthop)) {
-
- if (CHECK_FLAG(ctx_nexthop->flags,
- NEXTHOP_FLAG_RECURSIVE))
- continue;
-
- /* Compare with the current group's nexthops */
- nexthop = NULL;
- for (ALL_NEXTHOPS(re->fib_ng, nexthop)) {
- if (nexthop_same(nexthop, ctx_nexthop))
- break;
- }
-
- if (nexthop == NULL) {
- /* Nexthop not in the old installed set */
- if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
- nexthop2str(ctx_nexthop, nh_str,
- sizeof(nh_str));
- zlog_debug("update_from_ctx: no fib match for notif nh %s",
- nh_str);
- }
- matched = false;
- break;
- }
+ if (nexthop_group_equal(&re->fib_ng, dplane_ctx_get_ng(ctx)) == false) {
+ if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
+ zlog_debug(
+ "%u:%s update_from_ctx: notif nh and fib nh mismatch",
+ re->vrf_id, dest_str);
}
- } while (0);
+ matched = false;
+ } else
+ matched = true;
/* If the new FIB set matches the existing FIB set, we're done. */
if (matched) {
@@ -1530,9 +1467,22 @@ static bool rib_update_re_from_ctx(struct route_entry *re,
* walk the RIB group, looking for the 'installable' candidate
* nexthops, and then check those against the set
* that is actually installed.
+ *
+ * Assume nexthops are ordered here as well.
*/
matched = true;
- for (ALL_NEXTHOPS(re->ng, nexthop)) {
+
+ ctx_nexthop = dplane_ctx_get_ng(ctx)->nexthop;
+
+ /* Get the first `installed` one to check against.
+ * If the dataplane doesn't set these to be what was actually installed,
+ * it will just be whatever was in re->ng?
+ */
+ if (CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_RECURSIVE)
+ || !CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_ACTIVE))
+ ctx_nexthop = nexthop_next_active_resolved(ctx_nexthop);
+
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) {
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
continue;
@@ -1541,20 +1491,15 @@ static bool rib_update_re_from_ctx(struct route_entry *re,
continue;
/* Check for a FIB nexthop corresponding to the RIB nexthop */
- ctx_nexthop = NULL;
- for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), ctx_nexthop)) {
- if (nexthop_same(ctx_nexthop, nexthop))
- break;
- }
-
- /* If the FIB doesn't know about the nexthop,
- * it's not installed
- */
- if (ctx_nexthop == NULL) {
+ if (nexthop_same(ctx_nexthop, nexthop) == false) {
+ /* If the FIB doesn't know about the nexthop,
+ * it's not installed
+ */
if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
nexthop2str(nexthop, nh_str, sizeof(nh_str));
- zlog_debug("update_from_ctx: no notif match for rib nh %s",
- nh_str);
+ zlog_debug(
+ "update_from_ctx: no notif match for rib nh %s",
+ nh_str);
}
matched = false;
@@ -1578,6 +1523,8 @@ static bool rib_update_re_from_ctx(struct route_entry *re,
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
}
+
+ ctx_nexthop = nexthop_next_active_resolved(ctx_nexthop);
}
/* If all nexthops were processed, we're done */
@@ -2062,19 +2009,28 @@ done:
dplane_ctx_fini(&ctx);
}
-/* Take a list of route_node structs and return 1, if there was a record
- * picked from it and processed by rib_process(). Don't process more,
- * than one RN record; operate only in the specified sub-queue.
- */
-static unsigned int process_subq(struct list *subq, uint8_t qindex)
+static void process_subq_nhg(struct listnode *lnode)
{
- struct listnode *lnode = listhead(subq);
- struct route_node *rnode;
- rib_dest_t *dest;
- struct zebra_vrf *zvrf = NULL;
+ struct nhg_ctx *ctx = NULL;
+ uint8_t qindex = route_info[ZEBRA_ROUTE_NHG].meta_q_map;
- if (!lnode)
- return 0;
+ ctx = listgetdata(lnode);
+
+ if (!ctx)
+ return;
+
+ if (IS_ZEBRA_DEBUG_RIB_DETAILED)
+ zlog_debug("NHG Context id=%u dequeued from sub-queue %u",
+ ctx->id, qindex);
+
+ rib_nhg_process(ctx);
+}
+
+static void process_subq_route(struct listnode *lnode, uint8_t qindex)
+{
+ struct route_node *rnode = NULL;
+ rib_dest_t *dest = NULL;
+ struct zebra_vrf *zvrf = NULL;
rnode = listgetdata(lnode);
dest = rib_dest_from_rnode(rnode);
@@ -2104,7 +2060,26 @@ static unsigned int process_subq(struct list *subq, uint8_t qindex)
}
#endif
route_unlock_node(rnode);
+}
+
+/* Take a list of route_node structs and return 1, if there was a record
+ * picked from it and processed by rib_process(). Don't process more,
+ * than one RN record; operate only in the specified sub-queue.
+ */
+static unsigned int process_subq(struct list *subq, uint8_t qindex)
+{
+ struct listnode *lnode = listhead(subq);
+
+ if (!lnode)
+ return 0;
+
+ if (qindex == route_info[ZEBRA_ROUTE_NHG].meta_q_map)
+ process_subq_nhg(lnode);
+ else
+ process_subq_route(lnode, qindex);
+
list_delete_node(subq, lnode);
+
return 1;
}
@@ -2162,11 +2137,14 @@ static wq_item_status meta_queue_process(struct work_queue *dummy, void *data)
* original metaqueue index value will win and we'll end up with
* the route node enqueued once.
*/
-static void rib_meta_queue_add(struct meta_queue *mq, struct route_node *rn)
+static int rib_meta_queue_add(struct meta_queue *mq, void *data)
{
+ struct route_node *rn = NULL;
struct route_entry *re = NULL, *curr_re = NULL;
uint8_t qindex = MQ_SIZE, curr_qindex = MQ_SIZE;
+ rn = (struct route_node *)data;
+
RNODE_FOREACH_RE (rn, curr_re) {
curr_qindex = route_info[curr_re->type].meta_q_map;
@@ -2177,7 +2155,7 @@ static void rib_meta_queue_add(struct meta_queue *mq, struct route_node *rn)
}
if (!re)
- return;
+ return -1;
/* Invariant: at this point we always have rn->info set. */
if (CHECK_FLAG(rib_dest_from_rnode(rn)->flags,
@@ -2186,7 +2164,7 @@ static void rib_meta_queue_add(struct meta_queue *mq, struct route_node *rn)
rnode_debug(rn, re->vrf_id,
"rn %p is already queued in sub-queue %u",
(void *)rn, qindex);
- return;
+ return -1;
}
SET_FLAG(rib_dest_from_rnode(rn)->flags, RIB_ROUTE_QUEUED(qindex));
@@ -2197,26 +2175,37 @@ static void rib_meta_queue_add(struct meta_queue *mq, struct route_node *rn)
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
rnode_debug(rn, re->vrf_id, "queued rn %p into sub-queue %u",
(void *)rn, qindex);
+
+ return 0;
}
-/* Add route_node to work queue and schedule processing */
-void rib_queue_add(struct route_node *rn)
+static int rib_meta_queue_nhg_add(struct meta_queue *mq, void *data)
{
- assert(rn);
+ struct nhg_ctx *ctx = NULL;
+ uint8_t qindex = route_info[ZEBRA_ROUTE_NHG].meta_q_map;
- /* Pointless to queue a route_node with no RIB entries to add or remove
- */
- if (!rnode_to_ribs(rn)) {
- zlog_debug("%s: called for route_node (%p, %d) with no ribs",
- __func__, (void *)rn, rn->lock);
- zlog_backtrace(LOG_DEBUG);
- return;
- }
+ ctx = (struct nhg_ctx *)data;
+
+ if (!ctx)
+ return -1;
+ listnode_add(mq->subq[qindex], ctx);
+ mq->size++;
+
+ if (IS_ZEBRA_DEBUG_RIB_DETAILED)
+ zlog_debug("NHG Context id=%u queued into sub-queue %u",
+ ctx->id, qindex);
+
+ return 0;
+}
+
+static int mq_add_handler(void *data,
+ int (*mq_add_func)(struct meta_queue *mq, void *data))
+{
if (zrouter.ribq == NULL) {
flog_err(EC_ZEBRA_WQ_NONEXISTENT,
"%s: work_queue does not exist!", __func__);
- return;
+ return -1;
}
/*
@@ -2230,9 +2219,31 @@ void rib_queue_add(struct route_node *rn)
if (work_queue_empty(zrouter.ribq))
work_queue_add(zrouter.ribq, zrouter.mq);
- rib_meta_queue_add(zrouter.mq, rn);
+ return mq_add_func(zrouter.mq, data);
+}
- return;
+/* Add route_node to work queue and schedule processing */
+int rib_queue_add(struct route_node *rn)
+{
+ assert(rn);
+
+ /* Pointless to queue a route_node with no RIB entries to add or remove
+ */
+ if (!rnode_to_ribs(rn)) {
+ zlog_debug("%s: called for route_node (%p, %d) with no ribs",
+ __func__, (void *)rn, rn->lock);
+ zlog_backtrace(LOG_DEBUG);
+ return -1;
+ }
+
+ return mq_add_handler(rn, &rib_meta_queue_add);
+}
+
+int rib_queue_nhg_add(struct nhg_ctx *ctx)
+{
+ assert(ctx);
+
+ return mq_add_handler(ctx, &rib_meta_queue_nhg_add);
}
/* Create new meta queue.
@@ -2400,6 +2411,7 @@ static void rib_addnode(struct route_node *rn,
void rib_unlink(struct route_node *rn, struct route_entry *re)
{
rib_dest_t *dest;
+ struct nhg_hash_entry *nhe = NULL;
assert(rn && re);
@@ -2414,7 +2426,13 @@ void rib_unlink(struct route_node *rn, struct route_entry *re)
if (dest->selected_fib == re)
dest->selected_fib = NULL;
- nexthops_free(re->ng.nexthop);
+ if (re->nhe_id) {
+ nhe = zebra_nhg_lookup_id(re->nhe_id);
+ if (nhe)
+ zebra_nhg_decrement_ref(nhe);
+ } else if (re->ng)
+ nexthop_group_delete(&re->ng);
+
nexthops_free(re->fib_ng.nexthop);
XFREE(MTYPE_RE, re);
@@ -2480,9 +2498,10 @@ void _route_entry_dump(const char *func, union prefixconstptr pp,
"%s: metric == %u, mtu == %u, distance == %u, flags == %u, status == %u",
straddr, re->metric, re->mtu, re->distance, re->flags, re->status);
zlog_debug("%s: nexthop_num == %u, nexthop_active_num == %u", straddr,
- re->nexthop_num, re->nexthop_active_num);
+ nexthop_group_nexthop_num(re->ng),
+ nexthop_group_active_nexthop_num(re->ng));
- for (ALL_NEXTHOPS(re->ng, nexthop)) {
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) {
struct interface *ifp;
struct vrf *vrf = vrf_lookup_by_id(nexthop->vrf_id);
@@ -2633,6 +2652,7 @@ void rib_lookup_and_pushup(struct prefix_ipv4 *p, vrf_id_t vrf_id)
int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct route_entry *re)
{
+ struct nhg_hash_entry *nhe = NULL;
struct route_table *table;
struct route_node *rn;
struct route_entry *same = NULL;
@@ -2646,10 +2666,58 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
/* Lookup table. */
table = zebra_vrf_table_with_table_id(afi, safi, re->vrf_id, re->table);
if (!table) {
+ if (re->ng)
+ nexthop_group_delete(&re->ng);
XFREE(MTYPE_RE, re);
return 0;
}
+ if (re->nhe_id) {
+ nhe = zebra_nhg_lookup_id(re->nhe_id);
+
+ if (!nhe) {
+ flog_err(
+ EC_ZEBRA_TABLE_LOOKUP_FAILED,
+ "Zebra failed to find the nexthop hash entry for id=%u in a route entry",
+ re->nhe_id);
+ XFREE(MTYPE_RE, re);
+ return -1;
+ }
+ } else {
+ nhe = zebra_nhg_rib_find(0, re->ng, afi);
+
+ /*
+ * The nexthops got copied over into an nhe,
+ * so free them now.
+ */
+ nexthop_group_delete(&re->ng);
+
+ if (!nhe) {
+ char buf[PREFIX_STRLEN] = "";
+ char buf2[PREFIX_STRLEN] = "";
+
+ flog_err(
+ EC_ZEBRA_TABLE_LOOKUP_FAILED,
+ "Zebra failed to find or create a nexthop hash entry for %s%s%s",
+ prefix2str(p, buf, sizeof(buf)),
+ src_p ? " from " : "",
+ src_p ? prefix2str(src_p, buf2, sizeof(buf2))
+ : "");
+
+ XFREE(MTYPE_RE, re);
+ return -1;
+ }
+ }
+
+ /*
+ * Attach the re to the nhe's nexthop group.
+ *
+ * TODO: This will need to change when we start getting IDs from upper
+ * level protocols, as the refcnt might be wrong, since it checks
+ * if old_id != new_id.
+ */
+ zebra_nhg_re_update_ref(re, nhe);
+
/* Make it sure prefixlen is applied to the prefix. */
apply_mask(p);
if (src_p)
@@ -2726,8 +2794,8 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, int flags, struct prefix *p,
struct prefix_ipv6 *src_p, const struct nexthop *nh,
- uint32_t table_id, uint32_t metric, uint8_t distance,
- bool fromkernel)
+ uint32_t nhe_id, uint32_t table_id, uint32_t metric,
+ uint8_t distance, bool fromkernel)
{
struct route_table *table;
struct route_node *rn;
@@ -2790,31 +2858,37 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
if (re->type == ZEBRA_ROUTE_KERNEL && re->metric != metric)
continue;
- if (re->type == ZEBRA_ROUTE_CONNECT && (rtnh = re->ng.nexthop)
+ if (re->type == ZEBRA_ROUTE_CONNECT && (rtnh = re->ng->nexthop)
&& rtnh->type == NEXTHOP_TYPE_IFINDEX && nh) {
if (rtnh->ifindex != nh->ifindex)
continue;
same = re;
break;
}
+
/* Make sure that the route found has the same gateway. */
- else {
- if (nh == NULL) {
+ if (nhe_id && re->nhe_id == nhe_id) {
+ same = re;
+ break;
+ }
+
+ if (nh == NULL) {
+ same = re;
+ break;
+ }
+ for (ALL_NEXTHOPS_PTR(re->ng, rtnh)) {
+ /*
+ * No guarantee all kernel send nh with labels
+ * on delete.
+ */
+ if (nexthop_same_no_labels(rtnh, nh)) {
same = re;
break;
}
- for (ALL_NEXTHOPS(re->ng, rtnh))
- /*
- * No guarantee all kernel send nh with labels
- * on delete.
- */
- if (nexthop_same_no_labels(rtnh, nh)) {
- same = re;
- break;
- }
- if (same)
- break;
}
+
+ if (same)
+ break;
}
/* If same type of route can't be found and this message is from
kernel. */
@@ -2844,7 +2918,7 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
if (allow_delete) {
UNSET_FLAG(fib->status, ROUTE_ENTRY_INSTALLED);
/* Unset flags. */
- for (rtnh = fib->ng.nexthop; rtnh;
+ for (rtnh = fib->ng->nexthop; rtnh;
rtnh = rtnh->next)
UNSET_FLAG(rtnh->flags,
NEXTHOP_FLAG_FIB);
@@ -2900,7 +2974,7 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
if (CHECK_FLAG(flags, ZEBRA_FLAG_EVPN_ROUTE)) {
struct nexthop *tmp_nh;
- for (ALL_NEXTHOPS(re->ng, tmp_nh)) {
+ for (ALL_NEXTHOPS_PTR(re->ng, tmp_nh)) {
struct ipaddr vtep_ip;
memset(&vtep_ip, 0, sizeof(struct ipaddr));
@@ -2935,11 +3009,11 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, int flags, struct prefix *p,
struct prefix_ipv6 *src_p, const struct nexthop *nh,
- uint32_t table_id, uint32_t metric, uint32_t mtu, uint8_t distance,
- route_tag_t tag)
+ uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t mtu,
+ uint8_t distance, route_tag_t tag)
{
- struct route_entry *re;
- struct nexthop *nexthop;
+ struct route_entry *re = NULL;
+ struct nexthop *nexthop = NULL;
/* Allocate new route_entry structure. */
re = XCALLOC(MTYPE_RE, sizeof(struct route_entry));
@@ -2951,14 +3025,18 @@ int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
re->mtu = mtu;
re->table = table_id;
re->vrf_id = vrf_id;
- re->nexthop_num = 0;
re->uptime = monotime(NULL);
re->tag = tag;
+ re->nhe_id = nhe_id;
- /* Add nexthop. */
- nexthop = nexthop_new();
- *nexthop = *nh;
- route_entry_nexthop_add(re, nexthop);
+ if (!nhe_id) {
+ re->ng = nexthop_group_new();
+
+ /* Add nexthop. */
+ nexthop = nexthop_new();
+ *nexthop = *nh;
+ route_entry_nexthop_add(re, nexthop);
+ }
return rib_add_multipath(afi, safi, p, src_p, re);
}
@@ -3218,7 +3296,7 @@ void rib_sweep_table(struct route_table *table)
* this decision needs to be revisited
*/
SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
- for (ALL_NEXTHOPS(re->ng, nexthop))
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop))
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
rib_uninstall_kernel(rn, re);
@@ -3242,6 +3320,7 @@ int rib_sweep_route(struct thread *t)
}
zebra_router_sweep_route();
+ zebra_router_sweep_nhgs();
return 0;
}
@@ -3412,6 +3491,12 @@ static int rib_process_dplane_results(struct thread *thread)
rib_process_dplane_notify(ctx);
break;
+ case DPLANE_OP_NH_INSTALL:
+ case DPLANE_OP_NH_UPDATE:
+ case DPLANE_OP_NH_DELETE:
+ zebra_nhg_dplane_result(ctx);
+ break;
+
case DPLANE_OP_LSP_INSTALL:
case DPLANE_OP_LSP_UPDATE:
case DPLANE_OP_LSP_DELETE:
diff --git a/zebra/zebra_rnh.c b/zebra/zebra_rnh.c
index 5df5d94f4..60e23cc4d 100644
--- a/zebra/zebra_rnh.c
+++ b/zebra/zebra_rnh.c
@@ -384,7 +384,7 @@ static void zebra_rnh_clear_nexthop_rnh_filters(struct route_entry *re)
struct nexthop *nexthop;
if (re) {
- for (nexthop = re->ng.nexthop; nexthop;
+ for (nexthop = re->ng->nexthop; nexthop;
nexthop = nexthop->next) {
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RNH_FILTERED);
}
@@ -403,7 +403,7 @@ static int zebra_rnh_apply_nht_rmap(afi_t afi, struct zebra_vrf *zvrf,
route_map_result_t ret;
if (prn && re) {
- for (nexthop = re->ng.nexthop; nexthop;
+ for (nexthop = re->ng->nexthop; nexthop;
nexthop = nexthop->next) {
ret = zebra_nht_route_map_check(
afi, proto, &prn->p, zvrf, re, nexthop);
@@ -688,7 +688,7 @@ zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi,
/* Just being SELECTED isn't quite enough - must
* have an installed nexthop to be useful.
*/
- for (ALL_NEXTHOPS(re->ng, nexthop)) {
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) {
if (rnh_nexthop_valid(re, nexthop))
break;
}
@@ -707,7 +707,7 @@ zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi,
break;
if (re->type == ZEBRA_ROUTE_NHRP) {
- for (nexthop = re->ng.nexthop; nexthop;
+ for (nexthop = re->ng->nexthop; nexthop;
nexthop = nexthop->next)
if (nexthop->type
== NEXTHOP_TYPE_IFINDEX)
@@ -940,7 +940,7 @@ static void free_state(vrf_id_t vrf_id, struct route_entry *re,
return;
/* free RE and nexthops */
- nexthops_free(re->ng.nexthop);
+ nexthop_group_delete(&re->ng);
XFREE(MTYPE_RE, re);
}
@@ -963,8 +963,9 @@ static void copy_state(struct rnh *rnh, struct route_entry *re,
state->metric = re->metric;
state->vrf_id = re->vrf_id;
state->status = re->status;
+ state->ng = nexthop_group_new();
- route_entry_copy_nexthops(state, re->ng.nexthop);
+ route_entry_copy_nexthops(state, re->ng->nexthop);
rnh->state = state;
}
@@ -982,10 +983,11 @@ static int compare_state(struct route_entry *r1, struct route_entry *r2)
if (r1->metric != r2->metric)
return 1;
- if (r1->nexthop_num != r2->nexthop_num)
+ if (nexthop_group_nexthop_num(r1->ng)
+ != nexthop_group_nexthop_num(r2->ng))
return 1;
- if (nexthop_group_hash(&r1->ng) != nexthop_group_hash(&r2->ng))
+ if (nexthop_group_hash(r1->ng) != nexthop_group_hash(r2->ng))
return 1;
return 0;
@@ -1035,7 +1037,7 @@ static int send_client(struct rnh *rnh, struct zserv *client, rnh_type_t type,
num = 0;
nump = stream_get_endp(s);
stream_putc(s, 0);
- for (ALL_NEXTHOPS(re->ng, nh))
+ for (ALL_NEXTHOPS_PTR(re->ng, nh))
if (rnh_nexthop_valid(re, nh)) {
stream_putl(s, nh->vrf_id);
stream_putc(s, nh->type);
@@ -1135,7 +1137,7 @@ static void print_rnh(struct route_node *rn, struct vty *vty)
if (rnh->state) {
vty_out(vty, " resolved via %s\n",
zebra_route_string(rnh->state->type));
- for (nexthop = rnh->state->ng.nexthop; nexthop;
+ for (nexthop = rnh->state->ng->nexthop; nexthop;
nexthop = nexthop->next)
print_nh(nexthop, vty);
} else
diff --git a/zebra/zebra_router.c b/zebra/zebra_router.c
index 1e9f9e4ec..e5319c64a 100644
--- a/zebra/zebra_router.c
+++ b/zebra/zebra_router.c
@@ -29,7 +29,7 @@
#include "zebra_pbr.h"
#include "zebra_vxlan.h"
#include "zebra_mlag.h"
-#include "zebra_nhg.h"
+#include "zebra_nhg_private.h"
#include "debug.h"
DEFINE_MTYPE_STATIC(ZEBRA, RIB_TABLE_INFO, "RIB table info")
@@ -154,6 +154,11 @@ void zebra_router_sweep_route(void)
}
}
+void zebra_router_sweep_nhgs(void)
+{
+ zebra_nhg_sweep_table(zrouter.nhgs_id);
+}
+
static void zebra_router_free_table(struct zebra_router_table *zrt)
{
void *table_info;
@@ -218,6 +223,11 @@ void zebra_router_terminate(void)
zebra_vxlan_disable();
zebra_mlag_terminate();
+ hash_clean(zrouter.nhgs, zebra_nhg_free);
+ hash_free(zrouter.nhgs);
+ hash_clean(zrouter.nhgs_id, NULL);
+ hash_free(zrouter.nhgs_id);
+
hash_clean(zrouter.rules_hash, zebra_pbr_rules_free);
hash_free(zrouter.rules_hash);
@@ -253,4 +263,11 @@ void zebra_router_init(void)
zrouter.iptable_hash = hash_create_size(8, zebra_pbr_iptable_hash_key,
zebra_pbr_iptable_hash_equal,
"IPtable Hash Entry");
+
+ zrouter.nhgs =
+ hash_create_size(8, zebra_nhg_hash_key, zebra_nhg_hash_equal,
+ "Zebra Router Nexthop Groups");
+ zrouter.nhgs_id =
+ hash_create_size(8, zebra_nhg_id_key, zebra_nhg_hash_id_equal,
+ "Zebra Router Nexthop Groups ID index");
}
diff --git a/zebra/zebra_router.h b/zebra/zebra_router.h
index 25a7adac1..ac4c96147 100644
--- a/zebra/zebra_router.h
+++ b/zebra/zebra_router.h
@@ -132,6 +132,12 @@ struct zebra_router {
* Time for when we sweep the rib from old routes
*/
time_t startup_time;
+
+ /*
+ * The hash of nexthop groups associated with this router
+ */
+ struct hash *nhgs;
+ struct hash *nhgs_id;
};
#define GRACEFUL_RESTART_TIME 60
@@ -139,6 +145,7 @@ struct zebra_router {
extern struct zebra_router zrouter;
extern void zebra_router_init(void);
+extern void zebra_router_cleanup(void);
extern void zebra_router_terminate(void);
extern struct route_table *zebra_router_find_table(struct zebra_vrf *zvrf,
@@ -153,6 +160,7 @@ extern void zebra_router_release_table(struct zebra_vrf *zvrf, uint32_t tableid,
extern int zebra_router_config_write(struct vty *vty);
extern void zebra_router_sweep_route(void);
+extern void zebra_router_sweep_nhgs(void);
extern void zebra_router_show_table_summary(struct vty *vty);
diff --git a/zebra/zebra_snmp.c b/zebra/zebra_snmp.c
index 74eab765c..56c766432 100644
--- a/zebra/zebra_snmp.c
+++ b/zebra/zebra_snmp.c
@@ -285,8 +285,8 @@ static void check_replace(struct route_node *np2, struct route_entry *re2,
return;
}
- if (in_addr_cmp((uint8_t *)&(*re)->ng.nexthop->gate.ipv4,
- (uint8_t *)&re2->ng.nexthop->gate.ipv4)
+ if (in_addr_cmp((uint8_t *)&(*re)->ng->nexthop->gate.ipv4,
+ (uint8_t *)&re2->ng->nexthop->gate.ipv4)
<= 0)
return;
@@ -372,7 +372,7 @@ static void get_fwtable_route_node(struct variable *v, oid objid[],
(uint8_t *)&dest)) {
RNODE_FOREACH_RE (*np, *re) {
if (!in_addr_cmp((uint8_t *)&(*re)
- ->ng.nexthop
+ ->ng->nexthop
->gate.ipv4,
(uint8_t *)&nexthop))
if (proto
@@ -406,7 +406,7 @@ static void get_fwtable_route_node(struct variable *v, oid objid[],
|| ((policy == policy2) && (proto < proto2))
|| ((policy == policy2) && (proto == proto2)
&& (in_addr_cmp(
- (uint8_t *)&re2->ng.nexthop
+ (uint8_t *)&re2->ng->nexthop
->gate.ipv4,
(uint8_t *)&nexthop)
>= 0)))
@@ -432,7 +432,7 @@ static void get_fwtable_route_node(struct variable *v, oid objid[],
{
struct nexthop *nexthop;
- nexthop = (*re)->ng.nexthop;
+ nexthop = (*re)->ng->nexthop;
if (nexthop) {
pnt = (uint8_t *)&nexthop->gate.ipv4;
for (i = 0; i < 4; i++)
@@ -462,7 +462,7 @@ static uint8_t *ipFwTable(struct variable *v, oid objid[], size_t *objid_len,
if (!np)
return NULL;
- nexthop = re->ng.nexthop;
+ nexthop = re->ng->nexthop;
if (!nexthop)
return NULL;
diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c
index 92f8dd1ec..9d1745473 100644
--- a/zebra/zebra_vty.c
+++ b/zebra/zebra_vty.c
@@ -52,6 +52,8 @@
#include "zebra/ipforward.h"
#include "zebra/zebra_vxlan_private.h"
#include "zebra/zebra_pbr.h"
+#include "zebra/zebra_nhg.h"
+#include "zebra/interface.h"
extern int allow_delete;
@@ -62,7 +64,7 @@ static int do_show_ip_route(struct vty *vty, const char *vrf_name, afi_t afi,
bool supernets_only, int type,
unsigned short ospf_instance_id);
static void vty_show_ip_route_detail(struct vty *vty, struct route_node *rn,
- int mcast, bool use_fib);
+ int mcast, bool use_fib, bool show_ng);
static void vty_show_ip_route_summary(struct vty *vty,
struct route_table *table);
static void vty_show_ip_route_summary_prefix(struct vty *vty,
@@ -154,7 +156,7 @@ DEFUN (show_ip_rpf_addr,
re = rib_match_ipv4_multicast(VRF_DEFAULT, addr, &rn);
if (re)
- vty_show_ip_route_detail(vty, rn, 1, false);
+ vty_show_ip_route_detail(vty, rn, 1, false, false);
else
vty_out(vty, "%% No match for RPF lookup\n");
@@ -186,7 +188,7 @@ static char re_status_output_char(struct route_entry *re, struct nexthop *nhop)
/* New RIB. Detailed information for IPv4 route. */
static void vty_show_ip_route_detail(struct vty *vty, struct route_node *rn,
- int mcast, bool use_fib)
+ int mcast, bool use_fib, bool show_ng)
{
struct route_entry *re;
struct nexthop *nexthop;
@@ -258,7 +260,10 @@ static void vty_show_ip_route_detail(struct vty *vty, struct route_node *rn,
tm->tm_hour);
vty_out(vty, " ago\n");
- for (ALL_NEXTHOPS(re->ng, nexthop)) {
+ if (show_ng)
+ vty_out(vty, " Nexthop Group ID: %u\n", re->nhe_id);
+
+ for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) {
char addrstr[32];
vty_out(vty, " %c%s",
@@ -408,7 +413,7 @@ static void vty_show_ip_route(struct vty *vty, struct route_node *rn,
if (is_fib)
nhg = rib_active_nhg(re);
else
- nhg = &(re->ng);
+ nhg = re->ng;
if (json) {
json_route = json_object_new_object();
@@ -461,9 +466,9 @@ static void vty_show_ip_route(struct vty *vty, struct route_node *rn,
json_object_int_add(json_route, "internalFlags",
re->flags);
json_object_int_add(json_route, "internalNextHopNum",
- re->nexthop_num);
+ nexthop_group_nexthop_num(re->ng));
json_object_int_add(json_route, "internalNextHopActiveNum",
- re->nexthop_active_num);
+ nexthop_group_active_nexthop_num(re->ng));
if (uptime < ONE_DAY_SECOND)
sprintf(buf, "%02d:%02d:%02d", tm->tm_hour, tm->tm_min,
tm->tm_sec);
@@ -1101,6 +1106,295 @@ DEFUN (ip_nht_default_route,
return CMD_SUCCESS;
}
+static void show_nexthop_group_out(struct vty *vty, struct nhg_hash_entry *nhe)
+{
+ struct nexthop *nexthop = NULL;
+ struct nhg_connected *rb_node_dep = NULL;
+ char buf[SRCDEST2STR_BUFFER];
+
+ struct vrf *nhe_vrf = vrf_lookup_by_id(nhe->vrf_id);
+
+ vty_out(vty, "ID: %u\n", nhe->id);
+ vty_out(vty, " RefCnt: %d\n", nhe->refcnt);
+
+ if (nhe_vrf)
+ vty_out(vty, " VRF: %s\n", nhe_vrf->name);
+ else
+ vty_out(vty, " VRF: UNKNOWN\n");
+
+ if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_UNHASHABLE))
+ vty_out(vty, " Duplicate - from kernel not hashable\n");
+
+ if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_VALID)) {
+ vty_out(vty, " Valid");
+ if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED))
+ vty_out(vty, ", Installed");
+ vty_out(vty, "\n");
+ }
+ if (nhe->ifp)
+ vty_out(vty, " Interface Index: %d\n", nhe->ifp->ifindex);
+
+ if (!zebra_nhg_depends_is_empty(nhe)) {
+ vty_out(vty, " Depends:");
+ frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
+ vty_out(vty, " (%u)", rb_node_dep->nhe->id);
+ }
+ vty_out(vty, "\n");
+ }
+
+ for (ALL_NEXTHOPS_PTR(nhe->nhg, nexthop)) {
+ if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
+ vty_out(vty, " ");
+ else
+ /* Make recursive nexthops a bit more clear */
+ vty_out(vty, " ");
+
+ switch (nexthop->type) {
+ case NEXTHOP_TYPE_IPV4:
+ case NEXTHOP_TYPE_IPV4_IFINDEX:
+ vty_out(vty, " %s", inet_ntoa(nexthop->gate.ipv4));
+ if (nexthop->ifindex)
+ vty_out(vty, ", %s",
+ ifindex2ifname(nexthop->ifindex,
+ nexthop->vrf_id));
+ break;
+ case NEXTHOP_TYPE_IPV6:
+ case NEXTHOP_TYPE_IPV6_IFINDEX:
+ vty_out(vty, " %s",
+ inet_ntop(AF_INET6, &nexthop->gate.ipv6, buf,
+ sizeof(buf)));
+ if (nexthop->ifindex)
+ vty_out(vty, ", %s",
+ ifindex2ifname(nexthop->ifindex,
+ nexthop->vrf_id));
+ break;
+
+ case NEXTHOP_TYPE_IFINDEX:
+ vty_out(vty, " directly connected %s",
+ ifindex2ifname(nexthop->ifindex,
+ nexthop->vrf_id));
+ break;
+ case NEXTHOP_TYPE_BLACKHOLE:
+ vty_out(vty, " unreachable");
+ switch (nexthop->bh_type) {
+ case BLACKHOLE_REJECT:
+ vty_out(vty, " (ICMP unreachable)");
+ break;
+ case BLACKHOLE_ADMINPROHIB:
+ vty_out(vty, " (ICMP admin-prohibited)");
+ break;
+ case BLACKHOLE_NULL:
+ vty_out(vty, " (blackhole)");
+ break;
+ case BLACKHOLE_UNSPEC:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ struct vrf *vrf = vrf_lookup_by_id(nexthop->vrf_id);
+
+ if (vrf)
+ vty_out(vty, " (vrf %s)", vrf->name);
+ else
+ vty_out(vty, " (vrf UNKNOWN)");
+
+ if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
+ vty_out(vty, " inactive");
+
+ if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
+ vty_out(vty, " onlink");
+
+ if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
+ vty_out(vty, " (recursive)");
+
+ switch (nexthop->type) {
+ case NEXTHOP_TYPE_IPV4:
+ case NEXTHOP_TYPE_IPV4_IFINDEX:
+ if (nexthop->src.ipv4.s_addr) {
+ if (inet_ntop(AF_INET, &nexthop->src.ipv4, buf,
+ sizeof(buf)))
+ vty_out(vty, ", src %s", buf);
+ }
+ break;
+ case NEXTHOP_TYPE_IPV6:
+ case NEXTHOP_TYPE_IPV6_IFINDEX:
+ if (!IPV6_ADDR_SAME(&nexthop->src.ipv6, &in6addr_any)) {
+ if (inet_ntop(AF_INET6, &nexthop->src.ipv6, buf,
+ sizeof(buf)))
+ vty_out(vty, ", src %s", buf);
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Label information */
+ if (nexthop->nh_label && nexthop->nh_label->num_labels) {
+ vty_out(vty, ", label %s",
+ mpls_label2str(nexthop->nh_label->num_labels,
+ nexthop->nh_label->label, buf,
+ sizeof(buf), 1));
+ }
+
+ vty_out(vty, "\n");
+ }
+
+ if (!zebra_nhg_dependents_is_empty(nhe)) {
+ vty_out(vty, " Dependents:");
+ frr_each(nhg_connected_tree, &nhe->nhg_dependents,
+ rb_node_dep) {
+ vty_out(vty, " (%u)", rb_node_dep->nhe->id);
+ }
+ vty_out(vty, "\n");
+ }
+
+}
+
+static int show_nexthop_group_id_cmd_helper(struct vty *vty, uint32_t id)
+{
+ struct nhg_hash_entry *nhe = NULL;
+
+ nhe = zebra_nhg_lookup_id(id);
+
+ if (nhe)
+ show_nexthop_group_out(vty, nhe);
+ else {
+ vty_out(vty, "Nexthop Group ID: %u does not exist\n", id);
+ return CMD_WARNING;
+ }
+ return CMD_SUCCESS;
+}
+
+static void show_nexthop_group_cmd_helper(struct vty *vty,
+ struct zebra_vrf *zvrf, afi_t afi)
+{
+ struct list *list = hash_to_list(zrouter.nhgs);
+ struct nhg_hash_entry *nhe = NULL;
+ struct listnode *node = NULL;
+
+ for (ALL_LIST_ELEMENTS_RO(list, node, nhe)) {
+
+ if (afi && nhe->afi != afi)
+ continue;
+
+ if (nhe->vrf_id != zvrf->vrf->vrf_id)
+ continue;
+
+ show_nexthop_group_out(vty, nhe);
+ }
+
+ list_delete(&list);
+}
+
+static void if_nexthop_group_dump_vty(struct vty *vty, struct interface *ifp)
+{
+ struct zebra_if *zebra_if = NULL;
+ struct nhg_connected *rb_node_dep = NULL;
+
+ zebra_if = ifp->info;
+
+ if (!if_nhg_dependents_is_empty(ifp)) {
+ vty_out(vty, "Interface %s:\n", ifp->name);
+
+ frr_each(nhg_connected_tree, &zebra_if->nhg_dependents,
+ rb_node_dep) {
+ vty_out(vty, " ");
+ show_nexthop_group_out(vty, rb_node_dep->nhe);
+ }
+ }
+}
+
+DEFPY (show_interface_nexthop_group,
+ show_interface_nexthop_group_cmd,
+ "show interface [IFNAME$if_name] nexthop-group",
+ SHOW_STR
+ "Interface status and configuration\n"
+ "Interface name\n"
+ "Show Nexthop Groups\n")
+{
+ struct vrf *vrf = NULL;
+ struct interface *ifp = NULL;
+ bool found = false;
+
+ RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) {
+ if (if_name) {
+ ifp = if_lookup_by_name(if_name, vrf->vrf_id);
+ if (ifp) {
+ if_nexthop_group_dump_vty(vty, ifp);
+ found = true;
+ }
+ } else {
+ FOR_ALL_INTERFACES (vrf, ifp)
+ if_nexthop_group_dump_vty(vty, ifp);
+ found = true;
+ }
+ }
+
+ if (!found) {
+ vty_out(vty, "%% Can't find interface %s\n", if_name);
+ return CMD_WARNING;
+ }
+
+ return CMD_SUCCESS;
+}
+
+DEFPY (show_nexthop_group,
+ show_nexthop_group_cmd,
+ "show nexthop-group <(0-4294967295)$id|[<ip$v4|ipv6$v6>] [vrf <NAME$vrf_name|all$vrf_all>]>",
+ SHOW_STR
+ "Show Nexthop Groups\n"
+ "Nexthop Group ID\n"
+ IP_STR
+ IP6_STR
+ VRF_FULL_CMD_HELP_STR)
+{
+
+ struct zebra_vrf *zvrf = NULL;
+ afi_t afi = 0;
+
+ if (id)
+ return show_nexthop_group_id_cmd_helper(vty, id);
+
+ if (v4)
+ afi = AFI_IP;
+ else if (v6)
+ afi = AFI_IP6;
+
+ if (vrf_all) {
+ struct vrf *vrf;
+
+ RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) {
+ struct zebra_vrf *zvrf;
+
+ zvrf = vrf->info;
+ if (!zvrf)
+ continue;
+
+ vty_out(vty, "VRF: %s\n", vrf->name);
+ show_nexthop_group_cmd_helper(vty, zvrf, afi);
+ }
+
+ return CMD_SUCCESS;
+ }
+
+ if (vrf_name)
+ zvrf = zebra_vrf_lookup_by_name(vrf_name);
+ else
+ zvrf = zebra_vrf_lookup_by_name(VRF_DEFAULT_NAME);
+
+ if (!zvrf) {
+ vty_out(vty, "VRF %s specified does not exist", vrf_name);
+ return CMD_WARNING;
+ }
+
+ show_nexthop_group_cmd_helper(vty, zvrf, afi);
+
+ return CMD_SUCCESS;
+}
+
DEFUN (no_ip_nht_default_route,
no_ip_nht_default_route_cmd,
"no ip nht resolve-via-default",
@@ -1265,7 +1559,7 @@ DEFPY (show_route_detail,
|X:X::X:X/M$prefix\
>\
>\
- [json$json]",
+ [json$json] [nexthop-group$ng]",
SHOW_STR
IP_STR
"IPv6 forwarding table\n"
@@ -1279,7 +1573,8 @@ DEFPY (show_route_detail,
VRF_FULL_CMD_HELP_STR
"IPv6 Address\n"
"IPv6 prefix\n"
- JSON_STR)
+ JSON_STR
+ "Nexthop Group Information\n")
{
afi_t afi = ipv4 ? AFI_IP : AFI_IP6;
struct route_table *table;
@@ -1288,6 +1583,7 @@ DEFPY (show_route_detail,
bool use_fib = !!fib;
rib_dest_t *dest;
bool network_found = false;
+ bool show_ng = !!ng;
if (address_str)
prefix_str = address_str;
@@ -1321,10 +1617,10 @@ DEFPY (show_route_detail,
network_found = true;
if (json)
- vty_show_ip_route_detail_json(vty, rn,
- use_fib);
+ vty_show_ip_route_detail_json(vty, rn, use_fib);
else
- vty_show_ip_route_detail(vty, rn, 0, use_fib);
+ vty_show_ip_route_detail(vty, rn, 0, use_fib,
+ show_ng);
route_unlock_node(rn);
}
@@ -1376,7 +1672,7 @@ DEFPY (show_route_detail,
if (json)
vty_show_ip_route_detail_json(vty, rn, use_fib);
else
- vty_show_ip_route_detail(vty, rn, 0, use_fib);
+ vty_show_ip_route_detail(vty, rn, 0, use_fib, show_ng);
route_unlock_node(rn);
}
@@ -1539,7 +1835,7 @@ static void vty_show_ip_route_summary_prefix(struct vty *vty,
fib_cnt[ZEBRA_ROUTE_TOTAL]++;
fib_cnt[re->type]++;
}
- for (nexthop = re->ng.nexthop; (!cnt && nexthop);
+ for (nexthop = re->ng->nexthop; (!cnt && nexthop);
nexthop = nexthop->next) {
cnt++;
rib_cnt[ZEBRA_ROUTE_TOTAL]++;
@@ -3033,6 +3329,9 @@ void zebra_vty_init(void)
install_element(CONFIG_NODE, &zebra_packet_process_cmd);
install_element(CONFIG_NODE, &no_zebra_packet_process_cmd);
+ install_element(VIEW_NODE, &show_nexthop_group_cmd);
+ install_element(VIEW_NODE, &show_interface_nexthop_group_cmd);
+
install_element(VIEW_NODE, &show_vrf_cmd);
install_element(VIEW_NODE, &show_vrf_vni_cmd);
install_element(VIEW_NODE, &show_route_cmd);