diff options
author | Mark Stapp <mjs@voltanet.io> | 2019-10-28 13:07:23 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-28 13:07:23 +0100 |
commit | 882364f11a7ab5545b8c2def58b8996893b31702 (patch) | |
tree | 68673cbd330ed2e199ecce478cfb8a53ca4b8401 | |
parent | Merge pull request #5216 from ton31337/fix/override_peers_ttl_if_peer_group_c... (diff) | |
parent | zebra: rt_netlink nexthop handling checkpatch (diff) | |
download | frr-882364f11a7ab5545b8c2def58b8996893b31702.tar.xz frr-882364f11a7ab5545b8c2def58b8996893b31702.zip |
Merge pull request #4897 from sworleys/zebra_nhg_add
Zebra Nexthop Group Rework and Kernel Nexthop Object API Init
50 files changed, 4064 insertions, 617 deletions
diff --git a/doc/user/zebra.rst b/doc/user/zebra.rst index af465f6fd..2099dfdd6 100644 --- a/doc/user/zebra.rst +++ b/doc/user/zebra.rst @@ -839,11 +839,22 @@ zebra Terminal Mode Commands .. index:: show ipv6 route .. clicmd:: show ipv6 route -.. index:: show interface [{vrf VRF|brief}] -.. clicmd:: show interface [{vrf VRF|brief}] +.. index:: show [ip|ipv6] route [PREFIX] [nexthop-group] +.. clicmd:: show [ip|ipv6] route [PREFIX] [nexthop-group] -.. index:: show interface [{vrf all|brief}] -.. clicmd:: show interface [{vrf all|brief}] + Display detailed information about a route. If [nexthop-group] is + included, it will display the nexthop group ID the route is using as well. + +.. index:: show interface [NAME] [{vrf VRF|brief}] [nexthop-group] +.. clicmd:: show interface [NAME] [{vrf VRF|brief}] [nexthop-group] + +.. index:: show interface [NAME] [{vrf all|brief}] [nexthop-group] +.. clicmd:: show interface [NAME] [{vrf all|brief}] [nexthop-group] + + Display interface information. If no extra information is added, it will + dump information on all interfaces. If [NAME] is specified, it will display + detailed information about that single interface. If [nexthop-group] is + specified, it will display nexthop groups pointing out that interface. .. index:: show ip prefix-list [NAME] .. clicmd:: show ip prefix-list [NAME] @@ -900,3 +911,8 @@ zebra Terminal Mode Commands Reset statistics related to the zebra code that interacts with the optional Forwarding Plane Manager (FPM) component. +.. index:: show nexthop-group [ID] [vrf NAME] [ip|ipv6] +.. clicmd:: show nexthop-group [ID] [vrf NAME] [ip|ipv6] + + Display nexthop groups created by zebra. + diff --git a/include/linux/nexthop.h b/include/linux/nexthop.h new file mode 100644 index 000000000..e4d6e256e --- /dev/null +++ b/include/linux/nexthop.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_NEXTHOP_H +#define _LINUX_NEXTHOP_H + +#include <linux/types.h> + +#define RTM_NHA(h) ((struct rtattr *)(((char *)(h)) + \ + NLMSG_ALIGN(sizeof(struct nhmsg)))) + +struct nhmsg { + unsigned char nh_family; + unsigned char nh_scope; /* return only */ + unsigned char nh_protocol; /* Routing protocol that installed nh */ + unsigned char resvd; + unsigned int nh_flags; /* RTNH_F flags */ +}; + +struct nexthop_grp { + __u32 id; /* nexthop id - must exist */ + __u8 weight; /* weight of this nexthop */ + __u8 resvd1; + __u16 resvd2; +}; + +enum { + NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */ + __NEXTHOP_GRP_TYPE_MAX, +}; + +#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1) + +enum { + NHA_UNSPEC, + NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */ + + NHA_GROUP, /* array of nexthop_grp */ + NHA_GROUP_TYPE, /* u16 one of NEXTHOP_GRP_TYPE */ + /* if NHA_GROUP attribute is added, no other attributes can be set */ + + NHA_BLACKHOLE, /* flag; nexthop used to blackhole packets */ + /* if NHA_BLACKHOLE is added, OIF, GATEWAY, ENCAP can not be set */ + + NHA_OIF, /* u32; nexthop device */ + NHA_GATEWAY, /* be32 (IPv4) or in6_addr (IPv6) gw address */ + NHA_ENCAP_TYPE, /* u16; lwt encap type */ + NHA_ENCAP, /* lwt encap data */ + + /* NHA_OIF can be appended to dump request to return only + * nexthops using given device + */ + NHA_GROUPS, /* flag; only return nexthop groups in dump */ + NHA_MASTER, /* u32; only return nexthops with given master dev */ + + __NHA_MAX, +}; + +#define NHA_MAX (__NHA_MAX - 1) +#endif diff --git a/include/subdir.am b/include/subdir.am index 0d7fed285..b1ca1be54 100644 --- a/include/subdir.am +++ b/include/subdir.am @@ -6,6 +6,7 @@ noinst_HEADERS += \ include/linux/mpls_iptunnel.h \ include/linux/neighbour.h \ include/linux/netlink.h \ + include/linux/nexthop.h \ include/linux/rtnetlink.h \ include/linux/socket.h \ include/linux/net_namespace.h \ diff --git a/lib/nexthop.c b/lib/nexthop.c index cf5bed3d6..73c2de0cd 100644 --- a/lib/nexthop.c +++ b/lib/nexthop.c @@ -349,7 +349,7 @@ const char *nexthop2str(const struct nexthop *nexthop, char *str, int size) * left branch is 'resolved' and right branch is 'next': * https://en.wikipedia.org/wiki/Tree_traversal#/media/File:Sorted_binary_tree_preorder.svg */ -struct nexthop *nexthop_next(struct nexthop *nexthop) +struct nexthop *nexthop_next(const struct nexthop *nexthop) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) return nexthop->resolved; @@ -364,6 +364,19 @@ struct nexthop *nexthop_next(struct nexthop *nexthop) return NULL; } +/* Return the next nexthop in the tree that is resolved and active */ +struct nexthop *nexthop_next_active_resolved(const struct nexthop *nexthop) +{ + struct nexthop *next = nexthop_next(nexthop); + + while (next + && (CHECK_FLAG(next->flags, NEXTHOP_FLAG_RECURSIVE) + || !CHECK_FLAG(next->flags, NEXTHOP_FLAG_ACTIVE))) + next = nexthop_next(next); + + return next; +} + unsigned int nexthop_level(struct nexthop *nexthop) { unsigned int rv = 0; @@ -374,16 +387,13 @@ unsigned int nexthop_level(struct nexthop *nexthop) return rv; } -uint32_t nexthop_hash(const struct nexthop *nexthop) +/* Only hash word-sized things, let cmp do the rest. */ +uint32_t nexthop_hash_quick(const struct nexthop *nexthop) { uint32_t key = 0x45afe398; key = jhash_3words(nexthop->type, nexthop->vrf_id, nexthop->nh_label_type, key); - /* gate and blackhole are together in a union */ - key = jhash(&nexthop->gate, sizeof(nexthop->gate), key); - key = jhash(&nexthop->src, sizeof(nexthop->src), key); - key = jhash(&nexthop->rmap_src, sizeof(nexthop->rmap_src), key); if (nexthop->nh_label) { int labels = nexthop->nh_label->num_labels; @@ -410,17 +420,35 @@ uint32_t nexthop_hash(const struct nexthop *nexthop) key = jhash_1word(nexthop->nh_label->label[i], key); } - switch (nexthop->type) { - case NEXTHOP_TYPE_IPV4_IFINDEX: - case NEXTHOP_TYPE_IPV6_IFINDEX: - case NEXTHOP_TYPE_IFINDEX: - key = jhash_1word(nexthop->ifindex, key); - break; - case NEXTHOP_TYPE_BLACKHOLE: - case NEXTHOP_TYPE_IPV4: - case NEXTHOP_TYPE_IPV6: - break; - } + key = jhash_2words(nexthop->ifindex, + CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK), + key); + + return key; +} + + +#define GATE_SIZE 4 /* Number of uint32_t words in struct g_addr */ + +/* For a more granular hash */ +uint32_t nexthop_hash(const struct nexthop *nexthop) +{ + uint32_t gate_src_rmap_raw[GATE_SIZE * 3] = {}; + /* Get all the quick stuff */ + uint32_t key = nexthop_hash_quick(nexthop); + + assert(((sizeof(nexthop->gate) + sizeof(nexthop->src) + + sizeof(nexthop->rmap_src)) + / 3) + == (GATE_SIZE * sizeof(uint32_t))); + + memcpy(gate_src_rmap_raw, &nexthop->gate, GATE_SIZE); + memcpy(gate_src_rmap_raw + GATE_SIZE, &nexthop->src, GATE_SIZE); + memcpy(gate_src_rmap_raw + (2 * GATE_SIZE), &nexthop->rmap_src, + GATE_SIZE); + + key = jhash2(gate_src_rmap_raw, (GATE_SIZE * 3), key); + return key; } diff --git a/lib/nexthop.h b/lib/nexthop.h index 9dd5fc6fd..fe029f186 100644 --- a/lib/nexthop.h +++ b/lib/nexthop.h @@ -137,6 +137,14 @@ void nexthop_del_labels(struct nexthop *); * 32-bit hash of nexthop */ uint32_t nexthop_hash(const struct nexthop *nexthop); +/* + * Hash a nexthop only on word-sized attributes: + * - vrf_id + * - ifindex + * - type + * - (some) flags + */ +uint32_t nexthop_hash_quick(const struct nexthop *nexthop); extern bool nexthop_same(const struct nexthop *nh1, const struct nexthop *nh2); extern bool nexthop_same_no_labels(const struct nexthop *nh1, @@ -153,7 +161,9 @@ extern int nexthop_same_firsthop(struct nexthop *next1, struct nexthop *next2); extern const char *nexthop2str(const struct nexthop *nexthop, char *str, int size); -extern struct nexthop *nexthop_next(struct nexthop *nexthop); +extern struct nexthop *nexthop_next(const struct nexthop *nexthop); +extern struct nexthop * +nexthop_next_active_resolved(const struct nexthop *nexthop); extern unsigned int nexthop_level(struct nexthop *nexthop); /* Copies to an already allocated nexthop struct */ extern void nexthop_copy(struct nexthop *copy, const struct nexthop *nexthop, diff --git a/lib/nexthop_group.c b/lib/nexthop_group.c index 9564321d3..9552f8956 100644 --- a/lib/nexthop_group.c +++ b/lib/nexthop_group.c @@ -81,6 +81,17 @@ uint8_t nexthop_group_nexthop_num(const struct nexthop_group *nhg) return num; } +uint8_t nexthop_group_nexthop_num_no_recurse(const struct nexthop_group *nhg) +{ + struct nexthop *nhop; + uint8_t num = 0; + + for (nhop = nhg->nexthop; nhop; nhop = nhop->next) + num++; + + return num; +} + uint8_t nexthop_group_active_nexthop_num(const struct nexthop_group *nhg) { struct nexthop *nhop; @@ -94,7 +105,22 @@ uint8_t nexthop_group_active_nexthop_num(const struct nexthop_group *nhg) return num; } -struct nexthop *nexthop_exists(struct nexthop_group *nhg, struct nexthop *nh) +uint8_t +nexthop_group_active_nexthop_num_no_recurse(const struct nexthop_group *nhg) +{ + struct nexthop *nhop; + uint8_t num = 0; + + for (nhop = nhg->nexthop; nhop; nhop = nhop->next) { + if (CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_ACTIVE)) + num++; + } + + return num; +} + +struct nexthop *nexthop_exists(const struct nexthop_group *nhg, + const struct nexthop *nh) { struct nexthop *nexthop; @@ -106,6 +132,74 @@ struct nexthop *nexthop_exists(struct nexthop_group *nhg, struct nexthop *nh) return NULL; } +static bool +nexthop_group_equal_common(const struct nexthop_group *nhg1, + const struct nexthop_group *nhg2, + uint8_t (*nexthop_group_nexthop_num_func)( + const struct nexthop_group *nhg)) +{ + if (nhg1 && !nhg2) + return false; + + if (!nhg1 && nhg2) + return false; + + if (nhg1 == nhg2) + return true; + + if (nexthop_group_nexthop_num_func(nhg1) + != nexthop_group_nexthop_num_func(nhg2)) + return false; + + return true; +} + +/* This assumes ordered */ +bool nexthop_group_equal_no_recurse(const struct nexthop_group *nhg1, + const struct nexthop_group *nhg2) +{ + struct nexthop *nh1 = NULL; + struct nexthop *nh2 = NULL; + + if (!nexthop_group_equal_common(nhg1, nhg2, + &nexthop_group_nexthop_num_no_recurse)) + return false; + + for (nh1 = nhg1->nexthop, nh2 = nhg2->nexthop; nh1 || nh2; + nh1 = nh1->next, nh2 = nh2->next) { + if (nh1 && !nh2) + return false; + if (!nh1 && nh2) + return false; + if (!nexthop_same(nh1, nh2)) + return false; + } + + return true; +} + +/* This assumes ordered */ +bool nexthop_group_equal(const struct nexthop_group *nhg1, + const struct nexthop_group *nhg2) +{ + struct nexthop *nh1 = NULL; + struct nexthop *nh2 = NULL; + + if (!nexthop_group_equal_common(nhg1, nhg2, &nexthop_group_nexthop_num)) + return false; + + for (nh1 = nhg1->nexthop, nh2 = nhg2->nexthop; nh1 || nh2; + nh1 = nexthop_next(nh1), nh2 = nexthop_next(nh2)) { + if (nh1 && !nh2) + return false; + if (!nh1 && nh2) + return false; + if (!nexthop_same(nh1, nh2)) + return false; + } + + return true; +} struct nexthop_group *nexthop_group_new(void) { return XCALLOC(MTYPE_NEXTHOP_GROUP, sizeof(struct nexthop_group)); @@ -119,6 +213,9 @@ void nexthop_group_copy(struct nexthop_group *to, struct nexthop_group *from) void nexthop_group_delete(struct nexthop_group **nhg) { + if ((*nhg)->nexthop) + nexthops_free((*nhg)->nexthop); + XFREE(MTYPE_NEXTHOP_GROUP, *nhg); } @@ -217,7 +314,7 @@ void copy_nexthops(struct nexthop **tnh, const struct nexthop *nh, } } -uint32_t nexthop_group_hash(const struct nexthop_group *nhg) +uint32_t nexthop_group_hash_no_recurse(const struct nexthop_group *nhg) { struct nexthop *nh; uint32_t key = 0; @@ -232,6 +329,35 @@ uint32_t nexthop_group_hash(const struct nexthop_group *nhg) return key; } +uint32_t nexthop_group_hash(const struct nexthop_group *nhg) +{ + struct nexthop *nh; + uint32_t key = 0; + + for (ALL_NEXTHOPS_PTR(nhg, nh)) + key = jhash_1word(nexthop_hash(nh), key); + + return key; +} + +void nexthop_group_mark_duplicates(struct nexthop_group *nhg) +{ + struct nexthop *nexthop, *prev; + + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE); + for (ALL_NEXTHOPS_PTR(nhg, prev)) { + if (prev == nexthop) + break; + if (nexthop_same_firsthop(nexthop, prev)) { + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_DUPLICATE); + break; + } + } + } +} + static void nhgc_delete_nexthops(struct nexthop_group_cmd *nhgc) { struct nexthop *nexthop; diff --git a/lib/nexthop_group.h b/lib/nexthop_group.h index 4f4d40eb3..391775c69 100644 --- a/lib/nexthop_group.h +++ b/lib/nexthop_group.h @@ -47,7 +47,9 @@ void nexthop_group_copy(struct nexthop_group *to, void copy_nexthops(struct nexthop **tnh, const struct nexthop *nh, struct nexthop *rparent); +uint32_t nexthop_group_hash_no_recurse(const struct nexthop_group *nhg); uint32_t nexthop_group_hash(const struct nexthop_group *nhg); +void nexthop_group_mark_duplicates(struct nexthop_group *nhg); /* The following for loop allows to iterate over the nexthop * structure of routes. @@ -110,8 +112,15 @@ void nexthop_group_disable_vrf(struct vrf *vrf); void nexthop_group_interface_state_change(struct interface *ifp, ifindex_t oldifindex); -extern struct nexthop *nexthop_exists(struct nexthop_group *nhg, - struct nexthop *nh); +extern struct nexthop *nexthop_exists(const struct nexthop_group *nhg, + const struct nexthop *nh); +/* This assumes ordered */ +extern bool nexthop_group_equal_no_recurse(const struct nexthop_group *nhg1, + const struct nexthop_group *nhg2); + +/* This assumes ordered */ +extern bool nexthop_group_equal(const struct nexthop_group *nhg1, + const struct nexthop_group *nhg2); extern struct nexthop_group_cmd *nhgc_find(const char *name); @@ -120,7 +129,11 @@ extern void nexthop_group_write_nexthop(struct vty *vty, struct nexthop *nh); /* Return the number of nexthops in this nhg */ extern uint8_t nexthop_group_nexthop_num(const struct nexthop_group *nhg); extern uint8_t +nexthop_group_nexthop_num_no_recurse(const struct nexthop_group *nhg); +extern uint8_t nexthop_group_active_nexthop_num(const struct nexthop_group *nhg); +extern uint8_t +nexthop_group_active_nexthop_num_no_recurse(const struct nexthop_group *nhg); #ifdef __cplusplus } diff --git a/lib/route_types.txt b/lib/route_types.txt index 59f3a91cf..71d0a4644 100644 --- a/lib/route_types.txt +++ b/lib/route_types.txt @@ -84,6 +84,7 @@ ZEBRA_ROUTE_PBR, pbr, pbrd, 'F', 1, 1, 0, "PBR" ZEBRA_ROUTE_BFD, bfd, bfdd, '-', 0, 0, 0, "BFD" ZEBRA_ROUTE_OPENFABRIC, openfabric, fabricd, 'f', 1, 1, 1, "OpenFabric" ZEBRA_ROUTE_VRRP, vrrp, vrrpd, '-', 0, 0, 0, "VRRP" +ZEBRA_ROUTE_NHG, nhg, none, '-', 0, 0, 0, "Nexthop Group" ZEBRA_ROUTE_ALL, wildcard, none, '-', 0, 0, 0, "-" @@ -113,3 +114,4 @@ ZEBRA_ROUTE_PBR, "Policy Based Routing (PBR)" ZEBRA_ROUTE_BFD, "Bidirectional Fowarding Detection (BFD)" ZEBRA_ROUTE_VRRP, "Virtual Router Redundancy Protocol (VRRP)" ZEBRA_ROUTE_OPENFABRIC, "OpenFabric Routing Protocol" +ZEBRA_ROUTE_NHG, "Zebra Nexthop Groups (NHG)" diff --git a/pbrd/pbr_nht.c b/pbrd/pbr_nht.c index 67a1fe2ff..7ccd14d1f 100644 --- a/pbrd/pbr_nht.c +++ b/pbrd/pbr_nht.c @@ -578,8 +578,6 @@ void pbr_nht_delete_individual_nexthop(struct pbr_map_sequence *pbrms) hash_release(pbr_nhg_hash, pnhgc); - _nexthop_del(pbrms->nhg, nh); - nexthop_free(nh); nexthop_group_delete(&pbrms->nhg); XFREE(MTYPE_TMP, pbrms->internal_nhg_name); } @@ -639,7 +637,6 @@ void pbr_nht_delete_group(const char *name) if (pbrms->nhgrp_name && strmatch(pbrms->nhgrp_name, name)) { pbrms->reason |= PBR_MAP_INVALID_NO_NEXTHOPS; - nexthop_group_delete(&pbrms->nhg); pbrms->nhg = NULL; pbrms->internal_nhg_name = NULL; pbrm->valid = false; diff --git a/tests/topotests/bfd-topo2/r1/ipv6_routes.json b/tests/topotests/bfd-topo2/r1/ipv6_routes.json index d09439a8a..0fd03b516 100644 --- a/tests/topotests/bfd-topo2/r1/ipv6_routes.json +++ b/tests/topotests/bfd-topo2/r1/ipv6_routes.json @@ -33,7 +33,6 @@ { "interfaceName": "r1-eth0", "interfaceIndex": 2, - "flags": 1, "active": true, "afi": "ipv6" } diff --git a/tests/topotests/bfd-topo2/r2/ipv4_routes.json b/tests/topotests/bfd-topo2/r2/ipv4_routes.json index 3c41e1343..69a5f1a5b 100644 --- a/tests/topotests/bfd-topo2/r2/ipv4_routes.json +++ b/tests/topotests/bfd-topo2/r2/ipv4_routes.json @@ -11,7 +11,6 @@ { "active": true, "directlyConnected": true, - "flags": 1, "interfaceIndex": 3, "interfaceName": "r2-eth1" } diff --git a/tests/topotests/bfd-topo2/r2/ipv6_routes.json b/tests/topotests/bfd-topo2/r2/ipv6_routes.json index bb45bbae5..66abade38 100644 --- a/tests/topotests/bfd-topo2/r2/ipv6_routes.json +++ b/tests/topotests/bfd-topo2/r2/ipv6_routes.json @@ -11,7 +11,6 @@ { "active": true, "directlyConnected": true, - "flags": 1, "interfaceIndex": 4, "interfaceName": "r2-eth2" } diff --git a/tests/topotests/bfd-topo2/r3/ipv4_routes.json b/tests/topotests/bfd-topo2/r3/ipv4_routes.json index cbf116e68..d4a0812ae 100644 --- a/tests/topotests/bfd-topo2/r3/ipv4_routes.json +++ b/tests/topotests/bfd-topo2/r3/ipv4_routes.json @@ -11,7 +11,6 @@ { "active": true, "directlyConnected": true, - "flags": 1, "interfaceIndex": 2, "interfaceName": "r3-eth0" } diff --git a/tests/topotests/bfd-topo2/r4/ipv6_routes.json b/tests/topotests/bfd-topo2/r4/ipv6_routes.json index a22c90cbb..af8272c4a 100644 --- a/tests/topotests/bfd-topo2/r4/ipv6_routes.json +++ b/tests/topotests/bfd-topo2/r4/ipv6_routes.json @@ -11,7 +11,6 @@ { "active": true, "directlyConnected": true, - "flags": 1, "interfaceIndex": 2, "interfaceName": "r4-eth0" } diff --git a/tests/topotests/bgp_ipv6_rtadv/r1/ipv6_routes.json b/tests/topotests/bgp_ipv6_rtadv/r1/ipv6_routes.json index d0378b564..acf5c8b27 100644 --- a/tests/topotests/bgp_ipv6_rtadv/r1/ipv6_routes.json +++ b/tests/topotests/bgp_ipv6_rtadv/r1/ipv6_routes.json @@ -10,7 +10,6 @@ { "interfaceName": "r1-eth0", "interfaceIndex": 2, - "flags": 1, "active": true, "afi": "ipv6" } diff --git a/tests/topotests/bgp_vrf_lite_ipv6_rtadv/r1/ipv6_routes.json b/tests/topotests/bgp_vrf_lite_ipv6_rtadv/r1/ipv6_routes.json index 1ca62094b..e5aff94bd 100644 --- a/tests/topotests/bgp_vrf_lite_ipv6_rtadv/r1/ipv6_routes.json +++ b/tests/topotests/bgp_vrf_lite_ipv6_rtadv/r1/ipv6_routes.json @@ -10,7 +10,6 @@ "internalFlags": 0, "nexthops": [ { - "flags": 1, "afi": "ipv6", "interfaceIndex": 2, "interfaceName": "r1-eth0", diff --git a/zebra/connected.c b/zebra/connected.c index a81c0cbe7..75f4f53bc 100644 --- a/zebra/connected.c +++ b/zebra/connected.c @@ -251,10 +251,10 @@ void connected_up(struct interface *ifp, struct connected *ifc) metric = (ifc->metric < (uint32_t)METRIC_MAX) ? ifc->metric : ifp->metric; rib_add(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, - 0, 0, &p, NULL, &nh, zvrf->table_id, metric, 0, 0, 0); + 0, 0, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0); rib_add(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, - 0, 0, &p, NULL, &nh, zvrf->table_id, metric, 0, 0, 0); + 0, 0, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0); /* Schedule LSP forwarding entries for processing, if appropriate. */ if (zvrf->vrf->vrf_id == VRF_DEFAULT) { @@ -393,11 +393,11 @@ void connected_down(struct interface *ifp, struct connected *ifc) * Same logic as for connected_up(): push the changes into the * head. */ - rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, - 0, 0, &p, NULL, &nh, zvrf->table_id, 0, 0, false); + rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0, + 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false); rib_delete(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, - 0, 0, &p, NULL, &nh, zvrf->table_id, 0, 0, false); + 0, 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false); /* Schedule LSP forwarding entries for processing, if appropriate. */ if (zvrf->vrf->vrf_id == VRF_DEFAULT) { diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c index d42f68cbe..c09007bcb 100644 --- a/zebra/if_netlink.c +++ b/zebra/if_netlink.c @@ -66,6 +66,7 @@ #include "zebra/zebra_ptm.h" #include "zebra/zebra_mpls.h" #include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" #include "zebra/if_netlink.h" #include "zebra/zebra_errors.h" #include "zebra/zebra_vxlan.h" @@ -807,6 +808,23 @@ int interface_lookup_netlink(struct zebra_ns *zns) /* fixup linkages */ zebra_if_update_all_links(); + return 0; +} + +/** + * interface_addr_lookup_netlink() - Look up interface addresses + * + * @zns: Zebra netlink socket + * Return: Result status + */ +static int interface_addr_lookup_netlink(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + struct nlsock *netlink_cmd = &zns->netlink_cmd; + + /* Capture key info from ns struct */ + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); /* Get IPv4 address of the interfaces. */ ret = netlink_request_intf_addr(netlink_cmd, AF_INET, RTM_GETADDR, 0); @@ -1460,6 +1478,13 @@ int netlink_protodown(struct interface *ifp, bool down) void interface_list(struct zebra_ns *zns) { interface_lookup_netlink(zns); + /* We add routes for interface address, + * so we need to get the nexthop info + * from the kernel before we can do that + */ + netlink_nexthop_read(zns); + + interface_addr_lookup_netlink(zns); } #endif /* GNU_LINUX */ diff --git a/zebra/interface.c b/zebra/interface.c index ef03cf87f..daa93e36d 100644 --- a/zebra/interface.c +++ b/zebra/interface.c @@ -107,6 +107,17 @@ static void zebra_if_node_destroy(route_table_delegate_t *delegate, route_node_destroy(delegate, table, node); } +static void zebra_if_nhg_dependents_free(struct zebra_if *zebra_if) +{ + nhg_connected_tree_free(&zebra_if->nhg_dependents); +} + +static void zebra_if_nhg_dependents_init(struct zebra_if *zebra_if) +{ + nhg_connected_tree_init(&zebra_if->nhg_dependents); +} + + route_table_delegate_t zebra_if_table_delegate = { .create_node = route_node_create, .destroy_node = zebra_if_node_destroy}; @@ -120,6 +131,9 @@ static int if_zebra_new_hook(struct interface *ifp) zebra_if->multicast = IF_ZEBRA_MULTICAST_UNSPEC; zebra_if->shutdown = IF_ZEBRA_SHUTDOWN_OFF; + + zebra_if_nhg_dependents_init(zebra_if); + zebra_ptm_if_init(zebra_if); ifp->ptm_enable = zebra_ptm_get_enable_state(); @@ -175,6 +189,34 @@ static int if_zebra_new_hook(struct interface *ifp) return 0; } +static void if_nhg_dependents_check_valid(struct nhg_hash_entry *nhe) +{ + zebra_nhg_check_valid(nhe); + if (!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_VALID)) + /* Assuming uninstalled as well here */ + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); +} + +static void if_down_nhg_dependents(const struct interface *ifp) +{ + struct nhg_connected *rb_node_dep = NULL; + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + frr_each(nhg_connected_tree, &zif->nhg_dependents, rb_node_dep) + if_nhg_dependents_check_valid(rb_node_dep->nhe); +} + +static void if_nhg_dependents_release(const struct interface *ifp) +{ + struct nhg_connected *rb_node_dep = NULL; + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + frr_each(nhg_connected_tree, &zif->nhg_dependents, rb_node_dep) { + rb_node_dep->nhe->ifp = NULL; /* Null it out */ + if_nhg_dependents_check_valid(rb_node_dep->nhe); + } +} + /* Called when interface is deleted. */ static int if_zebra_delete_hook(struct interface *ifp) { @@ -196,7 +238,11 @@ static int if_zebra_delete_hook(struct interface *ifp) list_delete(&rtadv->AdvDNSSLList); #endif /* HAVE_RTADV */ + if_nhg_dependents_release(ifp); + zebra_if_nhg_dependents_free(zebra_if); + XFREE(MTYPE_TMP, zebra_if->desc); + THREAD_OFF(zebra_if->speed_update); XFREE(MTYPE_ZINFO, zebra_if); @@ -925,6 +971,47 @@ static void if_down_del_nbr_connected(struct interface *ifp) } } +void if_nhg_dependents_add(struct interface *ifp, struct nhg_hash_entry *nhe) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + nhg_connected_tree_add_nhe(&zif->nhg_dependents, nhe); + } +} + +void if_nhg_dependents_del(struct interface *ifp, struct nhg_hash_entry *nhe) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + nhg_connected_tree_del_nhe(&zif->nhg_dependents, nhe); + } +} + +unsigned int if_nhg_dependents_count(const struct interface *ifp) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + return nhg_connected_tree_count(&zif->nhg_dependents); + } + + return 0; +} + + +bool if_nhg_dependents_is_empty(const struct interface *ifp) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + return nhg_connected_tree_is_empty(&zif->nhg_dependents); + } + + return false; +} + /* Interface is up. */ void if_up(struct interface *ifp) { @@ -988,6 +1075,8 @@ void if_down(struct interface *ifp) zif->down_count++; quagga_timestamp(2, zif->down_last, sizeof(zif->down_last)); + if_down_nhg_dependents(ifp); + /* Handle interface down for specific types for EVPN. Non-VxLAN * interfaces * are checked to see if (remote) neighbor entries need to be purged diff --git a/zebra/interface.h b/zebra/interface.h index e134b9b42..78ccbae62 100644 --- a/zebra/interface.h +++ b/zebra/interface.h @@ -27,6 +27,7 @@ #include "hook.h" #include "zebra/zebra_l2.h" +#include "zebra/zebra_nhg_private.h" #ifdef __cplusplus extern "C" { @@ -277,6 +278,15 @@ struct zebra_if { /* Installed addresses chains tree. */ struct route_table *ipv4_subnets; + /* Nexthops pointing to this interface */ + /** + * Any nexthop that we get should have an + * interface. When an interface goes down, + * we will use this list to update the nexthops + * pointing to it with that info. + */ + struct nhg_connected_tree_head nhg_dependents; + /* Information about up/down changes */ unsigned int up_count; char up_last[QUAGGA_TIMESTAMP_LEN]; @@ -424,6 +434,14 @@ extern void zebra_if_update_link(struct interface *ifp, ifindex_t link_ifindex, extern void zebra_if_update_all_links(void); extern void zebra_if_set_protodown(struct interface *ifp, bool down); +/* Nexthop group connected functions */ +extern void if_nhg_dependents_add(struct interface *ifp, + struct nhg_hash_entry *nhe); +extern void if_nhg_dependents_del(struct interface *ifp, + struct nhg_hash_entry *nhe); +extern unsigned int if_nhg_dependents_count(const struct interface *ifp); +extern bool if_nhg_dependents_is_empty(const struct interface *ifp); + extern void vrf_add_update(struct vrf *vrfp); #ifdef HAVE_PROC_NET_DEV diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c index f52b4746a..23f1a3bf8 100644 --- a/zebra/kernel_netlink.c +++ b/zebra/kernel_netlink.c @@ -99,6 +99,9 @@ static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"}, {RTM_NEWRULE, "RTM_NEWRULE"}, {RTM_DELRULE, "RTM_DELRULE"}, {RTM_GETRULE, "RTM_GETRULE"}, + {RTM_NEWNEXTHOP, "RTM_NEWNEXTHOP"}, + {RTM_DELNEXTHOP, "RTM_DELNEXTHOP"}, + {RTM_GETNEXTHOP, "RTM_GETNEXTHOP"}, {0}}; static const struct message rtproto_str[] = { @@ -291,6 +294,10 @@ static int netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id, return netlink_rule_change(h, ns_id, startup); case RTM_DELRULE: return netlink_rule_change(h, ns_id, startup); + case RTM_NEWNEXTHOP: + return netlink_nexthop_change(h, ns_id, startup); + case RTM_DELNEXTHOP: + return netlink_nexthop_change(h, ns_id, startup); default: /* * If we have received this message then @@ -884,15 +891,20 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid); - } else - flog_err( - EC_ZEBRA_UNEXPECTED_MESSAGE, - "%s error: %s, type=%s(%u), seq=%u, pid=%u", - nl->name, - safe_strerror(-errnum), - nl_msg_type_to_str(msg_type), - msg_type, err->msg.nlmsg_seq, - err->msg.nlmsg_pid); + } else { + if ((msg_type != RTM_GETNEXTHOP) + || !startup) + flog_err( + EC_ZEBRA_UNEXPECTED_MESSAGE, + "%s error: %s, type=%s(%u), seq=%u, pid=%u", + nl->name, + safe_strerror(-errnum), + nl_msg_type_to_str( + msg_type), + msg_type, + err->msg.nlmsg_seq, + err->msg.nlmsg_pid); + } return -1; } @@ -1096,7 +1108,8 @@ void kernel_init(struct zebra_ns *zns) RTMGRP_IPV4_MROUTE | RTMGRP_NEIGH | (1 << (RTNLGRP_IPV4_RULE - 1)) | - (1 << (RTNLGRP_IPV6_RULE - 1)); + (1 << (RTNLGRP_IPV6_RULE - 1)) | + (1 << (RTNLGRP_NEXTHOP - 1)); snprintf(zns->netlink.name, sizeof(zns->netlink.name), "netlink-listen (NS %u)", zns->ns_id); diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c index f5aca2341..c2812aa47 100644 --- a/zebra/kernel_socket.c +++ b/zebra/kernel_socket.c @@ -1139,16 +1139,17 @@ void rtm_read(struct rt_msghdr *rtm) */ if (rtm->rtm_type == RTM_CHANGE) rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, - 0, zebra_flags, &p, NULL, NULL, RT_TABLE_MAIN, - 0, 0, true); + 0, zebra_flags, &p, NULL, NULL, 0, RT_TABLE_MAIN, 0, + 0, true); if (rtm->rtm_type == RTM_GET || rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) rib_add(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, 0, - zebra_flags, &p, NULL, &nh, RT_TABLE_MAIN, 0, 0, 0, 0); + zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN, + 0, 0, 0, 0); else rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, - 0, zebra_flags, &p, NULL, &nh, RT_TABLE_MAIN, - 0, 0, true); + 0, zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN, 0, + 0, true); } /* Interface function for the kernel routing table updates. Support diff --git a/zebra/main.c b/zebra/main.c index f0225ac5e..334354eaa 100644 --- a/zebra/main.c +++ b/zebra/main.c @@ -165,6 +165,7 @@ static void sigint(void) } if (zrouter.lsp_process_q) work_queue_free_and_null(&zrouter.lsp_process_q); + vrf_terminate(); ns_walk_func(zebra_ns_early_shutdown); diff --git a/zebra/redistribute.c b/zebra/redistribute.c index 0dc9de0c5..4e0163f8a 100644 --- a/zebra/redistribute.c +++ b/zebra/redistribute.c @@ -643,7 +643,7 @@ int zebra_add_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn, afi = family2afi(rn->p.family); if (rmap_name) ret = zebra_import_table_route_map_check( - afi, re->type, re->instance, &rn->p, re->ng.nexthop, + afi, re->type, re->instance, &rn->p, re->ng->nexthop, zvrf->vrf->vrf_id, re->tag, rmap_name); if (ret != RMAP_PERMITMATCH) { @@ -676,10 +676,10 @@ int zebra_add_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn, newre->metric = re->metric; newre->mtu = re->mtu; newre->table = zvrf->table_id; - newre->nexthop_num = 0; newre->uptime = monotime(NULL); newre->instance = re->table; - route_entry_copy_nexthops(newre, re->ng.nexthop); + newre->ng = nexthop_group_new(); + route_entry_copy_nexthops(newre, re->ng->nexthop); rib_add_multipath(afi, SAFI_UNICAST, &p, NULL, newre); @@ -696,7 +696,7 @@ int zebra_del_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn, prefix_copy(&p, &rn->p); rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_TABLE, - re->table, re->flags, &p, NULL, re->ng.nexthop, + re->table, re->flags, &p, NULL, re->ng->nexthop, re->nhe_id, zvrf->table_id, re->metric, re->distance, false); return 0; diff --git a/zebra/rib.h b/zebra/rib.h index ee1df89c0..35aa011c0 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -88,11 +88,14 @@ struct route_entry { struct re_list_item next; /* Nexthop structure (from RIB) */ - struct nexthop_group ng; + struct nexthop_group *ng; /* Nexthop group from FIB (optional) */ struct nexthop_group fib_ng; + /* Nexthop group hash entry ID */ + uint32_t nhe_id; + /* Tag */ route_tag_t tag; @@ -135,10 +138,6 @@ struct route_entry { /* Route has Failed installation into the Data Plane in some manner */ #define ROUTE_ENTRY_FAILED 0x20 - /* Nexthop information. */ - uint8_t nexthop_num; - uint8_t nexthop_active_num; - /* Sequence value incremented for each dataplane operation */ uint32_t dplane_sequence; @@ -154,13 +153,14 @@ struct route_entry { #define RIB_KERNEL_ROUTE(R) RKERNEL_ROUTE((R)->type) /* meta-queue structure: - * sub-queue 0: connected, kernel - * sub-queue 1: static - * sub-queue 2: RIP, RIPng, OSPF, OSPF6, IS-IS, EIGRP, NHRP - * sub-queue 3: iBGP, eBGP - * sub-queue 4: any other origin (if any) + * sub-queue 0: nexthop group objects + * sub-queue 1: connected, kernel + * sub-queue 2: static + * sub-queue 3: RIP, RIPng, OSPF, OSPF6, IS-IS, EIGRP, NHRP + * sub-queue 4: iBGP, eBGP + * sub-queue 5: any other origin (if any) */ -#define MQ_SIZE 5 +#define MQ_SIZE 6 struct meta_queue { struct list *subq[MQ_SIZE]; uint32_t size; /* sum of lengths of all subqueues */ @@ -210,7 +210,7 @@ DECLARE_LIST(re_list, struct route_entry, next); #define RIB_ROUTE_QUEUED(x) (1 << (x)) // If MQ_SIZE is modified this value needs to be updated. -#define RIB_ROUTE_ANY_QUEUED 0x1F +#define RIB_ROUTE_ANY_QUEUED 0x3F /* * The maximum qindex that can be used. @@ -364,8 +364,8 @@ extern void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re); extern int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, unsigned short instance, int flags, struct prefix *p, struct prefix_ipv6 *src_p, const struct nexthop *nh, - uint32_t table_id, uint32_t metric, uint32_t mtu, - uint8_t distance, route_tag_t tag); + uint32_t nhe_id, uint32_t table_id, uint32_t metric, + uint32_t mtu, uint8_t distance, route_tag_t tag); extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, struct prefix_ipv6 *src_p, struct route_entry *re); @@ -373,8 +373,8 @@ extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, unsigned short instance, int flags, struct prefix *p, struct prefix_ipv6 *src_p, const struct nexthop *nh, - uint32_t table_id, uint32_t metric, uint8_t distance, - bool fromkernel); + uint32_t nhe_id, uint32_t table_id, uint32_t metric, + uint8_t distance, bool fromkernel); extern struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id, union g_addr *addr, @@ -398,7 +398,13 @@ extern unsigned long rib_score_proto(uint8_t proto, unsigned short instance); extern unsigned long rib_score_proto_table(uint8_t proto, unsigned short instance, struct route_table *table); -extern void rib_queue_add(struct route_node *rn); + +extern int rib_queue_add(struct route_node *rn); + +struct nhg_ctx; /* Forward declaration */ + +extern int rib_queue_nhg_add(struct nhg_ctx *ctx); + extern void meta_queue_free(struct meta_queue *mq); extern int zebra_rib_labeled_unicast(struct route_entry *re); extern struct route_table *rib_table_ipv6; @@ -527,7 +533,7 @@ static inline struct nexthop_group *rib_active_nhg(struct route_entry *re) if (re->fib_ng.nexthop) return &(re->fib_ng); else - return &(re->ng); + return re->ng; } extern void zebra_vty_init(void); diff --git a/zebra/rt.h b/zebra/rt.h index f311a6b9d..4b9a3f83f 100644 --- a/zebra/rt.h +++ b/zebra/rt.h @@ -40,13 +40,17 @@ extern "C" { #define RSYSTEM_ROUTE(type) \ ((RKERNEL_ROUTE(type)) || (type) == ZEBRA_ROUTE_CONNECT) + /* - * Update or delete a route, LSP, pseudowire, or vxlan MAC from the kernel, - * using info from a dataplane context. + * Update or delete a route, nexthop, LSP, pseudowire, or vxlan MAC from the + * kernel, using info from a dataplane context. */ extern enum zebra_dplane_result kernel_route_update( struct zebra_dplane_ctx *ctx); +extern enum zebra_dplane_result +kernel_nexthop_update(struct zebra_dplane_ctx *ctx); + extern enum zebra_dplane_result kernel_lsp_update( struct zebra_dplane_ctx *ctx); diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index 43e44cad1..640802fe3 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -27,6 +27,7 @@ #include <linux/mpls_iptunnel.h> #include <linux/neighbour.h> #include <linux/rtnetlink.h> +#include <linux/nexthop.h> /* Hack for GNU libc version 2. */ #ifndef MSG_TRUNC @@ -49,6 +50,7 @@ #include "vty.h" #include "mpls.h" #include "vxlan.h" +#include "printfrr.h" #include "zebra/zapi_msg.h" #include "zebra/zebra_ns.h" @@ -62,6 +64,7 @@ #include "zebra/zebra_mpls.h" #include "zebra/kernel_netlink.h" #include "zebra/rt_netlink.h" +#include "zebra/zebra_nhg.h" #include "zebra/zebra_mroute.h" #include "zebra/zebra_vxlan.h" #include "zebra/zebra_errors.h" @@ -72,6 +75,8 @@ static vlanid_t filter_vlan = 0; +static bool supports_nh; + struct gw_family_t { uint16_t filler; uint16_t family; @@ -186,6 +191,7 @@ static inline int zebra2proto(int proto) proto = RTPROT_OPENFABRIC; break; case ZEBRA_ROUTE_TABLE: + case ZEBRA_ROUTE_NHG: proto = RTPROT_ZEBRA; break; default: @@ -205,7 +211,7 @@ static inline int zebra2proto(int proto) return proto; } -static inline int proto2zebra(int proto, int family) +static inline int proto2zebra(int proto, int family, bool is_nexthop) { switch (proto) { case RTPROT_BABEL: @@ -249,6 +255,12 @@ static inline int proto2zebra(int proto, int family) case RTPROT_OPENFABRIC: proto = ZEBRA_ROUTE_OPENFABRIC; break; + case RTPROT_ZEBRA: + if (is_nexthop) { + proto = ZEBRA_ROUTE_NHG; + break; + } + /* Intentional fall thru */ default: /* * When a user adds a new protocol this will show up @@ -319,6 +331,169 @@ static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels) return num_labels; } +static struct nexthop +parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb, + enum blackhole_type bh_type, int index, void *prefsrc, + void *gate, afi_t afi, vrf_id_t vrf_id) +{ + struct interface *ifp = NULL; + struct nexthop nh = {0}; + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + int num_labels = 0; + + vrf_id_t nh_vrf_id = vrf_id; + size_t sz = (afi == AFI_IP) ? 4 : 16; + + if (bh_type == BLACKHOLE_UNSPEC) { + if (index && !gate) + nh.type = NEXTHOP_TYPE_IFINDEX; + else if (index && gate) + nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX + : NEXTHOP_TYPE_IPV6_IFINDEX; + else if (!index && gate) + nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4 + : NEXTHOP_TYPE_IPV6; + else { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = bh_type; + } + } else { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = bh_type; + } + nh.ifindex = index; + if (prefsrc) + memcpy(&nh.src, prefsrc, sz); + if (gate) + memcpy(&nh.gate, gate, sz); + + if (index) { + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index); + if (ifp) + nh_vrf_id = ifp->vrf_id; + } + nh.vrf_id = nh_vrf_id; + + if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_MPLS) { + num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels); + } + + if (rtm->rtm_flags & RTNH_F_ONLINK) + SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK); + + if (num_labels) + nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels); + + return nh; +} + +static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id, + struct route_entry *re, + struct rtmsg *rtm, + struct rtnexthop *rtnh, + struct rtattr **tb, + void *prefsrc, vrf_id_t vrf_id) +{ + void *gate = NULL; + struct interface *ifp = NULL; + int index = 0; + /* MPLS labels */ + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + int num_labels = 0; + struct rtattr *rtnh_tb[RTA_MAX + 1] = {}; + + int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]); + vrf_id_t nh_vrf_id = vrf_id; + + re->ng = nexthop_group_new(); + + for (;;) { + struct nexthop *nh = NULL; + + if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len) + break; + + index = rtnh->rtnh_ifindex; + if (index) { + /* + * Yes we are looking this up + * for every nexthop and just + * using the last one looked + * up right now + */ + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + index); + if (ifp) + nh_vrf_id = ifp->vrf_id; + else { + flog_warn( + EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT", + __PRETTY_FUNCTION__, index); + nh_vrf_id = VRF_DEFAULT; + } + } else + nh_vrf_id = vrf_id; + + if (rtnh->rtnh_len > sizeof(*rtnh)) { + memset(rtnh_tb, 0, sizeof(rtnh_tb)); + + netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh), + rtnh->rtnh_len - sizeof(*rtnh)); + if (rtnh_tb[RTA_GATEWAY]) + gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]); + if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_MPLS) { + num_labels = parse_encap_mpls( + rtnh_tb[RTA_ENCAP], labels); + } + } + + if (gate && rtm->rtm_family == AF_INET) { + if (index) + nh = route_entry_nexthop_ipv4_ifindex_add( + re, gate, prefsrc, index, nh_vrf_id); + else + nh = route_entry_nexthop_ipv4_add( + re, gate, prefsrc, nh_vrf_id); + } else if (gate && rtm->rtm_family == AF_INET6) { + if (index) + nh = route_entry_nexthop_ipv6_ifindex_add( + re, gate, index, nh_vrf_id); + else + nh = route_entry_nexthop_ipv6_add(re, gate, + nh_vrf_id); + } else + nh = route_entry_nexthop_ifindex_add(re, index, + nh_vrf_id); + + if (nh) { + if (num_labels) + nexthop_add_labels(nh, ZEBRA_LSP_STATIC, + num_labels, labels); + + if (rtnh->rtnh_flags & RTNH_F_ONLINK) + SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK); + } + + if (rtnh->rtnh_len == 0) + break; + + len -= NLMSG_ALIGN(rtnh->rtnh_len); + rtnh = RTNH_NEXT(rtnh); + } + + uint8_t nhop_num = nexthop_group_nexthop_num(re->ng); + + if (!nhop_num) + nexthop_group_delete(&re->ng); + + return nhop_num; +} + /* Looking up routing table by netlink interface. */ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, int startup) @@ -340,6 +515,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, uint32_t mtu = 0; uint8_t distance = 0; route_tag_t tag = 0; + uint32_t nhe_id = 0; void *dest = NULL; void *gate = NULL; @@ -347,10 +523,6 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, void *src = NULL; /* IPv6 srcdest source prefix */ enum blackhole_type bh_type = BLACKHOLE_UNSPEC; - /* MPLS labels */ - mpls_label_t labels[MPLS_MAX_LABELS] = {0}; - int num_labels = 0; - rtm = NLMSG_DATA(h); if (startup && h->nlmsg_type != RTM_NEWROUTE) @@ -423,7 +595,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, /* Route which inserted by Zebra. */ if (is_selfroute(rtm->rtm_protocol)) { flags |= ZEBRA_FLAG_SELFROUTE; - proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family); + proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false); } if (tb[RTA_OIF]) index = *(int *)RTA_DATA(tb[RTA_OIF]); @@ -444,6 +616,9 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, if (tb[RTA_GATEWAY]) gate = RTA_DATA(tb[RTA_GATEWAY]); + if (tb[RTA_NH_ID]) + nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]); + if (tb[RTA_PRIORITY]) metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]); @@ -547,75 +722,24 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, afi = AFI_IP6; if (h->nlmsg_type == RTM_NEWROUTE) { - struct interface *ifp; - vrf_id_t nh_vrf_id = vrf_id; if (!tb[RTA_MULTIPATH]) { - struct nexthop nh; - size_t sz = (afi == AFI_IP) ? 4 : 16; - - memset(&nh, 0, sizeof(nh)); - - if (bh_type == BLACKHOLE_UNSPEC) { - if (index && !gate) - nh.type = NEXTHOP_TYPE_IFINDEX; - else if (index && gate) - nh.type = - (afi == AFI_IP) - ? NEXTHOP_TYPE_IPV4_IFINDEX - : NEXTHOP_TYPE_IPV6_IFINDEX; - else if (!index && gate) - nh.type = (afi == AFI_IP) - ? NEXTHOP_TYPE_IPV4 - : NEXTHOP_TYPE_IPV6; - else { - nh.type = NEXTHOP_TYPE_BLACKHOLE; - nh.bh_type = bh_type; - } - } else { - nh.type = NEXTHOP_TYPE_BLACKHOLE; - nh.bh_type = bh_type; - } - nh.ifindex = index; - if (prefsrc) - memcpy(&nh.src, prefsrc, sz); - if (gate) - memcpy(&nh.gate, gate, sz); - - if (index) { - ifp = if_lookup_by_index_per_ns( - zebra_ns_lookup(ns_id), - index); - if (ifp) - nh_vrf_id = ifp->vrf_id; - } - nh.vrf_id = nh_vrf_id; + struct nexthop nh = {0}; - if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] - && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE]) - == LWTUNNEL_ENCAP_MPLS) { - num_labels = - parse_encap_mpls(tb[RTA_ENCAP], labels); + if (!nhe_id) { + nh = parse_nexthop_unicast( + ns_id, rtm, tb, bh_type, index, prefsrc, + gate, afi, vrf_id); } - - if (rtm->rtm_flags & RTNH_F_ONLINK) - SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK); - - if (num_labels) - nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, - num_labels, labels); - rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p, - &src_p, &nh, table, metric, mtu, distance, tag); + &src_p, &nh, nhe_id, table, metric, mtu, + distance, tag); } else { /* This is a multipath route */ - struct route_entry *re; struct rtnexthop *rtnh = (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]); - len = RTA_PAYLOAD(tb[RTA_MULTIPATH]); - re = XCALLOC(MTYPE_RE, sizeof(struct route_entry)); re->type = proto; re->distance = distance; @@ -624,148 +748,73 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, re->mtu = mtu; re->vrf_id = vrf_id; re->table = table; - re->nexthop_num = 0; re->uptime = monotime(NULL); re->tag = tag; + re->nhe_id = nhe_id; - for (;;) { - struct nexthop *nh = NULL; - - if (len < (int)sizeof(*rtnh) - || rtnh->rtnh_len > len) - break; - - index = rtnh->rtnh_ifindex; - if (index) { - /* - * Yes we are looking this up - * for every nexthop and just - * using the last one looked - * up right now - */ - ifp = if_lookup_by_index_per_ns( - zebra_ns_lookup(ns_id), - index); - if (ifp) - nh_vrf_id = ifp->vrf_id; - else { - flog_warn( - EC_ZEBRA_UNKNOWN_INTERFACE, - "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT", - __PRETTY_FUNCTION__, - index); - nh_vrf_id = VRF_DEFAULT; - } - } else - nh_vrf_id = vrf_id; - - gate = 0; - if (rtnh->rtnh_len > sizeof(*rtnh)) { - memset(tb, 0, sizeof(tb)); - netlink_parse_rtattr( - tb, RTA_MAX, RTNH_DATA(rtnh), - rtnh->rtnh_len - sizeof(*rtnh)); - if (tb[RTA_GATEWAY]) - gate = RTA_DATA( - tb[RTA_GATEWAY]); - if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] - && *(uint16_t *)RTA_DATA( - tb[RTA_ENCAP_TYPE]) - == LWTUNNEL_ENCAP_MPLS) { - num_labels = parse_encap_mpls( - tb[RTA_ENCAP], labels); - } - } - - if (gate) { - if (rtm->rtm_family == AF_INET) { - if (index) - nh = route_entry_nexthop_ipv4_ifindex_add( - re, gate, - prefsrc, index, - nh_vrf_id); - else - nh = route_entry_nexthop_ipv4_add( - re, gate, - prefsrc, - nh_vrf_id); - } else if (rtm->rtm_family - == AF_INET6) { - if (index) - nh = route_entry_nexthop_ipv6_ifindex_add( - re, gate, index, - nh_vrf_id); - else - nh = route_entry_nexthop_ipv6_add( - re, gate, - nh_vrf_id); - } - } else - nh = route_entry_nexthop_ifindex_add( - re, index, nh_vrf_id); + if (!nhe_id) { + uint8_t nhop_num = + parse_multipath_nexthops_unicast( + ns_id, re, rtm, rtnh, tb, + prefsrc, vrf_id); - if (nh && num_labels) - nexthop_add_labels(nh, ZEBRA_LSP_STATIC, - num_labels, labels); - - if (nh && (rtnh->rtnh_flags & RTNH_F_ONLINK)) - SET_FLAG(nh->flags, - NEXTHOP_FLAG_ONLINK); - - if (rtnh->rtnh_len == 0) - break; - - len -= NLMSG_ALIGN(rtnh->rtnh_len); - rtnh = RTNH_NEXT(rtnh); + zserv_nexthop_num_warn( + __func__, (const struct prefix *)&p, + nhop_num); } - zserv_nexthop_num_warn(__func__, - (const struct prefix *)&p, - re->nexthop_num); - if (re->nexthop_num == 0) - XFREE(MTYPE_RE, re); - else + if (nhe_id || re->ng) rib_add_multipath(afi, SAFI_UNICAST, &p, &src_p, re); + else + XFREE(MTYPE_RE, re); } } else { - if (!tb[RTA_MULTIPATH]) { - struct nexthop nh; - size_t sz = (afi == AFI_IP) ? 4 : 16; - - memset(&nh, 0, sizeof(nh)); - if (bh_type == BLACKHOLE_UNSPEC) { - if (index && !gate) - nh.type = NEXTHOP_TYPE_IFINDEX; - else if (index && gate) - nh.type = - (afi == AFI_IP) - ? NEXTHOP_TYPE_IPV4_IFINDEX - : NEXTHOP_TYPE_IPV6_IFINDEX; - else if (!index && gate) - nh.type = (afi == AFI_IP) - ? NEXTHOP_TYPE_IPV4 - : NEXTHOP_TYPE_IPV6; - else { + if (nhe_id) { + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, + &p, &src_p, NULL, nhe_id, table, metric, + distance, true); + } else { + if (!tb[RTA_MULTIPATH]) { + struct nexthop nh; + size_t sz = (afi == AFI_IP) ? 4 : 16; + + memset(&nh, 0, sizeof(nh)); + if (bh_type == BLACKHOLE_UNSPEC) { + if (index && !gate) + nh.type = NEXTHOP_TYPE_IFINDEX; + else if (index && gate) + nh.type = + (afi == AFI_IP) + ? NEXTHOP_TYPE_IPV4_IFINDEX + : NEXTHOP_TYPE_IPV6_IFINDEX; + else if (!index && gate) + nh.type = + (afi == AFI_IP) + ? NEXTHOP_TYPE_IPV4 + : NEXTHOP_TYPE_IPV6; + else { + nh.type = + NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_UNSPEC; + } + } else { nh.type = NEXTHOP_TYPE_BLACKHOLE; - nh.bh_type = BLACKHOLE_UNSPEC; + nh.bh_type = bh_type; } + nh.ifindex = index; + if (gate) + memcpy(&nh.gate, gate, sz); + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, + flags, &p, &src_p, &nh, 0, table, + metric, distance, true); } else { - nh.type = NEXTHOP_TYPE_BLACKHOLE; - nh.bh_type = bh_type; + /* XXX: need to compare the entire list of + * nexthops here for NLM_F_APPEND stupidity */ + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, + flags, &p, &src_p, NULL, 0, table, + metric, distance, true); } - nh.ifindex = index; - if (gate) - memcpy(&nh.gate, gate, sz); - rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, - &p, &src_p, &nh, table, metric, distance, - true); - } else { - /* XXX: need to compare the entire list of nexthops - * here for NLM_F_APPEND stupidity */ - rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, - &p, &src_p, NULL, table, metric, distance, - true); } } @@ -1023,6 +1072,35 @@ static void _netlink_route_rta_add_gateway_info(uint8_t route_family, } } +static int build_label_stack(struct mpls_label_stack *nh_label, + mpls_lse_t *out_lse, char *label_buf, + size_t label_buf_size) +{ + char label_buf1[20]; + int num_labels = 0; + + for (int i = 0; nh_label && i < nh_label->num_labels; i++) { + if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL) + continue; + + if (IS_ZEBRA_DEBUG_KERNEL) { + if (!num_labels) + sprintf(label_buf, "label %u", + nh_label->label[i]); + else { + sprintf(label_buf1, "/%u", nh_label->label[i]); + strlcat(label_buf, label_buf1, label_buf_size); + } + } + + out_lse[num_labels] = + mpls_lse_encode(nh_label->label[i], 0, 0, 0); + num_labels++; + } + + return num_labels; +} + /* This function takes a nexthop as argument and adds * the appropriate netlink attributes to an existing * netlink message. @@ -1040,10 +1118,12 @@ static void _netlink_route_build_singlepath(const char *routedesc, int bytelen, struct rtmsg *rtmsg, size_t req_size, int cmd) { - struct mpls_label_stack *nh_label; + mpls_lse_t out_lse[MPLS_MAX_LABELS]; - int num_labels = 0; char label_buf[256]; + int num_labels = 0; + + assert(nexthop); /* * label_buf is *only* currently used within debugging. @@ -1053,30 +1133,8 @@ static void _netlink_route_build_singlepath(const char *routedesc, int bytelen, */ label_buf[0] = '\0'; - assert(nexthop); - char label_buf1[20]; - - nh_label = nexthop->nh_label; - - for (int i = 0; nh_label && i < nh_label->num_labels; i++) { - if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL) - continue; - - if (IS_ZEBRA_DEBUG_KERNEL) { - if (!num_labels) - sprintf(label_buf, "label %u", - nh_label->label[i]); - else { - sprintf(label_buf1, "/%u", nh_label->label[i]); - strlcat(label_buf, label_buf1, - sizeof(label_buf)); - } - } - - out_lse[num_labels] = - mpls_lse_encode(nh_label->label[i], 0, 0, 0); - num_labels++; - } + num_labels = build_label_stack(nexthop->nh_label, out_lse, label_buf, + sizeof(label_buf)); if (num_labels) { /* Set the BoS bit */ @@ -1221,16 +1279,17 @@ static void _netlink_route_build_multipath(const char *routedesc, int bytelen, struct rtmsg *rtmsg, const union g_addr **src) { - struct mpls_label_stack *nh_label; mpls_lse_t out_lse[MPLS_MAX_LABELS]; - int num_labels = 0; char label_buf[256]; + int num_labels = 0; rtnh->rtnh_len = sizeof(*rtnh); rtnh->rtnh_flags = 0; rtnh->rtnh_hops = 0; rta->rta_len += rtnh->rtnh_len; + assert(nexthop); + /* * label_buf is *only* currently used within debugging. * As such when we assign it we are guarding it inside @@ -1239,30 +1298,8 @@ static void _netlink_route_build_multipath(const char *routedesc, int bytelen, */ label_buf[0] = '\0'; - assert(nexthop); - char label_buf1[20]; - - nh_label = nexthop->nh_label; - - for (int i = 0; nh_label && i < nh_label->num_labels; i++) { - if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL) - continue; - - if (IS_ZEBRA_DEBUG_KERNEL) { - if (!num_labels) - sprintf(label_buf, "label %u", - nh_label->label[i]); - else { - sprintf(label_buf1, "/%u", nh_label->label[i]); - strlcat(label_buf, label_buf1, - sizeof(label_buf)); - } - } - - out_lse[num_labels] = - mpls_lse_encode(nh_label->label[i], 0, 0, 0); - num_labels++; - } + num_labels = build_label_stack(nexthop->nh_label, out_lse, label_buf, + sizeof(label_buf)); if (num_labels) { /* Set the BoS bit */ @@ -1430,6 +1467,13 @@ static void _netlink_route_debug(int cmd, const struct prefix *p, } } +static void _netlink_nexthop_debug(int cmd, uint32_t id) +{ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("netlink_nexthop(): %s, id=%u", + nl_msg_type_to_str(cmd), id); +} + static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc) { if (IS_ZEBRA_DEBUG_KERNEL) @@ -1595,6 +1639,13 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) RTA_PAYLOAD(rta)); } + if (supports_nh) { + /* Kernel supports nexthop objects */ + addattr32(&req.n, sizeof(req), RTA_NH_ID, + dplane_ctx_get_nhe_id(ctx)); + goto skip; + } + /* Count overall nexthops so we can decide whether to use singlepath * or multipath case. */ @@ -1842,6 +1893,262 @@ int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in) return suc; } +/* Char length to debug ID with */ +#define ID_LENGTH 10 + +static void _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size, + uint32_t id, + const struct nh_grp *z_grp, + const uint8_t count) +{ + struct nexthop_grp grp[count]; + /* Need space for max group size, "/", and null term */ + char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1]; + char buf1[ID_LENGTH + 2]; + + buf[0] = '\0'; + + memset(grp, 0, sizeof(grp)); + + if (count) { + for (int i = 0; i < count; i++) { + grp[i].id = z_grp[i].id; + grp[i].weight = z_grp[i].weight; + + if (IS_ZEBRA_DEBUG_KERNEL) { + if (i == 0) + snprintf(buf, sizeof(buf1), "group %u", + grp[i].id); + else { + snprintf(buf1, sizeof(buf1), "/%u", + grp[i].id); + strlcat(buf, buf1, sizeof(buf)); + } + } + } + addattr_l(n, req_size, NHA_GROUP, grp, count * sizeof(*grp)); + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ID (%u): %s", __func__, id, buf); +} + +/** + * netlink_nexthop() - Nexthop change via the netlink interface + * + * @ctx: Dataplane ctx + * + * Return: Result status + */ +static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[NL_PKT_BUF_SIZE]; + } req; + + mpls_lse_t out_lse[MPLS_MAX_LABELS]; + char label_buf[256]; + int num_labels = 0; + size_t req_size = sizeof(req); + + /* Nothing to do if the kernel doesn't support nexthop objects */ + if (!supports_nh) + return 0; + + label_buf[0] = '\0'; + + memset(&req, 0, req_size); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + if (cmd == RTM_NEWNEXTHOP) + req.n.nlmsg_flags |= NLM_F_REPLACE; + + req.n.nlmsg_type = cmd; + req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid; + + req.nhm.nh_family = AF_UNSPEC; + /* TODO: Scope? */ + + uint32_t id = dplane_ctx_get_nhe_id(ctx); + + if (!id) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Failed trying to update a nexthop group in the kernel that does not have an ID"); + return -1; + } + + addattr32(&req.n, req_size, NHA_ID, id); + + if (cmd == RTM_NEWNEXTHOP) { + if (dplane_ctx_get_nhe_nh_grp_count(ctx)) + _netlink_nexthop_build_group( + &req.n, req_size, id, + dplane_ctx_get_nhe_nh_grp(ctx), + dplane_ctx_get_nhe_nh_grp_count(ctx)); + else { + const struct nexthop *nh = + dplane_ctx_get_nhe_ng(ctx)->nexthop; + afi_t afi = dplane_ctx_get_nhe_afi(ctx); + + if (afi == AFI_IP) + req.nhm.nh_family = AF_INET; + else if (afi == AFI_IP6) + req.nhm.nh_family = AF_INET6; + + switch (nh->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + addattr_l(&req.n, req_size, NHA_GATEWAY, + &nh->gate.ipv4, IPV4_MAX_BYTELEN); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + addattr_l(&req.n, req_size, NHA_GATEWAY, + &nh->gate.ipv6, IPV6_MAX_BYTELEN); + break; + case NEXTHOP_TYPE_BLACKHOLE: + addattr_l(&req.n, req_size, NHA_BLACKHOLE, NULL, + 0); + /* Blackhole shouldn't have anymore attributes + */ + goto nexthop_done; + case NEXTHOP_TYPE_IFINDEX: + /* Don't need anymore info for this */ + break; + } + + if (!nh->ifindex) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Context received for kernel nexthop update without an interface"); + return -1; + } + + addattr32(&req.n, req_size, NHA_OIF, nh->ifindex); + + num_labels = + build_label_stack(nh->nh_label, out_lse, + label_buf, sizeof(label_buf)); + + if (num_labels) { + /* Set the BoS bit */ + out_lse[num_labels - 1] |= + htonl(1 << MPLS_LS_S_SHIFT); + + /* + * TODO: MPLS unsupported for now in kernel. + */ + if (req.nhm.nh_family == AF_MPLS) + goto nexthop_done; +#if 0 + addattr_l(&req.n, req_size, NHA_NEWDST, + &out_lse, + num_labels + * sizeof(mpls_lse_t)); +#endif + else { + struct rtattr *nest; + uint16_t encap = LWTUNNEL_ENCAP_MPLS; + + addattr_l(&req.n, req_size, + NHA_ENCAP_TYPE, &encap, + sizeof(uint16_t)); + nest = addattr_nest(&req.n, req_size, + NHA_ENCAP); + addattr_l(&req.n, req_size, + MPLS_IPTUNNEL_DST, &out_lse, + num_labels + * sizeof(mpls_lse_t)); + addattr_nest_end(&req.n, nest); + } + } + + nexthop_done: + if (IS_ZEBRA_DEBUG_KERNEL) { + char buf[NEXTHOP_STRLEN]; + + snprintfrr(buf, sizeof(buf), "%pNHv", nh); + zlog_debug("%s: ID (%u): %s (%u) %s ", __func__, + id, buf, nh->vrf_id, label_buf); + } + } + + req.nhm.nh_protocol = zebra2proto(dplane_ctx_get_nhe_type(ctx)); + + } else if (cmd != RTM_DELNEXTHOP) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Nexthop group kernel update command (%d) does not exist", + cmd); + return -1; + } + + _netlink_nexthop_debug(cmd, id); + + return netlink_talk_info(netlink_talk_filter, &req.n, + dplane_ctx_get_ns(ctx), 0); +} + +/** + * kernel_nexthop_update() - Update/delete a nexthop from the kernel + * + * @ctx: Dataplane context + * + * Return: Dataplane result flag + */ +enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx) +{ + int cmd = 0; + int ret = 0; + + switch (dplane_ctx_get_op(ctx)) { + case DPLANE_OP_NH_DELETE: + cmd = RTM_DELNEXTHOP; + break; + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + cmd = RTM_NEWNEXTHOP; + break; + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NONE: + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Context received for kernel nexthop update with incorrect OP code (%u)", + dplane_ctx_get_op(ctx)); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + ret = netlink_nexthop(cmd, ctx); + + return (ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS + : ZEBRA_DPLANE_REQUEST_FAILURE); +} + /* * Update or delete a prefix from the kernel, * using info from a dataplane context. @@ -1919,6 +2226,298 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); } +/** + * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop + * + * @tb: Netlink RTA data + * @family: Address family in the nhmsg + * @ifp: Interface connected - this should be NULL, we fill it in + * @ns_id: Namspace id + * + * Return: New nexthop + */ +static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb, + unsigned char family, + struct interface **ifp, + ns_id_t ns_id) +{ + struct nexthop nh = {}; + void *gate = NULL; + enum nexthop_types_t type = 0; + int if_index = 0; + size_t sz = 0; + + if_index = *(int *)RTA_DATA(tb[NHA_OIF]); + + + if (tb[NHA_GATEWAY]) { + switch (family) { + case AF_INET: + type = NEXTHOP_TYPE_IPV4_IFINDEX; + sz = 4; + break; + case AF_INET6: + type = NEXTHOP_TYPE_IPV6_IFINDEX; + sz = 16; + break; + default: + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Nexthop gateway with bad address family (%d) received from kernel", + family); + return nh; + } + gate = RTA_DATA(tb[NHA_GATEWAY]); + } else + type = NEXTHOP_TYPE_IFINDEX; + + if (type) + nh.type = type; + + if (gate) + memcpy(&(nh.gate), gate, sz); + + if (if_index) + nh.ifindex = if_index; + + *ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex); + if (ifp) + nh.vrf_id = (*ifp)->vrf_id; + else { + flog_warn( + EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT", + __PRETTY_FUNCTION__, nh.ifindex); + + nh.vrf_id = VRF_DEFAULT; + } + + if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) { + uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]); + int num_labels = 0; + + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + + if (encap_type == LWTUNNEL_ENCAP_MPLS) + num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels); + + if (num_labels) + nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, + labels); + } + + return nh; +} + +static int netlink_nexthop_process_group(struct rtattr **tb, + struct nh_grp *z_grp, int z_grp_size) +{ + uint8_t count = 0; + /* linux/nexthop.h group struct */ + struct nexthop_grp *n_grp = NULL; + + n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]); + count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp)); + + if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) { + flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE, + "Invalid nexthop group received from the kernel"); + return count; + } + +#if 0 + // TODO: Need type for something? + zlog_debug("Nexthop group type: %d", + *((uint16_t *)RTA_DATA(tb[NHA_GROUP_TYPE]))); + +#endif + + for (int i = 0; ((i < count) && (i < z_grp_size)); i++) { + z_grp[i].id = n_grp[i].id; + z_grp[i].weight = n_grp[i].weight; + } + return count; +} + +/** + * netlink_nexthop_change() - Read in change about nexthops from the kernel + * + * @h: Netlink message header + * @ns_id: Namspace id + * @startup: Are we reading under startup conditions? + * + * Return: Result status + */ +int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + /* nexthop group id */ + uint32_t id; + unsigned char family; + int type; + afi_t afi = AFI_UNSPEC; + vrf_id_t vrf_id = 0; + struct interface *ifp = NULL; + struct nhmsg *nhm = NULL; + struct nexthop nh = {}; + struct nh_grp grp[MULTIPATH_NUM] = {}; + /* Count of nexthops in group array */ + uint8_t grp_count = 0; + struct rtattr *tb[NHA_MAX + 1] = {}; + + nhm = NLMSG_DATA(h); + + if (startup && h->nlmsg_type != RTM_NEWNEXTHOP) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg)); + if (len < 0) { + zlog_warn( + "%s: Message received from netlink is of a broken size %d %zu", + __PRETTY_FUNCTION__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct nhmsg))); + return -1; + } + + netlink_parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len); + + + if (!tb[NHA_ID]) { + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Nexthop group without an ID received from the kernel"); + return -1; + } + + /* We use the ID key'd nhg table for kernel updates */ + id = *((uint32_t *)RTA_DATA(tb[NHA_ID])); + + family = nhm->nh_family; + afi = family2afi(family); + + type = proto2zebra(nhm->nh_protocol, 0, true); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s ID (%u) %s NS %u", + nl_msg_type_to_str(h->nlmsg_type), id, + nl_family_to_str(family), ns_id); + + + if (h->nlmsg_type == RTM_NEWNEXTHOP) { + if (tb[NHA_GROUP]) { + /** + * If this is a group message its only going to have + * an array of nexthop IDs associated with it + */ + grp_count = netlink_nexthop_process_group( + tb, grp, array_size(grp)); + } else { + if (tb[NHA_BLACKHOLE]) { + /** + * This nexthop is just for blackhole-ing + * traffic, it should not have an OIF, GATEWAY, + * or ENCAP + */ + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_UNSPEC; + } else if (tb[NHA_OIF]) + /** + * This is a true new nexthop, so we need + * to parse the gateway and device info + */ + nh = netlink_nexthop_process_nh(tb, family, + &ifp, ns_id); + else { + + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Invalid Nexthop message received from the kernel with ID (%u)", + id); + return -1; + } + SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE); + if (nhm->nh_flags & RTNH_F_ONLINK) + SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK); + vrf_id = nh.vrf_id; + } + + if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi, + type, startup)) + return -1; + + } else if (h->nlmsg_type == RTM_DELNEXTHOP) + zebra_nhg_kernel_del(id); + + return 0; +} + +#if 0 /* Force off kernel nexthop group installs for now */ +/** + * netlink_request_nexthop() - Request nextop information from the kernel + * @zns: Zebra namespace + * @family: AF_* netlink family + * @type: RTM_* route type + * + * Return: Result status + */ +static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.nhm.nh_family = family; + + return netlink_request(&zns->netlink_cmd, &req.n); +} + + +/** + * netlink_nexthop_read() - Nexthop read function using netlink interface + * + * @zns: Zebra name space + * + * Return: Result status + * Only called at bootstrap time. + */ +int netlink_nexthop_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get nexthop objects */ + ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd, + &dp_info, 0, 1); + + if (!ret) + /* If we succesfully read in nexthop objects, + * this kernel must support them. + */ + supports_nh = true; + else if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Nexthop objects not supported on this kernel"); + + return ret; +} +#else +int netlink_nexthop_read(struct zebra_ns *zns) +{ + return 0; +} +#endif + + int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla, int llalen, ns_id_t ns_id) { @@ -1951,7 +2550,7 @@ static int netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, req.n.nlmsg_type = cmd; req.ndm.ndm_family = PF_BRIDGE; req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT; - req.ndm.ndm_flags |= NTF_SELF; // Handle by "self", not "master" + req.ndm.ndm_flags |= NTF_SELF; /* Handle by "self", not "master" */ addattr_l(&req.n, sizeof(req), diff --git a/zebra/rt_netlink.h b/zebra/rt_netlink.h index 29e0152bb..2b4b14514 100644 --- a/zebra/rt_netlink.h +++ b/zebra/rt_netlink.h @@ -69,6 +69,10 @@ extern int netlink_mpls_multipath(int cmd, struct zebra_dplane_ctx *ctx); extern int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); extern int netlink_route_read(struct zebra_ns *zns); +extern int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, + int startup); +extern int netlink_nexthop_read(struct zebra_ns *zns); + extern int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id); extern int netlink_macfdb_read(struct zebra_ns *zns); extern int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, diff --git a/zebra/rt_socket.c b/zebra/rt_socket.c index 981ef7a88..73b3dd0b4 100644 --- a/zebra/rt_socket.c +++ b/zebra/rt_socket.c @@ -364,6 +364,11 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) return res; } +enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx) +{ + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla, int llalen, ns_id_t ns_id) { diff --git a/zebra/rtread_getmsg.c b/zebra/rtread_getmsg.c index 725bb63a0..3ba5d6ee7 100644 --- a/zebra/rtread_getmsg.c +++ b/zebra/rtread_getmsg.c @@ -102,7 +102,7 @@ static void handle_route_entry(mib2_ipRouteEntry_t *routeEntry) nh.gate.ipv4.s_addr = routeEntry->ipRouteNextHop; rib_add(AFI_IP, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, 0, - zebra_flags, &prefix, NULL, &nh, 0, 0, 0, 0, 0); + zebra_flags, &prefix, NULL, &nh, 0, 0, 0, 0, 0, 0); } void route_read(struct zebra_ns *zns) diff --git a/zebra/subdir.am b/zebra/subdir.am index 25040a271..28847ce09 100644 --- a/zebra/subdir.am +++ b/zebra/subdir.am @@ -137,6 +137,7 @@ noinst_HEADERS += \ zebra/zebra_mpls.h \ zebra/zebra_mroute.h \ zebra/zebra_nhg.h \ + zebra/zebra_nhg_private.h \ zebra/zebra_ns.h \ zebra/zebra_pbr.h \ zebra/zebra_ptm.h \ diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index e61e68b7f..d6ade783c 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -522,7 +522,7 @@ int zsend_redistribute_route(int cmd, struct zserv *client, struct zapi_route api; struct zapi_nexthop *api_nh; struct nexthop *nexthop; - int count = 0; + uint8_t count = 0; afi_t afi; size_t stream_size = MAX(ZEBRA_MAX_PACKET_SIZ, sizeof(struct zapi_route)); @@ -559,12 +559,7 @@ int zsend_redistribute_route(int cmd, struct zserv *client, memcpy(&api.src_prefix, src_p, sizeof(api.src_prefix)); } - /* Nexthops. */ - if (re->nexthop_active_num) { - SET_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP); - api.nexthop_num = re->nexthop_active_num; - } - for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) { + for (nexthop = re->ng->nexthop; nexthop; nexthop = nexthop->next) { if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) continue; @@ -595,6 +590,12 @@ int zsend_redistribute_route(int cmd, struct zserv *client, count++; } + /* Nexthops. */ + if (count) { + SET_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP); + api.nexthop_num = count; + } + /* Attributes. */ SET_FLAG(api.message, ZAPI_MESSAGE_DISTANCE); api.distance = re->distance; @@ -665,7 +666,8 @@ static int zsend_ipv4_nexthop_lookup_mrib(struct zserv *client, * nexthop we are looking up. Therefore, we will just iterate * over the top chain of nexthops. */ - for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) + for (nexthop = re->ng->nexthop; nexthop; + nexthop = nexthop->next) if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) num += zserv_encode_nexthop(s, nexthop); @@ -1422,6 +1424,8 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) re->flags = api.flags; re->uptime = monotime(NULL); re->vrf_id = vrf_id; + re->ng = nexthop_group_new(); + if (api.tableid) re->table = api.tableid; else @@ -1433,6 +1437,8 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) "%s: received a route without nexthops for prefix %pFX from client %s", __func__, &api.prefix, zebra_route_string(client->proto)); + + nexthop_group_delete(&re->ng); XFREE(MTYPE_RE, re); return; } @@ -1531,7 +1537,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) EC_ZEBRA_NEXTHOP_CREATION_FAILED, "%s: Nexthops Specified: %d but we failed to properly create one", __PRETTY_FUNCTION__, api.nexthop_num); - nexthops_free(re->ng.nexthop); + nexthop_group_delete(&re->ng); XFREE(MTYPE_RE, re); return; } @@ -1573,7 +1579,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) flog_warn(EC_ZEBRA_RX_SRCDEST_WRONG_AFI, "%s: Received SRC Prefix but afi is not v6", __PRETTY_FUNCTION__); - nexthops_free(re->ng.nexthop); + nexthop_group_delete(&re->ng); XFREE(MTYPE_RE, re); return; } @@ -1627,7 +1633,7 @@ static void zread_route_del(ZAPI_HANDLER_ARGS) table_id = zvrf->table_id; rib_delete(afi, api.safi, zvrf_id(zvrf), api.type, api.instance, - api.flags, &api.prefix, src_p, NULL, table_id, api.metric, + api.flags, &api.prefix, src_p, NULL, 0, table_id, api.metric, api.distance, false); /* Stats */ diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index bf343e06e..a88b0a38d 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -67,6 +67,20 @@ const uint32_t DPLANE_DEFAULT_NEW_WORK = 100; #endif /* DPLANE_DEBUG */ /* + * Nexthop information captured for nexthop/nexthop group updates + */ +struct dplane_nexthop_info { + uint32_t id; + afi_t afi; + vrf_id_t vrf_id; + int type; + + struct nexthop_group ng; + struct nh_grp nh_grp[MULTIPATH_NUM]; + uint8_t nh_grp_count; +}; + +/* * Route information captured for route updates. */ struct dplane_route_info { @@ -95,6 +109,9 @@ struct dplane_route_info { uint32_t zd_mtu; uint32_t zd_nexthop_mtu; + /* Nexthop hash entry info */ + struct dplane_nexthop_info nhe; + /* Nexthops */ struct nexthop_group zd_ng; @@ -321,6 +338,9 @@ static struct zebra_dplane_globals { _Atomic uint32_t dg_route_errors; _Atomic uint32_t dg_other_errors; + _Atomic uint32_t dg_nexthops_in; + _Atomic uint32_t dg_nexthop_errors; + _Atomic uint32_t dg_lsps_in; _Atomic uint32_t dg_lsp_errors; @@ -461,6 +481,18 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) break; + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: { + if ((*pctx)->u.rinfo.nhe.ng.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free((*pctx)->u.rinfo.nhe.ng.nexthop); + + (*pctx)->u.rinfo.nhe.ng.nexthop = NULL; + } + break; + } + case DPLANE_OP_LSP_INSTALL: case DPLANE_OP_LSP_UPDATE: case DPLANE_OP_LSP_DELETE: @@ -638,6 +670,17 @@ const char *dplane_op2str(enum dplane_op_e op) ret = "ROUTE_NOTIFY"; break; + /* Nexthop update */ + case DPLANE_OP_NH_INSTALL: + ret = "NH_INSTALL"; + break; + case DPLANE_OP_NH_UPDATE: + ret = "NH_UPDATE"; + break; + case DPLANE_OP_NH_DELETE: + ret = "NH_DELETE"; + break; + case DPLANE_OP_LSP_INSTALL: ret = "LSP_INSTALL"; break; @@ -1015,6 +1058,51 @@ const struct zebra_dplane_info *dplane_ctx_get_ns( return &(ctx->zd_ns_info); } +/* Accessors for nexthop information */ +uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.id; +} + +afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.afi; +} + +vrf_id_t dplane_ctx_get_nhe_vrf_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.vrf_id; +} + +int dplane_ctx_get_nhe_type(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.type; +} + +const struct nexthop_group * +dplane_ctx_get_nhe_ng(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return &(ctx->u.rinfo.nhe.ng); +} + +const struct nh_grp * +dplane_ctx_get_nhe_nh_grp(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.nh_grp; +} + +uint8_t dplane_ctx_get_nhe_nh_grp_count(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.nh_grp_count; +} + /* Accessors for LSP information */ mpls_label_t dplane_ctx_get_in_label(const struct zebra_dplane_ctx *ctx) @@ -1419,7 +1507,7 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, ctx->u.rinfo.zd_safi = info->safi; /* Copy nexthops; recursive info is included too */ - copy_nexthops(&(ctx->u.rinfo.zd_ng.nexthop), re->ng.nexthop, NULL); + copy_nexthops(&(ctx->u.rinfo.zd_ng.nexthop), re->ng->nexthop, NULL); /* Ensure that the dplane's nexthops flags are clear. */ for (ALL_NEXTHOPS(ctx->u.rinfo.zd_ng, nexthop)) @@ -1437,6 +1525,29 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, zns = zvrf->zns; dplane_ctx_ns_init(ctx, zns, (op == DPLANE_OP_ROUTE_UPDATE)); +#ifdef HAVE_NETLINK + if (re->nhe_id) { + struct nhg_hash_entry *nhe = + zebra_nhg_resolve(zebra_nhg_lookup_id(re->nhe_id)); + + ctx->u.rinfo.nhe.id = nhe->id; + /* + * Check if the nhe is installed/queued before doing anything + * with this route. + * + * If its a delete we only use the prefix anyway, so this only + * matters for INSTALL/UPDATE. + */ + if (((op == DPLANE_OP_ROUTE_INSTALL) + || (op == DPLANE_OP_ROUTE_UPDATE)) + && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) + && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED)) { + ret = ENOENT; + goto done; + } + } +#endif /* HAVE_NETLINK */ + /* Trying out the sequence number idea, so we can try to detect * when a result is stale. */ @@ -1449,6 +1560,64 @@ done: return ret; } +/** + * dplane_ctx_nexthop_init() - Initialize a context block for a nexthop update + * + * @ctx: Dataplane context to init + * @op: Operation being performed + * @nhe: Nexthop group hash entry + * + * Return: Result status + */ +static int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, + enum dplane_op_e op, + struct nhg_hash_entry *nhe) +{ + struct zebra_vrf *zvrf = NULL; + struct zebra_ns *zns = NULL; + + int ret = EINVAL; + + if (!ctx || !nhe) + goto done; + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + /* Copy over nhe info */ + ctx->u.rinfo.nhe.id = nhe->id; + ctx->u.rinfo.nhe.afi = nhe->afi; + ctx->u.rinfo.nhe.vrf_id = nhe->vrf_id; + ctx->u.rinfo.nhe.type = nhe->type; + + nexthop_group_copy(&(ctx->u.rinfo.nhe.ng), nhe->nhg); + + /* If its a group, convert it to a grp array of ids */ + if (!zebra_nhg_depends_is_empty(nhe) + && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_RECURSIVE)) + ctx->u.rinfo.nhe.nh_grp_count = zebra_nhg_nhe2grp( + ctx->u.rinfo.nhe.nh_grp, nhe, MULTIPATH_NUM); + + zvrf = vrf_info_lookup(nhe->vrf_id); + + /* + * Fallback to default namespace if the vrf got ripped out from under + * us. + */ + zns = zvrf ? zvrf->zns : zebra_ns_lookup(NS_DEFAULT); + + /* + * TODO: Might not need to mark this as an update, since + * it probably won't require two messages + */ + dplane_ctx_ns_init(ctx, zns, (op == DPLANE_OP_NH_UPDATE)); + + ret = AOK; + +done: + return ret; +} + /* * Capture information for an LSP update in a dplane context. */ @@ -1577,7 +1746,7 @@ static int dplane_ctx_pw_init(struct zebra_dplane_ctx *ctx, if (re) copy_nexthops(&(ctx->u.pw.nhg.nexthop), - re->ng.nexthop, NULL); + re->ng->nexthop, NULL); route_unlock_node(rn); } @@ -1673,7 +1842,7 @@ dplane_route_update_internal(struct route_node *rn, * We'll need these to do per-nexthop deletes. */ copy_nexthops(&(ctx->u.rinfo.zd_old_ng.nexthop), - old_re->ng.nexthop, NULL); + old_re->ng->nexthop, NULL); #endif /* !HAVE_NETLINK */ } @@ -1688,7 +1857,53 @@ dplane_route_update_internal(struct route_node *rn, if (ret == AOK) result = ZEBRA_DPLANE_REQUEST_QUEUED; else { - atomic_fetch_add_explicit(&zdplane_info.dg_route_errors, 1, + if (ret == ENOENT) + result = ZEBRA_DPLANE_REQUEST_SUCCESS; + else + atomic_fetch_add_explicit(&zdplane_info.dg_route_errors, + 1, memory_order_relaxed); + if (ctx) + dplane_ctx_free(&ctx); + } + + return result; +} + +/** + * dplane_nexthop_update_internal() - Helper for enqueuing nexthop changes + * + * @nhe: Nexthop group hash entry where the change occured + * @op: The operation to be enqued + * + * Return: Result of the change + */ +static enum zebra_dplane_result +dplane_nexthop_update_internal(struct nhg_hash_entry *nhe, enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret = EINVAL; + struct zebra_dplane_ctx *ctx = NULL; + + /* Obtain context block */ + ctx = dplane_ctx_alloc(); + if (!ctx) { + ret = ENOMEM; + goto done; + } + + ret = dplane_ctx_nexthop_init(ctx, op, nhe); + if (ret == AOK) + ret = dplane_update_enqueue(ctx); + +done: + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_nexthops_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit(&zdplane_info.dg_nexthop_errors, 1, memory_order_relaxed); if (ctx) dplane_ctx_free(&ctx); @@ -1853,6 +2068,45 @@ done: } /* + * Enqueue a nexthop add for the dataplane. + */ +enum zebra_dplane_result dplane_nexthop_add(struct nhg_hash_entry *nhe) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (nhe) + ret = dplane_nexthop_update_internal(nhe, DPLANE_OP_NH_INSTALL); + return ret; +} + +/* + * Enqueue a nexthop update for the dataplane. + * + * Might not need this func since zebra's nexthop objects should be immutable? + */ +enum zebra_dplane_result dplane_nexthop_update(struct nhg_hash_entry *nhe) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (nhe) + ret = dplane_nexthop_update_internal(nhe, DPLANE_OP_NH_UPDATE); + return ret; +} + +/* + * Enqueue a nexthop removal for the dataplane. + */ +enum zebra_dplane_result dplane_nexthop_delete(struct nhg_hash_entry *nhe) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (nhe) + ret = dplane_nexthop_update_internal(nhe, DPLANE_OP_NH_DELETE); + + return ret; +} + +/* * Enqueue LSP add for the dataplane. */ enum zebra_dplane_result dplane_lsp_add(zebra_lsp_t *lsp) @@ -2873,6 +3127,33 @@ kernel_dplane_address_update(struct zebra_dplane_ctx *ctx) return res; } +/** + * kernel_dplane_nexthop_update() - Handler for kernel nexthop updates + * + * @ctx: Dataplane context + * + * Return: Dataplane result flag + */ +static enum zebra_dplane_result +kernel_dplane_nexthop_update(struct zebra_dplane_ctx *ctx) +{ + enum zebra_dplane_result res; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + zlog_debug("ID (%u) Dplane nexthop update ctx %p op %s", + dplane_ctx_get_nhe_id(ctx), ctx, + dplane_op2str(dplane_ctx_get_op(ctx))); + } + + res = kernel_nexthop_update(ctx); + + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_nexthop_errors, 1, + memory_order_relaxed); + + return res; +} + /* * Handler for kernel-facing EVPN MAC address updates */ @@ -2967,6 +3248,12 @@ static int kernel_dplane_process_func(struct zebra_dplane_provider *prov) res = kernel_dplane_route_update(ctx); break; + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + res = kernel_dplane_nexthop_update(ctx); + break; + case DPLANE_OP_LSP_INSTALL: case DPLANE_OP_LSP_UPDATE: case DPLANE_OP_LSP_DELETE: diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index be945632c..fede3bfcc 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -30,6 +30,7 @@ #include "zebra/rib.h" #include "zebra/zserv.h" #include "zebra/zebra_mpls.h" +#include "zebra/zebra_nhg.h" #ifdef __cplusplus extern "C" { @@ -108,6 +109,11 @@ enum dplane_op_e { DPLANE_OP_ROUTE_DELETE, DPLANE_OP_ROUTE_NOTIFY, + /* Nexthop update */ + DPLANE_OP_NH_INSTALL, + DPLANE_OP_NH_UPDATE, + DPLANE_OP_NH_DELETE, + /* LSP update */ DPLANE_OP_LSP_INSTALL, DPLANE_OP_LSP_UPDATE, @@ -269,6 +275,17 @@ const struct nexthop_group *dplane_ctx_get_ng( const struct nexthop_group *dplane_ctx_get_old_ng( const struct zebra_dplane_ctx *ctx); +/* Accessors for nexthop information */ +uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx); +afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx); +vrf_id_t dplane_ctx_get_nhe_vrf_id(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_get_nhe_type(const struct zebra_dplane_ctx *ctx); +const struct nexthop_group * +dplane_ctx_get_nhe_ng(const struct zebra_dplane_ctx *ctx); +const struct nh_grp * +dplane_ctx_get_nhe_nh_grp(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_get_nhe_nh_grp_count(const struct zebra_dplane_ctx *ctx); + /* Accessors for LSP information */ mpls_label_t dplane_ctx_get_in_label(const struct zebra_dplane_ctx *ctx); void dplane_ctx_set_in_label(struct zebra_dplane_ctx *ctx, @@ -373,6 +390,16 @@ enum zebra_dplane_result dplane_route_notif_update( enum dplane_op_e op, struct zebra_dplane_ctx *ctx); + +/* Forward ref of nhg_hash_entry */ +struct nhg_hash_entry; +/* + * Enqueue a nexthop change operation for the dataplane. + */ +enum zebra_dplane_result dplane_nexthop_add(struct nhg_hash_entry *nhe); +enum zebra_dplane_result dplane_nexthop_update(struct nhg_hash_entry *nhe); +enum zebra_dplane_result dplane_nexthop_delete(struct nhg_hash_entry *nhe); + /* * Enqueue LSP change operations for the dataplane. */ diff --git a/zebra/zebra_errors.c b/zebra/zebra_errors.c index a7e5147af..5a0905d59 100644 --- a/zebra/zebra_errors.c +++ b/zebra/zebra_errors.c @@ -283,6 +283,39 @@ static struct log_ref ferr_zebra_err[] = { .description = "Zebra received an event from inotify, but failed to read what it was.", .suggestion = "Notify a developer.", }, + { + .code = EC_ZEBRA_NHG_TABLE_INSERT_FAILED, + .title = + "Nexthop Group Hash Table Insert Failure", + .description = + "Zebra failed in inserting a Nexthop Group into its hash tables.", + .suggestion = + "Check to see if the entry already exists or if the netlink message was parsed incorrectly." + }, + { + .code = EC_ZEBRA_NHG_SYNC, + .title = + "Zebra's Nexthop Groups are out of sync", + .description = + "Zebra's nexthop group tables are out of sync with the nexthop groups in the fib.", + .suggestion = + "Check the current status of the kernels nexthop groups and compare it to Zebra's." + }, + { + .code = EC_ZEBRA_NHG_FIB_UPDATE, + .title = + "Zebra failed updating the fib with Nexthop Group", + .description = + "Zebra was not able to successfully install a new nexthop group into the fib", + .suggestion = + "Check to see if the nexthop group on the route you tried to install is valid." + }, + { + .code = EC_ZEBRA_IF_LOOKUP_FAILED, + .title = "Zebra interface lookup failed", + .description = "Zebra attempted to look up a interface for a particular vrf_id and interface index, but didn't find anything.", + .suggestion = "If you entered a command to trigger this error, make sure you entered the arguments correctly. Check your config file for any potential errors. If these look correct, seek help.", + }, /* Warnings */ { .code = EC_ZEBRAING_LM_PROTO_MISMATCH, @@ -729,6 +762,24 @@ static struct log_ref ferr_zebra_err[] = { "Check network topology to detect duplicate host IP for correctness.", }, { + .code = EC_ZEBRA_BAD_NHG_MESSAGE, + .title = + "Bad Nexthop Group Message", + .description = + "Zebra received Nexthop Group message from the kernel that it cannot process.", + .suggestion = + "Check the kernel's link states and routing table to see how it matches ours." + }, + { + .code = EC_ZEBRA_DUPLICATE_NHG_MESSAGE, + .title = + "Duplicate Nexthop Group Message", + .description = + "Zebra received Nexthop Group message from the kernel that it is identical to one it/we already have but with a different ID.", + .suggestion = + "See if the nexthop you are trying to add is already present in the fib." + }, + { .code = END_FERR, } }; diff --git a/zebra/zebra_errors.h b/zebra/zebra_errors.h index 222055dd8..f9ccc2db2 100644 --- a/zebra/zebra_errors.h +++ b/zebra/zebra_errors.h @@ -72,6 +72,10 @@ enum zebra_log_refs { EC_ZEBRA_VNI_DEL_FAILED, EC_ZEBRA_VTEP_ADD_FAILED, EC_ZEBRA_VNI_ADD_FAILED, + EC_ZEBRA_NHG_TABLE_INSERT_FAILED, + EC_ZEBRA_NHG_SYNC, + EC_ZEBRA_NHG_FIB_UPDATE, + EC_ZEBRA_IF_LOOKUP_FAILED, /* warnings */ EC_ZEBRA_NS_NOTIFY_READ, EC_ZEBRAING_LM_PROTO_MISMATCH, @@ -125,6 +129,8 @@ enum zebra_log_refs { EC_ZEBRA_DUP_MAC_DETECTED, EC_ZEBRA_DUP_IP_INHERIT_DETECTED, EC_ZEBRA_DUP_IP_DETECTED, + EC_ZEBRA_BAD_NHG_MESSAGE, + EC_ZEBRA_DUPLICATE_NHG_MESSAGE, }; void zebra_error_init(void); diff --git a/zebra/zebra_fpm_dt.c b/zebra/zebra_fpm_dt.c index e87fa0ad7..debcf60ee 100644 --- a/zebra/zebra_fpm_dt.c +++ b/zebra/zebra_fpm_dt.c @@ -90,7 +90,7 @@ static int zfpm_dt_find_route(rib_dest_t **dest_p, struct route_entry **re_p) if (!re) continue; - if (re->nexthop_active_num <= 0) + if (nexthop_group_active_nexthop_num(re->ng) == 0) continue; *dest_p = dest; diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c index f347d3955..b54d8fbc1 100644 --- a/zebra/zebra_fpm_netlink.c +++ b/zebra/zebra_fpm_netlink.c @@ -314,7 +314,7 @@ static int netlink_route_info_fill(netlink_route_info_t *ri, int cmd, ri->rtm_type = RTN_UNICAST; ri->metric = &re->metric; - for (ALL_NEXTHOPS(re->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) { if (ri->num_nhs >= zrouter.multipath_num) break; diff --git a/zebra/zebra_fpm_protobuf.c b/zebra/zebra_fpm_protobuf.c index 3054b8a34..a11517ab8 100644 --- a/zebra/zebra_fpm_protobuf.c +++ b/zebra/zebra_fpm_protobuf.c @@ -173,7 +173,7 @@ static Fpm__AddRoute *create_add_route_message(qpb_allocator_t *allocator, * Figure out the set of nexthops to be added to the message. */ num_nhs = 0; - for (ALL_NEXTHOPS(re->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) { if (num_nhs >= zrouter.multipath_num) break; diff --git a/zebra/zebra_mpls.c b/zebra/zebra_mpls.c index 8088ec1bf..ef1bd0260 100644 --- a/zebra/zebra_mpls.c +++ b/zebra/zebra_mpls.c @@ -185,7 +185,7 @@ static int lsp_install(struct zebra_vrf *zvrf, mpls_label_t label, * the label advertised by the recursive nexthop (plus we don't have the * logic yet to push multiple labels). */ - for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) { + for (nexthop = re->ng->nexthop; nexthop; nexthop = nexthop->next) { /* Skip inactive and recursive entries. */ if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) continue; @@ -635,7 +635,7 @@ static int nhlfe_nexthop_active_ipv4(zebra_nhlfe_t *nhlfe, || !CHECK_FLAG(match->flags, ZEBRA_FLAG_SELECTED)) continue; - for (match_nh = match->ng.nexthop; match_nh; + for (match_nh = match->ng->nexthop; match_nh; match_nh = match_nh->next) { if (match->type == ZEBRA_ROUTE_CONNECT || nexthop->ifindex == match_nh->ifindex) { @@ -686,10 +686,10 @@ static int nhlfe_nexthop_active_ipv6(zebra_nhlfe_t *nhlfe, break; } - if (!match || !match->ng.nexthop) + if (!match || !match->ng->nexthop) return 0; - nexthop->ifindex = match->ng.nexthop->ifindex; + nexthop->ifindex = match->ng->nexthop->ifindex; return 1; } @@ -2590,11 +2590,13 @@ int mpls_ftn_update(int add, struct zebra_vrf *zvrf, enum lsp_types_t type, struct route_node *rn; struct route_entry *re; struct nexthop *nexthop; + struct nexthop_group new_grp = {}; + struct nhg_hash_entry *nhe = NULL; bool found; + afi_t afi = family2afi(prefix->family); /* Lookup table. */ - table = zebra_vrf_table(family2afi(prefix->family), SAFI_UNICAST, - zvrf_id(zvrf)); + table = zebra_vrf_table(afi, SAFI_UNICAST, zvrf_id(zvrf)); if (!table) return -1; @@ -2610,8 +2612,15 @@ int mpls_ftn_update(int add, struct zebra_vrf *zvrf, enum lsp_types_t type, if (re == NULL) return -1; + /* + * Copy over current nexthops into a temporary group. + * We can't just change the values here since we are hashing + * on labels. We need to create a whole new group + */ + nexthop_group_copy(&new_grp, re->ng); + found = false; - for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) { + for (nexthop = new_grp.nexthop; nexthop; nexthop = nexthop->next) { switch (nexthop->type) { case NEXTHOP_TYPE_IPV4: case NEXTHOP_TYPE_IPV4_IFINDEX: @@ -2625,7 +2634,7 @@ int mpls_ftn_update(int add, struct zebra_vrf *zvrf, enum lsp_types_t type, continue; if (!mpls_ftn_update_nexthop(add, nexthop, type, out_label)) - return 0; + break; found = true; break; case NEXTHOP_TYPE_IPV6: @@ -2640,7 +2649,7 @@ int mpls_ftn_update(int add, struct zebra_vrf *zvrf, enum lsp_types_t type, continue; if (!mpls_ftn_update_nexthop(add, nexthop, type, out_label)) - return 0; + break; found = true; break; default: @@ -2648,14 +2657,19 @@ int mpls_ftn_update(int add, struct zebra_vrf *zvrf, enum lsp_types_t type, } } - if (!found) - return -1; + if (found) { + nhe = zebra_nhg_rib_find(0, &new_grp, afi); - SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); - SET_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED); - rib_queue_add(rn); + zebra_nhg_re_update_ref(re, nhe); - return 0; + SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); + SET_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED); + rib_queue_add(rn); + } + + nexthops_free(new_grp.nexthop); + + return found ? 0 : -1; } int mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type, @@ -2684,7 +2698,7 @@ int mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type, if (re == NULL) return -1; - for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) + for (nexthop = re->ng->nexthop; nexthop; nexthop = nexthop->next) nexthop_del_labels(nexthop); SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); @@ -2889,7 +2903,12 @@ static void mpls_ftn_uninstall_all(struct zebra_vrf *zvrf, for (rn = route_top(table); rn; rn = route_next(rn)) { update = 0; RNODE_FOREACH_RE (rn, re) { - for (nexthop = re->ng.nexthop; nexthop; + struct nexthop_group new_grp = {}; + struct nhg_hash_entry *nhe = NULL; + + nexthop_group_copy(&new_grp, re->ng); + + for (nexthop = new_grp.nexthop; nexthop; nexthop = nexthop->next) { if (nexthop->nh_label_type != lsp_type) continue; @@ -2900,6 +2919,14 @@ static void mpls_ftn_uninstall_all(struct zebra_vrf *zvrf, ROUTE_ENTRY_LABELS_CHANGED); update = 1; } + + if (CHECK_FLAG(re->status, + ROUTE_ENTRY_LABELS_CHANGED)) { + nhe = zebra_nhg_rib_find(0, &new_grp, afi); + zebra_nhg_re_update_ref(re, nhe); + } + + nexthops_free(new_grp.nexthop); } if (update) diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index 4e696b39a..2bb117b27 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -26,14 +26,1094 @@ #include "lib/nexthop_group_private.h" #include "lib/routemap.h" #include "lib/mpls.h" +#include "lib/jhash.h" +#include "lib/debug.h" #include "zebra/connected.h" #include "zebra/debug.h" #include "zebra/zebra_router.h" -#include "zebra/zebra_nhg.h" +#include "zebra/zebra_nhg_private.h" #include "zebra/zebra_rnh.h" #include "zebra/zebra_routemap.h" +#include "zebra/zebra_memory.h" +#include "zebra/zserv.h" #include "zebra/rt.h" +#include "zebra_errors.h" +#include "zebra_dplane.h" +#include "zebra/interface.h" + +DEFINE_MTYPE_STATIC(ZEBRA, NHG, "Nexthop Group Entry"); +DEFINE_MTYPE_STATIC(ZEBRA, NHG_CONNECTED, "Nexthop Group Connected"); +DEFINE_MTYPE_STATIC(ZEBRA, NHG_CTX, "Nexthop Group Context"); + +/* id counter to keep in sync with kernel */ +uint32_t id_counter; + +static struct nhg_hash_entry *depends_find(struct nexthop *nh, afi_t afi); +static void depends_add(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *depend); +static struct nhg_hash_entry * +depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh, + afi_t afi); +static struct nhg_hash_entry * +depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id); +static void depends_decrement_free(struct nhg_connected_tree_head *head); + + +static void nhg_connected_free(struct nhg_connected *dep) +{ + XFREE(MTYPE_NHG_CONNECTED, dep); +} + +static struct nhg_connected *nhg_connected_new(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *new = NULL; + + new = XCALLOC(MTYPE_NHG_CONNECTED, sizeof(struct nhg_connected)); + new->nhe = nhe; + + return new; +} + +void nhg_connected_tree_free(struct nhg_connected_tree_head *head) +{ + struct nhg_connected *rb_node_dep = NULL; + + if (!nhg_connected_tree_is_empty(head)) { + frr_each_safe(nhg_connected_tree, head, rb_node_dep) { + nhg_connected_tree_del(head, rb_node_dep); + nhg_connected_free(rb_node_dep); + } + } +} + +bool nhg_connected_tree_is_empty(const struct nhg_connected_tree_head *head) +{ + return nhg_connected_tree_count(head) ? false : true; +} + +struct nhg_connected * +nhg_connected_tree_root(struct nhg_connected_tree_head *head) +{ + return nhg_connected_tree_first(head); +} + +void nhg_connected_tree_del_nhe(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *depend) +{ + struct nhg_connected lookup = {}; + struct nhg_connected *remove = NULL; + + lookup.nhe = depend; + + /* Lookup to find the element, then remove it */ + remove = nhg_connected_tree_find(head, &lookup); + remove = nhg_connected_tree_del(head, remove); + + if (remove) + nhg_connected_free(remove); +} + +void nhg_connected_tree_add_nhe(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *depend) +{ + struct nhg_connected *new = NULL; + + new = nhg_connected_new(depend); + + if (new) + nhg_connected_tree_add(head, new); +} + +static void +nhg_connected_tree_decrement_ref(struct nhg_connected_tree_head *head) +{ + struct nhg_connected *rb_node_dep = NULL; + + frr_each_safe(nhg_connected_tree, head, rb_node_dep) { + zebra_nhg_decrement_ref(rb_node_dep->nhe); + } +} + +static void +nhg_connected_tree_increment_ref(struct nhg_connected_tree_head *head) +{ + struct nhg_connected *rb_node_dep = NULL; + + frr_each(nhg_connected_tree, head, rb_node_dep) { + zebra_nhg_increment_ref(rb_node_dep->nhe); + } +} + +struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe) +{ + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_RECURSIVE) + && !zebra_nhg_depends_is_empty(nhe)) { + nhe = nhg_connected_tree_root(&nhe->nhg_depends)->nhe; + return zebra_nhg_resolve(nhe); + } + + return nhe; +} + +unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe) +{ + return nhg_connected_tree_count(&nhe->nhg_depends); +} + +bool zebra_nhg_depends_is_empty(const struct nhg_hash_entry *nhe) +{ + return nhg_connected_tree_is_empty(&nhe->nhg_depends); +} + +static void zebra_nhg_depends_del(struct nhg_hash_entry *from, + struct nhg_hash_entry *depend) +{ + nhg_connected_tree_del_nhe(&from->nhg_depends, depend); +} + +static void zebra_nhg_depends_init(struct nhg_hash_entry *nhe) +{ + nhg_connected_tree_init(&nhe->nhg_depends); +} + +unsigned int zebra_nhg_dependents_count(const struct nhg_hash_entry *nhe) +{ + return nhg_connected_tree_count(&nhe->nhg_dependents); +} + + +bool zebra_nhg_dependents_is_empty(const struct nhg_hash_entry *nhe) +{ + return nhg_connected_tree_is_empty(&nhe->nhg_dependents); +} + +static void zebra_nhg_dependents_del(struct nhg_hash_entry *from, + struct nhg_hash_entry *dependent) +{ + nhg_connected_tree_del_nhe(&from->nhg_dependents, dependent); +} + +static void zebra_nhg_dependents_add(struct nhg_hash_entry *to, + struct nhg_hash_entry *dependent) +{ + nhg_connected_tree_add_nhe(&to->nhg_dependents, dependent); +} + +static void zebra_nhg_dependents_init(struct nhg_hash_entry *nhe) +{ + nhg_connected_tree_init(&nhe->nhg_dependents); +} + +/* Release this nhe from anything depending on it */ +static void zebra_nhg_dependents_release(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep = NULL; + + frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) { + zebra_nhg_depends_del(rb_node_dep->nhe, nhe); + /* recheck validity of the dependent */ + zebra_nhg_check_valid(rb_node_dep->nhe); + } +} + +/* Release this nhe from anything that it depends on */ +static void zebra_nhg_depends_release(struct nhg_hash_entry *nhe) +{ + if (!zebra_nhg_depends_is_empty(nhe)) { + struct nhg_connected *rb_node_dep = NULL; + + frr_each_safe(nhg_connected_tree, &nhe->nhg_depends, + rb_node_dep) { + zebra_nhg_dependents_del(rb_node_dep->nhe, nhe); + } + } +} + + +struct nhg_hash_entry *zebra_nhg_lookup_id(uint32_t id) +{ + struct nhg_hash_entry lookup = {}; + + lookup.id = id; + return hash_lookup(zrouter.nhgs_id, &lookup); +} + +static int zebra_nhg_insert_id(struct nhg_hash_entry *nhe) +{ + if (hash_lookup(zrouter.nhgs_id, nhe)) { + flog_err( + EC_ZEBRA_NHG_TABLE_INSERT_FAILED, + "Failed inserting NHG id=%u into the ID hash table, entry already exists", + nhe->id); + return -1; + } + + hash_get(zrouter.nhgs_id, nhe, hash_alloc_intern); + + return 0; +} + +static void zebra_nhg_set_if(struct nhg_hash_entry *nhe, struct interface *ifp) +{ + nhe->ifp = ifp; + if_nhg_dependents_add(ifp, nhe); +} + +static void +zebra_nhg_connect_depends(struct nhg_hash_entry *nhe, + struct nhg_connected_tree_head nhg_depends) +{ + struct nhg_connected *rb_node_dep = NULL; + + /* This has been allocated higher above in the stack. Could probably + * re-allocate and free the old stuff but just using the same memory + * for now. Otherwise, their might be a time trade-off for repeated + * alloc/frees as startup. + */ + nhe->nhg_depends = nhg_depends; + + /* Attach backpointer to anything that it depends on */ + zebra_nhg_dependents_init(nhe); + if (!zebra_nhg_depends_is_empty(nhe)) { + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + zebra_nhg_dependents_add(rb_node_dep->nhe, nhe); + } + } + + /* Add the ifp now if its not a group or recursive and has ifindex */ + if (zebra_nhg_depends_is_empty(nhe) && nhe->nhg->nexthop + && nhe->nhg->nexthop->ifindex) { + struct interface *ifp = NULL; + + ifp = if_lookup_by_index(nhe->nhg->nexthop->ifindex, + nhe->vrf_id); + if (ifp) + zebra_nhg_set_if(nhe, ifp); + else + flog_err( + EC_ZEBRA_IF_LOOKUP_FAILED, + "Zebra failed to lookup an interface with ifindex=%d in vrf=%u for NHE id=%u", + nhe->nhg->nexthop->ifindex, nhe->vrf_id, + nhe->id); + } +} + +static struct nhg_hash_entry *zebra_nhg_copy(struct nhg_hash_entry *copy, + uint32_t id) +{ + struct nhg_hash_entry *nhe; + + nhe = XCALLOC(MTYPE_NHG, sizeof(struct nhg_hash_entry)); + + nhe->id = id; + + nhe->nhg = nexthop_group_new(); + nexthop_group_copy(nhe->nhg, copy->nhg); + + nhe->vrf_id = copy->vrf_id; + nhe->afi = copy->afi; + nhe->type = copy->type ? copy->type : ZEBRA_ROUTE_NHG; + nhe->refcnt = 0; + nhe->dplane_ref = zebra_router_get_next_sequence(); + + return nhe; +} + +/* Allocation via hash handler */ +static void *zebra_nhg_hash_alloc(void *arg) +{ + struct nhg_hash_entry *nhe = NULL; + struct nhg_hash_entry *copy = arg; + + nhe = zebra_nhg_copy(copy, copy->id); + + /* Mark duplicate nexthops in a group at creation time. */ + nexthop_group_mark_duplicates(nhe->nhg); + + zebra_nhg_connect_depends(nhe, copy->nhg_depends); + zebra_nhg_insert_id(nhe); + + return nhe; +} + +uint32_t zebra_nhg_hash_key(const void *arg) +{ + const struct nhg_hash_entry *nhe = arg; + + uint32_t key = 0x5a351234; + + key = jhash_3words(nhe->vrf_id, nhe->afi, nexthop_group_hash(nhe->nhg), + key); + + return key; +} + +uint32_t zebra_nhg_id_key(const void *arg) +{ + const struct nhg_hash_entry *nhe = arg; + + return nhe->id; +} + +bool zebra_nhg_hash_equal(const void *arg1, const void *arg2) +{ + const struct nhg_hash_entry *nhe1 = arg1; + const struct nhg_hash_entry *nhe2 = arg2; + + /* No matter what if they equal IDs, assume equal */ + if (nhe1->id && nhe2->id && (nhe1->id == nhe2->id)) + return true; + + if (nhe1->vrf_id != nhe2->vrf_id) + return false; + + if (nhe1->afi != nhe2->afi) + return false; + + if (nexthop_group_active_nexthop_num_no_recurse(nhe1->nhg) + != nexthop_group_active_nexthop_num_no_recurse(nhe2->nhg)) + return false; + + if (!nexthop_group_equal_no_recurse(nhe1->nhg, nhe2->nhg)) + return false; + + return true; +} + +bool zebra_nhg_hash_id_equal(const void *arg1, const void *arg2) +{ + const struct nhg_hash_entry *nhe1 = arg1; + const struct nhg_hash_entry *nhe2 = arg2; + + return nhe1->id == nhe2->id; +} + +static int zebra_nhg_process_grp(struct nexthop_group *nhg, + struct nhg_connected_tree_head *depends, + struct nh_grp *grp, uint8_t count) +{ + nhg_connected_tree_init(depends); + + for (int i = 0; i < count; i++) { + struct nhg_hash_entry *depend = NULL; + /* We do not care about nexthop_grp.weight at + * this time. But we should figure out + * how to adapt this to our code in + * the future. + */ + depend = depends_find_id_add(depends, grp[i].id); + + if (!depend) { + flog_err( + EC_ZEBRA_NHG_SYNC, + "Received Nexthop Group from the kernel with a dependent Nexthop ID (%u) which we do not have in our table", + grp[i].id); + return -1; + } + + /* + * If this is a nexthop with its own group + * dependencies, add them as well. Not sure its + * even possible to have a group within a group + * in the kernel. + */ + + copy_nexthops(&nhg->nexthop, depend->nhg->nexthop, NULL); + } + + return 0; +} + +static void handle_recursive_depend(struct nhg_connected_tree_head *nhg_depends, + struct nexthop *nh, afi_t afi) +{ + struct nhg_hash_entry *depend = NULL; + struct nexthop_group resolved_ng = {}; + + _nexthop_group_add_sorted(&resolved_ng, nh); + + depend = zebra_nhg_rib_find(0, &resolved_ng, afi); + depends_add(nhg_depends, depend); +} + +static bool zebra_nhg_find(struct nhg_hash_entry **nhe, uint32_t id, + struct nexthop_group *nhg, + struct nhg_connected_tree_head *nhg_depends, + vrf_id_t vrf_id, afi_t afi, int type) +{ + struct nhg_hash_entry lookup = {}; + + uint32_t old_id_counter = id_counter; + + bool created = false; + bool recursive = false; + + /* + * If it has an id at this point, we must have gotten it from the kernel + */ + lookup.id = id ? id : ++id_counter; + + lookup.type = type ? type : ZEBRA_ROUTE_NHG; + lookup.nhg = nhg; + + if (lookup.nhg->nexthop->next) { + /* Groups can have all vrfs and AF's in them */ + lookup.afi = AFI_UNSPEC; + lookup.vrf_id = 0; + } else { + switch (lookup.nhg->nexthop->type) { + case (NEXTHOP_TYPE_IFINDEX): + case (NEXTHOP_TYPE_BLACKHOLE): + /* + * This switch case handles setting the afi different + * for ipv4/v6 routes. Ifindex/blackhole nexthop + * objects cannot be ambiguous, they must be Address + * Family specific. If we get here, we will either use + * the AF of the route, or the one we got passed from + * here from the kernel. + */ + lookup.afi = afi; + break; + case (NEXTHOP_TYPE_IPV4_IFINDEX): + case (NEXTHOP_TYPE_IPV4): + lookup.afi = AFI_IP; + break; + case (NEXTHOP_TYPE_IPV6_IFINDEX): + case (NEXTHOP_TYPE_IPV6): + lookup.afi = AFI_IP6; + break; + } + + lookup.vrf_id = vrf_id; + } + + if (id) + (*nhe) = zebra_nhg_lookup_id(id); + else + (*nhe) = hash_lookup(zrouter.nhgs, &lookup); + + /* If it found an nhe in our tables, this new ID is unused */ + if (*nhe) + id_counter = old_id_counter; + + if (!(*nhe)) { + /* Only hash/lookup the depends if the first lookup + * fails to find something. This should hopefully save a + * lot of cycles for larger ecmp sizes. + */ + if (nhg_depends) + /* If you don't want to hash on each nexthop in the + * nexthop group struct you can pass the depends + * directly. Kernel-side we do this since it just looks + * them up via IDs. + */ + lookup.nhg_depends = *nhg_depends; + else { + if (nhg->nexthop->next) { + zebra_nhg_depends_init(&lookup); + + /* If its a group, create a dependency tree */ + struct nexthop *nh = NULL; + + for (nh = nhg->nexthop; nh; nh = nh->next) + depends_find_add(&lookup.nhg_depends, + nh, afi); + } else if (CHECK_FLAG(nhg->nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) { + zebra_nhg_depends_init(&lookup); + handle_recursive_depend(&lookup.nhg_depends, + nhg->nexthop->resolved, + afi); + recursive = true; + } + } + + (*nhe) = hash_get(zrouter.nhgs, &lookup, zebra_nhg_hash_alloc); + created = true; + + if (recursive) + SET_FLAG((*nhe)->flags, NEXTHOP_GROUP_RECURSIVE); + } + return created; +} + +/* Find/create a single nexthop */ +static struct nhg_hash_entry * +zebra_nhg_find_nexthop(uint32_t id, struct nexthop *nh, afi_t afi, int type) +{ + struct nhg_hash_entry *nhe = NULL; + struct nexthop_group nhg = {}; + + _nexthop_group_add_sorted(&nhg, nh); + + zebra_nhg_find(&nhe, id, &nhg, NULL, nh->vrf_id, afi, 0); + + return nhe; +} + +static struct nhg_ctx *nhg_ctx_new() +{ + struct nhg_ctx *new = NULL; + + new = XCALLOC(MTYPE_NHG_CTX, sizeof(struct nhg_ctx)); + + return new; +} + +static void nhg_ctx_free(struct nhg_ctx *ctx) +{ + XFREE(MTYPE_NHG_CTX, ctx); +} + +static uint32_t nhg_ctx_get_id(const struct nhg_ctx *ctx) +{ + return ctx->id; +} + +static void nhg_ctx_set_status(struct nhg_ctx *ctx, enum nhg_ctx_status status) +{ + ctx->status = status; +} + +static enum nhg_ctx_status nhg_ctx_get_status(const struct nhg_ctx *ctx) +{ + return ctx->status; +} + +static void nhg_ctx_set_op(struct nhg_ctx *ctx, enum nhg_ctx_op_e op) +{ + ctx->op = op; +} + +static enum nhg_ctx_op_e nhg_ctx_get_op(const struct nhg_ctx *ctx) +{ + return ctx->op; +} + +static vrf_id_t nhg_ctx_get_vrf_id(const struct nhg_ctx *ctx) +{ + return ctx->vrf_id; +} + +static int nhg_ctx_get_type(const struct nhg_ctx *ctx) +{ + return ctx->type; +} + +static int nhg_ctx_get_afi(const struct nhg_ctx *ctx) +{ + return ctx->afi; +} + +static struct nexthop *nhg_ctx_get_nh(struct nhg_ctx *ctx) +{ + return &ctx->u.nh; +} + +static uint8_t nhg_ctx_get_count(const struct nhg_ctx *ctx) +{ + return ctx->count; +} + +static struct nh_grp *nhg_ctx_get_grp(struct nhg_ctx *ctx) +{ + return ctx->u.grp; +} + +static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh, + struct nh_grp *grp, vrf_id_t vrf_id, + afi_t afi, int type, uint8_t count) +{ + struct nhg_ctx *ctx = NULL; + + ctx = nhg_ctx_new(); + + ctx->id = id; + ctx->vrf_id = vrf_id; + ctx->afi = afi; + ctx->type = type; + ctx->count = count; + + if (count) + /* Copy over the array */ + memcpy(&ctx->u.grp, grp, count * sizeof(struct nh_grp)); + else if (nh) + ctx->u.nh = *nh; + + return ctx; +} + +static bool zebra_nhg_contains_unhashable(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep = NULL; + + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + if (CHECK_FLAG(rb_node_dep->nhe->flags, + NEXTHOP_GROUP_UNHASHABLE)) + return true; + } + + return false; +} + +static void zebra_nhg_set_unhashable(struct nhg_hash_entry *nhe) +{ + SET_FLAG(nhe->flags, NEXTHOP_GROUP_UNHASHABLE); + SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + + flog_warn( + EC_ZEBRA_DUPLICATE_NHG_MESSAGE, + "Nexthop Group with ID (%d) is a duplicate, therefore unhashable, ignoring", + nhe->id); +} + +static void zebra_nhg_set_valid(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep; + + SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + + frr_each(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) + zebra_nhg_set_valid(rb_node_dep->nhe); +} + +static void zebra_nhg_set_invalid(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep; + + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + + /* Update validity of nexthops depending on it */ + frr_each(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) + zebra_nhg_check_valid(rb_node_dep->nhe); +} + +void zebra_nhg_check_valid(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep = NULL; + bool valid = false; + + /* If anthing else in the group is valid, the group is valid */ + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + if (CHECK_FLAG(rb_node_dep->nhe->flags, NEXTHOP_GROUP_VALID)) { + valid = true; + goto done; + } + } + +done: + if (valid) + zebra_nhg_set_valid(nhe); + else + zebra_nhg_set_invalid(nhe); +} + + +static void zebra_nhg_release(struct nhg_hash_entry *nhe) +{ + /* Remove it from any lists it may be on */ + zebra_nhg_depends_release(nhe); + zebra_nhg_dependents_release(nhe); + if (nhe->ifp) + if_nhg_dependents_del(nhe->ifp, nhe); + + /* + * If its unhashable, we didn't store it here and have to be + * sure we don't clear one thats actually being used. + */ + if (!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_UNHASHABLE)) + hash_release(zrouter.nhgs, nhe); + + hash_release(zrouter.nhgs_id, nhe); +} + +static void zebra_nhg_handle_uninstall(struct nhg_hash_entry *nhe) +{ + zebra_nhg_release(nhe); + zebra_nhg_free(nhe); +} + +static void zebra_nhg_handle_install(struct nhg_hash_entry *nhe) +{ + /* Update validity of groups depending on it */ + struct nhg_connected *rb_node_dep; + + frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) + zebra_nhg_set_valid(rb_node_dep->nhe); +} + +/* + * The kernel/other program has changed the state of a nexthop object we are + * using. + */ +static void zebra_nhg_handle_kernel_state_change(struct nhg_hash_entry *nhe, + bool is_delete) +{ + if (nhe->refcnt) { + flog_err( + EC_ZEBRA_NHG_SYNC, + "Kernel %s a nexthop group with ID (%u) that we are still using for a route, sending it back down", + (is_delete ? "deleted" : "updated"), nhe->id); + + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + zebra_nhg_install_kernel(nhe); + } else + zebra_nhg_handle_uninstall(nhe); +} + +static int nhg_ctx_process_new(struct nhg_ctx *ctx) +{ + struct nexthop_group *nhg = NULL; + struct nhg_connected_tree_head nhg_depends = {}; + struct nhg_hash_entry *lookup = NULL; + struct nhg_hash_entry *nhe = NULL; + + uint32_t id = nhg_ctx_get_id(ctx); + uint8_t count = nhg_ctx_get_count(ctx); + vrf_id_t vrf_id = nhg_ctx_get_vrf_id(ctx); + int type = nhg_ctx_get_type(ctx); + afi_t afi = nhg_ctx_get_afi(ctx); + + lookup = zebra_nhg_lookup_id(id); + + if (lookup) { + /* This is already present in our table, hence an update + * that we did not initate. + */ + zebra_nhg_handle_kernel_state_change(lookup, false); + return 0; + } + + if (nhg_ctx_get_count(ctx)) { + nhg = nexthop_group_new(); + if (zebra_nhg_process_grp(nhg, &nhg_depends, + nhg_ctx_get_grp(ctx), count)) { + depends_decrement_free(&nhg_depends); + nexthop_group_delete(&nhg); + return -ENOENT; + } + + if (!zebra_nhg_find(&nhe, id, nhg, &nhg_depends, vrf_id, type, + afi)) + depends_decrement_free(&nhg_depends); + + /* These got copied over in zebra_nhg_alloc() */ + nexthop_group_delete(&nhg); + } else + nhe = zebra_nhg_find_nexthop(id, nhg_ctx_get_nh(ctx), afi, + type); + + if (nhe) { + if (id != nhe->id) { + struct nhg_hash_entry *kernel_nhe = NULL; + + /* Duplicate but with different ID from + * the kernel + */ + + /* The kernel allows duplicate nexthops + * as long as they have different IDs. + * We are ignoring those to prevent + * syncing problems with the kernel + * changes. + * + * We maintain them *ONLY* in the ID hash table to + * track them and set the flag to indicated + * their attributes are unhashable. + */ + + kernel_nhe = zebra_nhg_copy(nhe, id); + zebra_nhg_insert_id(kernel_nhe); + zebra_nhg_set_unhashable(kernel_nhe); + } else if (zebra_nhg_contains_unhashable(nhe)) { + /* The group we got contains an unhashable/duplicated + * depend, so lets mark this group as unhashable as well + * and release it from the non-ID hash. + */ + hash_release(zrouter.nhgs, nhe); + zebra_nhg_set_unhashable(nhe); + } else { + /* It actually created a new nhe */ + SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + } + } else { + flog_err( + EC_ZEBRA_TABLE_LOOKUP_FAILED, + "Zebra failed to find or create a nexthop hash entry for ID (%u)", + id); + return -1; + } + + return 0; +} + +static int nhg_ctx_process_del(struct nhg_ctx *ctx) +{ + struct nhg_hash_entry *nhe = NULL; + uint32_t id = nhg_ctx_get_id(ctx); + + nhe = zebra_nhg_lookup_id(id); + + if (!nhe) { + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Kernel delete message received for nexthop group ID (%u) that we do not have in our ID table", + id); + return -1; + } + + zebra_nhg_handle_kernel_state_change(nhe, true); + + return 0; +} + +static void nhg_ctx_process_finish(struct nhg_ctx *ctx) +{ + struct nexthop *nh; + + /* + * Just freeing for now, maybe do something more in the future + * based on flag. + */ + + if (nhg_ctx_get_count(ctx)) + goto done; + + nh = nhg_ctx_get_nh(ctx); + + nexthop_del_labels(nh); + +done: + if (ctx) + nhg_ctx_free(ctx); +} + +static int queue_add(struct nhg_ctx *ctx) +{ + /* If its queued or already processed do nothing */ + if (nhg_ctx_get_status(ctx) == NHG_CTX_QUEUED) + return 0; + + if (rib_queue_nhg_add(ctx)) { + nhg_ctx_set_status(ctx, NHG_CTX_FAILURE); + return -1; + } + + nhg_ctx_set_status(ctx, NHG_CTX_QUEUED); + + return 0; +} + +int nhg_ctx_process(struct nhg_ctx *ctx) +{ + int ret = 0; + + switch (nhg_ctx_get_op(ctx)) { + case NHG_CTX_OP_NEW: + ret = nhg_ctx_process_new(ctx); + if (nhg_ctx_get_count(ctx) && ret == -ENOENT + && nhg_ctx_get_status(ctx) != NHG_CTX_REQUEUED) { + /** + * We have entered a situation where we are + * processing a group from the kernel + * that has a contained nexthop which + * we have not yet processed. + * + * Re-enqueue this ctx to be handled exactly one + * more time (indicated by the flag). + * + * By the time we get back to it, we + * should have processed its depends. + */ + nhg_ctx_set_status(ctx, NHG_CTX_NONE); + if (queue_add(ctx) == 0) { + nhg_ctx_set_status(ctx, NHG_CTX_REQUEUED); + return 0; + } + } + break; + case NHG_CTX_OP_DEL: + ret = nhg_ctx_process_del(ctx); + case NHG_CTX_OP_NONE: + break; + } + + nhg_ctx_set_status(ctx, (ret ? NHG_CTX_FAILURE : NHG_CTX_SUCCESS)); + + nhg_ctx_process_finish(ctx); + + return ret; +} + +/* Kernel-side, you either get a single new nexthop or a array of ID's */ +int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, struct nh_grp *grp, + uint8_t count, vrf_id_t vrf_id, afi_t afi, int type, + int startup) +{ + struct nhg_ctx *ctx = NULL; + + if (id > id_counter) + /* Increase our counter so we don't try to create + * an ID that already exists + */ + id_counter = id; + + ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count); + nhg_ctx_set_op(ctx, NHG_CTX_OP_NEW); + + /* Under statup conditions, we need to handle them immediately + * like we do for routes. Otherwise, we are going to get a route + * with a nhe_id that we have not handled. + */ + if (startup) + return nhg_ctx_process(ctx); + + if (queue_add(ctx)) { + nhg_ctx_process_finish(ctx); + return -1; + } + + return 0; +} + +/* Kernel-side, received delete message */ +int zebra_nhg_kernel_del(uint32_t id) +{ + struct nhg_ctx *ctx = NULL; + + ctx = nhg_ctx_init(id, NULL, NULL, 0, 0, 0, 0); + + nhg_ctx_set_op(ctx, NHG_CTX_OP_DEL); + + if (queue_add(ctx)) { + nhg_ctx_process_finish(ctx); + return -1; + } + + return 0; +} + +/* Some dependency helper functions */ +static struct nhg_hash_entry *depends_find(struct nexthop *nh, afi_t afi) +{ + struct nexthop *lookup = NULL; + struct nhg_hash_entry *nhe = NULL; + + copy_nexthops(&lookup, nh, NULL); + + /* Clear it, in case its a group */ + nexthops_free(lookup->next); + nexthops_free(lookup->prev); + lookup->next = NULL; + lookup->prev = NULL; + + nhe = zebra_nhg_find_nexthop(0, lookup, afi, 0); + + nexthops_free(lookup); + + return nhe; +} + +static void depends_add(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *depend) +{ + nhg_connected_tree_add_nhe(head, depend); + zebra_nhg_increment_ref(depend); +} + +static struct nhg_hash_entry * +depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh, + afi_t afi) +{ + struct nhg_hash_entry *depend = NULL; + + depend = depends_find(nh, afi); + + if (depend) + depends_add(head, depend); + + return depend; +} + +static struct nhg_hash_entry * +depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id) +{ + struct nhg_hash_entry *depend = NULL; + + depend = zebra_nhg_lookup_id(id); + + if (depend) + depends_add(head, depend); + + return depend; +} + +static void depends_decrement_free(struct nhg_connected_tree_head *head) +{ + nhg_connected_tree_decrement_ref(head); + nhg_connected_tree_free(head); +} + +/* Rib-side, you get a nexthop group struct */ +struct nhg_hash_entry * +zebra_nhg_rib_find(uint32_t id, struct nexthop_group *nhg, afi_t rt_afi) +{ + struct nhg_hash_entry *nhe = NULL; + + if (!(nhg && nhg->nexthop)) { + flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED, + "No nexthop passed to %s", __func__); + return NULL; + } + + zebra_nhg_find(&nhe, id, nhg, NULL, nhg->nexthop->vrf_id, rt_afi, 0); + + return nhe; +} + +static void zebra_nhg_free_members(struct nhg_hash_entry *nhe) +{ + nexthop_group_delete(&nhe->nhg); + /* Decrement to remove connection ref */ + nhg_connected_tree_decrement_ref(&nhe->nhg_depends); + nhg_connected_tree_free(&nhe->nhg_depends); + nhg_connected_tree_free(&nhe->nhg_dependents); +} + +void zebra_nhg_free(void *arg) +{ + struct nhg_hash_entry *nhe = NULL; + + nhe = (struct nhg_hash_entry *)arg; + + if (nhe->refcnt) + zlog_debug("nhe_id=%u hash refcnt=%d", nhe->id, nhe->refcnt); + + zebra_nhg_free_members(nhe); + + XFREE(MTYPE_NHG, nhe); +} + +void zebra_nhg_decrement_ref(struct nhg_hash_entry *nhe) +{ + nhe->refcnt--; + + if (!zebra_nhg_depends_is_empty(nhe)) + nhg_connected_tree_decrement_ref(&nhe->nhg_depends); + + if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) + zebra_nhg_uninstall_kernel(nhe); +} + +void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe) +{ + nhe->refcnt++; + + if (!zebra_nhg_depends_is_empty(nhe)) + nhg_connected_tree_increment_ref(&nhe->nhg_depends); +} static void nexthop_set_resolved(afi_t afi, const struct nexthop *newhop, struct nexthop *nexthop) @@ -152,7 +1232,8 @@ static bool nexthop_valid_resolve(const struct nexthop *nexthop, /* * Given a nexthop we need to properly recursively resolve * the route. As such, do a table lookup to find and match - * if at all possible. Set the nexthop->ifindex as appropriate + * if at all possible. Set the nexthop->ifindex and resolved_id + * as appropriate */ static int nexthop_active(afi_t afi, struct route_entry *re, struct nexthop *nexthop, struct route_node *top) @@ -171,6 +1252,7 @@ static int nexthop_active(afi_t afi, struct route_entry *re, || nexthop->type == NEXTHOP_TYPE_IPV6) nexthop->ifindex = 0; + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE); nexthops_free(nexthop->resolved); nexthop->resolved = NULL; @@ -210,13 +1292,12 @@ static int nexthop_active(afi_t afi, struct route_entry *re, if (connected_is_unnumbered(ifp)) { if (if_is_operative(ifp)) return 1; - else { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - "\t%s: Onlink and interface %s is not operative", - __PRETTY_FUNCTION__, ifp->name); - return 0; - } + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + "\t%s: Onlink and interface %s is not operative", + __PRETTY_FUNCTION__, ifp->name); + return 0; } if (!if_is_operative(ifp)) { if (IS_ZEBRA_DEBUG_RIB_DETAILED) @@ -276,7 +1357,8 @@ static int nexthop_active(afi_t afi, struct route_entry *re, /* Pick up selected route. */ /* However, do not resolve over default route unless explicitly - * allowed. */ + * allowed. + */ if (is_default_prefix(&rn->p) && !rnh_resolve_via_default(zvrf, p.family)) { if (IS_ZEBRA_DEBUG_RIB_DETAILED) @@ -294,7 +1376,8 @@ static int nexthop_active(afi_t afi, struct route_entry *re, match = dest->selected_fib; /* If there is no selected route or matched route is EGP, go up - tree. */ + * tree. + */ if (!match) { do { rn = rn->parent; @@ -307,7 +1390,7 @@ static int nexthop_active(afi_t afi, struct route_entry *re, if (match->type == ZEBRA_ROUTE_CONNECT) { /* Directly point connected route. */ - newhop = match->ng.nexthop; + newhop = match->ng->nexthop; if (newhop) { if (nexthop->type == NEXTHOP_TYPE_IPV4 || nexthop->type == NEXTHOP_TYPE_IPV6) @@ -316,7 +1399,7 @@ static int nexthop_active(afi_t afi, struct route_entry *re, return 1; } else if (CHECK_FLAG(re->flags, ZEBRA_FLAG_ALLOW_RECURSION)) { resolved = 0; - for (ALL_NEXTHOPS(match->ng, newhop)) { + for (ALL_NEXTHOPS_PTR(match->ng, newhop)) { if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED)) continue; @@ -330,13 +1413,14 @@ static int nexthop_active(afi_t afi, struct route_entry *re, } if (resolved) re->nexthop_mtu = match->mtu; + if (!resolved && IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("\t%s: Recursion failed to find", __PRETTY_FUNCTION__); return resolved; } else if (re->type == ZEBRA_ROUTE_STATIC) { resolved = 0; - for (ALL_NEXTHOPS(match->ng, newhop)) { + for (ALL_NEXTHOPS_PTR(match->ng, newhop)) { if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED)) continue; @@ -382,6 +1466,9 @@ static int nexthop_active(afi_t afi, struct route_entry *re, * appropriately as well. An existing route map can turn * (otherwise active) nexthop into inactive, but not vice versa. * + * If it finds a nexthop recursivedly, set the resolved_id + * to match that nexthop's nhg_hash_entry ID; + * * The return value is the final value of 'ACTIVE' flag. */ static unsigned nexthop_active_check(struct route_node *rn, @@ -505,23 +1592,29 @@ static unsigned nexthop_active_check(struct route_node *rn, /* * Iterate over all nexthops of the given RIB entry and refresh their - * ACTIVE flag. re->nexthop_active_num is updated accordingly. If any - * nexthop is found to toggle the ACTIVE flag, the whole re structure - * is flagged with ROUTE_ENTRY_CHANGED. + * ACTIVE flag. If any nexthop is found to toggle the ACTIVE flag, + * the whole re structure is flagged with ROUTE_ENTRY_CHANGED. * * Return value is the new number of active nexthops. */ int nexthop_active_update(struct route_node *rn, struct route_entry *re) { + struct nexthop_group new_grp = {}; struct nexthop *nexthop; union g_addr prev_src; unsigned int prev_active, new_active; ifindex_t prev_index; + uint8_t curr_active = 0; + + afi_t rt_afi = family2afi(rn->p.family); - re->nexthop_active_num = 0; UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED); - for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) { + /* Copy over the nexthops in current state */ + nexthop_group_copy(&new_grp, re->ng); + + for (nexthop = new_grp.nexthop; nexthop; nexthop = nexthop->next) { + /* No protocol daemon provides src and so we're skipping * tracking it */ prev_src = nexthop->rmap_src; @@ -533,14 +1626,19 @@ int nexthop_active_update(struct route_node *rn, struct route_entry *re) * a multipath perpsective should not be a data plane * decision point. */ - new_active = nexthop_active_check(rn, re, nexthop); + new_active = + nexthop_active_check(rn, re, nexthop); + if (new_active - && re->nexthop_active_num >= zrouter.multipath_num) { + && nexthop_group_active_nexthop_num(&new_grp) + >= zrouter.multipath_num) { UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); new_active = 0; } + if (new_active) - re->nexthop_active_num++; + curr_active++; + /* Don't allow src setting on IPv6 addr for now */ if (prev_active != new_active || prev_index != nexthop->ifindex || ((nexthop->type >= NEXTHOP_TYPE_IFINDEX @@ -555,6 +1653,269 @@ int nexthop_active_update(struct route_node *rn, struct route_entry *re) SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); } - return re->nexthop_active_num; + if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)) { + struct nhg_hash_entry *new_nhe = NULL; + + new_nhe = zebra_nhg_rib_find(0, &new_grp, rt_afi); + + zebra_nhg_re_update_ref(re, new_nhe); + } + + if (curr_active) { + struct nhg_hash_entry *nhe = NULL; + + nhe = zebra_nhg_lookup_id(re->nhe_id); + + if (nhe) + SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + else + flog_err( + EC_ZEBRA_TABLE_LOOKUP_FAILED, + "Active update on NHE id=%u that we do not have in our tables", + re->nhe_id); + } + + /* + * Do not need these nexthops anymore since they + * were either copied over into an nhe or not + * used at all. + */ + nexthops_free(new_grp.nexthop); + return curr_active; } +static void zebra_nhg_re_attach_ref(struct route_entry *re, + struct nhg_hash_entry *new) +{ + re->ng = new->nhg; + re->nhe_id = new->id; + + zebra_nhg_increment_ref(new); +} + +int zebra_nhg_re_update_ref(struct route_entry *re, struct nhg_hash_entry *new) +{ + struct nhg_hash_entry *old = NULL; + int ret = 0; + + if (new == NULL) { + re->ng = NULL; + goto done; + } + + if (re->nhe_id != new->id) { + old = zebra_nhg_lookup_id(re->nhe_id); + + zebra_nhg_re_attach_ref(re, new); + + if (old) + zebra_nhg_decrement_ref(old); + } else if (!re->ng) + /* This is the first time it's being attached */ + zebra_nhg_re_attach_ref(re, new); + +done: + return ret; +} + +/* Convert a nhe into a group array */ +uint8_t zebra_nhg_nhe2grp(struct nh_grp *grp, struct nhg_hash_entry *nhe, + int max_num) +{ + struct nhg_connected *rb_node_dep = NULL; + struct nhg_hash_entry *depend = NULL; + uint8_t i = 0; + + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + bool duplicate = false; + + depend = rb_node_dep->nhe; + + /* + * If its recursive, use its resolved nhe in the group + */ + if (CHECK_FLAG(depend->flags, NEXTHOP_GROUP_RECURSIVE)) { + depend = zebra_nhg_resolve(depend); + if (!depend) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Failed to recursively resolve Nexthop Hash Entry in the group id=%u", + nhe->id); + continue; + } + } + + /* Check for duplicate IDs, kernel doesn't like that */ + for (int j = 0; j < i; j++) { + if (depend->id == grp[j].id) + duplicate = true; + } + + if (!duplicate) { + grp[i].id = depend->id; + /* We aren't using weights for anything right now */ + grp[i].weight = 0; + i++; + } + + if (i >= max_num) + goto done; + } + +done: + return i; +} + +void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep = NULL; + + /* Resolve it first */ + nhe = zebra_nhg_resolve(nhe); + + /* Make sure all depends are installed/queued */ + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + zebra_nhg_install_kernel(rb_node_dep->nhe); + } + + if (!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) + && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED)) { + /* Change its type to us since we are installing it */ + nhe->type = ZEBRA_ROUTE_NHG; + + int ret = dplane_nexthop_add(nhe); + + switch (ret) { + case ZEBRA_DPLANE_REQUEST_QUEUED: + SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED); + break; + case ZEBRA_DPLANE_REQUEST_FAILURE: + flog_err( + EC_ZEBRA_DP_INSTALL_FAIL, + "Failed to install Nexthop ID (%u) into the kernel", + nhe->id); + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + zebra_nhg_handle_install(nhe); + break; + } + } +} + +void zebra_nhg_uninstall_kernel(struct nhg_hash_entry *nhe) +{ + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)) { + int ret = dplane_nexthop_delete(nhe); + + switch (ret) { + case ZEBRA_DPLANE_REQUEST_QUEUED: + SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED); + break; + case ZEBRA_DPLANE_REQUEST_FAILURE: + flog_err( + EC_ZEBRA_DP_DELETE_FAIL, + "Failed to uninstall Nexthop ID (%u) from the kernel", + nhe->id); + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + break; + } + } + + zebra_nhg_handle_uninstall(nhe); +} + +void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + enum zebra_dplane_result status; + uint32_t id = 0; + struct nhg_hash_entry *nhe = NULL; + + op = dplane_ctx_get_op(ctx); + status = dplane_ctx_get_status(ctx); + + id = dplane_ctx_get_nhe_id(ctx); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug( + "Nexthop dplane ctx %p, op %s, nexthop ID (%u), result %s", + ctx, dplane_op2str(op), id, dplane_res2str(status)); + + switch (op) { + case DPLANE_OP_NH_DELETE: + if (status != ZEBRA_DPLANE_REQUEST_SUCCESS) + flog_err( + EC_ZEBRA_DP_DELETE_FAIL, + "Failed to uninstall Nexthop ID (%u) from the kernel", + id); + /* We already free'd the data, nothing to do */ + break; + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + nhe = zebra_nhg_lookup_id(id); + + if (!nhe) { + flog_err( + EC_ZEBRA_NHG_SYNC, + "%s operation preformed on Nexthop ID (%u) in the kernel, that we no longer have in our table", + dplane_op2str(op), id); + break; + } + + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED); + if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) { + SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + zebra_nhg_handle_install(nhe); + } else + flog_err( + EC_ZEBRA_DP_INSTALL_FAIL, + "Failed to install Nexthop ID (%u) into the kernel", + nhe->id); + break; + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NONE: + break; + } + + dplane_ctx_fini(&ctx); +} + +static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = NULL; + + nhe = (struct nhg_hash_entry *)bucket->data; + + /* If its being ref'd, just let it be uninstalled via a route removal */ + if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) + zebra_nhg_uninstall_kernel(nhe); +} + +void zebra_nhg_sweep_table(struct hash *hash) +{ + hash_iterate(hash, zebra_nhg_sweep_entry, NULL); +} diff --git a/zebra/zebra_nhg.h b/zebra/zebra_nhg.h index ff2351c75..1f695433c 100644 --- a/zebra/zebra_nhg.h +++ b/zebra/zebra_nhg.h @@ -24,6 +24,205 @@ #define __ZEBRA_NHG_H__ #include "zebra/rib.h" +#include "lib/nexthop_group.h" +#include "zebra/zebra_dplane.h" + +/* This struct is used exclusively for dataplane + * interaction via a dataplane context. + * + * It is designed to mimic the netlink nexthop_grp + * struct in include/linux/nexthop.h + */ +struct nh_grp { + uint32_t id; + uint8_t weight; +}; + +PREDECL_RBTREE_UNIQ(nhg_connected_tree); + +/* + * Hashtables contiaining entries found in `zebra_router`. + */ + +struct nhg_hash_entry { + uint32_t id; + afi_t afi; + vrf_id_t vrf_id; + int type; + + struct nexthop_group *nhg; + + /* If this is not a group, it + * will be a single nexthop + * and must have an interface + * associated with it. + * Otherwise, this will be null. + */ + struct interface *ifp; + + uint32_t refcnt; + uint32_t dplane_ref; + + uint32_t flags; + + /* Dependency tree for other entries. + * For instance a group with two + * nexthops will have two dependencies + * pointing to those nhg_hash_entries. + * + * Using a rb tree here to make lookups + * faster with ID's. + */ + struct nhg_connected_tree_head nhg_depends, nhg_dependents; +/* + * Is this nexthop group valid, ie all nexthops are fully resolved. + * What is fully resolved? It's a nexthop that is either self contained + * and correct( ie no recursive pointer ) or a nexthop that is recursively + * resolved and correct. + */ +#define NEXTHOP_GROUP_VALID (1 << 0) +/* + * Has this nexthop group been installed? At this point in time, this + * means that the data-plane has been told about this nexthop group + * and it's possible usage by a route entry. + */ +#define NEXTHOP_GROUP_INSTALLED (1 << 1) +/* + * Has the nexthop group been queued to be send to the FIB? + * The NEXTHOP_GROUP_VALID flag should also be set by this point. + */ +#define NEXTHOP_GROUP_QUEUED (1 << 2) +/* + * Is this a nexthop that is recursively resolved? + */ +#define NEXTHOP_GROUP_RECURSIVE (1 << 3) +/* + * This is a nexthop group we got from the kernel, it is identical to + * one we already have. (The kernel allows duplicate nexthops, we don't + * since we hash on them). We are only tracking it in our ID table, + * it is unusable by our created routes but may be used by routes we get + * from the kernel. Therefore, it is unhashable. + */ +#define NEXTHOP_GROUP_UNHASHABLE (1 << 4) +}; + +/* Was this one we created, either this session or previously? */ +#define ZEBRA_NHG_CREATED(NHE) ((NHE->type) == ZEBRA_ROUTE_NHG) + + +enum nhg_ctx_op_e { + NHG_CTX_OP_NONE = 0, + NHG_CTX_OP_NEW, + NHG_CTX_OP_DEL, +}; + +enum nhg_ctx_status { + NHG_CTX_NONE = 0, + NHG_CTX_QUEUED, + NHG_CTX_REQUEUED, + NHG_CTX_SUCCESS, + NHG_CTX_FAILURE, +}; + +/* + * Context needed to queue nhg updates on the + * work queue. + */ +struct nhg_ctx { + + /* Unique ID */ + uint32_t id; + + vrf_id_t vrf_id; + afi_t afi; + /* + * This should only every be ZEBRA_ROUTE_NHG unless we get a a kernel + * created nexthop not made by us. + */ + int type; + + /* If its a group array, how many? */ + uint8_t count; + + /* Its either a single nexthop or an array of ID's */ + union { + struct nexthop nh; + struct nh_grp grp[MULTIPATH_NUM]; + } u; + + enum nhg_ctx_op_e op; + enum nhg_ctx_status status; +}; + + +/** + * NHE abstracted tree functions. + * Use these where possible instead of the direct ones access ones. + */ +extern struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe); + +extern unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe); +extern bool zebra_nhg_depends_is_empty(const struct nhg_hash_entry *nhe); + +extern unsigned int +zebra_nhg_dependents_count(const struct nhg_hash_entry *nhe); +extern bool zebra_nhg_dependents_is_empty(const struct nhg_hash_entry *nhe); + +/* Lookup ID, doesn't create */ +extern struct nhg_hash_entry *zebra_nhg_lookup_id(uint32_t id); + +/* Hash functions */ +extern uint32_t zebra_nhg_hash_key(const void *arg); +extern uint32_t zebra_nhg_id_key(const void *arg); + +extern bool zebra_nhg_hash_equal(const void *arg1, const void *arg2); +extern bool zebra_nhg_hash_id_equal(const void *arg1, const void *arg2); + +/* + * Process a context off of a queue. + * Specifically this should be from + * the rib meta queue. + */ +extern int nhg_ctx_process(struct nhg_ctx *ctx); + +/* Find via kernel nh creation */ +extern int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, + struct nh_grp *grp, uint8_t count, + vrf_id_t vrf_id, afi_t afi, int type, + int startup); +/* Del via kernel */ +extern int zebra_nhg_kernel_del(uint32_t id); + +/* Find via route creation */ +extern struct nhg_hash_entry * +zebra_nhg_rib_find(uint32_t id, struct nexthop_group *nhg, afi_t rt_afi); + +/* Reference counter functions */ +extern void zebra_nhg_decrement_ref(struct nhg_hash_entry *nhe); +extern void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe); +extern int zebra_nhg_re_update_ref(struct route_entry *re, + struct nhg_hash_entry *nhe); + +/* Check validity of nhe, if invalid will update dependents as well */ +extern void zebra_nhg_check_valid(struct nhg_hash_entry *nhe); + +/* Convert nhe depends to a grp context that can be passed around safely */ +extern uint8_t zebra_nhg_nhe2grp(struct nh_grp *grp, struct nhg_hash_entry *nhe, + int size); + +/* Dataplane install/uninstall */ +extern void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe); +extern void zebra_nhg_uninstall_kernel(struct nhg_hash_entry *nhe); + +/* Forward ref of dplane update context type */ +struct zebra_dplane_ctx; +extern void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx); + + +/* Sweet the nhg hash tables for old entries on restart */ +extern void zebra_nhg_sweep_table(struct hash *hash); + +/* Nexthop resolution processing */ extern int nexthop_active_update(struct route_node *rn, struct route_entry *re); #endif diff --git a/zebra/zebra_nhg_private.h b/zebra/zebra_nhg_private.h new file mode 100644 index 000000000..170e2357e --- /dev/null +++ b/zebra/zebra_nhg_private.h @@ -0,0 +1,62 @@ +/* + * Nexthop Group Private Functions. + * Copyright (C) 2019 Cumulus Networks, Inc. + * Stephen Worley + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * These functions should only be used internally for nhg_hash_entry + * manipulation and in certain special cases. + * + * Please use `zebra/zebra_nhg.h` for any general nhg_hash_entry api needs. + */ + +#ifndef __ZEBRA_NHG_PRIVATE_H__ +#define __ZEBRA_NHG_PRIVATE_H__ + +#include "zebra/zebra_nhg.h" + +/* Abstraction for connected trees */ +struct nhg_connected { + struct nhg_connected_tree_item tree_item; + struct nhg_hash_entry *nhe; +}; + +static int nhg_connected_cmp(const struct nhg_connected *con1, + const struct nhg_connected *con2) +{ + return (con1->nhe->id - con2->nhe->id); +} + +DECLARE_RBTREE_UNIQ(nhg_connected_tree, struct nhg_connected, tree_item, + nhg_connected_cmp); + +/* nhg connected tree direct access functions */ +extern void nhg_connected_tree_init(struct nhg_connected_tree_head *head); +extern void nhg_connected_tree_free(struct nhg_connected_tree_head *head); +extern bool +nhg_connected_tree_is_empty(const struct nhg_connected_tree_head *head); +extern struct nhg_connected * +nhg_connected_tree_root(struct nhg_connected_tree_head *head); +extern void nhg_connected_tree_del_nhe(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *nhe); +extern void nhg_connected_tree_add_nhe(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *nhe); + +extern void zebra_nhg_free(void *arg); + +#endif /* __ZEBRA_NHG_PRIVATE_H__ */ diff --git a/zebra/zebra_pw.c b/zebra/zebra_pw.c index 09edbc9a6..3f1567a95 100644 --- a/zebra/zebra_pw.c +++ b/zebra/zebra_pw.c @@ -259,7 +259,7 @@ static int zebra_pw_check_reachability(struct zebra_pw *pw) * Need to ensure that there's a label binding for all nexthops. * Otherwise, ECMP for this route could render the pseudowire unusable. */ - for (ALL_NEXTHOPS(re->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) { if (!nexthop->nh_label) { if (IS_ZEBRA_DEBUG_PW) zlog_debug("%s: unlabeled route for %s", diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index c2fa33f57..e0bf1a58f 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -56,7 +56,6 @@ #include "zebra/zebra_vxlan.h" #include "zebra/zapi_msg.h" #include "zebra/zebra_dplane.h" -#include "zebra/zebra_nhg.h" DEFINE_MTYPE_STATIC(ZEBRA, RIB_UPDATE_CTX, "Rib update context object"); @@ -79,34 +78,35 @@ static const struct { uint8_t distance; uint8_t meta_q_map; } route_info[ZEBRA_ROUTE_MAX] = { - [ZEBRA_ROUTE_SYSTEM] = {ZEBRA_ROUTE_SYSTEM, 0, 4}, - [ZEBRA_ROUTE_KERNEL] = {ZEBRA_ROUTE_KERNEL, 0, 0}, - [ZEBRA_ROUTE_CONNECT] = {ZEBRA_ROUTE_CONNECT, 0, 0}, - [ZEBRA_ROUTE_STATIC] = {ZEBRA_ROUTE_STATIC, 1, 1}, - [ZEBRA_ROUTE_RIP] = {ZEBRA_ROUTE_RIP, 120, 2}, - [ZEBRA_ROUTE_RIPNG] = {ZEBRA_ROUTE_RIPNG, 120, 2}, - [ZEBRA_ROUTE_OSPF] = {ZEBRA_ROUTE_OSPF, 110, 2}, - [ZEBRA_ROUTE_OSPF6] = {ZEBRA_ROUTE_OSPF6, 110, 2}, - [ZEBRA_ROUTE_ISIS] = {ZEBRA_ROUTE_ISIS, 115, 2}, - [ZEBRA_ROUTE_BGP] = {ZEBRA_ROUTE_BGP, 20 /* IBGP is 200. */, 3}, - [ZEBRA_ROUTE_PIM] = {ZEBRA_ROUTE_PIM, 255, 4}, - [ZEBRA_ROUTE_EIGRP] = {ZEBRA_ROUTE_EIGRP, 90, 2}, - [ZEBRA_ROUTE_NHRP] = {ZEBRA_ROUTE_NHRP, 10, 2}, - [ZEBRA_ROUTE_HSLS] = {ZEBRA_ROUTE_HSLS, 255, 4}, - [ZEBRA_ROUTE_OLSR] = {ZEBRA_ROUTE_OLSR, 255, 4}, - [ZEBRA_ROUTE_TABLE] = {ZEBRA_ROUTE_TABLE, 150, 1}, - [ZEBRA_ROUTE_LDP] = {ZEBRA_ROUTE_LDP, 150, 4}, - [ZEBRA_ROUTE_VNC] = {ZEBRA_ROUTE_VNC, 20, 3}, - [ZEBRA_ROUTE_VNC_DIRECT] = {ZEBRA_ROUTE_VNC_DIRECT, 20, 3}, - [ZEBRA_ROUTE_VNC_DIRECT_RH] = {ZEBRA_ROUTE_VNC_DIRECT_RH, 20, 3}, - [ZEBRA_ROUTE_BGP_DIRECT] = {ZEBRA_ROUTE_BGP_DIRECT, 20, 3}, - [ZEBRA_ROUTE_BGP_DIRECT_EXT] = {ZEBRA_ROUTE_BGP_DIRECT_EXT, 20, 3}, - [ZEBRA_ROUTE_BABEL] = {ZEBRA_ROUTE_BABEL, 100, 2}, - [ZEBRA_ROUTE_SHARP] = {ZEBRA_ROUTE_SHARP, 150, 4}, - [ZEBRA_ROUTE_PBR] = {ZEBRA_ROUTE_PBR, 200, 4}, - [ZEBRA_ROUTE_BFD] = {ZEBRA_ROUTE_BFD, 255, 4}, - [ZEBRA_ROUTE_OPENFABRIC] = {ZEBRA_ROUTE_OPENFABRIC, 115, 2}, - [ZEBRA_ROUTE_VRRP] = {ZEBRA_ROUTE_VRRP, 255, 4} + [ZEBRA_ROUTE_NHG] = {ZEBRA_ROUTE_NHG, 255 /* Uneeded for nhg's */, 0}, + [ZEBRA_ROUTE_SYSTEM] = {ZEBRA_ROUTE_SYSTEM, 0, 5}, + [ZEBRA_ROUTE_KERNEL] = {ZEBRA_ROUTE_KERNEL, 0, 1}, + [ZEBRA_ROUTE_CONNECT] = {ZEBRA_ROUTE_CONNECT, 0, 1}, + [ZEBRA_ROUTE_STATIC] = {ZEBRA_ROUTE_STATIC, 1, 2}, + [ZEBRA_ROUTE_RIP] = {ZEBRA_ROUTE_RIP, 120, 3}, + [ZEBRA_ROUTE_RIPNG] = {ZEBRA_ROUTE_RIPNG, 120, 3}, + [ZEBRA_ROUTE_OSPF] = {ZEBRA_ROUTE_OSPF, 110, 3}, + [ZEBRA_ROUTE_OSPF6] = {ZEBRA_ROUTE_OSPF6, 110, 3}, + [ZEBRA_ROUTE_ISIS] = {ZEBRA_ROUTE_ISIS, 115, 3}, + [ZEBRA_ROUTE_BGP] = {ZEBRA_ROUTE_BGP, 20 /* IBGP is 200. */, 4}, + [ZEBRA_ROUTE_PIM] = {ZEBRA_ROUTE_PIM, 255, 5}, + [ZEBRA_ROUTE_EIGRP] = {ZEBRA_ROUTE_EIGRP, 90, 3}, + [ZEBRA_ROUTE_NHRP] = {ZEBRA_ROUTE_NHRP, 10, 3}, + [ZEBRA_ROUTE_HSLS] = {ZEBRA_ROUTE_HSLS, 255, 5}, + [ZEBRA_ROUTE_OLSR] = {ZEBRA_ROUTE_OLSR, 255, 5}, + [ZEBRA_ROUTE_TABLE] = {ZEBRA_ROUTE_TABLE, 150, 2}, + [ZEBRA_ROUTE_LDP] = {ZEBRA_ROUTE_LDP, 150, 5}, + [ZEBRA_ROUTE_VNC] = {ZEBRA_ROUTE_VNC, 20, 4}, + [ZEBRA_ROUTE_VNC_DIRECT] = {ZEBRA_ROUTE_VNC_DIRECT, 20, 4}, + [ZEBRA_ROUTE_VNC_DIRECT_RH] = {ZEBRA_ROUTE_VNC_DIRECT_RH, 20, 4}, + [ZEBRA_ROUTE_BGP_DIRECT] = {ZEBRA_ROUTE_BGP_DIRECT, 20, 4}, + [ZEBRA_ROUTE_BGP_DIRECT_EXT] = {ZEBRA_ROUTE_BGP_DIRECT_EXT, 20, 4}, + [ZEBRA_ROUTE_BABEL] = {ZEBRA_ROUTE_BABEL, 100, 3}, + [ZEBRA_ROUTE_SHARP] = {ZEBRA_ROUTE_SHARP, 150, 5}, + [ZEBRA_ROUTE_PBR] = {ZEBRA_ROUTE_PBR, 200, 5}, + [ZEBRA_ROUTE_BFD] = {ZEBRA_ROUTE_BFD, 255, 5}, + [ZEBRA_ROUTE_OPENFABRIC] = {ZEBRA_ROUTE_OPENFABRIC, 115, 3}, + [ZEBRA_ROUTE_VRRP] = {ZEBRA_ROUTE_VRRP, 255, 5} /* Any new route type added to zebra, should be mirrored here */ /* no entry/default: 150 */ @@ -196,8 +196,7 @@ int zebra_check_addr(const struct prefix *p) /* Add nexthop to the end of a rib node's nexthop list */ void route_entry_nexthop_add(struct route_entry *re, struct nexthop *nexthop) { - _nexthop_group_add_sorted(&re->ng, nexthop); - re->nexthop_num++; + _nexthop_group_add_sorted(re->ng, nexthop); } @@ -206,10 +205,8 @@ void route_entry_nexthop_add(struct route_entry *re, struct nexthop *nexthop) */ void route_entry_copy_nexthops(struct route_entry *re, struct nexthop *nh) { - assert(!re->ng.nexthop); - copy_nexthops(&re->ng.nexthop, nh, NULL); - for (struct nexthop *nexthop = nh; nexthop; nexthop = nexthop->next) - re->nexthop_num++; + assert(!re->ng->nexthop); + copy_nexthops(&re->ng->nexthop, nh, NULL); } /* Delete specified nexthop from the list. */ @@ -220,8 +217,7 @@ void route_entry_nexthop_delete(struct route_entry *re, struct nexthop *nexthop) if (nexthop->prev) nexthop->prev->next = nexthop->next; else - re->ng.nexthop = nexthop->next; - re->nexthop_num--; + re->ng->nexthop = nexthop->next; } @@ -505,7 +501,7 @@ int zebra_rib_labeled_unicast(struct route_entry *re) if (re->type != ZEBRA_ROUTE_BGP) return 0; - for (ALL_NEXTHOPS(re->ng, nexthop)) + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) if (!nexthop->nh_label || !nexthop->nh_label->num_labels) return 0; @@ -529,26 +525,17 @@ void rib_install_kernel(struct route_node *rn, struct route_entry *re, srcdest_rnode_prefixes(rn, &p, &src_p); if (info->safi != SAFI_UNICAST) { - for (ALL_NEXTHOPS(re->ng, nexthop)) + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); return; - } else { - struct nexthop *prev; - - for (ALL_NEXTHOPS(re->ng, nexthop)) { - UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_DUPLICATE); - for (ALL_NEXTHOPS(re->ng, prev)) { - if (prev == nexthop) - break; - if (nexthop_same_firsthop(nexthop, prev)) { - SET_FLAG(nexthop->flags, - NEXTHOP_FLAG_DUPLICATE); - break; - } - } - } } + + /* + * Install the resolved nexthop object first. + */ + zebra_nhg_install_kernel(zebra_nhg_lookup_id(re->nhe_id)); + /* * If this is a replace to a new RE let the originator of the RE * know that they've lost @@ -586,7 +573,7 @@ void rib_install_kernel(struct route_node *rn, struct route_entry *re, if (!RIB_SYSTEM_ROUTE(old)) { /* Clear old route's FIB flags */ - for (ALL_NEXTHOPS(old->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(old->ng, nexthop)) { UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); } @@ -624,7 +611,7 @@ void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re) if (info->safi != SAFI_UNICAST) { UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); - for (ALL_NEXTHOPS(re->ng, nexthop)) + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); return; } @@ -684,7 +671,7 @@ static void rib_uninstall(struct route_node *rn, struct route_entry *re) re->fib_ng.nexthop = NULL; } - for (ALL_NEXTHOPS(re->ng, nexthop)) + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); } @@ -860,7 +847,7 @@ static void rib_process_add_fib(struct zebra_vrf *zvrf, struct route_node *rn, /* Update real nexthop. This may actually determine if nexthop is active * or not. */ - if (!nexthop_group_active_nexthop_num(&new->ng)) { + if (!nexthop_group_active_nexthop_num(new->ng)) { UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED); return; } @@ -929,7 +916,7 @@ static void rib_process_update_fib(struct zebra_vrf *zvrf, /* Update the nexthop; we could determine here that nexthop is * inactive. */ - if (nexthop_group_active_nexthop_num(&new->ng)) + if (nexthop_group_active_nexthop_num(new->ng)) nh_active = 1; /* If nexthop is active, install the selected route, if @@ -1047,7 +1034,7 @@ static struct route_entry *rib_choose_best(struct route_entry *current, /* both are connected. are either loop or vrf? */ struct nexthop *nexthop = NULL; - for (ALL_NEXTHOPS(alternate->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(alternate->ng, nexthop)) { struct interface *ifp = if_lookup_by_index( nexthop->ifindex, alternate->vrf_id); @@ -1055,7 +1042,7 @@ static struct route_entry *rib_choose_best(struct route_entry *current, return alternate; } - for (ALL_NEXTHOPS(current->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(current->ng, nexthop)) { struct interface *ifp = if_lookup_by_index( nexthop->ifindex, current->vrf_id); @@ -1086,6 +1073,12 @@ static struct route_entry *rib_choose_best(struct route_entry *current, return current; } +/* Core function for processing nexthop group contexts's off metaq */ +static void rib_nhg_process(struct nhg_ctx *ctx) +{ + nhg_ctx_process(ctx); +} + /* Core function for processing routing information base. */ static void rib_process(struct route_node *rn) { @@ -1380,7 +1373,7 @@ static void zebra_rib_fixup_system(struct route_node *rn) SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED); - for (ALL_NEXTHOPS(re->ng, nhop)) { + for (ALL_NEXTHOPS_PTR(re->ng, nhop)) { if (CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_RECURSIVE)) continue; @@ -1428,76 +1421,20 @@ static bool rib_update_re_from_ctx(struct route_entry *re, * status. */ - /* - * First check the fib nexthop-group, if it's present. The comparison - * here is quite strict: we require that the fib sets match exactly. + /* Check both fib group and notif group for equivalence. + * + * Let's assume the nexthops are ordered here to save time. */ - matched = false; - do { - if (re->fib_ng.nexthop == NULL) - break; - - matched = true; - - /* First check the route's fib nexthops */ - for (ALL_NEXTHOPS(re->fib_ng, nexthop)) { - - if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) - continue; - - ctx_nexthop = NULL; - for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), - ctx_nexthop)) { - if (nexthop_same(ctx_nexthop, nexthop)) - break; - } - - if (ctx_nexthop == NULL) { - /* Nexthop not in the new installed set */ - if (IS_ZEBRA_DEBUG_RIB_DETAILED) { - nexthop2str(nexthop, nh_str, - sizeof(nh_str)); - zlog_debug("update_from_ctx: no match for fib nh %s", - nh_str); - } - - matched = false; - break; - } - } - - if (!matched) - break; - - /* Check the new installed set */ - ctx_nexthop = NULL; - for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), ctx_nexthop)) { - - if (CHECK_FLAG(ctx_nexthop->flags, - NEXTHOP_FLAG_RECURSIVE)) - continue; - - /* Compare with the current group's nexthops */ - nexthop = NULL; - for (ALL_NEXTHOPS(re->fib_ng, nexthop)) { - if (nexthop_same(nexthop, ctx_nexthop)) - break; - } - - if (nexthop == NULL) { - /* Nexthop not in the old installed set */ - if (IS_ZEBRA_DEBUG_RIB_DETAILED) { - nexthop2str(ctx_nexthop, nh_str, - sizeof(nh_str)); - zlog_debug("update_from_ctx: no fib match for notif nh %s", - nh_str); - } - matched = false; - break; - } + if (nexthop_group_equal(&re->fib_ng, dplane_ctx_get_ng(ctx)) == false) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + zlog_debug( + "%u:%s update_from_ctx: notif nh and fib nh mismatch", + re->vrf_id, dest_str); } - } while (0); + matched = false; + } else + matched = true; /* If the new FIB set matches the existing FIB set, we're done. */ if (matched) { @@ -1530,9 +1467,22 @@ static bool rib_update_re_from_ctx(struct route_entry *re, * walk the RIB group, looking for the 'installable' candidate * nexthops, and then check those against the set * that is actually installed. + * + * Assume nexthops are ordered here as well. */ matched = true; - for (ALL_NEXTHOPS(re->ng, nexthop)) { + + ctx_nexthop = dplane_ctx_get_ng(ctx)->nexthop; + + /* Get the first `installed` one to check against. + * If the dataplane doesn't set these to be what was actually installed, + * it will just be whatever was in re->ng? + */ + if (CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_RECURSIVE) + || !CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + ctx_nexthop = nexthop_next_active_resolved(ctx_nexthop); + + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) continue; @@ -1541,20 +1491,15 @@ static bool rib_update_re_from_ctx(struct route_entry *re, continue; /* Check for a FIB nexthop corresponding to the RIB nexthop */ - ctx_nexthop = NULL; - for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), ctx_nexthop)) { - if (nexthop_same(ctx_nexthop, nexthop)) - break; - } - - /* If the FIB doesn't know about the nexthop, - * it's not installed - */ - if (ctx_nexthop == NULL) { + if (nexthop_same(ctx_nexthop, nexthop) == false) { + /* If the FIB doesn't know about the nexthop, + * it's not installed + */ if (IS_ZEBRA_DEBUG_RIB_DETAILED) { nexthop2str(nexthop, nh_str, sizeof(nh_str)); - zlog_debug("update_from_ctx: no notif match for rib nh %s", - nh_str); + zlog_debug( + "update_from_ctx: no notif match for rib nh %s", + nh_str); } matched = false; @@ -1578,6 +1523,8 @@ static bool rib_update_re_from_ctx(struct route_entry *re, UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); } + + ctx_nexthop = nexthop_next_active_resolved(ctx_nexthop); } /* If all nexthops were processed, we're done */ @@ -2062,19 +2009,28 @@ done: dplane_ctx_fini(&ctx); } -/* Take a list of route_node structs and return 1, if there was a record - * picked from it and processed by rib_process(). Don't process more, - * than one RN record; operate only in the specified sub-queue. - */ -static unsigned int process_subq(struct list *subq, uint8_t qindex) +static void process_subq_nhg(struct listnode *lnode) { - struct listnode *lnode = listhead(subq); - struct route_node *rnode; - rib_dest_t *dest; - struct zebra_vrf *zvrf = NULL; + struct nhg_ctx *ctx = NULL; + uint8_t qindex = route_info[ZEBRA_ROUTE_NHG].meta_q_map; - if (!lnode) - return 0; + ctx = listgetdata(lnode); + + if (!ctx) + return; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("NHG Context id=%u dequeued from sub-queue %u", + ctx->id, qindex); + + rib_nhg_process(ctx); +} + +static void process_subq_route(struct listnode *lnode, uint8_t qindex) +{ + struct route_node *rnode = NULL; + rib_dest_t *dest = NULL; + struct zebra_vrf *zvrf = NULL; rnode = listgetdata(lnode); dest = rib_dest_from_rnode(rnode); @@ -2104,7 +2060,26 @@ static unsigned int process_subq(struct list *subq, uint8_t qindex) } #endif route_unlock_node(rnode); +} + +/* Take a list of route_node structs and return 1, if there was a record + * picked from it and processed by rib_process(). Don't process more, + * than one RN record; operate only in the specified sub-queue. + */ +static unsigned int process_subq(struct list *subq, uint8_t qindex) +{ + struct listnode *lnode = listhead(subq); + + if (!lnode) + return 0; + + if (qindex == route_info[ZEBRA_ROUTE_NHG].meta_q_map) + process_subq_nhg(lnode); + else + process_subq_route(lnode, qindex); + list_delete_node(subq, lnode); + return 1; } @@ -2162,11 +2137,14 @@ static wq_item_status meta_queue_process(struct work_queue *dummy, void *data) * original metaqueue index value will win and we'll end up with * the route node enqueued once. */ -static void rib_meta_queue_add(struct meta_queue *mq, struct route_node *rn) +static int rib_meta_queue_add(struct meta_queue *mq, void *data) { + struct route_node *rn = NULL; struct route_entry *re = NULL, *curr_re = NULL; uint8_t qindex = MQ_SIZE, curr_qindex = MQ_SIZE; + rn = (struct route_node *)data; + RNODE_FOREACH_RE (rn, curr_re) { curr_qindex = route_info[curr_re->type].meta_q_map; @@ -2177,7 +2155,7 @@ static void rib_meta_queue_add(struct meta_queue *mq, struct route_node *rn) } if (!re) - return; + return -1; /* Invariant: at this point we always have rn->info set. */ if (CHECK_FLAG(rib_dest_from_rnode(rn)->flags, @@ -2186,7 +2164,7 @@ static void rib_meta_queue_add(struct meta_queue *mq, struct route_node *rn) rnode_debug(rn, re->vrf_id, "rn %p is already queued in sub-queue %u", (void *)rn, qindex); - return; + return -1; } SET_FLAG(rib_dest_from_rnode(rn)->flags, RIB_ROUTE_QUEUED(qindex)); @@ -2197,26 +2175,37 @@ static void rib_meta_queue_add(struct meta_queue *mq, struct route_node *rn) if (IS_ZEBRA_DEBUG_RIB_DETAILED) rnode_debug(rn, re->vrf_id, "queued rn %p into sub-queue %u", (void *)rn, qindex); + + return 0; } -/* Add route_node to work queue and schedule processing */ -void rib_queue_add(struct route_node *rn) +static int rib_meta_queue_nhg_add(struct meta_queue *mq, void *data) { - assert(rn); + struct nhg_ctx *ctx = NULL; + uint8_t qindex = route_info[ZEBRA_ROUTE_NHG].meta_q_map; - /* Pointless to queue a route_node with no RIB entries to add or remove - */ - if (!rnode_to_ribs(rn)) { - zlog_debug("%s: called for route_node (%p, %d) with no ribs", - __func__, (void *)rn, rn->lock); - zlog_backtrace(LOG_DEBUG); - return; - } + ctx = (struct nhg_ctx *)data; + + if (!ctx) + return -1; + listnode_add(mq->subq[qindex], ctx); + mq->size++; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("NHG Context id=%u queued into sub-queue %u", + ctx->id, qindex); + + return 0; +} + +static int mq_add_handler(void *data, + int (*mq_add_func)(struct meta_queue *mq, void *data)) +{ if (zrouter.ribq == NULL) { flog_err(EC_ZEBRA_WQ_NONEXISTENT, "%s: work_queue does not exist!", __func__); - return; + return -1; } /* @@ -2230,9 +2219,31 @@ void rib_queue_add(struct route_node *rn) if (work_queue_empty(zrouter.ribq)) work_queue_add(zrouter.ribq, zrouter.mq); - rib_meta_queue_add(zrouter.mq, rn); + return mq_add_func(zrouter.mq, data); +} - return; +/* Add route_node to work queue and schedule processing */ +int rib_queue_add(struct route_node *rn) +{ + assert(rn); + + /* Pointless to queue a route_node with no RIB entries to add or remove + */ + if (!rnode_to_ribs(rn)) { + zlog_debug("%s: called for route_node (%p, %d) with no ribs", + __func__, (void *)rn, rn->lock); + zlog_backtrace(LOG_DEBUG); + return -1; + } + + return mq_add_handler(rn, &rib_meta_queue_add); +} + +int rib_queue_nhg_add(struct nhg_ctx *ctx) +{ + assert(ctx); + + return mq_add_handler(ctx, &rib_meta_queue_nhg_add); } /* Create new meta queue. @@ -2400,6 +2411,7 @@ static void rib_addnode(struct route_node *rn, void rib_unlink(struct route_node *rn, struct route_entry *re) { rib_dest_t *dest; + struct nhg_hash_entry *nhe = NULL; assert(rn && re); @@ -2414,7 +2426,13 @@ void rib_unlink(struct route_node *rn, struct route_entry *re) if (dest->selected_fib == re) dest->selected_fib = NULL; - nexthops_free(re->ng.nexthop); + if (re->nhe_id) { + nhe = zebra_nhg_lookup_id(re->nhe_id); + if (nhe) + zebra_nhg_decrement_ref(nhe); + } else if (re->ng) + nexthop_group_delete(&re->ng); + nexthops_free(re->fib_ng.nexthop); XFREE(MTYPE_RE, re); @@ -2480,9 +2498,10 @@ void _route_entry_dump(const char *func, union prefixconstptr pp, "%s: metric == %u, mtu == %u, distance == %u, flags == %u, status == %u", straddr, re->metric, re->mtu, re->distance, re->flags, re->status); zlog_debug("%s: nexthop_num == %u, nexthop_active_num == %u", straddr, - re->nexthop_num, re->nexthop_active_num); + nexthop_group_nexthop_num(re->ng), + nexthop_group_active_nexthop_num(re->ng)); - for (ALL_NEXTHOPS(re->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) { struct interface *ifp; struct vrf *vrf = vrf_lookup_by_id(nexthop->vrf_id); @@ -2633,6 +2652,7 @@ void rib_lookup_and_pushup(struct prefix_ipv4 *p, vrf_id_t vrf_id) int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, struct prefix_ipv6 *src_p, struct route_entry *re) { + struct nhg_hash_entry *nhe = NULL; struct route_table *table; struct route_node *rn; struct route_entry *same = NULL; @@ -2646,10 +2666,58 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, /* Lookup table. */ table = zebra_vrf_table_with_table_id(afi, safi, re->vrf_id, re->table); if (!table) { + if (re->ng) + nexthop_group_delete(&re->ng); XFREE(MTYPE_RE, re); return 0; } + if (re->nhe_id) { + nhe = zebra_nhg_lookup_id(re->nhe_id); + + if (!nhe) { + flog_err( + EC_ZEBRA_TABLE_LOOKUP_FAILED, + "Zebra failed to find the nexthop hash entry for id=%u in a route entry", + re->nhe_id); + XFREE(MTYPE_RE, re); + return -1; + } + } else { + nhe = zebra_nhg_rib_find(0, re->ng, afi); + + /* + * The nexthops got copied over into an nhe, + * so free them now. + */ + nexthop_group_delete(&re->ng); + + if (!nhe) { + char buf[PREFIX_STRLEN] = ""; + char buf2[PREFIX_STRLEN] = ""; + + flog_err( + EC_ZEBRA_TABLE_LOOKUP_FAILED, + "Zebra failed to find or create a nexthop hash entry for %s%s%s", + prefix2str(p, buf, sizeof(buf)), + src_p ? " from " : "", + src_p ? prefix2str(src_p, buf2, sizeof(buf2)) + : ""); + + XFREE(MTYPE_RE, re); + return -1; + } + } + + /* + * Attach the re to the nhe's nexthop group. + * + * TODO: This will need to change when we start getting IDs from upper + * level protocols, as the refcnt might be wrong, since it checks + * if old_id != new_id. + */ + zebra_nhg_re_update_ref(re, nhe); + /* Make it sure prefixlen is applied to the prefix. */ apply_mask(p); if (src_p) @@ -2726,8 +2794,8 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, unsigned short instance, int flags, struct prefix *p, struct prefix_ipv6 *src_p, const struct nexthop *nh, - uint32_t table_id, uint32_t metric, uint8_t distance, - bool fromkernel) + uint32_t nhe_id, uint32_t table_id, uint32_t metric, + uint8_t distance, bool fromkernel) { struct route_table *table; struct route_node *rn; @@ -2790,31 +2858,37 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, if (re->type == ZEBRA_ROUTE_KERNEL && re->metric != metric) continue; - if (re->type == ZEBRA_ROUTE_CONNECT && (rtnh = re->ng.nexthop) + if (re->type == ZEBRA_ROUTE_CONNECT && (rtnh = re->ng->nexthop) && rtnh->type == NEXTHOP_TYPE_IFINDEX && nh) { if (rtnh->ifindex != nh->ifindex) continue; same = re; break; } + /* Make sure that the route found has the same gateway. */ - else { - if (nh == NULL) { + if (nhe_id && re->nhe_id == nhe_id) { + same = re; + break; + } + + if (nh == NULL) { + same = re; + break; + } + for (ALL_NEXTHOPS_PTR(re->ng, rtnh)) { + /* + * No guarantee all kernel send nh with labels + * on delete. + */ + if (nexthop_same_no_labels(rtnh, nh)) { same = re; break; } - for (ALL_NEXTHOPS(re->ng, rtnh)) - /* - * No guarantee all kernel send nh with labels - * on delete. - */ - if (nexthop_same_no_labels(rtnh, nh)) { - same = re; - break; - } - if (same) - break; } + + if (same) + break; } /* If same type of route can't be found and this message is from kernel. */ @@ -2844,7 +2918,7 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, if (allow_delete) { UNSET_FLAG(fib->status, ROUTE_ENTRY_INSTALLED); /* Unset flags. */ - for (rtnh = fib->ng.nexthop; rtnh; + for (rtnh = fib->ng->nexthop; rtnh; rtnh = rtnh->next) UNSET_FLAG(rtnh->flags, NEXTHOP_FLAG_FIB); @@ -2900,7 +2974,7 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, if (CHECK_FLAG(flags, ZEBRA_FLAG_EVPN_ROUTE)) { struct nexthop *tmp_nh; - for (ALL_NEXTHOPS(re->ng, tmp_nh)) { + for (ALL_NEXTHOPS_PTR(re->ng, tmp_nh)) { struct ipaddr vtep_ip; memset(&vtep_ip, 0, sizeof(struct ipaddr)); @@ -2935,11 +3009,11 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, unsigned short instance, int flags, struct prefix *p, struct prefix_ipv6 *src_p, const struct nexthop *nh, - uint32_t table_id, uint32_t metric, uint32_t mtu, uint8_t distance, - route_tag_t tag) + uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t mtu, + uint8_t distance, route_tag_t tag) { - struct route_entry *re; - struct nexthop *nexthop; + struct route_entry *re = NULL; + struct nexthop *nexthop = NULL; /* Allocate new route_entry structure. */ re = XCALLOC(MTYPE_RE, sizeof(struct route_entry)); @@ -2951,14 +3025,18 @@ int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, re->mtu = mtu; re->table = table_id; re->vrf_id = vrf_id; - re->nexthop_num = 0; re->uptime = monotime(NULL); re->tag = tag; + re->nhe_id = nhe_id; - /* Add nexthop. */ - nexthop = nexthop_new(); - *nexthop = *nh; - route_entry_nexthop_add(re, nexthop); + if (!nhe_id) { + re->ng = nexthop_group_new(); + + /* Add nexthop. */ + nexthop = nexthop_new(); + *nexthop = *nh; + route_entry_nexthop_add(re, nexthop); + } return rib_add_multipath(afi, safi, p, src_p, re); } @@ -3218,7 +3296,7 @@ void rib_sweep_table(struct route_table *table) * this decision needs to be revisited */ SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); - for (ALL_NEXTHOPS(re->ng, nexthop)) + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); rib_uninstall_kernel(rn, re); @@ -3242,6 +3320,7 @@ int rib_sweep_route(struct thread *t) } zebra_router_sweep_route(); + zebra_router_sweep_nhgs(); return 0; } @@ -3412,6 +3491,12 @@ static int rib_process_dplane_results(struct thread *thread) rib_process_dplane_notify(ctx); break; + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + zebra_nhg_dplane_result(ctx); + break; + case DPLANE_OP_LSP_INSTALL: case DPLANE_OP_LSP_UPDATE: case DPLANE_OP_LSP_DELETE: diff --git a/zebra/zebra_rnh.c b/zebra/zebra_rnh.c index 5df5d94f4..60e23cc4d 100644 --- a/zebra/zebra_rnh.c +++ b/zebra/zebra_rnh.c @@ -384,7 +384,7 @@ static void zebra_rnh_clear_nexthop_rnh_filters(struct route_entry *re) struct nexthop *nexthop; if (re) { - for (nexthop = re->ng.nexthop; nexthop; + for (nexthop = re->ng->nexthop; nexthop; nexthop = nexthop->next) { UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RNH_FILTERED); } @@ -403,7 +403,7 @@ static int zebra_rnh_apply_nht_rmap(afi_t afi, struct zebra_vrf *zvrf, route_map_result_t ret; if (prn && re) { - for (nexthop = re->ng.nexthop; nexthop; + for (nexthop = re->ng->nexthop; nexthop; nexthop = nexthop->next) { ret = zebra_nht_route_map_check( afi, proto, &prn->p, zvrf, re, nexthop); @@ -688,7 +688,7 @@ zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi, /* Just being SELECTED isn't quite enough - must * have an installed nexthop to be useful. */ - for (ALL_NEXTHOPS(re->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) { if (rnh_nexthop_valid(re, nexthop)) break; } @@ -707,7 +707,7 @@ zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi, break; if (re->type == ZEBRA_ROUTE_NHRP) { - for (nexthop = re->ng.nexthop; nexthop; + for (nexthop = re->ng->nexthop; nexthop; nexthop = nexthop->next) if (nexthop->type == NEXTHOP_TYPE_IFINDEX) @@ -940,7 +940,7 @@ static void free_state(vrf_id_t vrf_id, struct route_entry *re, return; /* free RE and nexthops */ - nexthops_free(re->ng.nexthop); + nexthop_group_delete(&re->ng); XFREE(MTYPE_RE, re); } @@ -963,8 +963,9 @@ static void copy_state(struct rnh *rnh, struct route_entry *re, state->metric = re->metric; state->vrf_id = re->vrf_id; state->status = re->status; + state->ng = nexthop_group_new(); - route_entry_copy_nexthops(state, re->ng.nexthop); + route_entry_copy_nexthops(state, re->ng->nexthop); rnh->state = state; } @@ -982,10 +983,11 @@ static int compare_state(struct route_entry *r1, struct route_entry *r2) if (r1->metric != r2->metric) return 1; - if (r1->nexthop_num != r2->nexthop_num) + if (nexthop_group_nexthop_num(r1->ng) + != nexthop_group_nexthop_num(r2->ng)) return 1; - if (nexthop_group_hash(&r1->ng) != nexthop_group_hash(&r2->ng)) + if (nexthop_group_hash(r1->ng) != nexthop_group_hash(r2->ng)) return 1; return 0; @@ -1035,7 +1037,7 @@ static int send_client(struct rnh *rnh, struct zserv *client, rnh_type_t type, num = 0; nump = stream_get_endp(s); stream_putc(s, 0); - for (ALL_NEXTHOPS(re->ng, nh)) + for (ALL_NEXTHOPS_PTR(re->ng, nh)) if (rnh_nexthop_valid(re, nh)) { stream_putl(s, nh->vrf_id); stream_putc(s, nh->type); @@ -1135,7 +1137,7 @@ static void print_rnh(struct route_node *rn, struct vty *vty) if (rnh->state) { vty_out(vty, " resolved via %s\n", zebra_route_string(rnh->state->type)); - for (nexthop = rnh->state->ng.nexthop; nexthop; + for (nexthop = rnh->state->ng->nexthop; nexthop; nexthop = nexthop->next) print_nh(nexthop, vty); } else diff --git a/zebra/zebra_router.c b/zebra/zebra_router.c index 1e9f9e4ec..e5319c64a 100644 --- a/zebra/zebra_router.c +++ b/zebra/zebra_router.c @@ -29,7 +29,7 @@ #include "zebra_pbr.h" #include "zebra_vxlan.h" #include "zebra_mlag.h" -#include "zebra_nhg.h" +#include "zebra_nhg_private.h" #include "debug.h" DEFINE_MTYPE_STATIC(ZEBRA, RIB_TABLE_INFO, "RIB table info") @@ -154,6 +154,11 @@ void zebra_router_sweep_route(void) } } +void zebra_router_sweep_nhgs(void) +{ + zebra_nhg_sweep_table(zrouter.nhgs_id); +} + static void zebra_router_free_table(struct zebra_router_table *zrt) { void *table_info; @@ -218,6 +223,11 @@ void zebra_router_terminate(void) zebra_vxlan_disable(); zebra_mlag_terminate(); + hash_clean(zrouter.nhgs, zebra_nhg_free); + hash_free(zrouter.nhgs); + hash_clean(zrouter.nhgs_id, NULL); + hash_free(zrouter.nhgs_id); + hash_clean(zrouter.rules_hash, zebra_pbr_rules_free); hash_free(zrouter.rules_hash); @@ -253,4 +263,11 @@ void zebra_router_init(void) zrouter.iptable_hash = hash_create_size(8, zebra_pbr_iptable_hash_key, zebra_pbr_iptable_hash_equal, "IPtable Hash Entry"); + + zrouter.nhgs = + hash_create_size(8, zebra_nhg_hash_key, zebra_nhg_hash_equal, + "Zebra Router Nexthop Groups"); + zrouter.nhgs_id = + hash_create_size(8, zebra_nhg_id_key, zebra_nhg_hash_id_equal, + "Zebra Router Nexthop Groups ID index"); } diff --git a/zebra/zebra_router.h b/zebra/zebra_router.h index 25a7adac1..ac4c96147 100644 --- a/zebra/zebra_router.h +++ b/zebra/zebra_router.h @@ -132,6 +132,12 @@ struct zebra_router { * Time for when we sweep the rib from old routes */ time_t startup_time; + + /* + * The hash of nexthop groups associated with this router + */ + struct hash *nhgs; + struct hash *nhgs_id; }; #define GRACEFUL_RESTART_TIME 60 @@ -139,6 +145,7 @@ struct zebra_router { extern struct zebra_router zrouter; extern void zebra_router_init(void); +extern void zebra_router_cleanup(void); extern void zebra_router_terminate(void); extern struct route_table *zebra_router_find_table(struct zebra_vrf *zvrf, @@ -153,6 +160,7 @@ extern void zebra_router_release_table(struct zebra_vrf *zvrf, uint32_t tableid, extern int zebra_router_config_write(struct vty *vty); extern void zebra_router_sweep_route(void); +extern void zebra_router_sweep_nhgs(void); extern void zebra_router_show_table_summary(struct vty *vty); diff --git a/zebra/zebra_snmp.c b/zebra/zebra_snmp.c index 74eab765c..56c766432 100644 --- a/zebra/zebra_snmp.c +++ b/zebra/zebra_snmp.c @@ -285,8 +285,8 @@ static void check_replace(struct route_node *np2, struct route_entry *re2, return; } - if (in_addr_cmp((uint8_t *)&(*re)->ng.nexthop->gate.ipv4, - (uint8_t *)&re2->ng.nexthop->gate.ipv4) + if (in_addr_cmp((uint8_t *)&(*re)->ng->nexthop->gate.ipv4, + (uint8_t *)&re2->ng->nexthop->gate.ipv4) <= 0) return; @@ -372,7 +372,7 @@ static void get_fwtable_route_node(struct variable *v, oid objid[], (uint8_t *)&dest)) { RNODE_FOREACH_RE (*np, *re) { if (!in_addr_cmp((uint8_t *)&(*re) - ->ng.nexthop + ->ng->nexthop ->gate.ipv4, (uint8_t *)&nexthop)) if (proto @@ -406,7 +406,7 @@ static void get_fwtable_route_node(struct variable *v, oid objid[], || ((policy == policy2) && (proto < proto2)) || ((policy == policy2) && (proto == proto2) && (in_addr_cmp( - (uint8_t *)&re2->ng.nexthop + (uint8_t *)&re2->ng->nexthop ->gate.ipv4, (uint8_t *)&nexthop) >= 0))) @@ -432,7 +432,7 @@ static void get_fwtable_route_node(struct variable *v, oid objid[], { struct nexthop *nexthop; - nexthop = (*re)->ng.nexthop; + nexthop = (*re)->ng->nexthop; if (nexthop) { pnt = (uint8_t *)&nexthop->gate.ipv4; for (i = 0; i < 4; i++) @@ -462,7 +462,7 @@ static uint8_t *ipFwTable(struct variable *v, oid objid[], size_t *objid_len, if (!np) return NULL; - nexthop = re->ng.nexthop; + nexthop = re->ng->nexthop; if (!nexthop) return NULL; diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index 92f8dd1ec..9d1745473 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -52,6 +52,8 @@ #include "zebra/ipforward.h" #include "zebra/zebra_vxlan_private.h" #include "zebra/zebra_pbr.h" +#include "zebra/zebra_nhg.h" +#include "zebra/interface.h" extern int allow_delete; @@ -62,7 +64,7 @@ static int do_show_ip_route(struct vty *vty, const char *vrf_name, afi_t afi, bool supernets_only, int type, unsigned short ospf_instance_id); static void vty_show_ip_route_detail(struct vty *vty, struct route_node *rn, - int mcast, bool use_fib); + int mcast, bool use_fib, bool show_ng); static void vty_show_ip_route_summary(struct vty *vty, struct route_table *table); static void vty_show_ip_route_summary_prefix(struct vty *vty, @@ -154,7 +156,7 @@ DEFUN (show_ip_rpf_addr, re = rib_match_ipv4_multicast(VRF_DEFAULT, addr, &rn); if (re) - vty_show_ip_route_detail(vty, rn, 1, false); + vty_show_ip_route_detail(vty, rn, 1, false, false); else vty_out(vty, "%% No match for RPF lookup\n"); @@ -186,7 +188,7 @@ static char re_status_output_char(struct route_entry *re, struct nexthop *nhop) /* New RIB. Detailed information for IPv4 route. */ static void vty_show_ip_route_detail(struct vty *vty, struct route_node *rn, - int mcast, bool use_fib) + int mcast, bool use_fib, bool show_ng) { struct route_entry *re; struct nexthop *nexthop; @@ -258,7 +260,10 @@ static void vty_show_ip_route_detail(struct vty *vty, struct route_node *rn, tm->tm_hour); vty_out(vty, " ago\n"); - for (ALL_NEXTHOPS(re->ng, nexthop)) { + if (show_ng) + vty_out(vty, " Nexthop Group ID: %u\n", re->nhe_id); + + for (ALL_NEXTHOPS_PTR(re->ng, nexthop)) { char addrstr[32]; vty_out(vty, " %c%s", @@ -408,7 +413,7 @@ static void vty_show_ip_route(struct vty *vty, struct route_node *rn, if (is_fib) nhg = rib_active_nhg(re); else - nhg = &(re->ng); + nhg = re->ng; if (json) { json_route = json_object_new_object(); @@ -461,9 +466,9 @@ static void vty_show_ip_route(struct vty *vty, struct route_node *rn, json_object_int_add(json_route, "internalFlags", re->flags); json_object_int_add(json_route, "internalNextHopNum", - re->nexthop_num); + nexthop_group_nexthop_num(re->ng)); json_object_int_add(json_route, "internalNextHopActiveNum", - re->nexthop_active_num); + nexthop_group_active_nexthop_num(re->ng)); if (uptime < ONE_DAY_SECOND) sprintf(buf, "%02d:%02d:%02d", tm->tm_hour, tm->tm_min, tm->tm_sec); @@ -1101,6 +1106,295 @@ DEFUN (ip_nht_default_route, return CMD_SUCCESS; } +static void show_nexthop_group_out(struct vty *vty, struct nhg_hash_entry *nhe) +{ + struct nexthop *nexthop = NULL; + struct nhg_connected *rb_node_dep = NULL; + char buf[SRCDEST2STR_BUFFER]; + + struct vrf *nhe_vrf = vrf_lookup_by_id(nhe->vrf_id); + + vty_out(vty, "ID: %u\n", nhe->id); + vty_out(vty, " RefCnt: %d\n", nhe->refcnt); + + if (nhe_vrf) + vty_out(vty, " VRF: %s\n", nhe_vrf->name); + else + vty_out(vty, " VRF: UNKNOWN\n"); + + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_UNHASHABLE)) + vty_out(vty, " Duplicate - from kernel not hashable\n"); + + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_VALID)) { + vty_out(vty, " Valid"); + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)) + vty_out(vty, ", Installed"); + vty_out(vty, "\n"); + } + if (nhe->ifp) + vty_out(vty, " Interface Index: %d\n", nhe->ifp->ifindex); + + if (!zebra_nhg_depends_is_empty(nhe)) { + vty_out(vty, " Depends:"); + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + vty_out(vty, " (%u)", rb_node_dep->nhe->id); + } + vty_out(vty, "\n"); + } + + for (ALL_NEXTHOPS_PTR(nhe->nhg, nexthop)) { + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + vty_out(vty, " "); + else + /* Make recursive nexthops a bit more clear */ + vty_out(vty, " "); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + vty_out(vty, " %s", inet_ntoa(nexthop->gate.ipv4)); + if (nexthop->ifindex) + vty_out(vty, ", %s", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + vty_out(vty, " %s", + inet_ntop(AF_INET6, &nexthop->gate.ipv6, buf, + sizeof(buf))); + if (nexthop->ifindex) + vty_out(vty, ", %s", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + + case NEXTHOP_TYPE_IFINDEX: + vty_out(vty, " directly connected %s", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_BLACKHOLE: + vty_out(vty, " unreachable"); + switch (nexthop->bh_type) { + case BLACKHOLE_REJECT: + vty_out(vty, " (ICMP unreachable)"); + break; + case BLACKHOLE_ADMINPROHIB: + vty_out(vty, " (ICMP admin-prohibited)"); + break; + case BLACKHOLE_NULL: + vty_out(vty, " (blackhole)"); + break; + case BLACKHOLE_UNSPEC: + break; + } + break; + default: + break; + } + + struct vrf *vrf = vrf_lookup_by_id(nexthop->vrf_id); + + if (vrf) + vty_out(vty, " (vrf %s)", vrf->name); + else + vty_out(vty, " (vrf UNKNOWN)"); + + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + vty_out(vty, " inactive"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) + vty_out(vty, " onlink"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + vty_out(vty, " (recursive)"); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + if (nexthop->src.ipv4.s_addr) { + if (inet_ntop(AF_INET, &nexthop->src.ipv4, buf, + sizeof(buf))) + vty_out(vty, ", src %s", buf); + } + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + if (!IPV6_ADDR_SAME(&nexthop->src.ipv6, &in6addr_any)) { + if (inet_ntop(AF_INET6, &nexthop->src.ipv6, buf, + sizeof(buf))) + vty_out(vty, ", src %s", buf); + } + break; + default: + break; + } + + /* Label information */ + if (nexthop->nh_label && nexthop->nh_label->num_labels) { + vty_out(vty, ", label %s", + mpls_label2str(nexthop->nh_label->num_labels, + nexthop->nh_label->label, buf, + sizeof(buf), 1)); + } + + vty_out(vty, "\n"); + } + + if (!zebra_nhg_dependents_is_empty(nhe)) { + vty_out(vty, " Dependents:"); + frr_each(nhg_connected_tree, &nhe->nhg_dependents, + rb_node_dep) { + vty_out(vty, " (%u)", rb_node_dep->nhe->id); + } + vty_out(vty, "\n"); + } + +} + +static int show_nexthop_group_id_cmd_helper(struct vty *vty, uint32_t id) +{ + struct nhg_hash_entry *nhe = NULL; + + nhe = zebra_nhg_lookup_id(id); + + if (nhe) + show_nexthop_group_out(vty, nhe); + else { + vty_out(vty, "Nexthop Group ID: %u does not exist\n", id); + return CMD_WARNING; + } + return CMD_SUCCESS; +} + +static void show_nexthop_group_cmd_helper(struct vty *vty, + struct zebra_vrf *zvrf, afi_t afi) +{ + struct list *list = hash_to_list(zrouter.nhgs); + struct nhg_hash_entry *nhe = NULL; + struct listnode *node = NULL; + + for (ALL_LIST_ELEMENTS_RO(list, node, nhe)) { + + if (afi && nhe->afi != afi) + continue; + + if (nhe->vrf_id != zvrf->vrf->vrf_id) + continue; + + show_nexthop_group_out(vty, nhe); + } + + list_delete(&list); +} + +static void if_nexthop_group_dump_vty(struct vty *vty, struct interface *ifp) +{ + struct zebra_if *zebra_if = NULL; + struct nhg_connected *rb_node_dep = NULL; + + zebra_if = ifp->info; + + if (!if_nhg_dependents_is_empty(ifp)) { + vty_out(vty, "Interface %s:\n", ifp->name); + + frr_each(nhg_connected_tree, &zebra_if->nhg_dependents, + rb_node_dep) { + vty_out(vty, " "); + show_nexthop_group_out(vty, rb_node_dep->nhe); + } + } +} + +DEFPY (show_interface_nexthop_group, + show_interface_nexthop_group_cmd, + "show interface [IFNAME$if_name] nexthop-group", + SHOW_STR + "Interface status and configuration\n" + "Interface name\n" + "Show Nexthop Groups\n") +{ + struct vrf *vrf = NULL; + struct interface *ifp = NULL; + bool found = false; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + if (if_name) { + ifp = if_lookup_by_name(if_name, vrf->vrf_id); + if (ifp) { + if_nexthop_group_dump_vty(vty, ifp); + found = true; + } + } else { + FOR_ALL_INTERFACES (vrf, ifp) + if_nexthop_group_dump_vty(vty, ifp); + found = true; + } + } + + if (!found) { + vty_out(vty, "%% Can't find interface %s\n", if_name); + return CMD_WARNING; + } + + return CMD_SUCCESS; +} + +DEFPY (show_nexthop_group, + show_nexthop_group_cmd, + "show nexthop-group <(0-4294967295)$id|[<ip$v4|ipv6$v6>] [vrf <NAME$vrf_name|all$vrf_all>]>", + SHOW_STR + "Show Nexthop Groups\n" + "Nexthop Group ID\n" + IP_STR + IP6_STR + VRF_FULL_CMD_HELP_STR) +{ + + struct zebra_vrf *zvrf = NULL; + afi_t afi = 0; + + if (id) + return show_nexthop_group_id_cmd_helper(vty, id); + + if (v4) + afi = AFI_IP; + else if (v6) + afi = AFI_IP6; + + if (vrf_all) { + struct vrf *vrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + struct zebra_vrf *zvrf; + + zvrf = vrf->info; + if (!zvrf) + continue; + + vty_out(vty, "VRF: %s\n", vrf->name); + show_nexthop_group_cmd_helper(vty, zvrf, afi); + } + + return CMD_SUCCESS; + } + + if (vrf_name) + zvrf = zebra_vrf_lookup_by_name(vrf_name); + else + zvrf = zebra_vrf_lookup_by_name(VRF_DEFAULT_NAME); + + if (!zvrf) { + vty_out(vty, "VRF %s specified does not exist", vrf_name); + return CMD_WARNING; + } + + show_nexthop_group_cmd_helper(vty, zvrf, afi); + + return CMD_SUCCESS; +} + DEFUN (no_ip_nht_default_route, no_ip_nht_default_route_cmd, "no ip nht resolve-via-default", @@ -1265,7 +1559,7 @@ DEFPY (show_route_detail, |X:X::X:X/M$prefix\ >\ >\ - [json$json]", + [json$json] [nexthop-group$ng]", SHOW_STR IP_STR "IPv6 forwarding table\n" @@ -1279,7 +1573,8 @@ DEFPY (show_route_detail, VRF_FULL_CMD_HELP_STR "IPv6 Address\n" "IPv6 prefix\n" - JSON_STR) + JSON_STR + "Nexthop Group Information\n") { afi_t afi = ipv4 ? AFI_IP : AFI_IP6; struct route_table *table; @@ -1288,6 +1583,7 @@ DEFPY (show_route_detail, bool use_fib = !!fib; rib_dest_t *dest; bool network_found = false; + bool show_ng = !!ng; if (address_str) prefix_str = address_str; @@ -1321,10 +1617,10 @@ DEFPY (show_route_detail, network_found = true; if (json) - vty_show_ip_route_detail_json(vty, rn, - use_fib); + vty_show_ip_route_detail_json(vty, rn, use_fib); else - vty_show_ip_route_detail(vty, rn, 0, use_fib); + vty_show_ip_route_detail(vty, rn, 0, use_fib, + show_ng); route_unlock_node(rn); } @@ -1376,7 +1672,7 @@ DEFPY (show_route_detail, if (json) vty_show_ip_route_detail_json(vty, rn, use_fib); else - vty_show_ip_route_detail(vty, rn, 0, use_fib); + vty_show_ip_route_detail(vty, rn, 0, use_fib, show_ng); route_unlock_node(rn); } @@ -1539,7 +1835,7 @@ static void vty_show_ip_route_summary_prefix(struct vty *vty, fib_cnt[ZEBRA_ROUTE_TOTAL]++; fib_cnt[re->type]++; } - for (nexthop = re->ng.nexthop; (!cnt && nexthop); + for (nexthop = re->ng->nexthop; (!cnt && nexthop); nexthop = nexthop->next) { cnt++; rib_cnt[ZEBRA_ROUTE_TOTAL]++; @@ -3033,6 +3329,9 @@ void zebra_vty_init(void) install_element(CONFIG_NODE, &zebra_packet_process_cmd); install_element(CONFIG_NODE, &no_zebra_packet_process_cmd); + install_element(VIEW_NODE, &show_nexthop_group_cmd); + install_element(VIEW_NODE, &show_interface_nexthop_group_cmd); + install_element(VIEW_NODE, &show_vrf_cmd); install_element(VIEW_NODE, &show_vrf_vni_cmd); install_element(VIEW_NODE, &show_route_cmd); |