summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bgpd/bgp_attr.c3
-rw-r--r--bgpd/bgp_evpn_vty.c168
-rw-r--r--bgpd/bgp_lcommunity.c6
-rw-r--r--bgpd/bgp_route.c64
-rw-r--r--bgpd/bgp_routemap.c49
-rw-r--r--bgpd/bgp_vpn.c4
-rw-r--r--bgpd/bgpd.c58
-rw-r--r--bgpd/bgpd.h4
-rwxr-xr-xconfigure.ac14
-rw-r--r--doc/developer/library.rst1
-rw-r--r--doc/developer/rcu.rst269
-rw-r--r--doc/developer/subdir.am1
-rw-r--r--lib/frr_pthread.c15
-rw-r--r--lib/frr_pthread.h3
-rw-r--r--lib/frrcu.c527
-rw-r--r--lib/frrcu.h172
-rw-r--r--lib/libfrr.c2
-rw-r--r--lib/log.c4
-rw-r--r--lib/seqlock.c190
-rw-r--r--lib/seqlock.h40
-rw-r--r--lib/sigevent.c42
-rw-r--r--lib/stream.c14
-rw-r--r--lib/stream.h3
-rw-r--r--lib/subdir.am2
-rw-r--r--lib/thread.c6
-rw-r--r--lib/zebra.h4
-rw-r--r--ospfd/ospf_packet.c30
-rw-r--r--pimd/pim_neighbor.c5
-rw-r--r--pimd/pim_nht.c10
-rw-r--r--ripd/ripd.c30
-rw-r--r--tests/lib/test_atomlist.c10
-rw-r--r--tests/lib/test_seqlock.c34
-rw-r--r--zebra/zebra_dplane.c2
-rw-r--r--zebra/zebra_fpm_netlink.c2
-rw-r--r--zebra/zebra_vxlan.c35
35 files changed, 1567 insertions, 256 deletions
diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c
index c64d153f1..e21c84355 100644
--- a/bgpd/bgp_attr.c
+++ b/bgpd/bgp_attr.c
@@ -2956,7 +2956,8 @@ void bgp_packet_mpattr_prefix(struct stream *s, afi_t afi, safi_t safi,
addpath_encode, addpath_tx_id);
} else if (safi == SAFI_LABELED_UNICAST) {
/* Prefix write with label. */
- stream_put_labeled_prefix(s, p, label);
+ stream_put_labeled_prefix(s, p, label, addpath_encode,
+ addpath_tx_id);
} else if (safi == SAFI_FLOWSPEC) {
if (PSIZE (p->prefixlen)+2 < FLOWSPEC_NLRI_SIZELIMIT)
stream_putc(s, PSIZE (p->prefixlen)+2);
diff --git a/bgpd/bgp_evpn_vty.c b/bgpd/bgp_evpn_vty.c
index a22082c07..d031d34f1 100644
--- a/bgpd/bgp_evpn_vty.c
+++ b/bgpd/bgp_evpn_vty.c
@@ -1069,11 +1069,9 @@ static int bgp_show_ethernet_vpn(struct vty *vty, struct prefix_rd *prd,
pi = pi->next) {
total_count++;
if (type == bgp_show_type_neighbor) {
- union sockunion *su = output_arg;
+ struct peer *peer = output_arg;
- if (pi->peer->su_remote == NULL
- || !sockunion_same(
- pi->peer->su_remote, su))
+ if (peer_cmp(peer, pi->peer) != 0)
continue;
}
if (header) {
@@ -1292,29 +1290,41 @@ DEFUN(show_ip_bgp_l2vpn_evpn_rd_tags,
0);
}
-DEFUN(show_ip_bgp_l2vpn_evpn_all_neighbor_routes,
- show_ip_bgp_l2vpn_evpn_all_neighbor_routes_cmd,
- "show [ip] bgp l2vpn evpn all neighbors A.B.C.D routes [json]",
+DEFUN(show_ip_bgp_l2vpn_evpn_neighbor_routes,
+ show_ip_bgp_l2vpn_evpn_neighbor_routes_cmd,
+ "show [ip] bgp l2vpn evpn neighbors <A.B.C.D|X:X::X:X|WORD> routes [json]",
SHOW_STR
IP_STR
BGP_STR
L2VPN_HELP_STR
EVPN_HELP_STR
- "Display information about all EVPN NLRIs\n"
"Detailed information on TCP and BGP neighbor connections\n"
- "Neighbor to display information about\n"
+ "IPv4 Neighbor to display information about\n"
+ "IPv6 Neighbor to display information about\n"
+ "Neighbor on BGP configured interface\n"
"Display routes learned from neighbor\n" JSON_STR)
{
- int idx_ipv4 = 0;
- union sockunion su;
+ int idx = 0;
struct peer *peer;
- int ret;
+ char *peerstr = NULL;
bool uj = use_json(argc, argv);
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct bgp *bgp = NULL;
- argv_find(argv, argc, "A.B.C.D", &idx_ipv4);
+ bgp_vty_find_and_parse_afi_safi_bgp(vty, argv, argc, &idx, &afi, &safi,
+ &bgp, uj);
+ if (!idx) {
+ vty_out(vty, "No index\n");
+ return CMD_WARNING;
+ }
+
+ /* neighbors <A.B.C.D|X:X::X:X|WORD> */
+ argv_find(argv, argc, "neighbors", &idx);
+ peerstr = argv[++idx]->arg;
- ret = str2sockunion(argv[idx_ipv4]->arg, &su);
- if (ret < 0) {
+ peer = peer_lookup_in_view(vty, bgp, peerstr, uj);
+ if (!peer) {
if (uj) {
json_object *json_no = NULL;
json_no = json_object_new_object();
@@ -1325,11 +1335,9 @@ DEFUN(show_ip_bgp_l2vpn_evpn_all_neighbor_routes,
json_object_free(json_no);
} else
vty_out(vty, "Malformed address: %s\n",
- argv[idx_ipv4]->arg);
+ argv[idx]->arg);
return CMD_WARNING;
}
-
- peer = peer_lookup(NULL, &su);
if (!peer || !peer->afc[AFI_L2VPN][SAFI_EVPN]) {
if (uj) {
json_object *json_no = NULL;
@@ -1345,13 +1353,13 @@ DEFUN(show_ip_bgp_l2vpn_evpn_all_neighbor_routes,
return CMD_WARNING;
}
- return bgp_show_ethernet_vpn(vty, NULL, bgp_show_type_neighbor, &su, 0,
+ return bgp_show_ethernet_vpn(vty, NULL, bgp_show_type_neighbor, peer, 0,
uj);
}
DEFUN(show_ip_bgp_l2vpn_evpn_rd_neighbor_routes,
show_ip_bgp_l2vpn_evpn_rd_neighbor_routes_cmd,
- "show [ip] bgp l2vpn evpn rd ASN:NN_OR_IP-ADDRESS:NN neighbors A.B.C.D routes [json]",
+ "show [ip] bgp l2vpn evpn rd ASN:NN_OR_IP-ADDRESS:NN neighbors <A.B.C.D|X:X::X:X|WORD> routes [json]",
SHOW_STR
IP_STR
BGP_STR
@@ -1360,20 +1368,30 @@ DEFUN(show_ip_bgp_l2vpn_evpn_rd_neighbor_routes,
"Display information for a route distinguisher\n"
"VPN Route Distinguisher\n"
"Detailed information on TCP and BGP neighbor connections\n"
- "Neighbor to display information about\n"
+ "IPv4 Neighbor to display information about\n"
+ "IPv6 Neighbor to display information about\n"
+ "Neighbor on BGP configured interface\n"
"Display routes learned from neighbor\n" JSON_STR)
{
int idx_ext_community = 0;
- int idx_ipv4 = 0;
+ int idx = 0;
int ret;
- union sockunion su;
struct peer *peer;
+ char *peerstr = NULL;
struct prefix_rd prd;
bool uj = use_json(argc, argv);
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct bgp *bgp = NULL;
- argv_find(argv, argc, "ASN:NN_OR_IP-ADDRESS:NN", &idx_ext_community);
- argv_find(argv, argc, "A.B.C.D", &idx_ipv4);
+ bgp_vty_find_and_parse_afi_safi_bgp(vty, argv, argc, &idx, &afi, &safi,
+ &bgp, uj);
+ if (!idx) {
+ vty_out(vty, "No index\n");
+ return CMD_WARNING;
+ }
+ argv_find(argv, argc, "ASN:NN_OR_IP-ADDRESS:NN", &idx_ext_community);
ret = str2prefix_rd(argv[idx_ext_community]->arg, &prd);
if (!ret) {
if (uj) {
@@ -1389,8 +1407,12 @@ DEFUN(show_ip_bgp_l2vpn_evpn_rd_neighbor_routes,
return CMD_WARNING;
}
- ret = str2sockunion(argv[idx_ipv4]->arg, &su);
- if (ret < 0) {
+ /* neighbors <A.B.C.D|X:X::X:X|WORD> */
+ argv_find(argv, argc, "neighbors", &idx);
+ peerstr = argv[++idx]->arg;
+
+ peer = peer_lookup_in_view(vty, bgp, peerstr, uj);
+ if (!peer) {
if (uj) {
json_object *json_no = NULL;
json_no = json_object_new_object();
@@ -1401,11 +1423,9 @@ DEFUN(show_ip_bgp_l2vpn_evpn_rd_neighbor_routes,
json_object_free(json_no);
} else
vty_out(vty, "Malformed address: %s\n",
- argv[idx_ext_community]->arg);
+ argv[idx]->arg);
return CMD_WARNING;
}
-
- peer = peer_lookup(NULL, &su);
if (!peer || !peer->afc[AFI_L2VPN][SAFI_EVPN]) {
if (uj) {
json_object *json_no = NULL;
@@ -1421,33 +1441,48 @@ DEFUN(show_ip_bgp_l2vpn_evpn_rd_neighbor_routes,
return CMD_WARNING;
}
- return bgp_show_ethernet_vpn(vty, &prd, bgp_show_type_neighbor, &su, 0,
+ return bgp_show_ethernet_vpn(vty, &prd, bgp_show_type_neighbor, peer, 0,
uj);
}
-DEFUN(show_ip_bgp_l2vpn_evpn_all_neighbor_advertised_routes,
- show_ip_bgp_l2vpn_evpn_all_neighbor_advertised_routes_cmd,
- "show [ip] bgp l2vpn evpn all neighbors A.B.C.D advertised-routes [json]",
+DEFUN(show_ip_bgp_l2vpn_evpn_neighbor_advertised_routes,
+ show_ip_bgp_l2vpn_evpn_neighbor_advertised_routes_cmd,
+ "show [ip] bgp l2vpn evpn neighbors <A.B.C.D|X:X::X:X|WORD> advertised-routes [json]",
SHOW_STR
IP_STR
BGP_STR
L2VPN_HELP_STR
EVPN_HELP_STR
- "Display information about all EVPN NLRIs\n"
"Detailed information on TCP and BGP neighbor connections\n"
- "Neighbor to display information about\n"
+ "IPv4 Neighbor to display information about\n"
+ "IPv6 Neighbor to display information about\n"
+ "Neighbor on BGP configured interface\n"
"Display the routes advertised to a BGP neighbor\n" JSON_STR)
{
- int idx_ipv4 = 0;
- int ret;
+ int idx = 0;
struct peer *peer;
- union sockunion su;
bool uj = use_json(argc, argv);
+ struct bgp *bgp = NULL;
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ char *peerstr = NULL;
+
+ if (uj)
+ argc--;
- argv_find(argv, argc, "A.B.C.D", &idx_ipv4);
+ bgp_vty_find_and_parse_afi_safi_bgp(vty, argv, argc, &idx, &afi, &safi,
+ &bgp, uj);
+ if (!idx) {
+ vty_out(vty, "No index\n");
+ return CMD_WARNING;
+ }
+
+ /* neighbors <A.B.C.D|X:X::X:X|WORD> */
+ argv_find(argv, argc, "neighbors", &idx);
+ peerstr = argv[++idx]->arg;
- ret = str2sockunion(argv[idx_ipv4]->arg, &su);
- if (ret < 0) {
+ peer = peer_lookup_in_view(vty, bgp, peerstr, uj);
+ if (!peer) {
if (uj) {
json_object *json_no = NULL;
json_no = json_object_new_object();
@@ -1458,10 +1493,9 @@ DEFUN(show_ip_bgp_l2vpn_evpn_all_neighbor_advertised_routes,
json_object_free(json_no);
} else
vty_out(vty, "Malformed address: %s\n",
- argv[idx_ipv4]->arg);
+ argv[idx]->arg);
return CMD_WARNING;
}
- peer = peer_lookup(NULL, &su);
if (!peer || !peer->afc[AFI_L2VPN][SAFI_EVPN]) {
if (uj) {
json_object *json_no = NULL;
@@ -1482,7 +1516,7 @@ DEFUN(show_ip_bgp_l2vpn_evpn_all_neighbor_advertised_routes,
DEFUN(show_ip_bgp_l2vpn_evpn_rd_neighbor_advertised_routes,
show_ip_bgp_l2vpn_evpn_rd_neighbor_advertised_routes_cmd,
- "show [ip] bgp l2vpn evpn rd ASN:NN_OR_IP-ADDRESS:NN neighbors A.B.C.D advertised-routes [json]",
+ "show [ip] bgp l2vpn evpn rd ASN:NN_OR_IP-ADDRESS:NN neighbors <A.B.C.D|X:X::X:X|WORD> advertised-routes [json]",
SHOW_STR
IP_STR
BGP_STR
@@ -1491,22 +1525,43 @@ DEFUN(show_ip_bgp_l2vpn_evpn_rd_neighbor_advertised_routes,
"Display information for a route distinguisher\n"
"VPN Route Distinguisher\n"
"Detailed information on TCP and BGP neighbor connections\n"
- "Neighbor to display information about\n"
+ "IPv4 Neighbor to display information about\n"
+ "IPv6 Neighbor to display information about\n"
+ "Neighbor on BGP configured interface\n"
"Display the routes advertised to a BGP neighbor\n" JSON_STR)
{
int idx_ext_community = 0;
- int idx_ipv4 = 0;
+ int idx = 0;
int ret;
struct peer *peer;
struct prefix_rd prd;
- union sockunion su;
+ struct bgp *bgp = NULL;
bool uj = use_json(argc, argv);
+ char *peerstr = NULL;
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+
+ if (uj)
+ argc--;
+
+ if (uj)
+ argc--;
+
+ bgp_vty_find_and_parse_afi_safi_bgp(vty, argv, argc, &idx, &afi, &safi,
+ &bgp, uj);
+ if (!idx) {
+ vty_out(vty, "No index\n");
+ return CMD_WARNING;
+ }
argv_find(argv, argc, "ASN:NN_OR_IP-ADDRESS:NN", &idx_ext_community);
- argv_find(argv, argc, "A.B.C.D", &idx_ipv4);
- ret = str2sockunion(argv[idx_ipv4]->arg, &su);
- if (ret < 0) {
+ /* neighbors <A.B.C.D|X:X::X:X|WORD> */
+ argv_find(argv, argc, "neighbors", &idx);
+ peerstr = argv[++idx]->arg;
+
+ peer = peer_lookup_in_view(vty, bgp, peerstr, uj);
+ if (!peer) {
if (uj) {
json_object *json_no = NULL;
json_no = json_object_new_object();
@@ -1517,10 +1572,9 @@ DEFUN(show_ip_bgp_l2vpn_evpn_rd_neighbor_advertised_routes,
json_object_free(json_no);
} else
vty_out(vty, "Malformed address: %s\n",
- argv[idx_ext_community]->arg);
+ argv[idx]->arg);
return CMD_WARNING;
}
- peer = peer_lookup(NULL, &su);
if (!peer || !peer->afc[AFI_L2VPN][SAFI_EVPN]) {
if (uj) {
json_object *json_no = NULL;
@@ -1599,7 +1653,7 @@ DEFUN(show_ip_bgp_evpn_rd_overlay,
use_json(argc, argv));
}
-/* For testing purpose, static route of MPLS-VPN. */
+/* For testing purpose, static route of EVPN RT-5. */
DEFUN(evpnrt5_network,
evpnrt5_network_cmd,
"network <A.B.C.D/M|X:X::X:X/M> rd ASN:NN_OR_IP-ADDRESS:NN ethtag WORD label WORD esi WORD gwip <A.B.C.D|X:X::X:X> routermac WORD [route-map WORD]",
@@ -1638,7 +1692,7 @@ DEFUN(evpnrt5_network,
argv[idx_routermac]->arg);
}
-/* For testing purpose, static route of MPLS-VPN. */
+/* For testing purpose, static route of EVPN RT-5. */
DEFUN(no_evpnrt5_network,
no_evpnrt5_network_cmd,
"no network <A.B.C.D/M|X:X::X:X/M> rd ASN:NN_OR_IP-ADDRESS:NN ethtag WORD label WORD esi WORD gwip <A.B.C.D|X:X::X:X>",
@@ -5323,12 +5377,12 @@ void bgp_ethernetvpn_init(void)
install_element(VIEW_NODE, &show_ip_bgp_l2vpn_evpn_all_tags_cmd);
install_element(VIEW_NODE, &show_ip_bgp_l2vpn_evpn_rd_tags_cmd);
install_element(VIEW_NODE,
- &show_ip_bgp_l2vpn_evpn_all_neighbor_routes_cmd);
+ &show_ip_bgp_l2vpn_evpn_neighbor_routes_cmd);
install_element(VIEW_NODE,
&show_ip_bgp_l2vpn_evpn_rd_neighbor_routes_cmd);
install_element(
VIEW_NODE,
- &show_ip_bgp_l2vpn_evpn_all_neighbor_advertised_routes_cmd);
+ &show_ip_bgp_l2vpn_evpn_neighbor_advertised_routes_cmd);
install_element(
VIEW_NODE,
&show_ip_bgp_l2vpn_evpn_rd_neighbor_advertised_routes_cmd);
diff --git a/bgpd/bgp_lcommunity.c b/bgpd/bgp_lcommunity.c
index 2b09a2954..aeb290719 100644
--- a/bgpd/bgp_lcommunity.c
+++ b/bgpd/bgp_lcommunity.c
@@ -439,7 +439,8 @@ struct lcommunity *lcommunity_str2com(const char *str)
enum lcommunity_token token = lcommunity_token_unknown;
struct lcommunity_val lval;
- while ((str = lcommunity_gettoken(str, &lval, &token))) {
+ do {
+ str = lcommunity_gettoken(str, &lval, &token);
switch (token) {
case lcommunity_token_val:
if (lcom == NULL)
@@ -452,7 +453,8 @@ struct lcommunity *lcommunity_str2com(const char *str)
lcommunity_free(&lcom);
return NULL;
}
- }
+ } while (str);
+
return lcom;
}
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index a5591e29f..a37256837 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -5332,6 +5332,13 @@ static void bgp_purge_af_static_redist_routes(struct bgp *bgp, afi_t afi,
struct bgp_node *rn;
struct bgp_path_info *pi;
+ /* Do not install the aggregate route if BGP is in the
+ * process of termination.
+ */
+ if (bgp_flag_check(bgp, BGP_FLAG_DELETE_IN_PROGRESS) ||
+ (bgp->peer_self == NULL))
+ return;
+
table = bgp->rib[afi][safi];
for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) {
for (pi = bgp_node_get_bgp_path_info(rn); pi; pi = pi->next) {
@@ -10493,63 +10500,6 @@ static int bgp_show_prefix_longer(struct vty *vty, struct bgp *bgp,
return ret;
}
-static struct peer *peer_lookup_in_view(struct vty *vty, struct bgp *bgp,
- const char *ip_str, bool use_json)
-{
- int ret;
- struct peer *peer;
- union sockunion su;
-
- /* Get peer sockunion. */
- ret = str2sockunion(ip_str, &su);
- if (ret < 0) {
- peer = peer_lookup_by_conf_if(bgp, ip_str);
- if (!peer) {
- peer = peer_lookup_by_hostname(bgp, ip_str);
-
- if (!peer) {
- if (use_json) {
- json_object *json_no = NULL;
- json_no = json_object_new_object();
- json_object_string_add(
- json_no,
- "malformedAddressOrName",
- ip_str);
- vty_out(vty, "%s\n",
- json_object_to_json_string_ext(
- json_no,
- JSON_C_TO_STRING_PRETTY));
- json_object_free(json_no);
- } else
- vty_out(vty,
- "%% Malformed address or name: %s\n",
- ip_str);
- return NULL;
- }
- }
- return peer;
- }
-
- /* Peer structure lookup. */
- peer = peer_lookup(bgp, &su);
- if (!peer) {
- if (use_json) {
- json_object *json_no = NULL;
- json_no = json_object_new_object();
- json_object_string_add(json_no, "warning",
- "No such neighbor in this view/vrf");
- vty_out(vty, "%s\n",
- json_object_to_json_string_ext(
- json_no, JSON_C_TO_STRING_PRETTY));
- json_object_free(json_no);
- } else
- vty_out(vty, "No such neighbor in this view/vrf\n");
- return NULL;
- }
-
- return peer;
-}
-
enum bgp_stats {
BGP_STATS_MAXBITLEN = 0,
BGP_STATS_RIB,
diff --git a/bgpd/bgp_routemap.c b/bgpd/bgp_routemap.c
index 5ffc416dc..7f1a9b71c 100644
--- a/bgpd/bgp_routemap.c
+++ b/bgpd/bgp_routemap.c
@@ -3406,31 +3406,34 @@ int bgp_route_map_update_timer(struct thread *thread)
static void bgp_route_map_mark_update(const char *rmap_name)
{
- if (bm->t_rmap_update == NULL) {
- struct listnode *node, *nnode;
- struct bgp *bgp;
-
- /* rmap_update_timer of 0 means don't do route updates */
- if (bm->rmap_update_timer) {
- bm->t_rmap_update = NULL;
- thread_add_timer(bm->master, bgp_route_map_update_timer,
- NULL, bm->rmap_update_timer,
- &bm->t_rmap_update);
-
- /* Signal the groups that a route-map update event has
- * started */
- for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp))
- update_group_policy_update(bgp,
- BGP_POLICY_ROUTE_MAP,
- rmap_name, 1, 1);
- } else {
- for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp))
- bgp_route_map_process_update(bgp, rmap_name, 0);
+ struct listnode *node, *nnode;
+ struct bgp *bgp;
+
+ /* If new update is received before the current timer timed out,
+ * turn it off and start a new timer.
+ */
+ if (bm->t_rmap_update != NULL)
+ THREAD_OFF(bm->t_rmap_update);
+
+ /* rmap_update_timer of 0 means don't do route updates */
+ if (bm->rmap_update_timer) {
+ thread_add_timer(bm->master, bgp_route_map_update_timer,
+ NULL, bm->rmap_update_timer,
+ &bm->t_rmap_update);
+
+ /* Signal the groups that a route-map update event has
+ * started */
+ for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp))
+ update_group_policy_update(bgp,
+ BGP_POLICY_ROUTE_MAP,
+ rmap_name, 1, 1);
+ } else {
+ for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp))
+ bgp_route_map_process_update(bgp, rmap_name, 0);
#if ENABLE_BGP_VNC
- zlog_debug("%s: calling vnc_routemap_update", __func__);
- vnc_routemap_update(bgp, __func__);
+ zlog_debug("%s: calling vnc_routemap_update", __func__);
+ vnc_routemap_update(bgp, __func__);
#endif
- }
}
}
diff --git a/bgpd/bgp_vpn.c b/bgpd/bgp_vpn.c
index 54ca980ca..09b1cb429 100644
--- a/bgpd/bgp_vpn.c
+++ b/bgpd/bgp_vpn.c
@@ -74,7 +74,7 @@ int show_adj_route_vpn(struct vty *vty, struct peer *peer,
json_object_string_add(json_ocode, "incomplete", "?");
}
- for (rn = bgp_table_top(bgp->rib[afi][SAFI_MPLS_VPN]); rn;
+ for (rn = bgp_table_top(bgp->rib[afi][safi]); rn;
rn = bgp_route_next(rn)) {
if (prd && memcmp(rn->p.u.val, prd->val, 8) != 0)
continue;
@@ -200,7 +200,7 @@ int show_adj_route_vpn(struct vty *vty, struct peer *peer,
json_array);
} else {
route_vty_out_tmp(vty, &rm->p, path->attr,
- SAFI_MPLS_VPN, use_json,
+ safi, use_json,
json_array);
}
}
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index d79a68dca..b5f267cc3 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -8047,3 +8047,61 @@ void bgp_terminate(void)
bgp_mac_finish();
}
+
+struct peer *peer_lookup_in_view(struct vty *vty, struct bgp *bgp,
+ const char *ip_str, bool use_json)
+{
+ int ret;
+ struct peer *peer;
+ union sockunion su;
+
+ /* Get peer sockunion. */
+ ret = str2sockunion(ip_str, &su);
+ if (ret < 0) {
+ peer = peer_lookup_by_conf_if(bgp, ip_str);
+ if (!peer) {
+ peer = peer_lookup_by_hostname(bgp, ip_str);
+
+ if (!peer) {
+ if (use_json) {
+ json_object *json_no = NULL;
+ json_no = json_object_new_object();
+ json_object_string_add(
+ json_no,
+ "malformedAddressOrName",
+ ip_str);
+ vty_out(vty, "%s\n",
+ json_object_to_json_string_ext(
+ json_no,
+ JSON_C_TO_STRING_PRETTY));
+ json_object_free(json_no);
+ } else
+ vty_out(vty,
+ "%% Malformed address or name: %s\n",
+ ip_str);
+ return NULL;
+ }
+ }
+ return peer;
+ }
+
+ /* Peer structure lookup. */
+ peer = peer_lookup(bgp, &su);
+ if (!peer) {
+ if (use_json) {
+ json_object *json_no = NULL;
+ json_no = json_object_new_object();
+ json_object_string_add(json_no, "warning",
+ "No such neighbor in this view/vrf");
+ vty_out(vty, "%s\n",
+ json_object_to_json_string_ext(
+ json_no, JSON_C_TO_STRING_PRETTY));
+ json_object_free(json_no);
+ } else
+ vty_out(vty, "No such neighbor in this view/vrf\n");
+ return NULL;
+ }
+
+ return peer;
+}
+
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
index 777db0ce2..9e05fd562 100644
--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
@@ -1927,4 +1927,8 @@ extern void bgp_unset_redist_vrf_bitmaps(struct bgp *, vrf_id_t);
/* For benefit of rfapi */
extern struct peer *peer_new(struct bgp *bgp);
+
+extern struct peer *peer_lookup_in_view(struct vty *vty, struct bgp *bgp,
+ const char *ip_str, bool use_json);
+
#endif /* _QUAGGA_BGPD_H */
diff --git a/configure.ac b/configure.ac
index 961336fbd..134c8692d 100755
--- a/configure.ac
+++ b/configure.ac
@@ -572,6 +572,20 @@ AC_ARG_ENABLE([thread-sanitizer],
AS_HELP_STRING([--enable-thread-sanitizer], [enable ThreadSanitizer support for detecting data races]))
AC_ARG_ENABLE([memory-sanitizer],
AS_HELP_STRING([--enable-memory-sanitizer], [enable MemorySanitizer support for detecting uninitialized memory reads]))
+AC_ARG_WITH([crypto],
+ AS_HELP_STRING([--with-crypto=<internal|openssl>], [choose between different implementations of cryptographic functions(default value is --with-crypto=internal)]))
+
+#if openssl, else use the internal
+AS_IF([test x"${with_crypto}" = x"openssl"], [
+AC_CHECK_LIB([crypto], [EVP_DigestInit], [LIBS="$LIBS -lcrypto"], [], [])
+if test $ac_cv_lib_crypto_EVP_DigestInit = no; then
+ AC_MSG_ERROR([build with openssl has been specified but openssl library was not found on your system])
+else
+ AC_DEFINE([CRYPTO_OPENSSL], [1], [Compile with openssl support])
+fi
+], [test x"${with_crypto}" = x"internal" || test x"${with_crypto}" = x"" ], [AC_DEFINE([CRYPTO_INTERNAL], [1], [Compile with internal cryptographic implementation])
+], [AC_MSG_ERROR([Unknown value for --with-crypto])]
+)
AS_IF([test "${enable_clippy_only}" != "yes"], [
AC_CHECK_HEADERS([json-c/json.h])
diff --git a/doc/developer/library.rst b/doc/developer/library.rst
index 4ba0c0ebc..7cd493ccc 100644
--- a/doc/developer/library.rst
+++ b/doc/developer/library.rst
@@ -8,6 +8,7 @@ Library Facilities (libfrr)
:maxdepth: 2
memtypes
+ rcu
lists
logging
hooks
diff --git a/doc/developer/rcu.rst b/doc/developer/rcu.rst
new file mode 100644
index 000000000..c2ddf93f5
--- /dev/null
+++ b/doc/developer/rcu.rst
@@ -0,0 +1,269 @@
+.. highlight:: c
+
+RCU
+===
+
+Introduction
+------------
+
+RCU (Read-Copy-Update) is, fundamentally, a paradigm of multithreaded
+operation (and not a set of APIs.) The core ideas are:
+
+* longer, complicated updates to structures are made only on private,
+ "invisible" copies. Other threads, when they access the structure, see an
+ older (but consistent) copy.
+
+* once done, the updated copy is swapped in in a single operation so that
+ other threads see either the old or the new data but no inconsistent state
+ between.
+
+* the old instance is only released after making sure that it is impossible
+ any other thread might still be reading it.
+
+For more information, please search for general or Linux kernel RCU
+documentation; there is no way this doc can be comprehensive in explaining the
+interactions:
+
+* https://en.wikipedia.org/wiki/Read-copy-update
+* https://www.kernel.org/doc/html/latest/kernel-hacking/locking.html#avoiding-locks-read-copy-update
+* https://lwn.net/Articles/262464/
+* http://www.rdrop.com/users/paulmck/RCU/rclock_OLS.2001.05.01c.pdf
+* http://lse.sourceforge.net/locking/rcupdate.html
+
+RCU, the TL;DR
+^^^^^^^^^^^^^^
+
+#. data structures are always consistent for reading. That's the "R" part.
+#. reading never blocks / takes a lock.
+#. rcu_read_lock is not a lock in the traditional sense. Think of it as a
+ "reservation"; it notes what the *oldest* possible thing the thread might
+ be seeing is, and which thus can't be deleted yet.
+#. you create some object, finish it up, and then publish it.
+#. publishing is an ``atomic_*`` call with ``memory_order_release``, which
+ tells the compiler to make sure prior memory writes have completed before
+ doing the atomic op.
+#. ``ATOMLIST_*`` ``add`` operations do the ``memory_order_release`` for you.
+#. you can't touch the object after it is published, except with atomic ops.
+#. because you can't touch it, if you want to change it you make a new copy,
+ work on that, and then publish the new copy. That's the "CU" part.
+#. deleting the object is also an atomic op.
+#. other threads that started working before you published / deleted an object
+ might not see the new object / still see the deleted object.
+#. because other threads may still see deleted objects, the ``free()`` needs
+ to be delayed. That's what :c:func:`rcu_free()` is for.
+
+
+When (not) to use RCU
+^^^^^^^^^^^^^^^^^^^^^
+
+RCU is designed for read-heavy workloads where objects are updated relatively
+rarely, but frequently accessed. Do *not* indiscriminately replace locking by
+RCU patterns.
+
+The "copy" part of RCU implies that, while updating, several copies of a given
+object exist in parallel. Even after the updated copy is swapped in, the old
+object remains queued for freeing until all other threads are guaranteed to
+not be accessing it anymore, due to passing a sequence point. In addition to
+the increased memory usage, there may be some bursted (due to batching) malloc
+contention when the RCU cleanup thread does its thing and frees memory.
+
+Other useful patterns
+^^^^^^^^^^^^^^^^^^^^^
+
+In addition to the full "copy object, apply changes, atomically update"
+approach, there are 2 "reduced" usage cases that can be done:
+
+* atomically updating single pieces of a particular object, e.g. some flags
+ or configuration piece
+
+* straight up read-only / immutable objects
+
+Both of these cases can be considered RCU "subsets". For example, when
+maintaining an atomic list of items, but these items only have a single
+integer value that needs to be updated, that value can be atomically updated
+without copying the entire object. However, the object still needs to be
+free'd through :c:func:`rcu_free()` since reading/updating and deleting might
+be happening concurrently. The same applies for immutable objects; deletion
+might still race with reading so they need to be free'd through RCU.
+
+FRR API
+-------
+
+Before diving into detail on the provided functions, it is important to note
+that the FRR RCU API covers the **cleanup part of RCU, not the read-copy-update
+paradigm itself**. These parts are handled by standard C11 atomic operations,
+and by extension through the atomic data structures (ATOMLIST, ATOMSORT & co.)
+
+The ``rcu_*`` functions only make sense in conjunction with these RCU access
+patterns. If you're calling the RCU API but not using these, something is
+wrong. The other way around is not necessarily true; it is possible to use
+atomic ops & datastructures with other types of locking, e.g. rwlocks.
+
+.. c:function:: void rcu_read_lock()
+.. c:function:: void rcu_read_unlock()
+
+ These functions acquire / release the RCU read-side lock. All access to
+ RCU-guarded data must be inside a block guarded by these. Any number of
+ threads may hold the RCU read-side lock at a given point in time, including
+ both no threads at all and all threads.
+
+ The functions implement a depth counter, i.e. can be nested. The nested
+ calls are cheap, since they only increment/decrement the counter.
+ Therefore, any place that uses RCU data and doesn't have a guarantee that
+ the caller holds RCU (e.g. ``lib/`` code) should just have its own
+ rcu_read_lock/rcu_read_unlock pair.
+
+ At the "root" level (e.g. un-nested), these calls can incur the cost of one
+ syscall (to ``futex()``). That puts them on about the same cost as a
+ mutex lock/unlock.
+
+ The ``thread_master`` code currently always holds RCU everywhere, except
+ while doing the actual ``poll()`` syscall. This is both an optimization as
+ well as an "easement" into getting RCU going. The current implementation
+ contract is that any ``struct thread *`` callback is called with a RCU
+ holding depth of 1, and that this is owned by the thread so it may (should)
+ drop and reacquire it when doing some longer-running work.
+
+ .. warning::
+
+ The RCU read-side lock must be held **continuously** for the entire time
+ any piece of RCU data is used. This includes any access to RCU data
+ after the initial ``atomic_load``. If the RCU read-side lock is
+ released, any RCU-protected pointers as well as the data they refer to
+ become invalid, as another thread may have called :c:func:`rcu_free` on
+ them.
+
+.. c:type:: struct rcu_head
+.. c:type:: struct rcu_head_close
+.. c:type:: struct rcu_action
+
+ The ``rcu_head`` structures are small (16-byte) bits that contain the
+ queueing machinery for the RCU sweeper/cleanup mechanisms.
+
+ Any piece of data that is cleaned up by RCU needs to have a matching
+ ``rcu_head`` embedded in it. If there is more than one cleanup operation
+ to be done (e.g. closing a file descriptor), more than one ``rcu_head`` may
+ be embedded.
+
+ .. warning::
+
+ It is not possible to reuse a ``rcu_head``. It is owned by the RCU code
+ as soon as ``rcu_*`` is called on it.
+
+ The ``_close`` variant carries an extra ``int fd`` field to store the fd to
+ be closed.
+
+ To minimize the amount of memory used for ``rcu_head``, details about the
+ RCU operation to be performed are moved into the ``rcu_action`` structure.
+ It contains e.g. the MTYPE for :c:func:`rcu_free` calls. The pointer to be
+ freed is stored as an offset relative to the ``rcu_head``, which means it
+ must be embedded as a struct field so the offset is constant.
+
+ The ``rcu_action`` structure is an implementation detail. Using
+ ``rcu_free`` or ``rcu_close`` will set it up correctly without further
+ code needed.
+
+ The ``rcu_head`` may be put in an union with other data if the other data
+ is only used during "life" of the data, since the ``rcu_head`` is used only
+ for the "death" of data. But note that other threads may still be reading
+ a piece of data while a thread is working to free it.
+
+.. c:function:: void rcu_free(struct memtype *mtype, struct X *ptr, field)
+
+ Free a block of memory after RCU has ensured no other thread can be
+ accessing it anymore. The pointer remains valid for any other thread that
+ has called :c:func:`rcu_read_lock` before the ``rcu_free`` call.
+
+ .. warning::
+
+ In some other RCU implementations, the pointer remains valid to the
+ *calling* thread if it is holding the RCU read-side lock. This is not
+ the case in FRR, particularly when running single-threaded. Enforcing
+ this rule also allows static analysis to find use-after-free issues.
+
+ ``mtype`` is the libfrr ``MTYPE_FOO`` allocation type to pass to
+ :c:func:`XFREE`.
+
+ ``field`` must be the name of a ``struct rcu_head`` member field in ``ptr``.
+ The offset of this field (which must be constant) is used to reduce the
+ memory size of ``struct rcu_head``.
+
+ .. note::
+
+ ``rcu_free`` (and ``rcu_close``) calls are more efficient if they are
+ put close to each other. When freeing several RCU'd resources, try to
+ move the calls next to each other (even if the data structures do not
+ directly point to each other.)
+
+ Having the calls bundled reduces the cost of adding the ``rcu_head`` to
+ the RCU queue; the RCU queue is an atomic data structure whose usage
+ will require the CPU to acquire an exclusive hold on relevant cache
+ lines.
+
+.. c:function:: void rcu_close(struct rcu_head_close *head, int fd)
+
+ Close a file descriptor after ensuring no other thread might be using it
+ anymore. Same as :c:func:`rcu_free`, except it calls ``close`` instead of
+ ``free``.
+
+Internals
+^^^^^^^^^
+
+.. c:type:: struct rcu_thread
+
+ Per-thread state maintained by the RCU code, set up by the following
+ functions. A pointer to a thread's own ``rcu_thread`` is saved in
+ thread-local storage.
+
+.. c:function:: struct rcu_thread *rcu_thread_prepare(void)
+.. c:function:: void rcu_thread_unprepare(struct rcu_thread *rcu_thread)
+.. c:function:: void rcu_thread_start(struct rcu_thread *rcu_thread)
+
+ Since the RCU code needs to have a list of all active threads, these
+ functions are used by the ``frr_pthread`` code to set up threads. Teardown
+ is automatic. It should not be necessary to call these functions.
+
+ Any thread that accesses RCU-protected data needs to be registered with
+ these functions. Threads that do not access RCU-protected data may call
+ these functions but do not need to.
+
+ Note that passing a pointer to RCU-protected data to some library which
+ accesses that pointer makes the library "access RCU-protected data". In
+ that case, either all of the library's threads must be registered for RCU,
+ or the code must instead pass a (non-RCU) copy of the data to the library.
+
+.. c:function:: void rcu_shutdown(void)
+
+ Stop the RCU sweeper thread and make sure all cleanup has finished.
+
+ This function is called on daemon exit by the libfrr code to ensure pending
+ RCU operations are completed. This is mostly to get a clean exit without
+ memory leaks from queued RCU operations. It should not be necessary to
+ call this function as libfrr handles this.
+
+FRR specifics and implementation details
+----------------------------------------
+
+The FRR RCU infrastructure has the following characteristics:
+
+* it is Epoch-based with a 32-bit wrapping counter. (This is somewhat
+ different from other Epoch-based approaches which may be designed to only
+ use 3 counter values, but works out to a simple implementation.)
+
+* instead of tracking CPUs as the Linux kernel does, threads are tracked. This
+ has exactly zero semantic impact, RCU just cares about "threads of
+ execution", which the kernel can optimize to CPUs but we can't. But it
+ really boils down to the same thing.
+
+* there are no ``rcu_dereference`` and ``rcu_assign_pointer`` - use
+ ``atomic_load`` and ``atomic_store`` instead. (These didn't exist when the
+ Linux RCU code was created.)
+
+* there is no ``synchronize_rcu``; this is a design choice but may be revisited
+ at a later point. ``synchronize_rcu`` blocks a thread until it is guaranteed
+ that no other threads might still be accessing data structures that they may
+ have access to at the beginning of the function call. This is a blocking
+ design and probably not appropriate for FRR. Instead, ``rcu_call`` can be
+ used to have the RCU sweeper thread make a callback after the same constraint
+ is fulfilled in an asynchronous way. Most needs should be covered by
+ ``rcu_free`` and ``rcu_close``.
diff --git a/doc/developer/subdir.am b/doc/developer/subdir.am
index 996f12d47..1fc593e56 100644
--- a/doc/developer/subdir.am
+++ b/doc/developer/subdir.am
@@ -42,6 +42,7 @@ dev_RSTFILES = \
doc/developer/packaging-debian.rst \
doc/developer/packaging-redhat.rst \
doc/developer/packaging.rst \
+ doc/developer/rcu.rst \
doc/developer/testing.rst \
doc/developer/topotests-snippets.rst \
doc/developer/topotests.rst \
diff --git a/lib/frr_pthread.c b/lib/frr_pthread.c
index e588571c0..bdb6c2a39 100644
--- a/lib/frr_pthread.c
+++ b/lib/frr_pthread.c
@@ -133,18 +133,29 @@ int frr_pthread_set_name(struct frr_pthread *fpt)
return ret;
}
+static void *frr_pthread_inner(void *arg)
+{
+ struct frr_pthread *fpt = arg;
+
+ rcu_thread_start(fpt->rcu_thread);
+ return fpt->attr.start(fpt);
+}
+
int frr_pthread_run(struct frr_pthread *fpt, const pthread_attr_t *attr)
{
int ret;
- ret = pthread_create(&fpt->thread, attr, fpt->attr.start, fpt);
+ fpt->rcu_thread = rcu_thread_prepare();
+ ret = pthread_create(&fpt->thread, attr, frr_pthread_inner, fpt);
/*
* Per pthread_create(3), the contents of fpt->thread are undefined if
* pthread_create() did not succeed. Reset this value to zero.
*/
- if (ret < 0)
+ if (ret < 0) {
+ rcu_thread_unprepare(fpt->rcu_thread);
memset(&fpt->thread, 0x00, sizeof(fpt->thread));
+ }
return ret;
}
diff --git a/lib/frr_pthread.h b/lib/frr_pthread.h
index 3afe7ba96..6096a5037 100644
--- a/lib/frr_pthread.h
+++ b/lib/frr_pthread.h
@@ -23,6 +23,7 @@
#include <pthread.h>
#include "frratomic.h"
#include "memory.h"
+#include "frrcu.h"
#include "thread.h"
#ifdef __cplusplus
@@ -50,6 +51,8 @@ struct frr_pthread {
/* pthread id */
pthread_t thread;
+ struct rcu_thread *rcu_thread;
+
/* thread master for this pthread's thread.c event loop */
struct thread_master *master;
diff --git a/lib/frrcu.c b/lib/frrcu.c
new file mode 100644
index 000000000..7e6475b64
--- /dev/null
+++ b/lib/frrcu.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2017-19 David Lamparter, for NetDEF, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* implementation notes: this is an epoch-based RCU implementation. rcu_seq
+ * (global variable) counts the current epoch. Threads hold a specific epoch
+ * in rcu_read_lock(). This is the oldest epoch a thread might be accessing
+ * data from.
+ *
+ * The rcu_seq global is only pushed forward on rcu_read_lock() and
+ * rcu_read_unlock() calls. This makes things a tad more efficient since
+ * those are the only places it matters:
+ * - on rcu_read_lock, we don't want to hold an old epoch pointlessly
+ * - on rcu_read_unlock, we want to make sure we're not stuck on an old epoch
+ * when heading into a long idle period where no thread holds RCU
+ *
+ * rcu_thread structures themselves are RCU-free'd.
+ *
+ * rcu_head structures are the most iffy; normally for an ATOMLIST we would
+ * need to make sure we use rcu_free or pthread_rwlock to deallocate old items
+ * to prevent ABA or use-after-free problems. However, our ATOMLIST code
+ * guarantees that if the list remains non-empty in all cases, we only need
+ * the "last" pointer to do an "add_tail()", i.e. we can't run into ABA/UAF
+ * issues - but we do need to keep at least 1 item on the list.
+ *
+ * (Search the atomlist code for all uses of "last")
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <pthread.h>
+#ifdef HAVE_PTHREAD_NP_H
+#include <pthread_np.h>
+#endif
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include "frrcu.h"
+#include "seqlock.h"
+#include "atomlist.h"
+
+DEFINE_MTYPE_STATIC(LIB, RCU_THREAD, "RCU thread")
+DEFINE_MTYPE_STATIC(LIB, RCU_NEXT, "RCU sequence barrier")
+
+DECLARE_ATOMLIST(rcu_heads, struct rcu_head, head)
+
+PREDECL_ATOMLIST(rcu_threads)
+struct rcu_thread {
+ struct rcu_threads_item head;
+
+ struct rcu_head rcu_head;
+
+ struct seqlock rcu;
+
+ /* only accessed by thread itself, not atomic */
+ unsigned depth;
+};
+DECLARE_ATOMLIST(rcu_threads, struct rcu_thread, head)
+
+static const struct rcu_action rcua_next = { .type = RCUA_NEXT };
+static const struct rcu_action rcua_end = { .type = RCUA_END };
+static const struct rcu_action rcua_close = { .type = RCUA_CLOSE };
+
+struct rcu_next {
+ struct rcu_head head_free;
+ struct rcu_head head_next;
+};
+
+#define rcu_free_internal(mtype, ptr, field) \
+ do { \
+ typeof(ptr) _ptr = (ptr); \
+ struct rcu_head *_rcu_head = &_ptr->field; \
+ static const struct rcu_action _rcu_action = { \
+ .type = RCUA_FREE, \
+ .u.free = { \
+ .mt = mtype, \
+ .offset = offsetof(typeof(*_ptr), field), \
+ }, \
+ }; \
+ _rcu_head->action = &_rcu_action; \
+ rcu_heads_add_tail(&rcu_heads, _rcu_head); \
+ } while (0)
+
+/* primary global RCU position */
+static struct seqlock rcu_seq;
+/* this is set to rcu_seq whenever something is added on the RCU queue.
+ * rcu_read_lock() and rcu_read_unlock() will then bump rcu_seq up one step.
+ */
+static _Atomic seqlock_val_t rcu_dirty;
+
+static struct rcu_threads_head rcu_threads;
+static struct rcu_heads_head rcu_heads;
+
+/* main thread & RCU sweeper have pre-setup rcu_thread structures. The
+ * reasons are different:
+ *
+ * - rcu_thread_main is there because the main thread isn't started like
+ * other threads, it's implicitly created when the program is started. So
+ * rcu_thread_main matches up implicitly.
+ *
+ * - rcu_thread_rcu isn't actually put on the rcu_threads list (makes no
+ * sense really), it only exists so we can call RCU-using functions from
+ * the RCU thread without special handling in rcu_read_lock/unlock.
+ */
+static struct rcu_thread rcu_thread_main;
+static struct rcu_thread rcu_thread_rcu;
+
+static pthread_t rcu_pthread;
+static pthread_key_t rcu_thread_key;
+static bool rcu_active;
+
+static void rcu_start(void);
+static void rcu_bump(void);
+
+/*
+ * preinitialization for main thread
+ */
+static void rcu_thread_end(void *rcu_thread);
+
+static void rcu_preinit(void) __attribute__((constructor));
+static void rcu_preinit(void)
+{
+ struct rcu_thread *rt;
+
+ rt = &rcu_thread_main;
+ rt->depth = 1;
+ seqlock_init(&rt->rcu);
+ seqlock_acquire_val(&rt->rcu, SEQLOCK_STARTVAL);
+
+ pthread_key_create(&rcu_thread_key, rcu_thread_end);
+ pthread_setspecific(rcu_thread_key, rt);
+
+ rcu_threads_add_tail(&rcu_threads, rt);
+
+ /* RCU sweeper's rcu_thread is a dummy, NOT added to rcu_threads */
+ rt = &rcu_thread_rcu;
+ rt->depth = 1;
+
+ seqlock_init(&rcu_seq);
+ seqlock_acquire_val(&rcu_seq, SEQLOCK_STARTVAL);
+}
+
+static struct rcu_thread *rcu_self(void)
+{
+ return (struct rcu_thread *)pthread_getspecific(rcu_thread_key);
+}
+
+/*
+ * thread management (for the non-main thread)
+ */
+struct rcu_thread *rcu_thread_prepare(void)
+{
+ struct rcu_thread *rt, *cur;
+
+ rcu_assert_read_locked();
+
+ if (!rcu_active)
+ rcu_start();
+
+ cur = rcu_self();
+ assert(cur->depth);
+
+ /* new thread always starts with rcu_read_lock held at depth 1, and
+ * holding the same epoch as the parent (this makes it possible to
+ * use RCU for things passed into the thread through its arg)
+ */
+ rt = XCALLOC(MTYPE_RCU_THREAD, sizeof(*rt));
+ rt->depth = 1;
+
+ seqlock_init(&rt->rcu);
+ seqlock_acquire(&rt->rcu, &cur->rcu);
+
+ rcu_threads_add_tail(&rcu_threads, rt);
+
+ return rt;
+}
+
+void rcu_thread_start(struct rcu_thread *rt)
+{
+ pthread_setspecific(rcu_thread_key, rt);
+}
+
+void rcu_thread_unprepare(struct rcu_thread *rt)
+{
+ if (rt == &rcu_thread_rcu)
+ return;
+
+ rt->depth = 1;
+ seqlock_acquire(&rt->rcu, &rcu_seq);
+
+ rcu_bump();
+ if (rt != &rcu_thread_main)
+ /* this free() happens after seqlock_release() below */
+ rcu_free_internal(MTYPE_RCU_THREAD, rt, rcu_head);
+
+ rcu_threads_del(&rcu_threads, rt);
+ seqlock_release(&rt->rcu);
+}
+
+static void rcu_thread_end(void *rtvoid)
+{
+ struct rcu_thread *rt = rtvoid;
+ rcu_thread_unprepare(rt);
+}
+
+/*
+ * main RCU control aspects
+ */
+
+static void rcu_bump(void)
+{
+ struct rcu_next *rn;
+
+ rn = XMALLOC(MTYPE_RCU_NEXT, sizeof(*rn));
+
+ /* note: each RCUA_NEXT item corresponds to exactly one seqno bump.
+ * This means we don't need to communicate which seqno is which
+ * RCUA_NEXT, since we really don't care.
+ */
+
+ /*
+ * Important race condition: while rcu_heads_add_tail is executing,
+ * there is an intermediate point where the rcu_heads "last" pointer
+ * already points to rn->head_next, but rn->head_next isn't added to
+ * the list yet. That means any other "add_tail" calls append to this
+ * item, which isn't fully on the list yet. Freeze this thread at
+ * that point and look at another thread doing a rcu_bump. It adds
+ * these two items and then does a seqlock_bump. But the rcu_heads
+ * list is still "interrupted" and there's no RCUA_NEXT on the list
+ * yet (from either the frozen thread or the second thread). So
+ * rcu_main() might actually hit the end of the list at the
+ * "interrupt".
+ *
+ * This situation is prevented by requiring that rcu_read_lock is held
+ * for any calls to rcu_bump, since if we're holding the current RCU
+ * epoch, that means rcu_main can't be chewing on rcu_heads and hit
+ * that interruption point. Only by the time the thread has continued
+ * to rcu_read_unlock() - and therefore completed the add_tail - the
+ * RCU sweeper gobbles up the epoch and can be sure to find at least
+ * the RCUA_NEXT and RCUA_FREE items on rcu_heads.
+ */
+ rn->head_next.action = &rcua_next;
+ rcu_heads_add_tail(&rcu_heads, &rn->head_next);
+
+ /* free rn that we allocated above.
+ *
+ * This is INTENTIONALLY not built into the RCUA_NEXT action. This
+ * ensures that after the action above is popped off the queue, there
+ * is still at least 1 item on the RCU queue. This means we never
+ * delete the last item, which is extremely important since it keeps
+ * the atomlist ->last pointer alive and well.
+ *
+ * If we were to "run dry" on the RCU queue, add_tail may run into the
+ * "last item is being deleted - start over" case, and then we may end
+ * up accessing old RCU queue items that are already free'd.
+ */
+ rcu_free_internal(MTYPE_RCU_NEXT, rn, head_free);
+
+ /* Only allow the RCU sweeper to run after these 2 items are queued.
+ *
+ * If another thread enqueues some RCU action in the intermediate
+ * window here, nothing bad happens - the queued action is associated
+ * with a larger seq# than strictly necessary. Thus, it might get
+ * executed a bit later, but that's not a problem.
+ *
+ * If another thread acquires the read lock in this window, it holds
+ * the previous epoch, but its RCU queue actions will be in the next
+ * epoch. This isn't a problem either, just a tad inefficient.
+ */
+ seqlock_bump(&rcu_seq);
+}
+
+static void rcu_bump_maybe(void)
+{
+ seqlock_val_t dirty;
+
+ dirty = atomic_load_explicit(&rcu_dirty, memory_order_relaxed);
+ /* no problem if we race here and multiple threads bump rcu_seq;
+ * bumping too much causes no issues while not bumping enough will
+ * result in delayed cleanup
+ */
+ if (dirty == seqlock_cur(&rcu_seq))
+ rcu_bump();
+}
+
+void rcu_read_lock(void)
+{
+ struct rcu_thread *rt = rcu_self();
+
+ assert(rt);
+ if (rt->depth++ > 0)
+ return;
+
+ seqlock_acquire(&rt->rcu, &rcu_seq);
+ /* need to hold RCU for bump ... */
+ rcu_bump_maybe();
+ /* ... but no point in holding the old epoch if we just bumped */
+ seqlock_acquire(&rt->rcu, &rcu_seq);
+}
+
+void rcu_read_unlock(void)
+{
+ struct rcu_thread *rt = rcu_self();
+
+ assert(rt && rt->depth);
+ if (--rt->depth > 0)
+ return;
+ rcu_bump_maybe();
+ seqlock_release(&rt->rcu);
+}
+
+void rcu_assert_read_locked(void)
+{
+ struct rcu_thread *rt = rcu_self();
+ assert(rt && rt->depth && seqlock_held(&rt->rcu));
+}
+
+void rcu_assert_read_unlocked(void)
+{
+ struct rcu_thread *rt = rcu_self();
+ assert(rt && !rt->depth && !seqlock_held(&rt->rcu));
+}
+
+/*
+ * RCU resource-release thread
+ */
+
+static void *rcu_main(void *arg);
+
+static void rcu_start(void)
+{
+ /* ensure we never handle signals on the RCU thread by blocking
+ * everything here (new thread inherits signal mask)
+ */
+ sigset_t oldsigs, blocksigs;
+
+ sigfillset(&blocksigs);
+ pthread_sigmask(SIG_BLOCK, &blocksigs, &oldsigs);
+
+ rcu_active = true;
+
+ assert(!pthread_create(&rcu_pthread, NULL, rcu_main, NULL));
+
+ pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);
+
+#ifdef HAVE_PTHREAD_SETNAME_NP
+# ifdef GNU_LINUX
+ pthread_setname_np(rcu_pthread, "RCU sweeper");
+# elif defined(__NetBSD__)
+ pthread_setname_np(rcu_pthread, "RCU sweeper", NULL);
+# endif
+#elif defined(HAVE_PTHREAD_SET_NAME_NP)
+ pthread_set_name_np(rcu_pthread, "RCU sweeper");
+#endif
+}
+
+static void rcu_do(struct rcu_head *rh)
+{
+ struct rcu_head_close *rhc;
+ void *p;
+
+ switch (rh->action->type) {
+ case RCUA_FREE:
+ p = (char *)rh - rh->action->u.free.offset;
+ if (rh->action->u.free.mt)
+ qfree(rh->action->u.free.mt, p);
+ else
+ free(p);
+ break;
+ case RCUA_CLOSE:
+ rhc = container_of(rh, struct rcu_head_close,
+ rcu_head);
+ close(rhc->fd);
+ break;
+ case RCUA_CALL:
+ p = (char *)rh - rh->action->u.call.offset;
+ rh->action->u.call.fptr(p);
+ break;
+
+ case RCUA_INVALID:
+ case RCUA_NEXT:
+ case RCUA_END:
+ default:
+ assert(0);
+ }
+}
+
+static void rcu_watchdog(struct rcu_thread *rt)
+{
+#if 0
+ /* future work: print a backtrace for the thread that's holding up
+ * RCU. The only (good) way of doing that is to send a signal to the
+ * other thread, save away the backtrace in the signal handler, and
+ * block here until the signal is done processing.
+ *
+ * Just haven't implemented that yet.
+ */
+ fprintf(stderr, "RCU watchdog %p\n", rt);
+#endif
+}
+
+static void *rcu_main(void *arg)
+{
+ struct rcu_thread *rt;
+ struct rcu_head *rh = NULL;
+ bool end = false;
+ struct timespec maxwait;
+
+ seqlock_val_t rcuval = SEQLOCK_STARTVAL;
+
+ pthread_setspecific(rcu_thread_key, &rcu_thread_rcu);
+
+ while (!end) {
+ seqlock_wait(&rcu_seq, rcuval);
+
+ /* RCU watchdog timeout, TODO: configurable value */
+ clock_gettime(CLOCK_MONOTONIC, &maxwait);
+ maxwait.tv_nsec += 100 * 1000 * 1000;
+ if (maxwait.tv_nsec >= 1000000000) {
+ maxwait.tv_sec++;
+ maxwait.tv_nsec -= 1000000000;
+ }
+
+ frr_each (rcu_threads, &rcu_threads, rt)
+ if (!seqlock_timedwait(&rt->rcu, rcuval, &maxwait)) {
+ rcu_watchdog(rt);
+ seqlock_wait(&rt->rcu, rcuval);
+ }
+
+ while ((rh = rcu_heads_pop(&rcu_heads))) {
+ if (rh->action->type == RCUA_NEXT)
+ break;
+ else if (rh->action->type == RCUA_END)
+ end = true;
+ else
+ rcu_do(rh);
+ }
+
+ rcuval += SEQLOCK_INCR;
+ }
+
+ /* rcu_shutdown can only be called singlethreaded, and it does a
+ * pthread_join, so it should be impossible that anything ended up
+ * on the queue after RCUA_END
+ */
+#if 1
+ assert(!rcu_heads_first(&rcu_heads));
+#else
+ while ((rh = rcu_heads_pop(&rcu_heads)))
+ if (rh->action->type >= RCUA_FREE)
+ rcu_do(rh);
+#endif
+ return NULL;
+}
+
+void rcu_shutdown(void)
+{
+ static struct rcu_head rcu_head_end;
+ struct rcu_thread *rt = rcu_self();
+ void *retval;
+
+ if (!rcu_active)
+ return;
+
+ rcu_assert_read_locked();
+ assert(rcu_threads_count(&rcu_threads) == 1);
+
+ rcu_enqueue(&rcu_head_end, &rcua_end);
+
+ rt->depth = 0;
+ seqlock_release(&rt->rcu);
+ seqlock_release(&rcu_seq);
+ rcu_active = false;
+
+ /* clearing rcu_active is before pthread_join in case we hang in
+ * pthread_join & get a SIGTERM or something - in that case, just
+ * ignore the maybe-still-running RCU thread
+ */
+ if (pthread_join(rcu_pthread, &retval) == 0) {
+ seqlock_acquire_val(&rcu_seq, SEQLOCK_STARTVAL);
+ seqlock_acquire_val(&rt->rcu, SEQLOCK_STARTVAL);
+ rt->depth = 1;
+ }
+}
+
+/*
+ * RCU'd free functions
+ */
+
+void rcu_enqueue(struct rcu_head *rh, const struct rcu_action *action)
+{
+ /* refer to rcu_bump() for why we need to hold RCU when adding items
+ * to rcu_heads
+ */
+ rcu_assert_read_locked();
+
+ rh->action = action;
+
+ if (!rcu_active) {
+ rcu_do(rh);
+ return;
+ }
+ rcu_heads_add_tail(&rcu_heads, rh);
+ atomic_store_explicit(&rcu_dirty, seqlock_cur(&rcu_seq),
+ memory_order_relaxed);
+}
+
+void rcu_close(struct rcu_head_close *rhc, int fd)
+{
+ rhc->fd = fd;
+ rcu_enqueue(&rhc->rcu_head, &rcua_close);
+}
diff --git a/lib/frrcu.h b/lib/frrcu.h
new file mode 100644
index 000000000..8f789303c
--- /dev/null
+++ b/lib/frrcu.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2017-19 David Lamparter, for NetDEF, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _FRRCU_H
+#define _FRRCU_H
+
+#include "memory.h"
+#include "atomlist.h"
+#include "seqlock.h"
+
+/* quick RCU primer:
+ * There's a global sequence counter. Whenever a thread does a
+ * rcu_read_lock(), it is marked as holding the current sequence counter.
+ * When something is cleaned with RCU, the global sequence counter is
+ * increased and the item is queued for cleanup - *after* all threads are
+ * at a more recent sequence counter (or no sequence counter / unheld).
+ *
+ * So, by delaying resource cleanup, RCU ensures that things don't go away
+ * while another thread may hold a (stale) reference.
+ *
+ * Note that even if a thread is in rcu_read_lock(), it is invalid for that
+ * thread to access bits after rcu_free() & co on them. This is a design
+ * choice to allow no-op'ing out the entire RCU mechanism if we're running
+ * singlethreaded. (Also allows some optimization on the counter bumping.)
+ *
+ * differences from Linux Kernel RCU:
+ * - there's no rcu_synchronize(), if you really need to defer something
+ * use rcu_call() (and double check it's really necessary)
+ * - rcu_dereference() and rcu_assign_pointer() don't exist, use atomic_*
+ * instead (ATOM* list structures do the right thing)
+ */
+
+/* opaque */
+struct rcu_thread;
+
+/* called before new thread creation, sets up rcu thread info for new thread
+ * before it actually exits. This ensures possible RCU references are held
+ * for thread startup.
+ *
+ * return value must be passed into the new thread's call to rcu_thread_start()
+ */
+extern struct rcu_thread *rcu_thread_prepare(void);
+
+/* cleanup in case pthread_create() fails */
+extern void rcu_thread_unprepare(struct rcu_thread *rcu_thread);
+
+/* called early in the new thread, with the return value from the above.
+ * NB: new thread is initially in RCU-held state! (at depth 1)
+ *
+ * TBD: maybe inherit RCU state from rcu_thread_prepare()?
+ */
+extern void rcu_thread_start(struct rcu_thread *rcu_thread);
+
+/* thread exit is handled through pthread_key_create's destructor function */
+
+/* global RCU shutdown - must be called with only 1 active thread left. waits
+ * until remaining RCU actions are done & RCU thread has exited.
+ *
+ * This is mostly here to get a clean exit without memleaks.
+ */
+extern void rcu_shutdown(void);
+
+/* enter / exit RCU-held state. counter-based, so can be called nested. */
+extern void rcu_read_lock(void);
+extern void rcu_read_unlock(void);
+
+/* for debugging / safety checks */
+extern void rcu_assert_read_locked(void);
+extern void rcu_assert_read_unlocked(void);
+
+enum rcu_action_type {
+ RCUA_INVALID = 0,
+ /* used internally by the RCU code, shouldn't ever show up outside */
+ RCUA_NEXT,
+ RCUA_END,
+ /* normal RCU actions, for outside use */
+ RCUA_FREE,
+ RCUA_CLOSE,
+ RCUA_CALL,
+};
+
+/* since rcu_head is intended to be embedded into structs which may exist
+ * with lots of copies, rcu_head is shrunk down to its absolute minimum -
+ * the atomlist pointer + a pointer to this action struct.
+ */
+struct rcu_action {
+ enum rcu_action_type type;
+
+ union {
+ struct {
+ struct memtype *mt;
+ ptrdiff_t offset;
+ } free;
+
+ struct {
+ void (*fptr)(void *arg);
+ ptrdiff_t offset;
+ } call;
+ } u;
+};
+
+/* RCU cleanup function queue item */
+PREDECL_ATOMLIST(rcu_heads)
+struct rcu_head {
+ struct rcu_heads_item head;
+ const struct rcu_action *action;
+};
+
+/* special RCU head for delayed fd-close */
+struct rcu_head_close {
+ struct rcu_head rcu_head;
+ int fd;
+};
+
+/* enqueue RCU action - use the macros below to get the rcu_action set up */
+extern void rcu_enqueue(struct rcu_head *head, const struct rcu_action *action);
+
+/* RCU free() and file close() operations.
+ *
+ * freed memory / closed fds become _immediately_ unavailable to the calling
+ * thread, but will remain available for other threads until they have passed
+ * into RCU-released state.
+ */
+
+/* may be called with NULL mt to do non-MTYPE free() */
+#define rcu_free(mtype, ptr, field) \
+ do { \
+ typeof(ptr) _ptr = (ptr); \
+ struct rcu_head *_rcu_head = &_ptr->field; \
+ static const struct rcu_action _rcu_action = { \
+ .type = RCUA_FREE, \
+ .u.free = { \
+ .mt = mtype, \
+ .offset = offsetof(typeof(*_ptr), field), \
+ }, \
+ }; \
+ rcu_enqueue(_rcu_head, &_rcu_action); \
+ } while (0)
+
+/* use this sparingly, it runs on (and blocks) the RCU thread */
+#define rcu_call(func, ptr, field) \
+ do { \
+ typeof(ptr) _ptr = (ptr); \
+ void (*fptype)(typeof(ptr)); \
+ struct rcu_head *_rcu_head = &_ptr->field; \
+ static const struct rcu_action _rcu_action = { \
+ .type = RCUA_CALL, \
+ .u.call = { \
+ .fptr = (void *)func, \
+ .offset = offsetof(typeof(*_ptr), field), \
+ }, \
+ }; \
+ (void)(_fptype = func); \
+ rcu_enqueue(_rcu_head, &_rcu_action); \
+ } while (0)
+
+extern void rcu_close(struct rcu_head_close *head, int fd);
+
+#endif /* _FRRCU_H */
diff --git a/lib/libfrr.c b/lib/libfrr.c
index 0fc321d6e..35c609214 100644
--- a/lib/libfrr.c
+++ b/lib/libfrr.c
@@ -41,6 +41,7 @@
#include "northbound_cli.h"
#include "northbound_db.h"
#include "debug.h"
+#include "frrcu.h"
DEFINE_HOOK(frr_late_init, (struct thread_master * tm), (tm))
DEFINE_KOOH(frr_early_fini, (), ())
@@ -1081,6 +1082,7 @@ void frr_fini(void)
master = NULL;
closezlog();
/* frrmod_init -> nothing needed / hooks */
+ rcu_shutdown();
if (!debug_memstats_at_exit)
return;
diff --git a/lib/log.c b/lib/log.c
index c57723990..51a0ddd6b 100644
--- a/lib/log.c
+++ b/lib/log.c
@@ -559,9 +559,7 @@ static void crash_write(struct fbuf *fb, char *msgstart)
void zlog_signal(int signo, const char *action, void *siginfo_v,
void *program_counter)
{
-#ifdef SA_SIGINFO
siginfo_t *siginfo = siginfo_v;
-#endif
time_t now;
char buf[sizeof("DEFAULT: Received signal S at T (si_addr 0xP, PC 0xP); aborting...")
+ 100];
@@ -575,7 +573,6 @@ void zlog_signal(int signo, const char *action, void *siginfo_v,
msgstart = fb.pos;
bprintfrr(&fb, "Received signal %d at %lld", signo, (long long)now);
-#ifdef SA_SIGINFO
if (program_counter)
bprintfrr(&fb, " (si_addr 0x%tx, PC 0x%tx)",
(ptrdiff_t)siginfo->si_addr,
@@ -583,7 +580,6 @@ void zlog_signal(int signo, const char *action, void *siginfo_v,
else
bprintfrr(&fb, " (si_addr 0x%tx)",
(ptrdiff_t)siginfo->si_addr);
-#endif /* SA_SIGINFO */
bprintfrr(&fb, "; %s\n", action);
crash_write(&fb, msgstart);
diff --git a/lib/seqlock.c b/lib/seqlock.c
index 223d14952..c05ec19db 100644
--- a/lib/seqlock.c
+++ b/lib/seqlock.c
@@ -25,6 +25,7 @@
#include "config.h"
#endif
+#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <errno.h>
@@ -35,44 +36,75 @@
#include "seqlock.h"
+/****************************************
+ * OS specific synchronization wrappers *
+ ****************************************/
+
+/*
+ * Linux: sys_futex()
+ */
#ifdef HAVE_SYNC_LINUX_FUTEX
-/* Linux-specific - sys_futex() */
#include <sys/syscall.h>
#include <linux/futex.h>
-static long sys_futex(void *addr1, int op, int val1, struct timespec *timeout,
- void *addr2, int val3)
+static long sys_futex(void *addr1, int op, int val1,
+ const struct timespec *timeout, void *addr2, int val3)
{
return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
}
#define wait_once(sqlo, val) \
sys_futex((int *)&sqlo->pos, FUTEX_WAIT, (int)val, NULL, NULL, 0)
+#define wait_time(sqlo, val, time, reltime) \
+ sys_futex((int *)&sqlo->pos, FUTEX_WAIT_BITSET, (int)val, time, \
+ NULL, ~0U)
#define wait_poke(sqlo) \
sys_futex((int *)&sqlo->pos, FUTEX_WAKE, INT_MAX, NULL, NULL, 0)
+/*
+ * OpenBSD: sys_futex(), almost the same as on Linux
+ */
#elif defined(HAVE_SYNC_OPENBSD_FUTEX)
-/* OpenBSD variant of the above. untested, not upstream in OpenBSD. */
#include <sys/syscall.h>
#include <sys/futex.h>
+#define TIME_RELATIVE 1
+
#define wait_once(sqlo, val) \
futex((int *)&sqlo->pos, FUTEX_WAIT, (int)val, NULL, NULL, 0)
+#define wait_time(sqlo, val, time, reltime) \
+ futex((int *)&sqlo->pos, FUTEX_WAIT, (int)val, reltime, NULL, 0)
#define wait_poke(sqlo) \
futex((int *)&sqlo->pos, FUTEX_WAKE, INT_MAX, NULL, NULL, 0)
+/*
+ * FreeBSD: _umtx_op()
+ */
#elif defined(HAVE_SYNC_UMTX_OP)
-/* FreeBSD-specific: umtx_op() */
#include <sys/umtx.h>
#define wait_once(sqlo, val) \
_umtx_op((void *)&sqlo->pos, UMTX_OP_WAIT_UINT, val, NULL, NULL)
+static int wait_time(struct seqlock *sqlo, uint32_t val,
+ const struct timespec *abstime,
+ const struct timespec *reltime)
+{
+ struct _umtx_time t;
+ t._flags = UMTX_ABSTIME;
+ t._clockid = CLOCK_MONOTONIC;
+ memcpy(&t._timeout, abstime, sizeof(t._timeout));
+ return _umtx_op((void *)&sqlo->pos, UMTX_OP_WAIT_UINT, val,
+ (void *)(uintptr_t) sizeof(t), &t);
+}
#define wait_poke(sqlo) \
_umtx_op((void *)&sqlo->pos, UMTX_OP_WAKE, INT_MAX, NULL, NULL)
-#else
-/* generic version. used on *BSD, Solaris and OSX.
+/*
+ * generic version. used on NetBSD, Solaris and OSX. really shitty.
*/
+#else
+
+#define TIME_ABS_REALTIME 1
#define wait_init(sqlo) do { \
pthread_mutex_init(&sqlo->lock, NULL); \
@@ -80,6 +112,9 @@ static long sys_futex(void *addr1, int op, int val1, struct timespec *timeout,
} while (0)
#define wait_prep(sqlo) pthread_mutex_lock(&sqlo->lock)
#define wait_once(sqlo, val) pthread_cond_wait(&sqlo->wake, &sqlo->lock)
+#define wait_time(sqlo, val, time, reltime) \
+ pthread_cond_timedwait(&sqlo->wake, \
+ &sqlo->lock, time);
#define wait_done(sqlo) pthread_mutex_unlock(&sqlo->lock)
#define wait_poke(sqlo) do { \
pthread_mutex_lock(&sqlo->lock); \
@@ -103,18 +138,112 @@ void seqlock_wait(struct seqlock *sqlo, seqlock_val_t val)
seqlock_assert_valid(val);
wait_prep(sqlo);
- while (1) {
- cur = atomic_load_explicit(&sqlo->pos, memory_order_acquire);
- if (!(cur & 1))
+ cur = atomic_load_explicit(&sqlo->pos, memory_order_relaxed);
+
+ while (cur & SEQLOCK_HELD) {
+ cal = SEQLOCK_VAL(cur) - val - 1;
+ assert(cal < 0x40000000 || cal > 0xc0000000);
+ if (cal < 0x80000000)
break;
- cal = cur - val - 1;
+
+ if ((cur & SEQLOCK_WAITERS)
+ || atomic_compare_exchange_weak_explicit(
+ &sqlo->pos, &cur, cur | SEQLOCK_WAITERS,
+ memory_order_relaxed, memory_order_relaxed)) {
+ wait_once(sqlo, cur | SEQLOCK_WAITERS);
+ cur = atomic_load_explicit(&sqlo->pos,
+ memory_order_relaxed);
+ }
+ /* else: we failed to swap in cur because it just changed */
+ }
+ wait_done(sqlo);
+}
+
+bool seqlock_timedwait(struct seqlock *sqlo, seqlock_val_t val,
+ const struct timespec *abs_monotime_limit)
+{
+/*
+ * ABS_REALTIME - used on NetBSD, Solaris and OSX
+ */
+#if TIME_ABS_REALTIME
+#define time_arg1 &abs_rt
+#define time_arg2 NULL
+#define time_prep
+ struct timespec curmono, abs_rt;
+
+ clock_gettime(CLOCK_MONOTONIC, &curmono);
+ clock_gettime(CLOCK_REALTIME, &abs_rt);
+
+ abs_rt.tv_nsec += abs_monotime_limit->tv_nsec - curmono.tv_nsec;
+ if (abs_rt.tv_nsec < 0) {
+ abs_rt.tv_sec--;
+ abs_rt.tv_nsec += 1000000000;
+ } else if (abs_rt.tv_nsec >= 1000000000) {
+ abs_rt.tv_sec++;
+ abs_rt.tv_nsec -= 1000000000;
+ }
+ abs_rt.tv_sec += abs_monotime_limit->tv_sec - curmono.tv_sec;
+
+/*
+ * RELATIVE - used on OpenBSD (might get a patch to get absolute monotime)
+ */
+#elif TIME_RELATIVE
+ struct timespec reltime;
+
+#define time_arg1 abs_monotime_limit
+#define time_arg2 &reltime
+#define time_prep \
+ clock_gettime(CLOCK_MONOTONIC, &reltime); \
+ reltime.tv_sec = abs_monotime_limit.tv_sec - reltime.tv_sec; \
+ reltime.tv_nsec = abs_monotime_limit.tv_nsec - reltime.tv_nsec; \
+ if (reltime.tv_nsec < 0) { \
+ reltime.tv_sec--; \
+ reltime.tv_nsec += 1000000000; \
+ }
+/*
+ * FreeBSD & Linux: absolute time re. CLOCK_MONOTONIC
+ */
+#else
+#define time_arg1 abs_monotime_limit
+#define time_arg2 NULL
+#define time_prep
+#endif
+
+ bool ret = true;
+ seqlock_val_t cur, cal;
+
+ seqlock_assert_valid(val);
+
+ wait_prep(sqlo);
+ cur = atomic_load_explicit(&sqlo->pos, memory_order_relaxed);
+
+ while (cur & SEQLOCK_HELD) {
+ cal = SEQLOCK_VAL(cur) - val - 1;
assert(cal < 0x40000000 || cal > 0xc0000000);
if (cal < 0x80000000)
break;
- wait_once(sqlo, cur);
+ if ((cur & SEQLOCK_WAITERS)
+ || atomic_compare_exchange_weak_explicit(
+ &sqlo->pos, &cur, cur | SEQLOCK_WAITERS,
+ memory_order_relaxed, memory_order_relaxed)) {
+ int rv;
+
+ time_prep
+
+ rv = wait_time(sqlo, cur | SEQLOCK_WAITERS, time_arg1,
+ time_arg2);
+ if (rv) {
+ ret = false;
+ break;
+ }
+ cur = atomic_load_explicit(&sqlo->pos,
+ memory_order_relaxed);
+ }
}
wait_done(sqlo);
+
+ return ret;
}
bool seqlock_check(struct seqlock *sqlo, seqlock_val_t val)
@@ -123,26 +252,32 @@ bool seqlock_check(struct seqlock *sqlo, seqlock_val_t val)
seqlock_assert_valid(val);
- cur = atomic_load_explicit(&sqlo->pos, memory_order_acquire);
- if (!(cur & 1))
+ cur = atomic_load_explicit(&sqlo->pos, memory_order_relaxed);
+ if (!(cur & SEQLOCK_HELD))
return 1;
- cur -= val;
+ cur = SEQLOCK_VAL(cur) - val - 1;
assert(cur < 0x40000000 || cur > 0xc0000000);
return cur < 0x80000000;
}
void seqlock_acquire_val(struct seqlock *sqlo, seqlock_val_t val)
{
+ seqlock_val_t prev;
+
seqlock_assert_valid(val);
- atomic_store_explicit(&sqlo->pos, val, memory_order_release);
- wait_poke(sqlo);
+ prev = atomic_exchange_explicit(&sqlo->pos, val, memory_order_relaxed);
+ if (prev & SEQLOCK_WAITERS)
+ wait_poke(sqlo);
}
void seqlock_release(struct seqlock *sqlo)
{
- atomic_store_explicit(&sqlo->pos, 0, memory_order_release);
- wait_poke(sqlo);
+ seqlock_val_t prev;
+
+ prev = atomic_exchange_explicit(&sqlo->pos, 0, memory_order_relaxed);
+ if (prev & SEQLOCK_WAITERS)
+ wait_poke(sqlo);
}
void seqlock_init(struct seqlock *sqlo)
@@ -154,14 +289,23 @@ void seqlock_init(struct seqlock *sqlo)
seqlock_val_t seqlock_cur(struct seqlock *sqlo)
{
- return atomic_load_explicit(&sqlo->pos, memory_order_acquire);
+ return SEQLOCK_VAL(atomic_load_explicit(&sqlo->pos,
+ memory_order_relaxed));
}
seqlock_val_t seqlock_bump(struct seqlock *sqlo)
{
- seqlock_val_t val;
+ seqlock_val_t val, cur;
+
+ cur = atomic_load_explicit(&sqlo->pos, memory_order_relaxed);
+ seqlock_assert_valid(cur);
+
+ do {
+ val = SEQLOCK_VAL(cur) + SEQLOCK_INCR;
+ } while (!atomic_compare_exchange_weak_explicit(&sqlo->pos, &cur, val,
+ memory_order_relaxed, memory_order_relaxed));
- val = atomic_fetch_add_explicit(&sqlo->pos, 2, memory_order_release);
- wait_poke(sqlo);
+ if (cur & SEQLOCK_WAITERS)
+ wait_poke(sqlo);
return val;
}
diff --git a/lib/seqlock.h b/lib/seqlock.h
index eef05a430..b551e3ffc 100644
--- a/lib/seqlock.h
+++ b/lib/seqlock.h
@@ -54,12 +54,28 @@
*/
/* use sequentially increasing "ticket numbers". lowest bit will always
- * be 1 to have a 'cleared' indication (i.e., counts 1,3,5,7,etc. )
+ * be 1 to have a 'cleared' indication (i.e., counts 1,5,9,13,etc. )
+ * 2nd lowest bit is used to indicate we have waiters.
*/
typedef _Atomic uint32_t seqlock_ctr_t;
typedef uint32_t seqlock_val_t;
-#define seqlock_assert_valid(val) assert(val & 1)
+#define seqlock_assert_valid(val) assert((val) & SEQLOCK_HELD)
+/* NB: SEQLOCK_WAITERS is only allowed if SEQLOCK_HELD is also set; can't
+ * have waiters on an unheld seqlock
+ */
+#define SEQLOCK_HELD (1U << 0)
+#define SEQLOCK_WAITERS (1U << 1)
+#define SEQLOCK_VAL(n) ((n) & ~SEQLOCK_WAITERS)
+#define SEQLOCK_STARTVAL 1U
+#define SEQLOCK_INCR 4U
+
+/* TODO: originally, this was using "atomic_fetch_add", which is the reason
+ * bit 0 is used to indicate held state. With SEQLOCK_WAITERS added, there's
+ * no fetch_add anymore (cmpxchg loop instead), so we don't need to use bit 0
+ * for this anymore & can just special-case the value 0 for it and skip it in
+ * counting.
+ */
struct seqlock {
/* always used */
@@ -74,8 +90,16 @@ struct seqlock {
extern void seqlock_init(struct seqlock *sqlo);
-/* while (sqlo <= val) - wait until seqlock->pos > val, or seqlock unheld */
+/* basically: "while (sqlo <= val) wait();"
+ * returns when sqlo > val || !seqlock_held(sqlo)
+ */
extern void seqlock_wait(struct seqlock *sqlo, seqlock_val_t val);
+
+/* same, but time-limited (limit is an absolute CLOCK_MONOTONIC value) */
+extern bool seqlock_timedwait(struct seqlock *sqlo, seqlock_val_t val,
+ const struct timespec *abs_monotime_limit);
+
+/* one-shot test, returns true if seqlock_wait would return immediately */
extern bool seqlock_check(struct seqlock *sqlo, seqlock_val_t val);
static inline bool seqlock_held(struct seqlock *sqlo)
@@ -85,12 +109,20 @@ static inline bool seqlock_held(struct seqlock *sqlo)
/* sqlo - get seqlock position -- for the "counter" seqlock */
extern seqlock_val_t seqlock_cur(struct seqlock *sqlo);
-/* sqlo++ - note: like x++, returns previous value, before bumping */
+
+/* ++sqlo (but atomic & wakes waiters) - returns value that we bumped to.
+ *
+ * guarantees:
+ * - each seqlock_bump call bumps the position by exactly one SEQLOCK_INCR.
+ * There are no skipped/missed or multiple increments.
+ * - each return value is only returned from one seqlock_bump() call
+ */
extern seqlock_val_t seqlock_bump(struct seqlock *sqlo);
/* sqlo = val - can be used on held seqlock. */
extern void seqlock_acquire_val(struct seqlock *sqlo, seqlock_val_t val);
+
/* sqlo = ref - standard pattern: acquire relative to other seqlock */
static inline void seqlock_acquire(struct seqlock *sqlo, struct seqlock *ref)
{
diff --git a/lib/sigevent.c b/lib/sigevent.c
index d02b07422..fcd85d0d4 100644
--- a/lib/sigevent.c
+++ b/lib/sigevent.c
@@ -24,7 +24,6 @@
#include <memory.h>
#include <lib_errors.h>
-#ifdef SA_SIGINFO
#ifdef HAVE_UCONTEXT_H
#ifdef GNU_LINUX
/* get REG_EIP from ucontext.h */
@@ -34,7 +33,6 @@
#endif /* GNU_LINUX */
#include <ucontext.h>
#endif /* HAVE_UCONTEXT_H */
-#endif /* SA_SIGINFO */
/* master signals descriptor struct */
@@ -158,8 +156,6 @@ static int signal_set(int signo)
return 0;
}
-#ifdef SA_SIGINFO
-
/* XXX This function should be enhanced to support more platforms
(it currently works only on Linux/x86). */
static void *program_counter(void *context)
@@ -199,41 +195,19 @@ static void *program_counter(void *context)
return NULL;
}
-#endif /* SA_SIGINFO */
-
static void __attribute__((noreturn))
-exit_handler(int signo
-#ifdef SA_SIGINFO
- ,
- siginfo_t *siginfo, void *context
-#endif
- )
+exit_handler(int signo, siginfo_t *siginfo, void *context)
{
-#ifndef SA_SIGINFO
- void *siginfo = NULL;
- void *pc = NULL;
-#else
void *pc = program_counter(context);
-#endif
zlog_signal(signo, "exiting...", siginfo, pc);
_exit(128 + signo);
}
static void __attribute__((noreturn))
-core_handler(int signo
-#ifdef SA_SIGINFO
- ,
- siginfo_t *siginfo, void *context
-#endif
- )
+core_handler(int signo, siginfo_t *siginfo, void *context)
{
-#ifndef SA_SIGINFO
- void *siginfo = NULL;
- void *pc = NULL;
-#else
void *pc = program_counter(context);
-#endif
/* make sure we don't hang in here. default for SIGALRM is terminate.
* - if we're in backtrace for more than a second, abort. */
@@ -290,12 +264,7 @@ static void trap_default_signals(void)
static const struct {
const int *sigs;
unsigned int nsigs;
- void (*handler)(int signo
-#ifdef SA_SIGINFO
- ,
- siginfo_t *info, void *context
-#endif
- );
+ void (*handler)(int signo, siginfo_t *info, void *context);
} sigmap[] = {
{core_signals, array_size(core_signals), core_handler},
{exit_signals, array_size(exit_signals), exit_handler},
@@ -316,15 +285,10 @@ static void trap_default_signals(void)
act.sa_handler = SIG_IGN;
act.sa_flags = 0;
} else {
-#ifdef SA_SIGINFO
/* Request extra arguments to signal
* handler. */
act.sa_sigaction = sigmap[i].handler;
act.sa_flags = SA_SIGINFO;
-#else
- act.sa_handler = sigmap[i].handler;
- act.sa_flags = 0;
-#endif
#ifdef SA_RESETHAND
/* don't try to print backtraces
* recursively */
diff --git a/lib/stream.c b/lib/stream.c
index 6c187bd35..c67bc3c99 100644
--- a/lib/stream.c
+++ b/lib/stream.c
@@ -904,20 +904,30 @@ int stream_put_prefix(struct stream *s, struct prefix *p)
/* Put NLRI with label */
int stream_put_labeled_prefix(struct stream *s, struct prefix *p,
- mpls_label_t *label)
+ mpls_label_t *label, int addpath_encode,
+ uint32_t addpath_tx_id)
{
size_t psize;
+ size_t psize_with_addpath;
uint8_t *label_pnt = (uint8_t *)label;
STREAM_VERIFY_SANE(s);
psize = PSIZE(p->prefixlen);
+ psize_with_addpath = psize + (addpath_encode ? 4 : 0);
- if (STREAM_WRITEABLE(s) < (psize + 3)) {
+ if (STREAM_WRITEABLE(s) < (psize_with_addpath + 3)) {
STREAM_BOUND_WARN(s, "put");
return 0;
}
+ if (addpath_encode) {
+ s->data[s->endp++] = (uint8_t)(addpath_tx_id >> 24);
+ s->data[s->endp++] = (uint8_t)(addpath_tx_id >> 16);
+ s->data[s->endp++] = (uint8_t)(addpath_tx_id >> 8);
+ s->data[s->endp++] = (uint8_t)addpath_tx_id;
+ }
+
stream_putc(s, (p->prefixlen + 24));
stream_putc(s, label_pnt[0]);
stream_putc(s, label_pnt[1]);
diff --git a/lib/stream.h b/lib/stream.h
index 5341bfa40..a903ec0ea 100644
--- a/lib/stream.h
+++ b/lib/stream.h
@@ -199,7 +199,8 @@ extern int stream_put_prefix_addpath(struct stream *, struct prefix *,
uint32_t addpath_tx_id);
extern int stream_put_prefix(struct stream *, struct prefix *);
extern int stream_put_labeled_prefix(struct stream *, struct prefix *,
- mpls_label_t *);
+ mpls_label_t *, int addpath_encode,
+ uint32_t addpath_tx_id);
extern void stream_get(void *, struct stream *, size_t);
extern bool stream_get2(void *data, struct stream *s, size_t size);
extern void stream_get_from(void *, struct stream *, size_t, size_t);
diff --git a/lib/subdir.am b/lib/subdir.am
index 3754d270a..2be7537bc 100644
--- a/lib/subdir.am
+++ b/lib/subdir.am
@@ -21,6 +21,7 @@ lib_libfrr_la_SOURCES = \
lib/distribute.c \
lib/ferr.c \
lib/filter.c \
+ lib/frrcu.c \
lib/frrlua.c \
lib/frr_pthread.c \
lib/frrstr.c \
@@ -160,6 +161,7 @@ pkginclude_HEADERS += \
lib/frrlua.h \
lib/frr_pthread.h \
lib/frratomic.h \
+ lib/frrcu.h \
lib/frrstr.h \
lib/getopt.h \
lib/graph.h \
diff --git a/lib/thread.c b/lib/thread.c
index f862ce5eb..5756ebc1f 100644
--- a/lib/thread.c
+++ b/lib/thread.c
@@ -25,6 +25,7 @@
#include "thread.h"
#include "memory.h"
+#include "frrcu.h"
#include "log.h"
#include "hash.h"
#include "command.h"
@@ -737,6 +738,9 @@ static int fd_poll(struct thread_master *m, struct pollfd *pfds, nfds_t pfdsize,
< 0) // effect a poll (return immediately)
timeout = 0;
+ rcu_read_unlock();
+ rcu_assert_read_unlocked();
+
/* add poll pipe poker */
assert(count + 1 < pfdsize);
pfds[count].fd = m->io_pipe[0];
@@ -750,6 +754,8 @@ static int fd_poll(struct thread_master *m, struct pollfd *pfds, nfds_t pfdsize,
while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
;
+ rcu_read_lock();
+
return num;
}
diff --git a/lib/zebra.h b/lib/zebra.h
index 22239f8e6..352887eca 100644
--- a/lib/zebra.h
+++ b/lib/zebra.h
@@ -134,6 +134,10 @@ typedef unsigned char uint8_t;
#endif
#endif
+#ifdef CRYPTO_OPENSSL
+#include <openssl/evp.h>
+#endif
+
#include "openbsd-tree.h"
#include <netinet/in.h>
diff --git a/ospfd/ospf_packet.c b/ospfd/ospf_packet.c
index 50c30a6fa..62b044479 100644
--- a/ospfd/ospf_packet.c
+++ b/ospfd/ospf_packet.c
@@ -33,7 +33,9 @@
#include "log.h"
#include "sockopt.h"
#include "checksum.h"
+#ifdef CRYPTO_INTERNAL
#include "md5.h"
+#endif
#include "vrf.h"
#include "lib_errors.h"
@@ -332,7 +334,11 @@ static unsigned int ospf_packet_max(struct ospf_interface *oi)
static int ospf_check_md5_digest(struct ospf_interface *oi,
struct ospf_header *ospfh)
{
+#ifdef CRYPTO_OPENSSL
+ EVP_MD_CTX *ctx;
+#elif CRYPTO_INTERNAL
MD5_CTX ctx;
+#endif
unsigned char digest[OSPF_AUTH_MD5_SIZE];
struct crypt_key *ck;
struct ospf_neighbor *nbr;
@@ -361,11 +367,21 @@ static int ospf_check_md5_digest(struct ospf_interface *oi,
}
/* Generate a digest for the ospf packet - their digest + our digest. */
+#ifdef CRYPTO_OPENSSL
+ unsigned int md5_size = OSPF_AUTH_MD5_SIZE;
+ ctx = EVP_MD_CTX_new();
+ EVP_DigestInit(ctx, EVP_md5());
+ EVP_DigestUpdate(ctx, ospfh, length);
+ EVP_DigestUpdate(ctx, ck->auth_key, OSPF_AUTH_MD5_SIZE);
+ EVP_DigestFinal(ctx, digest, &md5_size);
+ EVP_MD_CTX_free(ctx);
+#elif CRYPTO_INTERNAL
memset(&ctx, 0, sizeof(ctx));
MD5Init(&ctx);
MD5Update(&ctx, ospfh, length);
MD5Update(&ctx, ck->auth_key, OSPF_AUTH_MD5_SIZE);
MD5Final(digest, &ctx);
+#endif
/* compare the two */
if (memcmp((caddr_t)ospfh + length, digest, OSPF_AUTH_MD5_SIZE)) {
@@ -389,7 +405,11 @@ static int ospf_make_md5_digest(struct ospf_interface *oi,
{
struct ospf_header *ospfh;
unsigned char digest[OSPF_AUTH_MD5_SIZE] = {0};
+#ifdef CRYPTO_OPENSSL
+ EVP_MD_CTX *ctx;
+#elif CRYPTO_INTERNAL
MD5_CTX ctx;
+#endif
void *ibuf;
uint32_t t;
struct crypt_key *ck;
@@ -422,11 +442,21 @@ static int ospf_make_md5_digest(struct ospf_interface *oi,
}
/* Generate a digest for the entire packet + our secret key. */
+#ifdef CRYPTO_OPENSSL
+ unsigned int md5_size = OSPF_AUTH_MD5_SIZE;
+ ctx = EVP_MD_CTX_new();
+ EVP_DigestInit(ctx, EVP_md5());
+ EVP_DigestUpdate(ctx, ibuf, ntohs(ospfh->length));
+ EVP_DigestUpdate(ctx, auth_key, OSPF_AUTH_MD5_SIZE);
+ EVP_DigestFinal(ctx, digest, &md5_size);
+ EVP_MD_CTX_free(ctx);
+#elif CRYPTO_INTERNAL
memset(&ctx, 0, sizeof(ctx));
MD5Init(&ctx);
MD5Update(&ctx, ibuf, ntohs(ospfh->length));
MD5Update(&ctx, auth_key, OSPF_AUTH_MD5_SIZE);
MD5Final(digest, &ctx);
+#endif
/* Append md5 digest to the end of the stream. */
stream_put(op->s, digest, OSPF_AUTH_MD5_SIZE);
diff --git a/pimd/pim_neighbor.c b/pimd/pim_neighbor.c
index a63b09fc1..722ecb2a7 100644
--- a/pimd/pim_neighbor.c
+++ b/pimd/pim_neighbor.c
@@ -424,10 +424,11 @@ struct pim_neighbor *pim_neighbor_find_by_secondary(struct interface *ifp,
struct pim_neighbor *neigh;
struct prefix *p;
- pim_ifp = ifp->info;
- if (!pim_ifp)
+ if (!ifp || !ifp->info)
return NULL;
+ pim_ifp = ifp->info;
+
for (ALL_LIST_ELEMENTS_RO(pim_ifp->pim_neighbor_list, node, neigh)) {
for (ALL_LIST_ELEMENTS_RO(neigh->prefix_list, pnode, p)) {
if (prefix_same(p, src))
diff --git a/pimd/pim_nht.c b/pimd/pim_nht.c
index 65ea858cb..39dc8ad2f 100644
--- a/pimd/pim_nht.c
+++ b/pimd/pim_nht.c
@@ -842,6 +842,14 @@ int pim_parse_nexthop_update(ZAPI_CALLBACK_ARGS)
}
if (!ifp->info) {
+ /*
+ * Though Multicast is not enabled on this
+ * Interface store it in database otheriwse we
+ * may miss this update and this will not cause
+ * any issue, because while choosing the path we
+ * are ommitting the Interfaces which are not
+ * multicast enabled
+ */
if (PIM_DEBUG_PIM_NHT) {
char buf[NEXTHOP_STRLEN];
@@ -853,8 +861,6 @@ int pim_parse_nexthop_update(ZAPI_CALLBACK_ARGS)
nexthop2str(nexthop, buf,
sizeof(buf)));
}
- nexthop_free(nexthop);
- continue;
}
if (nhlist_tail) {
diff --git a/ripd/ripd.c b/ripd/ripd.c
index e0ff0430f..561fbcb52 100644
--- a/ripd/ripd.c
+++ b/ripd/ripd.c
@@ -37,7 +37,9 @@
#include "if_rmap.h"
#include "plist.h"
#include "distribute.h"
+#ifdef CRYPTO_INTERNAL
#include "md5.h"
+#endif
#include "keychain.h"
#include "privs.h"
#include "lib_errors.h"
@@ -870,7 +872,11 @@ static int rip_auth_md5(struct rip_packet *packet, struct sockaddr_in *from,
struct rip_md5_data *md5data;
struct keychain *keychain;
struct key *key;
+#ifdef CRYPTO_OPENSSL
+ EVP_MD_CTX *ctx;
+#elif CRYPTO_INTERNAL
MD5_CTX ctx;
+#endif
uint8_t digest[RIP_AUTH_MD5_SIZE];
uint16_t packet_len;
char auth_str[RIP_AUTH_MD5_SIZE] = {};
@@ -934,11 +940,21 @@ static int rip_auth_md5(struct rip_packet *packet, struct sockaddr_in *from,
return 0;
/* MD5 digest authentication. */
+#ifdef CRYPTO_OPENSSL
+ unsigned int md5_size = RIP_AUTH_MD5_SIZE;
+ ctx = EVP_MD_CTX_new();
+ EVP_DigestInit(ctx, EVP_md5());
+ EVP_DigestUpdate(ctx, packet, packet_len + RIP_HEADER_SIZE);
+ EVP_DigestUpdate(ctx, auth_str, RIP_AUTH_MD5_SIZE);
+ EVP_DigestFinal(ctx, digest, &md5_size);
+ EVP_MD_CTX_free(ctx);
+#elif CRYPTO_INTERNAL
memset(&ctx, 0, sizeof(ctx));
MD5Init(&ctx);
MD5Update(&ctx, packet, packet_len + RIP_HEADER_SIZE);
MD5Update(&ctx, auth_str, RIP_AUTH_MD5_SIZE);
MD5Final(digest, &ctx);
+#endif
if (memcmp(md5data->digest, digest, RIP_AUTH_MD5_SIZE) == 0)
return packet_len;
@@ -1063,7 +1079,11 @@ static void rip_auth_md5_set(struct stream *s, struct rip_interface *ri,
size_t doff, char *auth_str, int authlen)
{
unsigned long len;
+#ifdef CRYPTO_OPENSSL
+ EVP_MD_CTX *ctx;
+#elif CRYPTO_INTERNAL
MD5_CTX ctx;
+#endif
unsigned char digest[RIP_AUTH_MD5_SIZE];
/* Make it sure this interface is configured as MD5
@@ -1092,11 +1112,21 @@ static void rip_auth_md5_set(struct stream *s, struct rip_interface *ri,
stream_putw(s, RIP_AUTH_DATA);
/* Generate a digest for the RIP packet. */
+#ifdef CRYPTO_OPENSSL
+ unsigned int md5_size = RIP_AUTH_MD5_SIZE;
+ ctx = EVP_MD_CTX_new();
+ EVP_DigestInit(ctx, EVP_md5());
+ EVP_DigestUpdate(ctx, STREAM_DATA(s), stream_get_endp(s));
+ EVP_DigestUpdate(ctx, auth_str, RIP_AUTH_MD5_SIZE);
+ EVP_DigestFinal(ctx, digest, &md5_size);
+ EVP_MD_CTX_free(ctx);
+#elif CRYPTO_INTERNAL
memset(&ctx, 0, sizeof(ctx));
MD5Init(&ctx);
MD5Update(&ctx, STREAM_DATA(s), stream_get_endp(s));
MD5Update(&ctx, auth_str, RIP_AUTH_MD5_SIZE);
MD5Final(digest, &ctx);
+#endif
/* Copy the digest to the packet. */
stream_write(s, digest, RIP_AUTH_MD5_SIZE);
diff --git a/tests/lib/test_atomlist.c b/tests/lib/test_atomlist.c
index 249fff8ed..238ee9539 100644
--- a/tests/lib/test_atomlist.c
+++ b/tests/lib/test_atomlist.c
@@ -253,7 +253,7 @@ static void *thr1func(void *arg)
struct testrun *tr;
for (tr = runs; tr; tr = tr->next) {
- sv = seqlock_bump(&p->sqlo);
+ sv = seqlock_bump(&p->sqlo) - SEQLOCK_INCR;
seqlock_wait(&sqlo, sv);
tr->func(offset);
@@ -288,14 +288,14 @@ static void run_tr(struct testrun *tr)
size_t c = 0, s = 0, n = 0;
struct item *item, *prev, dummy;
- printf("[%02u] %35s %s\n", seqlock_cur(&sqlo) >> 1, "", desc);
+ printf("[%02u] %35s %s\n", seqlock_cur(&sqlo) >> 2, "", desc);
fflush(stdout);
if (tr->prefill != NOCLEAR)
clear_list(tr->prefill);
monotime(&tv);
- sv = seqlock_bump(&sqlo);
+ sv = seqlock_bump(&sqlo) - SEQLOCK_INCR;
for (size_t i = 0; i < NTHREADS; i++) {
seqlock_wait(&thr[i].sqlo, seqlock_cur(&sqlo));
s += thr[i].counter;
@@ -325,7 +325,7 @@ static void run_tr(struct testrun *tr)
assert(c == alist_count(&ahead));
}
printf("\033[1A[%02u] %9"PRId64"us c=%5zu s=%5zu n=%5zu %s\n",
- sv >> 1, delta, c, s, n, desc);
+ sv >> 2, delta, c, s, n, desc);
}
#ifdef BASIC_TESTS
@@ -381,7 +381,7 @@ int main(int argc, char **argv)
basic_tests();
seqlock_init(&sqlo);
- seqlock_acquire_val(&sqlo, 1);
+ seqlock_acquire_val(&sqlo, SEQLOCK_STARTVAL);
for (i = 0; i < NTHREADS; i++) {
seqlock_init(&thr[i].sqlo);
diff --git a/tests/lib/test_seqlock.c b/tests/lib/test_seqlock.c
index 9cc6f8070..639c2bdc2 100644
--- a/tests/lib/test_seqlock.c
+++ b/tests/lib/test_seqlock.c
@@ -66,20 +66,20 @@ static void *thr1func(void *arg)
seqlock_wait(&sqlo, 1);
writestr("thr1 @1\n");
- seqlock_wait(&sqlo, 3);
- writestr("thr1 @3\n");
-
seqlock_wait(&sqlo, 5);
writestr("thr1 @5\n");
- seqlock_wait(&sqlo, 7);
- writestr("thr1 @7\n");
-
seqlock_wait(&sqlo, 9);
writestr("thr1 @9\n");
- seqlock_wait(&sqlo, 11);
- writestr("thr1 @11\n");
+ seqlock_wait(&sqlo, 13);
+ writestr("thr1 @13\n");
+
+ seqlock_wait(&sqlo, 17);
+ writestr("thr1 @17\n");
+
+ seqlock_wait(&sqlo, 21);
+ writestr("thr1 @21\n");
return NULL;
}
@@ -95,11 +95,11 @@ int main(int argc, char **argv)
assert(seqlock_cur(&sqlo) == 1);
assert(seqlock_bump(&sqlo) == 1);
- assert(seqlock_cur(&sqlo) == 3);
- assert(seqlock_bump(&sqlo) == 3);
+ assert(seqlock_cur(&sqlo) == 5);
assert(seqlock_bump(&sqlo) == 5);
- assert(seqlock_bump(&sqlo) == 7);
- assert(seqlock_cur(&sqlo) == 9);
+ assert(seqlock_bump(&sqlo) == 9);
+ assert(seqlock_bump(&sqlo) == 13);
+ assert(seqlock_cur(&sqlo) == 17);
assert(seqlock_held(&sqlo));
seqlock_release(&sqlo);
@@ -108,16 +108,16 @@ int main(int argc, char **argv)
pthread_create(&thr1, NULL, thr1func, NULL);
sleep(1);
- writestr("main @3\n");
- seqlock_acquire_val(&sqlo, 3);
+ writestr("main @5\n");
+ seqlock_acquire_val(&sqlo, 5);
sleep(2);
- writestr("main @5\n");
+ writestr("main @9\n");
seqlock_bump(&sqlo);
sleep(1);
- writestr("main @9\n");
- seqlock_acquire_val(&sqlo, 9);
+ writestr("main @17\n");
+ seqlock_acquire_val(&sqlo, 17);
sleep(1);
writestr("main @release\n");
diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
index f93562b31..c90e027f0 100644
--- a/zebra/zebra_dplane.c
+++ b/zebra/zebra_dplane.c
@@ -3023,7 +3023,7 @@ void zebra_dplane_start(void)
/* Start dataplane pthread */
zdplane_info.dg_pthread = frr_pthread_new(&pattr, "Zebra dplane thread",
- "Zebra dplane");
+ "zebra_dplane");
zdplane_info.dg_master = zdplane_info.dg_pthread->master;
diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c
index 822def318..f347d3955 100644
--- a/zebra/zebra_fpm_netlink.c
+++ b/zebra/zebra_fpm_netlink.c
@@ -238,7 +238,7 @@ static int netlink_route_info_add_nh(netlink_route_info_t *ri,
if (re && CHECK_FLAG(re->flags, ZEBRA_FLAG_EVPN_ROUTE)) {
nhi.encap_info.encap_type = FPM_NH_ENCAP_VXLAN;
- zl3vni = zl3vni_from_vrf(ri->rtm_table);
+ zl3vni = zl3vni_from_vrf(nexthop->vrf_id);
if (zl3vni && is_l3vni_oper_up(zl3vni)) {
/* Add VNI to VxLAN encap info */
diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c
index 2e8c81bdd..1450072aa 100644
--- a/zebra/zebra_vxlan.c
+++ b/zebra/zebra_vxlan.c
@@ -2787,27 +2787,40 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni,
/* Set "local" forwarding info. */
SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL);
- SET_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW);
ZEBRA_NEIGH_SET_ACTIVE(n);
- /* Set Router flag (R-bit) */
- if (ip->ipa_type == IPADDR_V6)
- SET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG);
memcpy(&n->emac, macaddr, ETH_ALEN);
n->ifindex = ifp->ifindex;
/* Only advertise in BGP if the knob is enabled */
- if (!advertise_gw_macip_enabled(zvni))
- return 0;
+ if (advertise_gw_macip_enabled(zvni)) {
- if (IS_ZEBRA_DEBUG_VXLAN)
- zlog_debug(
+ SET_FLAG(mac->flags, ZEBRA_MAC_DEF_GW);
+ SET_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW);
+ /* Set Router flag (R-bit) */
+ if (ip->ipa_type == IPADDR_V6)
+ SET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG);
+
+ if (IS_ZEBRA_DEBUG_VXLAN)
+ zlog_debug(
"SVI %s(%u) L2-VNI %u, sending GW MAC %s IP %s add to BGP with flags 0x%x",
ifp->name, ifp->ifindex, zvni->vni,
prefix_mac2str(macaddr, buf, sizeof(buf)),
ipaddr2str(ip, buf2, sizeof(buf2)), n->flags);
- zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr,
- n->flags, n->loc_seq);
+ zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr,
+ n->flags, n->loc_seq);
+ } else if (advertise_svi_macip_enabled(zvni)) {
+
+ if (IS_ZEBRA_DEBUG_VXLAN)
+ zlog_debug(
+ "SVI %s(%u) L2-VNI %u, sending SVI MAC %s IP %s add to BGP with flags 0x%x",
+ ifp->name, ifp->ifindex, zvni->vni,
+ prefix_mac2str(macaddr, buf, sizeof(buf)),
+ ipaddr2str(ip, buf2, sizeof(buf2)), n->flags);
+
+ zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr,
+ n->flags, n->loc_seq);
+ }
return 0;
}
@@ -9015,7 +9028,7 @@ void zebra_vxlan_advertise_svi_macip(ZAPI_HANDLER_ARGS)
if (IS_ZEBRA_DEBUG_VXLAN)
zlog_debug("EVPN SVI-MACIP Adv %s, currently %s",
advertise ? "enabled" : "disabled",
- advertise_gw_macip_enabled(NULL)
+ advertise_svi_macip_enabled(NULL)
? "enabled"
: "disabled");