diff options
author | Jafar Al-Gharaibeh <Jafaral@users.noreply.github.com> | 2019-04-22 18:44:52 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-22 18:44:52 +0200 |
commit | c6f637132fc482b782d47f943849a3272f07c0da (patch) | |
tree | 448d010b2a4d0044a77f6ebf7f30bd8b596b0994 /pimd | |
parent | Merge pull request #4057 from mjstapp/fix_privs_even_more (diff) | |
parent | pimd: fix macro backslash alignment (diff) | |
download | frr-c6f637132fc482b782d47f943849a3272f07c0da.tar.xz frr-c6f637132fc482b782d47f943849a3272f07c0da.zip |
Merge pull request #4025 from AnuradhaKaruppiah/pim-evpn
pim-evpn: Forwarding overlay BUM traffic via multicast VxLAN tunnels in the underlay
Diffstat (limited to 'pimd')
-rw-r--r-- | pimd/pim_cmd.c | 425 | ||||
-rw-r--r-- | pimd/pim_cmd.h | 1 | ||||
-rw-r--r-- | pimd/pim_iface.c | 23 | ||||
-rw-r--r-- | pimd/pim_iface.h | 4 | ||||
-rw-r--r-- | pimd/pim_instance.c | 3 | ||||
-rw-r--r-- | pimd/pim_instance.h | 2 | ||||
-rw-r--r-- | pimd/pim_memory.c | 1 | ||||
-rw-r--r-- | pimd/pim_memory.h | 1 | ||||
-rw-r--r-- | pimd/pim_mroute.c | 46 | ||||
-rw-r--r-- | pimd/pim_oil.c | 10 | ||||
-rw-r--r-- | pimd/pim_oil.h | 8 | ||||
-rw-r--r-- | pimd/pim_register.c | 52 | ||||
-rw-r--r-- | pimd/pim_register.h | 1 | ||||
-rw-r--r-- | pimd/pim_rpf.c | 3 | ||||
-rw-r--r-- | pimd/pim_str.c | 37 | ||||
-rw-r--r-- | pimd/pim_str.h | 5 | ||||
-rw-r--r-- | pimd/pim_upstream.c | 83 | ||||
-rw-r--r-- | pimd/pim_upstream.h | 69 | ||||
-rw-r--r-- | pimd/pim_vty.c | 8 | ||||
-rw-r--r-- | pimd/pim_vxlan.c | 1025 | ||||
-rw-r--r-- | pimd/pim_vxlan.h | 139 | ||||
-rw-r--r-- | pimd/pim_vxlan_instance.h | 45 | ||||
-rw-r--r-- | pimd/pim_zebra.c | 50 | ||||
-rw-r--r-- | pimd/pimd.h | 4 | ||||
-rw-r--r-- | pimd/subdir.am | 3 |
25 files changed, 1964 insertions, 84 deletions
diff --git a/pimd/pim_cmd.c b/pimd/pim_cmd.c index 110408001..e1dfb0006 100644 --- a/pimd/pim_cmd.c +++ b/pimd/pim_cmd.c @@ -60,6 +60,7 @@ #include "pim_ssm.h" #include "pim_nht.h" #include "pim_bfd.h" +#include "pim_vxlan.h" #include "bfd.h" #ifndef VTYSH_EXTRACT_PL @@ -1963,7 +1964,7 @@ static void pim_show_state(struct pim_instance *pim, struct vty *vty, json = json_object_new_object(); } else { vty_out(vty, - "Codes: J -> Pim Join, I -> IGMP Report, S -> Source, * -> Inherited from (*,G)"); + "Codes: J -> Pim Join, I -> IGMP Report, S -> Source, * -> Inherited from (*,G), V -> VxLAN"); vty_out(vty, "\nInstalled Source Group IIF OIL\n"); } @@ -2092,7 +2093,7 @@ static void pim_show_state(struct pim_instance *pim, struct vty *vty, } else { if (first_oif) { first_oif = 0; - vty_out(vty, "%s(%c%c%c%c)", out_ifname, + vty_out(vty, "%s(%c%c%c%c%c)", out_ifname, (c_oil->oif_flags[oif_vif_index] & PIM_OIF_FLAG_PROTO_IGMP) ? 'I' @@ -2102,6 +2103,10 @@ static void pim_show_state(struct pim_instance *pim, struct vty *vty, ? 'J' : ' ', (c_oil->oif_flags[oif_vif_index] + & PIM_OIF_FLAG_PROTO_VXLAN) + ? 'V' + : ' ', + (c_oil->oif_flags[oif_vif_index] & PIM_OIF_FLAG_PROTO_SOURCE) ? 'S' : ' ', @@ -2110,7 +2115,7 @@ static void pim_show_state(struct pim_instance *pim, struct vty *vty, ? '*' : ' '); } else - vty_out(vty, ", %s(%c%c%c%c)", + vty_out(vty, ", %s(%c%c%c%c%c)", out_ifname, (c_oil->oif_flags[oif_vif_index] & PIM_OIF_FLAG_PROTO_IGMP) @@ -2121,6 +2126,10 @@ static void pim_show_state(struct pim_instance *pim, struct vty *vty, ? 'J' : ' ', (c_oil->oif_flags[oif_vif_index] + & PIM_OIF_FLAG_PROTO_VXLAN) + ? 'V' + : ' ', + (c_oil->oif_flags[oif_vif_index] & PIM_OIF_FLAG_PROTO_SOURCE) ? 'S' : ' ', @@ -4672,6 +4681,11 @@ static void show_mroute(struct pim_instance *pim, struct vty *vty, bool fill, json_ifp_out, "protocolIgmp"); if (c_oil->oif_flags[oif_vif_index] + & PIM_OIF_FLAG_PROTO_VXLAN) + json_object_boolean_true_add( + json_ifp_out, "protocolVxlan"); + + if (c_oil->oif_flags[oif_vif_index] & PIM_OIF_FLAG_PROTO_SOURCE) json_object_boolean_true_add( json_ifp_out, "protocolSource"); @@ -4714,6 +4728,11 @@ static void show_mroute(struct pim_instance *pim, struct vty *vty, bool fill, } if (c_oil->oif_flags[oif_vif_index] + & PIM_OIF_FLAG_PROTO_VXLAN) { + strcpy(proto, "VxLAN"); + } + + if (c_oil->oif_flags[oif_vif_index] & PIM_OIF_FLAG_PROTO_SOURCE) { strcpy(proto, "SRC"); } @@ -5825,7 +5844,8 @@ static int pim_cmd_igmp_start(struct vty *vty, struct interface *ifp) pim_ifp = ifp->info; if (!pim_ifp) { - pim_ifp = pim_if_new(ifp, true, false, false); + pim_ifp = pim_if_new(ifp, true, false, false, + false /*vxlan_term*/); if (!pim_ifp) { vty_out(vty, "Could not enable IGMP on interface %s\n", ifp->name); @@ -6436,7 +6456,8 @@ static int pim_cmd_interface_add(struct interface *ifp) struct pim_interface *pim_ifp = ifp->info; if (!pim_ifp) { - pim_ifp = pim_if_new(ifp, false, true, false); + pim_ifp = pim_if_new(ifp, false, true, false, + false /*vxlan_term*/); if (!pim_ifp) { return 0; } @@ -7435,6 +7456,29 @@ DEFUN (no_debug_pim_zebra, return CMD_SUCCESS; } +DEFUN (debug_pim_vxlan, + debug_pim_vxlan_cmd, + "debug pim vxlan", + DEBUG_STR + DEBUG_PIM_STR + DEBUG_PIM_VXLAN_STR) +{ + PIM_DO_DEBUG_VXLAN; + return CMD_SUCCESS; +} + +DEFUN (no_debug_pim_vxlan, + no_debug_pim_vxlan_cmd, + "no debug pim vxlan", + NO_STR + DEBUG_STR + DEBUG_PIM_STR + DEBUG_PIM_VXLAN_STR) +{ + PIM_DONT_DEBUG_VXLAN; + return CMD_SUCCESS; +} + DEFUN (debug_msdp, debug_msdp_cmd, "debug msdp", @@ -8736,6 +8780,369 @@ DEFUN (show_ip_msdp_sa_sg_vrf_all, return CMD_SUCCESS; } +struct pim_sg_cache_walk_data { + struct vty *vty; + json_object *json; + json_object *json_group; + struct in_addr addr; + bool addr_match; +}; + +static void pim_show_vxlan_sg_entry(struct pim_vxlan_sg *vxlan_sg, + struct pim_sg_cache_walk_data *cwd) +{ + struct vty *vty = cwd->vty; + json_object *json = cwd->json; + char src_str[INET_ADDRSTRLEN]; + char grp_str[INET_ADDRSTRLEN]; + json_object *json_row; + bool installed = (vxlan_sg->up)?TRUE:FALSE; + const char *iif_name = vxlan_sg->iif?vxlan_sg->iif->name:"-"; + const char *oif_name; + + if (pim_vxlan_is_orig_mroute(vxlan_sg)) + oif_name = vxlan_sg->orig_oif?vxlan_sg->orig_oif->name:""; + else + oif_name = vxlan_sg->term_oif?vxlan_sg->term_oif->name:""; + + if (cwd->addr_match && (vxlan_sg->sg.src.s_addr != cwd->addr.s_addr) && + (vxlan_sg->sg.grp.s_addr != cwd->addr.s_addr)) { + return; + } + pim_inet4_dump("<src?>", vxlan_sg->sg.src, src_str, sizeof(src_str)); + pim_inet4_dump("<grp?>", vxlan_sg->sg.grp, grp_str, sizeof(grp_str)); + if (json) { + json_object_object_get_ex(json, grp_str, &cwd->json_group); + + if (!cwd->json_group) { + cwd->json_group = json_object_new_object(); + json_object_object_add(json, grp_str, + cwd->json_group); + } + + json_row = json_object_new_object(); + json_object_string_add(json_row, "source", src_str); + json_object_string_add(json_row, "group", grp_str); + json_object_string_add(json_row, "input", iif_name); + json_object_string_add(json_row, "output", oif_name); + if (installed) + json_object_boolean_true_add(json_row, "installed"); + else + json_object_boolean_false_add(json_row, "installed"); + json_object_object_add(cwd->json_group, src_str, json_row); + } else { + vty_out(vty, "%-15s %-15s %-15s %-15s %-5s\n", + src_str, grp_str, iif_name, oif_name, + installed?"I":""); + } +} + +static void pim_show_vxlan_sg_hash_entry(struct hash_backet *backet, void *arg) +{ + pim_show_vxlan_sg_entry((struct pim_vxlan_sg *)backet->data, + (struct pim_sg_cache_walk_data *)arg); +} + +static void pim_show_vxlan_sg(struct pim_instance *pim, + struct vty *vty, bool uj) +{ + json_object *json = NULL; + struct pim_sg_cache_walk_data cwd; + + if (uj) { + json = json_object_new_object(); + } else { + vty_out(vty, "Codes: I -> installed\n"); + vty_out(vty, + "Source Group Input Output Flags\n"); + } + + memset(&cwd, 0, sizeof(cwd)); + cwd.vty = vty; + cwd.json = json; + hash_iterate(pim->vxlan.sg_hash, pim_show_vxlan_sg_hash_entry, &cwd); + + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + } +} + +static void pim_show_vxlan_sg_match_addr(struct pim_instance *pim, + struct vty *vty, char *addr_str, bool uj) +{ + json_object *json = NULL; + struct pim_sg_cache_walk_data cwd; + int result = 0; + + memset(&cwd, 0, sizeof(cwd)); + result = inet_pton(AF_INET, addr_str, &cwd.addr); + if (result <= 0) { + vty_out(vty, "Bad address %s: errno=%d: %s\n", addr_str, + errno, safe_strerror(errno)); + return; + } + + if (uj) { + json = json_object_new_object(); + } else { + vty_out(vty, "Codes: I -> installed\n"); + vty_out(vty, + "Source Group Input Output Flags\n"); + } + + cwd.vty = vty; + cwd.json = json; + cwd.addr_match = TRUE; + hash_iterate(pim->vxlan.sg_hash, pim_show_vxlan_sg_hash_entry, &cwd); + + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + } +} + +static void pim_show_vxlan_sg_one(struct pim_instance *pim, + struct vty *vty, char *src_str, char *grp_str, bool uj) +{ + json_object *json = NULL; + struct prefix_sg sg; + int result = 0; + struct pim_vxlan_sg *vxlan_sg; + const char *iif_name; + bool installed; + const char *oif_name; + + result = inet_pton(AF_INET, src_str, &sg.src); + if (result <= 0) { + vty_out(vty, "Bad src address %s: errno=%d: %s\n", src_str, + errno, safe_strerror(errno)); + return; + } + result = inet_pton(AF_INET, grp_str, &sg.grp); + if (result <= 0) { + vty_out(vty, "Bad grp address %s: errno=%d: %s\n", grp_str, + errno, safe_strerror(errno)); + return; + } + + sg.family = AF_INET; + sg.prefixlen = IPV4_MAX_BITLEN; + if (uj) + json = json_object_new_object(); + + vxlan_sg = pim_vxlan_sg_find(pim, &sg); + if (vxlan_sg) { + installed = (vxlan_sg->up)?TRUE:FALSE; + iif_name = vxlan_sg->iif?vxlan_sg->iif->name:"-"; + + if (pim_vxlan_is_orig_mroute(vxlan_sg)) + oif_name = + vxlan_sg->orig_oif?vxlan_sg->orig_oif->name:""; + else + oif_name = + vxlan_sg->term_oif?vxlan_sg->term_oif->name:""; + + if (uj) { + json_object_string_add(json, "source", src_str); + json_object_string_add(json, "group", grp_str); + json_object_string_add(json, "input", iif_name); + json_object_string_add(json, "output", oif_name); + if (installed) + json_object_boolean_true_add(json, "installed"); + else + json_object_boolean_false_add(json, + "installed"); + } else { + vty_out(vty, "SG : %s\n", vxlan_sg->sg_str); + vty_out(vty, " Input : %s\n", iif_name); + vty_out(vty, " Output : %s\n", oif_name); + vty_out(vty, " installed : %s\n", + installed?"yes":"no"); + } + } + + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + } +} + +DEFUN (show_ip_pim_vxlan_sg, + show_ip_pim_vxlan_sg_cmd, + "show ip pim [vrf NAME] vxlan-groups [A.B.C.D [A.B.C.D]] [json]", + SHOW_STR + IP_STR + PIM_STR + VRF_CMD_HELP_STR + "VxLAN BUM groups\n" + "source or group ip\n" + "group ip\n" + JSON_STR) +{ + bool uj = use_json(argc, argv); + struct vrf *vrf; + int idx = 2; + + vrf = pim_cmd_lookup_vrf(vty, argv, argc, &idx); + + if (!vrf) + return CMD_WARNING; + + char *src_ip = argv_find(argv, argc, "A.B.C.D", &idx) ? + argv[idx++]->arg:NULL; + char *grp_ip = idx < argc && argv_find(argv, argc, "A.B.C.D", &idx) ? + argv[idx]->arg:NULL; + + if (src_ip && grp_ip) + pim_show_vxlan_sg_one(vrf->info, vty, src_ip, grp_ip, uj); + else if (src_ip) + pim_show_vxlan_sg_match_addr(vrf->info, vty, src_ip, uj); + else + pim_show_vxlan_sg(vrf->info, vty, uj); + + return CMD_SUCCESS; +} + +static void pim_show_vxlan_sg_work(struct pim_instance *pim, + struct vty *vty, bool uj) +{ + json_object *json = NULL; + struct pim_sg_cache_walk_data cwd; + struct listnode *node; + struct pim_vxlan_sg *vxlan_sg; + + if (uj) { + json = json_object_new_object(); + } else { + vty_out(vty, "Codes: I -> installed\n"); + vty_out(vty, + "Source Group Input Flags\n"); + } + + memset(&cwd, 0, sizeof(cwd)); + cwd.vty = vty; + cwd.json = json; + for (ALL_LIST_ELEMENTS_RO(pim_vxlan_p->work_list, node, vxlan_sg)) + pim_show_vxlan_sg_entry(vxlan_sg, &cwd); + + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + } +} + +DEFUN_HIDDEN (show_ip_pim_vxlan_sg_work, + show_ip_pim_vxlan_sg_work_cmd, + "show ip pim [vrf NAME] vxlan-work [json]", + SHOW_STR + IP_STR + PIM_STR + VRF_CMD_HELP_STR + "VxLAN work list\n" + JSON_STR) +{ + bool uj = use_json(argc, argv); + struct vrf *vrf; + int idx = 2; + + vrf = pim_cmd_lookup_vrf(vty, argv, argc, &idx); + + if (!vrf) + return CMD_WARNING; + + pim_show_vxlan_sg_work(vrf->info, vty, uj); + + return CMD_SUCCESS; +} + +DEFUN_HIDDEN (no_ip_pim_mlag, + no_ip_pim_mlag_cmd, + "no ip pim mlag", + NO_STR + IP_STR + PIM_STR + "MLAG\n") +{ + struct in_addr addr; + + addr.s_addr = 0; + pim_vxlan_mlag_update(TRUE /*mlag_enable*/, + FALSE /*peer_state*/, PIM_VXLAN_MLAG_ROLE_SECONDARY, + NULL/*peerlink*/, &addr); + + return CMD_SUCCESS; +} + +DEFUN_HIDDEN (ip_pim_mlag, + ip_pim_mlag_cmd, + "ip pim mlag INTERFACE role [primary|secondary] state [up|down] addr A.B.C.D", + IP_STR + PIM_STR + "MLAG\n" + "peerlink sub interface\n" + "MLAG role\n" + "MLAG role primary\n" + "MLAG role secondary\n" + "peer session state\n" + "peer session state up\n" + "peer session state down\n" + "configure PIP\n" + "unique ip address\n") +{ + struct interface *ifp; + const char *peerlink; + uint32_t role; + int idx; + bool peer_state; + int result; + struct in_addr reg_addr; + + idx = 3; + peerlink = argv[idx]->arg; + ifp = if_lookup_by_name(peerlink, VRF_DEFAULT); + if (!ifp) { + vty_out(vty, "No such interface name %s\n", peerlink); + return CMD_WARNING; + } + + idx += 2; + if (!strcmp(argv[idx]->arg, "primary")) { + role = PIM_VXLAN_MLAG_ROLE_PRIMARY; + } else if (!strcmp(argv[idx]->arg, "secondary")) { + role = PIM_VXLAN_MLAG_ROLE_SECONDARY; + } else { + vty_out(vty, "unknown MLAG role %s\n", argv[idx]->arg); + return CMD_WARNING; + } + + idx += 2; + if (!strcmp(argv[idx]->arg, "up")) { + peer_state = TRUE; + } else if (strcmp(argv[idx]->arg, "down")) { + peer_state = FALSE; + } else { + vty_out(vty, "unknown MLAG state %s\n", argv[idx]->arg); + return CMD_WARNING; + } + + idx += 2; + result = inet_pton(AF_INET, argv[idx]->arg, ®_addr); + if (result <= 0) { + vty_out(vty, "%% Bad reg address %s: errno=%d: %s\n", + argv[idx]->arg, + errno, safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + pim_vxlan_mlag_update(TRUE, peer_state, role, ifp, ®_addr); + + return CMD_SUCCESS; +} + void pim_cmd_init(void) { install_node(&interface_node, @@ -8809,6 +9216,8 @@ void pim_cmd_init(void) install_element(VRF_NODE, &ip_pim_ecmp_rebalance_cmd); install_element(CONFIG_NODE, &no_ip_pim_ecmp_rebalance_cmd); install_element(VRF_NODE, &no_ip_pim_ecmp_rebalance_cmd); + install_element(CONFIG_NODE, &ip_pim_mlag_cmd); + install_element(CONFIG_NODE, &no_ip_pim_mlag_cmd); install_element(INTERFACE_NODE, &interface_ip_igmp_cmd); install_element(INTERFACE_NODE, &interface_no_ip_igmp_cmd); @@ -8935,6 +9344,8 @@ void pim_cmd_init(void) install_element(ENABLE_NODE, &no_debug_ssmpingd_cmd); install_element(ENABLE_NODE, &debug_pim_zebra_cmd); install_element(ENABLE_NODE, &no_debug_pim_zebra_cmd); + install_element(ENABLE_NODE, &debug_pim_vxlan_cmd); + install_element(ENABLE_NODE, &no_debug_pim_vxlan_cmd); install_element(ENABLE_NODE, &debug_msdp_cmd); install_element(ENABLE_NODE, &no_debug_msdp_cmd); install_element(ENABLE_NODE, &debug_msdp_events_cmd); @@ -8976,6 +9387,8 @@ void pim_cmd_init(void) install_element(CONFIG_NODE, &no_debug_ssmpingd_cmd); install_element(CONFIG_NODE, &debug_pim_zebra_cmd); install_element(CONFIG_NODE, &no_debug_pim_zebra_cmd); + install_element(CONFIG_NODE, &debug_pim_vxlan_cmd); + install_element(CONFIG_NODE, &no_debug_pim_vxlan_cmd); install_element(CONFIG_NODE, &debug_msdp_cmd); install_element(CONFIG_NODE, &no_debug_msdp_cmd); install_element(CONFIG_NODE, &debug_msdp_events_cmd); @@ -9003,6 +9416,8 @@ void pim_cmd_init(void) install_element(VIEW_NODE, &show_ip_msdp_mesh_group_vrf_all_cmd); install_element(VIEW_NODE, &show_ip_pim_ssm_range_cmd); install_element(VIEW_NODE, &show_ip_pim_group_type_cmd); + install_element(VIEW_NODE, &show_ip_pim_vxlan_sg_cmd); + install_element(VIEW_NODE, &show_ip_pim_vxlan_sg_work_cmd); install_element(INTERFACE_NODE, &interface_pim_use_source_cmd); install_element(INTERFACE_NODE, &interface_no_pim_use_source_cmd); /* Install BFD command */ diff --git a/pimd/pim_cmd.h b/pimd/pim_cmd.h index b58da30bd..67d6e43c3 100644 --- a/pimd/pim_cmd.h +++ b/pimd/pim_cmd.h @@ -52,6 +52,7 @@ #define DEBUG_PIM_PACKETDUMP_RECV_STR "Dump received packets\n" #define DEBUG_PIM_TRACE_STR "PIM internal daemon activity\n" #define DEBUG_PIM_ZEBRA_STR "ZEBRA protocol activity\n" +#define DEBUG_PIM_VXLAN_STR "PIM VxLAN events\n" #define DEBUG_SSMPINGD_STR "ssmpingd activity\n" #define CLEAR_IP_IGMP_STR "IGMP clear commands\n" #define CLEAR_IP_PIM_STR "PIM clear commands\n" diff --git a/pimd/pim_iface.c b/pimd/pim_iface.c index 92d21cf42..0fb7f176c 100644 --- a/pimd/pim_iface.c +++ b/pimd/pim_iface.c @@ -47,6 +47,7 @@ #include "pim_nht.h" #include "pim_jp_agg.h" #include "pim_igmp_join.h" +#include "pim_vxlan.h" static void pim_if_igmp_join_del_all(struct interface *ifp); static int igmp_join_sock(const char *ifname, ifindex_t ifindex, @@ -109,7 +110,7 @@ static int pim_sec_addr_comp(const void *p1, const void *p2) } struct pim_interface *pim_if_new(struct interface *ifp, bool igmp, bool pim, - bool ispimreg) + bool ispimreg, bool is_vxlan_term) { struct pim_interface *pim_ifp; @@ -178,7 +179,7 @@ struct pim_interface *pim_if_new(struct interface *ifp, bool igmp, bool pim, pim_sock_reset(ifp); - pim_if_add_vif(ifp, ispimreg); + pim_if_add_vif(ifp, ispimreg, is_vxlan_term); return pim_ifp; } @@ -628,7 +629,7 @@ void pim_if_addr_add(struct connected *ifc) address assigned, then try to create a vif_index. */ if (pim_ifp->mroute_vif_index < 0) { - pim_if_add_vif(ifp, false); + pim_if_add_vif(ifp, false, false /*vxlan_term*/); } pim_ifchannel_scan_forward_start(ifp); } @@ -761,7 +762,7 @@ void pim_if_addr_add_all(struct interface *ifp) * address assigned, then try to create a vif_index. */ if (pim_ifp->mroute_vif_index < 0) { - pim_if_add_vif(ifp, false); + pim_if_add_vif(ifp, false, false /*vxlan_term*/); } pim_ifchannel_scan_forward_start(ifp); @@ -926,7 +927,7 @@ static int pim_iface_next_vif_index(struct interface *ifp) see also pim_if_find_vifindex_by_ifindex() */ -int pim_if_add_vif(struct interface *ifp, bool ispimreg) +int pim_if_add_vif(struct interface *ifp, bool ispimreg, bool is_vxlan_term) { struct pim_interface *pim_ifp = ifp->info; struct in_addr ifaddr; @@ -948,7 +949,7 @@ int pim_if_add_vif(struct interface *ifp, bool ispimreg) } ifaddr = pim_ifp->primary_address; - if (!ispimreg && PIM_INADDR_IS_ANY(ifaddr)) { + if (!ispimreg && !is_vxlan_term && PIM_INADDR_IS_ANY(ifaddr)) { zlog_warn( "%s: could not get address for interface %s ifindex=%d", __PRETTY_FUNCTION__, ifp->name, ifp->ifindex); @@ -977,6 +978,10 @@ int pim_if_add_vif(struct interface *ifp, bool ispimreg) } pim_ifp->pim->iface_vif_index[pim_ifp->mroute_vif_index] = 1; + + /* if the device qualifies as pim_vxlan iif/oif update vxlan entries */ + pim_vxlan_add_vif(ifp); + return 0; } @@ -991,6 +996,9 @@ int pim_if_del_vif(struct interface *ifp) return -1; } + /* if the device was a pim_vxlan iif/oif update vxlan mroute entries */ + pim_vxlan_del_vif(ifp); + pim_mroute_del_vif(ifp); /* @@ -1469,7 +1477,8 @@ void pim_if_create_pimreg(struct pim_instance *pim) pim->regiface = if_create(pimreg_name, pim->vrf_id); pim->regiface->ifindex = PIM_OIF_PIM_REGISTER_VIF; - pim_if_new(pim->regiface, false, false, true); + pim_if_new(pim->regiface, false, false, true, + false /*vxlan_term*/); } } diff --git a/pimd/pim_iface.h b/pimd/pim_iface.h index 5066998cb..fe96c0775 100644 --- a/pimd/pim_iface.h +++ b/pimd/pim_iface.h @@ -158,7 +158,7 @@ void pim_if_init(struct pim_instance *pim); void pim_if_terminate(struct pim_instance *pim); struct pim_interface *pim_if_new(struct interface *ifp, bool igmp, bool pim, - bool ispimreg); + bool ispimreg, bool is_vxlan_term); void pim_if_delete(struct interface *ifp); void pim_if_addr_add(struct connected *ifc); void pim_if_addr_del(struct connected *ifc, int force_prim_as_any); @@ -167,7 +167,7 @@ void pim_if_addr_del_all(struct interface *ifp); void pim_if_addr_del_all_igmp(struct interface *ifp); void pim_if_addr_del_all_pim(struct interface *ifp); -int pim_if_add_vif(struct interface *ifp, bool ispimreg); +int pim_if_add_vif(struct interface *ifp, bool ispimreg, bool is_vxlan_term); int pim_if_del_vif(struct interface *ifp); void pim_if_add_vif_all(struct pim_instance *pim); void pim_if_del_vif_all(struct pim_instance *pim); diff --git a/pimd/pim_instance.c b/pimd/pim_instance.c index 092a2d76f..a2bf3d278 100644 --- a/pimd/pim_instance.c +++ b/pimd/pim_instance.c @@ -36,6 +36,8 @@ static void pim_instance_terminate(struct pim_instance *pim) { + pim_vxlan_exit(pim); + if (pim->ssm_info) { pim_ssm_terminate(pim->ssm_info); pim->ssm_info = NULL; @@ -86,6 +88,7 @@ static struct pim_instance *pim_instance_init(struct vrf *vrf) pim->spt.plist = NULL; pim_msdp_init(pim, router->master); + pim_vxlan_init(pim); snprintf(hash_name, 64, "PIM %s RPF Hash", vrf->name); pim->rpf_hash = hash_create_size(256, pim_rpf_hash_key, pim_rpf_equal, diff --git a/pimd/pim_instance.h b/pimd/pim_instance.h index e651356bf..1740bcc79 100644 --- a/pimd/pim_instance.h +++ b/pimd/pim_instance.h @@ -26,6 +26,7 @@ #include "pim_str.h" #include "pim_msdp.h" #include "pim_assert.h" +#include "pim_vxlan_instance.h" #if defined(HAVE_LINUX_MROUTE_H) #include <linux/mroute.h> @@ -110,6 +111,7 @@ struct pim_instance { struct hash *channel_oil_hash; struct pim_msdp msdp; + struct pim_vxlan_instance vxlan; struct list *ssmpingd_list; struct in_addr ssmpingd_group_addr; diff --git a/pimd/pim_memory.c b/pimd/pim_memory.c index dff16c416..2bbab67e4 100644 --- a/pimd/pim_memory.c +++ b/pimd/pim_memory.c @@ -52,3 +52,4 @@ DEFINE_MTYPE(PIMD, PIM_PIM_INSTANCE, "PIM global state") DEFINE_MTYPE(PIMD, PIM_NEXTHOP_CACHE, "PIM nexthop cache state") DEFINE_MTYPE(PIMD, PIM_SSM_INFO, "PIM SSM configuration") DEFINE_MTYPE(PIMD, PIM_SPT_PLIST_NAME, "PIM SPT Prefix List Name") +DEFINE_MTYPE(PIMD, PIM_VXLAN_SG, "PIM VxLAN mroute cache") diff --git a/pimd/pim_memory.h b/pimd/pim_memory.h index 01189aca7..e5ca57a15 100644 --- a/pimd/pim_memory.h +++ b/pimd/pim_memory.h @@ -51,5 +51,6 @@ DECLARE_MTYPE(PIM_PIM_INSTANCE) DECLARE_MTYPE(PIM_NEXTHOP_CACHE) DECLARE_MTYPE(PIM_SSM_INFO) DECLARE_MTYPE(PIM_SPT_PLIST_NAME); +DECLARE_MTYPE(PIM_VXLAN_SG) #endif /* _QUAGGA_PIM_MEMORY_H */ diff --git a/pimd/pim_mroute.c b/pimd/pim_mroute.c index a2d50aba5..866a19fc9 100644 --- a/pimd/pim_mroute.c +++ b/pimd/pim_mroute.c @@ -42,6 +42,7 @@ #include "pim_zlookup.h" #include "pim_ssm.h" #include "pim_sock.h" +#include "pim_vxlan.h" static void mroute_read_on(struct pim_instance *pim); @@ -896,6 +897,12 @@ int pim_mroute_add(struct channel_oil *c_oil, const char *name) int err; int orig = 0; int orig_iif_vif = 0; + struct pim_interface *pim_reg_ifp; + int orig_pimreg_ttl; + bool pimreg_ttl_reset = false; + struct pim_interface *vxlan_ifp; + int orig_term_ttl; + bool orig_term_ttl_reset = false; pim->mroute_add_last = pim_time_monotonic_sec(); ++pim->mroute_add_events; @@ -921,6 +928,37 @@ int pim_mroute_add(struct channel_oil *c_oil, const char *name) c_oil->oil.mfcc_ttls[c_oil->oil.mfcc_parent] = 1; } + if (c_oil->up) { + /* suppress pimreg in the OIL if the mroute is not supposed to + * trigger register encapsulated data + */ + if (PIM_UPSTREAM_FLAG_TEST_NO_PIMREG_DATA(c_oil->up->flags)) { + pim_reg_ifp = pim->regiface->info; + orig_pimreg_ttl = + c_oil->oil.mfcc_ttls[pim_reg_ifp->mroute_vif_index]; + c_oil->oil.mfcc_ttls[pim_reg_ifp->mroute_vif_index] = 0; + /* remember to flip it back after MFC programming */ + pimreg_ttl_reset = true; + } + + vxlan_ifp = pim_vxlan_get_term_ifp(pim); + /* 1. vxlan termination device must never be added to the + * origination mroute (and that can actually happen because + * of XG inheritance from the termination mroute) otherwise + * traffic will end up looping. + * 2. vxlan termination device should be removed from the non-DF + * to prevent duplicates to the overlay rxer + */ + if (vxlan_ifp && + (PIM_UPSTREAM_FLAG_TEST_SRC_VXLAN_ORIG(c_oil->up->flags) || + PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(c_oil->up->flags))) { + orig_term_ttl_reset = true; + orig_term_ttl = + c_oil->oil.mfcc_ttls[vxlan_ifp->mroute_vif_index]; + c_oil->oil.mfcc_ttls[vxlan_ifp->mroute_vif_index] = 0; + } + } + /* * If we have an unresolved cache entry for the S,G * it is owned by the pimreg for the incoming IIF @@ -947,6 +985,14 @@ int pim_mroute_add(struct channel_oil *c_oil, const char *name) if (c_oil->oil.mfcc_origin.s_addr == INADDR_ANY) c_oil->oil.mfcc_ttls[c_oil->oil.mfcc_parent] = orig; + if (pimreg_ttl_reset) + c_oil->oil.mfcc_ttls[pim_reg_ifp->mroute_vif_index] = + orig_pimreg_ttl; + + if (orig_term_ttl_reset) + c_oil->oil.mfcc_ttls[vxlan_ifp->mroute_vif_index] = + orig_term_ttl; + if (err) { zlog_warn( "%s %s: failure: setsockopt(fd=%d,IPPROTO_IP,MRT_ADD_MFC): errno=%d: %s", diff --git a/pimd/pim_oil.c b/pimd/pim_oil.c index 55d26113f..5945bc55f 100644 --- a/pimd/pim_oil.c +++ b/pimd/pim_oil.c @@ -320,6 +320,7 @@ int pim_channel_add_oif(struct channel_oil *channel_oil, struct interface *oif, { struct pim_interface *pim_ifp; int old_ttl; + bool allow_iif_in_oil = false; /* * If we've gotten here we've gone bad, but let's @@ -344,7 +345,14 @@ int pim_channel_add_oif(struct channel_oil *channel_oil, struct interface *oif, by both source and receiver attached to the same interface. See TODO T22. */ - if (pim_ifp->mroute_vif_index == channel_oil->oil.mfcc_parent) { + if (channel_oil->up && + PIM_UPSTREAM_FLAG_TEST_ALLOW_IIF_IN_OIL( + channel_oil->up->flags)) { + allow_iif_in_oil = true; + } + + if (!allow_iif_in_oil && + pim_ifp->mroute_vif_index == channel_oil->oil.mfcc_parent) { channel_oil->oil_inherited_rescan = 1; if (PIM_DEBUG_MROUTE) { char group_str[INET_ADDRSTRLEN]; diff --git a/pimd/pim_oil.h b/pimd/pim_oil.h index 8b9532414..c5106d01c 100644 --- a/pimd/pim_oil.h +++ b/pimd/pim_oil.h @@ -34,9 +34,11 @@ #define PIM_OIF_FLAG_PROTO_PIM (1 << 1) #define PIM_OIF_FLAG_PROTO_SOURCE (1 << 2) #define PIM_OIF_FLAG_PROTO_STAR (1 << 3) -#define PIM_OIF_FLAG_PROTO_ANY \ - (PIM_OIF_FLAG_PROTO_IGMP | PIM_OIF_FLAG_PROTO_PIM \ - | PIM_OIF_FLAG_PROTO_SOURCE | PIM_OIF_FLAG_PROTO_STAR) +#define PIM_OIF_FLAG_PROTO_VXLAN (1 << 4) +#define PIM_OIF_FLAG_PROTO_ANY \ + (PIM_OIF_FLAG_PROTO_IGMP | PIM_OIF_FLAG_PROTO_PIM \ + | PIM_OIF_FLAG_PROTO_SOURCE | PIM_OIF_FLAG_PROTO_STAR \ + | PIM_OIF_FLAG_PROTO_VXLAN) /* * We need a pimreg vif id from the kernel. diff --git a/pimd/pim_register.c b/pimd/pim_register.c index 386ed1d42..431236eeb 100644 --- a/pimd/pim_register.c +++ b/pimd/pim_register.c @@ -43,6 +43,7 @@ #include "pim_join.h" #include "pim_util.h" #include "pim_ssm.h" +#include "pim_vxlan.h" struct thread *send_test_packet_timer = NULL; @@ -60,6 +61,7 @@ void pim_register_join(struct pim_upstream *up) pim_channel_add_oif(up->channel_oil, pim->regiface, PIM_OIF_FLAG_PROTO_PIM); up->reg_state = PIM_REG_JOIN; + pim_vxlan_update_sg_reg_state(pim, up, TRUE /*reg_join*/); } void pim_register_stop_send(struct interface *ifp, struct prefix_sg *sg, @@ -145,6 +147,8 @@ int pim_register_stop_recv(struct interface *ifp, uint8_t *buf, int buf_size) pim_channel_del_oif(upstream->channel_oil, pim->regiface, PIM_OIF_FLAG_PROTO_PIM); pim_upstream_start_register_stop_timer(upstream, 0); + pim_vxlan_update_sg_reg_state(pim, upstream, + FALSE /*reg_join*/); break; case PIM_REG_JOIN_PENDING: upstream->reg_state = PIM_REG_PRUNE; @@ -219,6 +223,54 @@ void pim_register_send(const uint8_t *buf, int buf_size, struct in_addr src, } } +void pim_null_register_send(struct pim_upstream *up) +{ + struct ip ip_hdr; + struct pim_interface *pim_ifp; + struct pim_rpf *rpg; + struct in_addr src; + + pim_ifp = up->rpf.source_nexthop.interface->info; + if (!pim_ifp) { + if (PIM_DEBUG_TRACE) + zlog_debug( + "%s: Cannot send null-register for %s no valid iif", + __PRETTY_FUNCTION__, up->sg_str); + return; + } + + rpg = RP(pim_ifp->pim, up->sg.grp); + if (!rpg) { + if (PIM_DEBUG_TRACE) + zlog_debug( + "%s: Cannot send null-register for %s no RPF to the RP", + __PRETTY_FUNCTION__, up->sg_str); + return; + } + + memset(&ip_hdr, 0, sizeof(struct ip)); + ip_hdr.ip_p = PIM_IP_PROTO_PIM; + ip_hdr.ip_hl = 5; + ip_hdr.ip_v = 4; + ip_hdr.ip_src = up->sg.src; + ip_hdr.ip_dst = up->sg.grp; + ip_hdr.ip_len = htons(20); + + /* checksum is broken */ + src = pim_ifp->primary_address; + if (PIM_UPSTREAM_FLAG_TEST_SRC_VXLAN_ORIG(up->flags)) { + if (!pim_vxlan_get_register_src(pim_ifp->pim, up, &src)) { + if (PIM_DEBUG_TRACE) + zlog_debug( + "%s: Cannot send null-register for %s vxlan-aa PIP unavailable", + __PRETTY_FUNCTION__, up->sg_str); + return; + } + } + pim_register_send((uint8_t *)&ip_hdr, sizeof(struct ip), + src, rpg, 1, up); +} + /* * 4.4.2 Receiving Register Messages at the RP * diff --git a/pimd/pim_register.h b/pimd/pim_register.h index 906d093bb..c5a28fee4 100644 --- a/pimd/pim_register.h +++ b/pimd/pim_register.h @@ -42,5 +42,6 @@ void pim_register_send(const uint8_t *buf, int buf_size, struct in_addr src, void pim_register_stop_send(struct interface *ifp, struct prefix_sg *sg, struct in_addr src, struct in_addr originator); void pim_register_join(struct pim_upstream *up); +void pim_null_register_send(struct pim_upstream *up); #endif diff --git a/pimd/pim_rpf.c b/pimd/pim_rpf.c index 8cc8b7481..afe3886aa 100644 --- a/pimd/pim_rpf.c +++ b/pimd/pim_rpf.c @@ -204,6 +204,9 @@ enum pim_rpf_result pim_rpf_update(struct pim_instance *pim, struct prefix src, grp; bool neigh_needed = true; + if (PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags)) + return PIM_RPF_OK; + if (up->upstream_addr.s_addr == INADDR_ANY) { zlog_debug("%s: RP is not configured yet for %s", __PRETTY_FUNCTION__, up->sg_str); diff --git a/pimd/pim_str.c b/pimd/pim_str.c index fa1a6e624..f6acd0873 100644 --- a/pimd/pim_str.c +++ b/pimd/pim_str.c @@ -42,47 +42,12 @@ void pim_addr_dump(const char *onfail, struct prefix *p, char *buf, errno = save_errno; } -void pim_inet4_dump(const char *onfail, struct in_addr addr, char *buf, - int buf_size) -{ - int save_errno = errno; - - if (addr.s_addr == INADDR_ANY) - strcpy(buf, "*"); - else { - if (!inet_ntop(AF_INET, &addr, buf, buf_size)) { - zlog_warn( - "pim_inet4_dump: inet_ntop(AF_INET,buf_size=%d): errno=%d: %s", - buf_size, errno, safe_strerror(errno)); - if (onfail) - snprintf(buf, buf_size, "%s", onfail); - } - } - - errno = save_errno; -} - char *pim_str_sg_dump(const struct prefix_sg *sg) { - char src_str[INET_ADDRSTRLEN]; - char grp_str[INET_ADDRSTRLEN]; static char sg_str[PIM_SG_LEN]; - pim_inet4_dump("<src?>", sg->src, src_str, sizeof(src_str)); - pim_inet4_dump("<grp?>", sg->grp, grp_str, sizeof(grp_str)); - snprintf(sg_str, PIM_SG_LEN, "(%s,%s)", src_str, grp_str); + pim_str_sg_set(sg, sg_str); return sg_str; } -char *pim_str_sg_set(const struct prefix_sg *sg, char *sg_str) -{ - char src_str[INET_ADDRSTRLEN]; - char grp_str[INET_ADDRSTRLEN]; - - pim_inet4_dump("<src?>", sg->src, src_str, sizeof(src_str)); - pim_inet4_dump("<grp?>", sg->grp, grp_str, sizeof(grp_str)); - snprintf(sg_str, PIM_SG_LEN, "(%s,%s)", src_str, grp_str); - - return sg_str; -} diff --git a/pimd/pim_str.h b/pimd/pim_str.h index 12a33a810..94ba32415 100644 --- a/pimd/pim_str.h +++ b/pimd/pim_str.h @@ -33,13 +33,14 @@ * NULL Character at end = 1 * (123.123.123.123,123,123,123,123) */ -#define PIM_SG_LEN 36 +#define PIM_SG_LEN PREFIX_SG_STR_LEN +#define pim_inet4_dump prefix_mcast_inet4_dump +#define pim_str_sg_set prefix_sg2str void pim_addr_dump(const char *onfail, struct prefix *p, char *buf, int buf_size); void pim_inet4_dump(const char *onfail, struct in_addr addr, char *buf, int buf_size); char *pim_str_sg_dump(const struct prefix_sg *sg); -char *pim_str_sg_set(const struct prefix_sg *sg, char *sg_str); #endif diff --git a/pimd/pim_upstream.c b/pimd/pim_upstream.c index 04137b0b7..7de2092a5 100644 --- a/pimd/pim_upstream.c +++ b/pimd/pim_upstream.c @@ -51,6 +51,7 @@ #include "pim_jp_agg.h" #include "pim_nht.h" #include "pim_ssm.h" +#include "pim_vxlan.h" static void join_timer_stop(struct pim_upstream *up); static void @@ -483,6 +484,13 @@ static int pim_upstream_could_register(struct pim_upstream *up) { struct pim_interface *pim_ifp = NULL; + /* FORCE_PIMREG is a generic flag to let an app like VxLAN-AA register + * a source on an upstream entry even if the source is not directly + * connected on the IIF. + */ + if (PIM_UPSTREAM_FLAG_TEST_FORCE_PIMREG(up->flags)) + return 1; + if (up->rpf.source_nexthop.interface) pim_ifp = up->rpf.source_nexthop.interface->info; else { @@ -647,6 +655,23 @@ int pim_upstream_compare(void *arg1, void *arg2) return 0; } +void pim_upstream_fill_static_iif(struct pim_upstream *up, + struct interface *incoming) +{ + up->rpf.source_nexthop.interface = incoming; + + /* reset other parameters to matched a connected incoming interface */ + up->rpf.source_nexthop.mrib_nexthop_addr.family = AF_INET; + up->rpf.source_nexthop.mrib_nexthop_addr.u.prefix4.s_addr = + PIM_NET_INADDR_ANY; + up->rpf.source_nexthop.mrib_metric_preference = + ZEBRA_CONNECT_DISTANCE_DEFAULT; + up->rpf.source_nexthop.mrib_route_metric = 0; + up->rpf.rpf_addr.family = AF_INET; + up->rpf.rpf_addr.u.prefix4.s_addr = PIM_NET_INADDR_ANY; + +} + static struct pim_upstream *pim_upstream_new(struct pim_instance *pim, struct prefix_sg *sg, struct interface *incoming, @@ -712,13 +737,19 @@ static struct pim_upstream *pim_upstream_new(struct pim_instance *pim, if (up->sg.src.s_addr != INADDR_ANY) wheel_add_item(pim->upstream_sg_wheel, up); - if (up->upstream_addr.s_addr == INADDR_ANY) + if (PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags)) { + pim_upstream_fill_static_iif(up, incoming); + pim_ifp = up->rpf.source_nexthop.interface->info; + assert(pim_ifp); + up->channel_oil = pim_channel_oil_add(pim, + &up->sg, pim_ifp->mroute_vif_index); + } else if (up->upstream_addr.s_addr == INADDR_ANY) { /* Create a dummmy channel oil with incoming ineterface MAXVIFS, * since RP is not configured */ up->channel_oil = pim_channel_oil_add(pim, &up->sg, MAXVIFS); - else { + } else { rpf_result = pim_rpf_update(pim, up, NULL, 1); if (rpf_result == PIM_RPF_FAILURE) { if (PIM_DEBUG_TRACE) @@ -1117,14 +1148,21 @@ static void pim_upstream_fhr_kat_start(struct pim_upstream *up) * KAT expiry indicates that flow is inactive. If the flow was created or * maintained by activity now is the time to deref it. */ -static int pim_upstream_keep_alive_timer(struct thread *t) +struct pim_upstream *pim_upstream_keep_alive_timer_proc( + struct pim_upstream *up) { - struct pim_upstream *up; struct pim_instance *pim; - up = THREAD_ARG(t); pim = up->channel_oil->pim; + if (PIM_UPSTREAM_FLAG_TEST_DISABLE_KAT_EXPIRY(up->flags)) { + /* if the router is a PIM vxlan encapsulator we prevent expiry + * of KAT as the mroute is pre-setup without any traffic + */ + pim_upstream_keep_alive_timer_start(up, pim->keep_alive_time); + return up; + } + if (I_am_RP(pim, up->sg.grp)) { pim_br_clear_pmbr(&up->sg); /* @@ -1144,12 +1182,12 @@ static int pim_upstream_keep_alive_timer(struct thread *t) "kat expired on %s[%s]; remove stream reference", up->sg_str, pim->vrf->name); PIM_UPSTREAM_FLAG_UNSET_SRC_STREAM(up->flags); - pim_upstream_del(pim, up, __PRETTY_FUNCTION__); + up = pim_upstream_del(pim, up, __PRETTY_FUNCTION__); } else if (PIM_UPSTREAM_FLAG_TEST_SRC_LHR(up->flags)) { struct pim_upstream *parent = up->parent; PIM_UPSTREAM_FLAG_UNSET_SRC_LHR(up->flags); - pim_upstream_del(pim, up, __PRETTY_FUNCTION__); + up = pim_upstream_del(pim, up, __PRETTY_FUNCTION__); if (parent) { pim_jp_agg_single_upstream_send(&parent->rpf, parent, @@ -1157,6 +1195,15 @@ static int pim_upstream_keep_alive_timer(struct thread *t) } } + return up; +} +static int pim_upstream_keep_alive_timer(struct thread *t) +{ + struct pim_upstream *up; + + up = THREAD_ARG(t); + + pim_upstream_keep_alive_timer_proc(up); return 0; } @@ -1371,8 +1418,6 @@ static int pim_upstream_register_stop_timer(struct thread *t) struct pim_interface *pim_ifp; struct pim_instance *pim; struct pim_upstream *up; - struct pim_rpf *rpg; - struct ip ip_hdr; up = THREAD_ARG(t); pim = up->channel_oil->pim; @@ -1388,6 +1433,7 @@ static int pim_upstream_register_stop_timer(struct thread *t) up->reg_state = PIM_REG_JOIN; pim_channel_add_oif(up->channel_oil, pim->regiface, PIM_OIF_FLAG_PROTO_PIM); + pim_vxlan_update_sg_reg_state(pim, up, TRUE /*reg_join*/); break; case PIM_REG_JOIN: break; @@ -1420,24 +1466,7 @@ static int pim_upstream_register_stop_timer(struct thread *t) __PRETTY_FUNCTION__); return 0; } - rpg = RP(pim_ifp->pim, up->sg.grp); - if (!rpg) { - if (PIM_DEBUG_TRACE) - zlog_debug( - "%s: Cannot send register for %s no RPF to the RP", - __PRETTY_FUNCTION__, up->sg_str); - return 0; - } - memset(&ip_hdr, 0, sizeof(struct ip)); - ip_hdr.ip_p = PIM_IP_PROTO_PIM; - ip_hdr.ip_hl = 5; - ip_hdr.ip_v = 4; - ip_hdr.ip_src = up->sg.src; - ip_hdr.ip_dst = up->sg.grp; - ip_hdr.ip_len = htons(20); - // checksum is broken - pim_register_send((uint8_t *)&ip_hdr, sizeof(struct ip), - pim_ifp->primary_address, rpg, 1, up); + pim_null_register_send(up); break; default: break; diff --git a/pimd/pim_upstream.h b/pimd/pim_upstream.h index 70e70140d..13a3dcdf8 100644 --- a/pimd/pim_upstream.h +++ b/pimd/pim_upstream.h @@ -37,6 +37,43 @@ #define PIM_UPSTREAM_FLAG_MASK_SRC_MSDP (1 << 6) #define PIM_UPSTREAM_FLAG_MASK_SEND_SG_RPT_PRUNE (1 << 7) #define PIM_UPSTREAM_FLAG_MASK_SRC_LHR (1 << 8) +/* In the case of pim vxlan we prime the pump by registering the + * vxlan source and keeping the SPT (FHR-RP) alive by sending periodic + * NULL registers. So we need to prevent KAT expiry because of the + * lack of BUM traffic. + */ +#define PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY (1 << 9) +/* for pim vxlan we need to pin the IIF to lo or MLAG-ISL on the + * originating VTEP. This flag allows that by setting IIF to the + * value specified and preventing next-hop-tracking on the entry + */ +#define PIM_UPSTREAM_FLAG_MASK_STATIC_IIF (1 << 10) +#define PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL (1 << 11) +/* Disable pimreg encasulation for a flow */ +#define PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA (1 << 12) +/* For some MDTs we need to register the router as a source even + * if the not DR or directly connected on the IIF. This is typically + * needed on a VxLAN-AA (MLAG) setup. + */ +#define PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG (1 << 13) +/* VxLAN origination mroute - SG was registered by EVPN where S is the + * local VTEP IP and G is the BUM multicast group address + */ +#define PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG (1 << 14) +/* VxLAN termination mroute - *G entry where G is the BUM multicast group + * address + */ +#define PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM (1 << 15) +/* MLAG mroute - synced to the MLAG peer and subject to DF (designated + * forwarder) election + */ +#define PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN (1 << 16) +/* MLAG mroute that lost the DF election with peer and is installed in + * a dormant state i.e. MLAG OIFs are removed from the MFC. + * In most cases the OIL is empty (but not not always) simply + * blackholing the traffic pulled down to the LHR. + */ +#define PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF (1 << 17) #define PIM_UPSTREAM_FLAG_ALL 0xFFFFFFFF #define PIM_UPSTREAM_FLAG_TEST_DR_JOIN_DESIRED(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_DR_JOIN_DESIRED) @@ -48,6 +85,16 @@ #define PIM_UPSTREAM_FLAG_TEST_SRC_MSDP(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_SRC_MSDP) #define PIM_UPSTREAM_FLAG_TEST_SEND_SG_RPT_PRUNE(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_SEND_SG_RPT_PRUNE) #define PIM_UPSTREAM_FLAG_TEST_SRC_LHR(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_SRC_LHR) +#define PIM_UPSTREAM_FLAG_TEST_DISABLE_KAT_EXPIRY(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY) +#define PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_STATIC_IIF) +#define PIM_UPSTREAM_FLAG_TEST_ALLOW_IIF_IN_OIL(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL) +#define PIM_UPSTREAM_FLAG_TEST_NO_PIMREG_DATA(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA) +#define PIM_UPSTREAM_FLAG_TEST_FORCE_PIMREG(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG) +#define PIM_UPSTREAM_FLAG_TEST_SRC_VXLAN_ORIG(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG) +#define PIM_UPSTREAM_FLAG_TEST_SRC_VXLAN_TERM(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) +#define PIM_UPSTREAM_FLAG_TEST_SRC_VXLAN(flags) ((flags) & (PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG | PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM)) +#define PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN) +#define PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF) #define PIM_UPSTREAM_FLAG_SET_DR_JOIN_DESIRED(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_DR_JOIN_DESIRED) #define PIM_UPSTREAM_FLAG_SET_DR_JOIN_DESIRED_UPDATED(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_DR_JOIN_DESIRED_UPDATED) @@ -58,6 +105,15 @@ #define PIM_UPSTREAM_FLAG_SET_SRC_MSDP(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_SRC_MSDP) #define PIM_UPSTREAM_FLAG_SET_SEND_SG_RPT_PRUNE(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_SEND_SG_RPT_PRUNE) #define PIM_UPSTREAM_FLAG_SET_SRC_LHR(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_SRC_LHR) +#define PIM_UPSTREAM_FLAG_SET_DISABLE_KAT_EXPIRY(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY) +#define PIM_UPSTREAM_FLAG_SET_STATIC_IIF(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_STATIC_IIF) +#define PIM_UPSTREAM_FLAG_SET_ALLOW_IIF_IN_OIL(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL) +#define PIM_UPSTREAM_FLAG_SET_NO_PIMREG_DATA(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA) +#define PIM_UPSTREAM_FLAG_SET_FORCE_PIMREG(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG) +#define PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_ORIG(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG) +#define PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) +#define PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN) +#define PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF) #define PIM_UPSTREAM_FLAG_UNSET_DR_JOIN_DESIRED(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_DR_JOIN_DESIRED) #define PIM_UPSTREAM_FLAG_UNSET_DR_JOIN_DESIRED_UPDATED(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_DR_JOIN_DESIRED_UPDATED) @@ -68,6 +124,15 @@ #define PIM_UPSTREAM_FLAG_UNSET_SRC_MSDP(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_SRC_MSDP) #define PIM_UPSTREAM_FLAG_UNSET_SEND_SG_RPT_PRUNE(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_SEND_SG_RPT_PRUNE) #define PIM_UPSTREAM_FLAG_UNSET_SRC_LHR(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_SRC_LHR) +#define PIM_UPSTREAM_FLAG_UNSET_DISABLE_KAT_EXPIRY(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY) +#define PIM_UPSTREAM_FLAG_UNSET_STATIC_IIF(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_STATIC_IIF) +#define PIM_UPSTREAM_FLAG_UNSET_ALLOW_IIF_IN_OIL(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL) +#define PIM_UPSTREAM_FLAG_UNSET_NO_PIMREG_DATA(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA) +#define PIM_UPSTREAM_FLAG_UNSET_FORCE_PIMREG(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG) +#define PIM_UPSTREAM_FLAG_UNSET_SRC_VXLAN_ORIG(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG) +#define PIM_UPSTREAM_FLAG_UNSET_SRC_VXLAN_TERM(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) +#define PIM_UPSTREAM_FLAG_UNSET_MLAG_VXLAN(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN) +#define PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF) enum pim_upstream_state { PIM_UPSTREAM_NOTJOINED, @@ -245,4 +310,8 @@ void pim_upstream_spt_prefix_list_update(struct pim_instance *pim, unsigned int pim_upstream_hash_key(void *arg); bool pim_upstream_equal(const void *arg1, const void *arg2); +struct pim_upstream *pim_upstream_keep_alive_timer_proc( + struct pim_upstream *up); +void pim_upstream_fill_static_iif(struct pim_upstream *up, + struct interface *incoming); #endif /* PIM_UPSTREAM_H */ diff --git a/pimd/pim_vty.c b/pimd/pim_vty.c index 649578874..2654ebc58 100644 --- a/pimd/pim_vty.c +++ b/pimd/pim_vty.c @@ -39,6 +39,7 @@ #include "pim_msdp.h" #include "pim_ssm.h" #include "pim_bfd.h" +#include "pim_vxlan.h" int pim_debug_config_write(struct vty *vty) { @@ -119,6 +120,11 @@ int pim_debug_config_write(struct vty *vty) ++writes; } + if (PIM_DEBUG_VXLAN) { + vty_out(vty, "debug pim vxlan\n"); + ++writes; + } + if (PIM_DEBUG_SSMPINGD) { vty_out(vty, "debug ssmpingd\n"); ++writes; @@ -234,6 +240,8 @@ int pim_global_config_write_worker(struct pim_instance *pim, struct vty *vty) } } + pim_vxlan_config_write(vty, spaces, &writes); + return writes; } diff --git a/pimd/pim_vxlan.c b/pimd/pim_vxlan.c new file mode 100644 index 000000000..af76c6d73 --- /dev/null +++ b/pimd/pim_vxlan.c @@ -0,0 +1,1025 @@ +/* PIM support for VxLAN BUM flooding + * + * Copyright (C) 2019 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <zebra.h> + +#include <hash.h> +#include <jhash.h> +#include <log.h> +#include <prefix.h> +#include <vrf.h> + +#include "pimd.h" +#include "pim_iface.h" +#include "pim_memory.h" +#include "pim_oil.h" +#include "pim_register.h" +#include "pim_str.h" +#include "pim_upstream.h" +#include "pim_ifchannel.h" +#include "pim_nht.h" +#include "pim_zebra.h" +#include "pim_vxlan.h" + +/* pim-vxlan global info */ +struct pim_vxlan vxlan_info, *pim_vxlan_p = &vxlan_info; + +static void pim_vxlan_work_timer_setup(bool start); +static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim, + struct interface *ifp); + +/*************************** vxlan work list ********************************** + * A work list is maintained for staggered generation of pim null register + * messages for vxlan SG entries that are in a reg_join state. + * + * A max of 500 NULL registers are generated at one shot. If paused reg + * generation continues on the next second and so on till all register + * messages have been sent out. And the process is restarted every 60s. + * + * purpose of this null register generation is to setup the SPT and maintain + * independent of the presence of overlay BUM traffic. + ****************************************************************************/ +static void pim_vxlan_do_reg_work(void) +{ + struct listnode *listnode; + int work_cnt = 0; + struct pim_vxlan_sg *vxlan_sg; + static int sec_count; + + ++sec_count; + + if (sec_count > PIM_VXLAN_NULL_REG_INTERVAL) { + sec_count = 0; + listnode = vxlan_info.next_work ? + vxlan_info.next_work : + vxlan_info.work_list->head; + if (PIM_DEBUG_VXLAN && listnode) + zlog_debug("vxlan SG work %s", + vxlan_info.next_work ? "continues" : "starts"); + } else { + listnode = vxlan_info.next_work; + } + + for (; listnode; listnode = listnode->next) { + vxlan_sg = (struct pim_vxlan_sg *)listnode->data; + if (vxlan_sg->up && (vxlan_sg->up->reg_state == PIM_REG_JOIN)) { + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s periodic NULL register", + vxlan_sg->sg_str); + pim_null_register_send(vxlan_sg->up); + ++work_cnt; + } + + if (work_cnt > vxlan_info.max_work_cnt) { + vxlan_info.next_work = listnode->next; + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %d work items proc and pause", + work_cnt); + return; + } + } + + if (work_cnt) { + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %d work items proc", work_cnt); + } + vxlan_info.next_work = NULL; +} + +/* Staggered work related info is initialized when the first work comes + * along + */ +static void pim_vxlan_init_work(void) +{ + if (vxlan_info.flags & PIM_VXLANF_WORK_INITED) + return; + + vxlan_info.max_work_cnt = PIM_VXLAN_WORK_MAX; + vxlan_info.flags |= PIM_VXLANF_WORK_INITED; + vxlan_info.work_list = list_new(); + pim_vxlan_work_timer_setup(TRUE /* start */); +} + +static void pim_vxlan_add_work(struct pim_vxlan_sg *vxlan_sg) +{ + if (vxlan_sg->flags & PIM_VXLAN_SGF_DEL_IN_PROG) { + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s skip work list; del-in-prog", + vxlan_sg->sg_str); + return; + } + + pim_vxlan_init_work(); + + /* already a part of the work list */ + if (vxlan_sg->work_node) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s work list add", + vxlan_sg->sg_str); + vxlan_sg->work_node = listnode_add(vxlan_info.work_list, vxlan_sg); + /* XXX: adjust max_work_cnt if needed */ +} + +static void pim_vxlan_del_work(struct pim_vxlan_sg *vxlan_sg) +{ + if (!vxlan_sg->work_node) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s work list del", + vxlan_sg->sg_str); + + if (vxlan_sg->work_node == vxlan_info.next_work) + vxlan_info.next_work = vxlan_sg->work_node->next; + + list_delete_node(vxlan_info.work_list, vxlan_sg->work_node); + vxlan_sg->work_node = NULL; +} + +void pim_vxlan_update_sg_reg_state(struct pim_instance *pim, + struct pim_upstream *up, bool reg_join) +{ + struct pim_vxlan_sg *vxlan_sg; + + vxlan_sg = pim_vxlan_sg_find(pim, &up->sg); + if (!vxlan_sg) + return; + + /* add the vxlan sg entry to a work list for periodic reg joins. + * the entry will stay in the list as long as the register state is + * PIM_REG_JOIN + */ + if (reg_join) + pim_vxlan_add_work(vxlan_sg); + else + pim_vxlan_del_work(vxlan_sg); +} + +static int pim_vxlan_work_timer_cb(struct thread *t) +{ + pim_vxlan_do_reg_work(); + pim_vxlan_work_timer_setup(true /* start */); + return 0; +} + +/* global 1second timer used for periodic processing */ +static void pim_vxlan_work_timer_setup(bool start) +{ + THREAD_OFF(vxlan_info.work_timer); + if (start) + thread_add_timer(router->master, pim_vxlan_work_timer_cb, NULL, + PIM_VXLAN_WORK_TIME, &vxlan_info.work_timer); +} + +/**************************** vxlan origination mroutes *********************** + * For every (local-vtep-ip, bum-mcast-grp) registered by evpn an origination + * mroute is setup by pimd. The purpose of this mroute is to forward vxlan + * encapsulated BUM (broadcast, unknown-unicast and unknown-multicast packets + * over the underlay.) + * + * Sample mroute (single VTEP): + * (27.0.0.7, 239.1.1.100) Iif: lo Oifs: uplink-1 + * + * Sample mroute (anycast VTEP): + * (36.0.0.9, 239.1.1.100) Iif: peerlink-3.4094\ + * Oifs: peerlink-3.4094 uplink-1 + ***************************************************************************/ +static void pim_vxlan_orig_mr_up_del(struct pim_vxlan_sg *vxlan_sg) +{ + struct pim_upstream *up = vxlan_sg->up; + + if (!up) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s orig mroute-up del", + vxlan_sg->sg_str); + + vxlan_sg->up = NULL; + if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG) { + /* clear out all the vxlan properties */ + up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG | + PIM_UPSTREAM_FLAG_MASK_STATIC_IIF | + PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY | + PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG | + PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA | + PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL); + + /* We bring things to a grinding halt by force expirying + * the kat. Doing this will also remove the reference we + * created as a "vxlan" source and delete the upstream entry + * if there are no other references. + */ + if (PIM_UPSTREAM_FLAG_TEST_SRC_STREAM(up->flags)) { + THREAD_OFF(up->t_ka_timer); + up = pim_upstream_keep_alive_timer_proc(up); + } else { + /* this is really unexpected as we force vxlan + * origination mroutes active sources but just in + * case + */ + up = pim_upstream_del(vxlan_sg->pim, up, + __PRETTY_FUNCTION__); + } + /* if there are other references register the source + * for nht + */ + if (up) + pim_rpf_update(vxlan_sg->pim, up, NULL, 1 /* is_new */); + } +} + +static void pim_vxlan_orig_mr_up_iif_update(struct pim_vxlan_sg *vxlan_sg) +{ + int vif_index; + + /* update MFC with the new IIF */ + pim_upstream_fill_static_iif(vxlan_sg->up, vxlan_sg->iif); + vif_index = pim_if_find_vifindex_by_ifindex(vxlan_sg->pim, + vxlan_sg->iif->ifindex); + if (vif_index > 0) + pim_scan_individual_oil(vxlan_sg->up->channel_oil, + vif_index); + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s orig mroute-up updated with iif %s vifi %d", + vxlan_sg->sg_str, + vxlan_sg->iif?vxlan_sg->iif->name:"-", vif_index); + +} + +/* For every VxLAN BUM multicast group we setup a SG-up that has the following + * "forced properties" - + * 1. Directly connected on a DR interface i.e. we must act as an FHR + * 2. We prime the pump i.e. no multicast data is needed to register this + * source with the FHR. To do that we send periodic null registers if + * the SG entry is in a register-join state. We also prevent expiry of + * KAT. + * 3. As this SG is setup without data there is no need to register encapsulate + * data traffic. This encapsulation is explicitly skipped for the following + * reasons - + * a) Many levels of encapsulation are needed creating MTU disc challenges. + * Overlay BUM is encapsulated in a vxlan/UDP/IP header and then + * encapsulated again in a pim-register header. + * b) On a vxlan-aa setup both switches rx a copy of each BUM packet. if + * they both reg encapsulated traffic the RP will accept the duplicates + * as there are no RPF checks for this encapsulated data. + * a), b) can be workarounded if needed, but there is really no need because + * of (2) i.e. the pump is primed without data. + */ +static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg *vxlan_sg) +{ + struct pim_upstream *up; + int flags = 0; + struct prefix nht_p; + + if (vxlan_sg->up) { + /* nothing to do */ + return; + } + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s orig mroute-up add with iif %s", + vxlan_sg->sg_str, + vxlan_sg->iif?vxlan_sg->iif->name:"-"); + + PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_ORIG(flags); + /* pin the IIF to lo or peerlink-subinterface and disable NHT */ + PIM_UPSTREAM_FLAG_SET_STATIC_IIF(flags); + /* Fake traffic by setting SRC_STREAM and starting KAT */ + /* We intentionally skip updating ref count for SRC_STREAM/FHR. + * Setting SRC_VXLAN should have already created a reference + * preventing the entry from being deleted + */ + PIM_UPSTREAM_FLAG_SET_FHR(flags); + PIM_UPSTREAM_FLAG_SET_SRC_STREAM(flags); + /* Force pimreg even if non-DR. This is needed on a MLAG setup for + * VxLAN AA + */ + PIM_UPSTREAM_FLAG_SET_FORCE_PIMREG(flags); + /* prevent KAT expiry. we want the MDT setup even if there is no BUM + * traffic + */ + PIM_UPSTREAM_FLAG_SET_DISABLE_KAT_EXPIRY(flags); + /* SPT for vxlan BUM groups is primed and maintained via NULL + * registers so there is no need to reg-encapsulate + * vxlan-encapsulated overlay data traffic + */ + PIM_UPSTREAM_FLAG_SET_NO_PIMREG_DATA(flags); + /* On a MLAG setup we force a copy to the MLAG peer while also + * accepting traffic from the peer. To do this we set peerlink-rif as + * the IIF and also add it to the OIL + */ + PIM_UPSTREAM_FLAG_SET_ALLOW_IIF_IN_OIL(flags); + + /* XXX: todo: defer pim_upstream add if pim is not enabled on the iif */ + up = pim_upstream_find(vxlan_sg->pim, &vxlan_sg->sg); + if (up) { + /* if the iif is set to something other than the vxlan_sg->iif + * we must dereg the old nexthop and force to new "static" + * iif + */ + if (!PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags)) { + nht_p.family = AF_INET; + nht_p.prefixlen = IPV4_MAX_BITLEN; + nht_p.u.prefix4 = up->upstream_addr; + pim_delete_tracked_nexthop(vxlan_sg->pim, + &nht_p, up, NULL); + } + pim_upstream_ref(up, flags, __PRETTY_FUNCTION__); + vxlan_sg->up = up; + pim_vxlan_orig_mr_up_iif_update(vxlan_sg); + } else { + up = pim_upstream_add(vxlan_sg->pim, &vxlan_sg->sg, + vxlan_sg->iif, flags, + __PRETTY_FUNCTION__, NULL); + vxlan_sg->up = up; + } + + if (!up) { + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s orig mroute-up add failed", + vxlan_sg->sg_str); + return; + } + + pim_upstream_keep_alive_timer_start(up, vxlan_sg->pim->keep_alive_time); + + /* register the source with the RP */ + if (up->reg_state == PIM_REG_NOINFO) { + pim_register_join(up); + pim_null_register_send(up); + } + + /* update the inherited OIL */ + pim_upstream_inherited_olist(vxlan_sg->pim, up); +} + +static void pim_vxlan_orig_mr_oif_add(struct pim_vxlan_sg *vxlan_sg) +{ + if (!vxlan_sg->up || !vxlan_sg->orig_oif) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s oif %s add", + vxlan_sg->sg_str, vxlan_sg->orig_oif->name); + + vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED; + pim_channel_add_oif(vxlan_sg->up->channel_oil, + vxlan_sg->orig_oif, PIM_OIF_FLAG_PROTO_VXLAN); +} + +static void pim_vxlan_orig_mr_oif_del(struct pim_vxlan_sg *vxlan_sg) +{ + struct interface *orig_oif; + + orig_oif = vxlan_sg->orig_oif; + vxlan_sg->orig_oif = NULL; + + if (!(vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED)) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s oif %s del", + vxlan_sg->sg_str, orig_oif->name); + + vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED; + pim_channel_del_oif(vxlan_sg->up->channel_oil, + orig_oif, PIM_OIF_FLAG_PROTO_VXLAN); +} + +static inline struct interface *pim_vxlan_orig_mr_oif_get( + struct pim_instance *pim) +{ + return (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) ? + pim->vxlan.peerlink_rif : NULL; +} + +/* Single VTEPs: IIF for the vxlan-origination-mroutes is lo or vrf-dev (if + * the mroute is in a non-default vrf). + * Anycast VTEPs: IIF is the MLAG ISL/peerlink. + */ +static inline struct interface *pim_vxlan_orig_mr_iif_get( + struct pim_instance *pim) +{ + return ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) && + pim->vxlan.peerlink_rif) ? + pim->vxlan.peerlink_rif : pim->vxlan.default_iif; +} + +static bool pim_vxlan_orig_mr_add_is_ok(struct pim_vxlan_sg *vxlan_sg) +{ + struct pim_interface *pim_ifp; + + vxlan_sg->iif = pim_vxlan_orig_mr_iif_get(vxlan_sg->pim); + if (!vxlan_sg->iif) + return false; + + pim_ifp = (struct pim_interface *)vxlan_sg->iif->info; + if (!pim_ifp || (pim_ifp->mroute_vif_index < 0)) + return false; + + return true; +} + +static void pim_vxlan_orig_mr_install(struct pim_vxlan_sg *vxlan_sg) +{ + pim_vxlan_orig_mr_up_add(vxlan_sg); + + vxlan_sg->orig_oif = pim_vxlan_orig_mr_oif_get(vxlan_sg->pim); + pim_vxlan_orig_mr_oif_add(vxlan_sg); +} + +static void pim_vxlan_orig_mr_add(struct pim_vxlan_sg *vxlan_sg) +{ + if (!pim_vxlan_orig_mr_add_is_ok(vxlan_sg)) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s orig-mr add", vxlan_sg->sg_str); + + pim_vxlan_orig_mr_install(vxlan_sg); +} + +static void pim_vxlan_orig_mr_del(struct pim_vxlan_sg *vxlan_sg) +{ + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s orig-mr del", vxlan_sg->sg_str); + + pim_vxlan_orig_mr_oif_del(vxlan_sg); + pim_vxlan_orig_mr_up_del(vxlan_sg); +} + +static void pim_vxlan_orig_mr_iif_update(struct hash_backet *backet, void *arg) +{ + struct interface *ifp = (struct interface *)arg; + struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data; + struct interface *old_iif = vxlan_sg->iif; + + if (!pim_vxlan_is_orig_mroute(vxlan_sg)) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s iif changed from %s to %s", + vxlan_sg->sg_str, + old_iif ? old_iif->name : "-", + ifp ? ifp->name : "-"); + + if (pim_vxlan_orig_mr_add_is_ok(vxlan_sg)) { + if (vxlan_sg->up) { + /* upstream exists but iif changed */ + pim_vxlan_orig_mr_up_iif_update(vxlan_sg); + } else { + /* install mroute */ + pim_vxlan_orig_mr_install(vxlan_sg); + } + } else { + pim_vxlan_orig_mr_del(vxlan_sg); + } +} + +/**************************** vxlan termination mroutes *********************** + * For every bum-mcast-grp registered by evpn a *G termination + * mroute is setup by pimd. The purpose of this mroute is to pull down vxlan + * packets with the bum-mcast-grp dip from the underlay and terminate the + * tunnel. This is done by including the vxlan termination device (ipmr-lo) in + * its OIL. The vxlan de-capsulated packets are subject to subsequent overlay + * bridging. + * + * Sample mroute: + * (0.0.0.0, 239.1.1.100) Iif: uplink-1 Oifs: ipmr-lo, uplink-1 + *****************************************************************************/ +struct pim_interface *pim_vxlan_get_term_ifp(struct pim_instance *pim) +{ + return pim->vxlan.term_if ? + (struct pim_interface *)pim->vxlan.term_if->info : NULL; +} + +static void pim_vxlan_term_mr_oif_add(struct pim_vxlan_sg *vxlan_sg) +{ + if (vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s term-oif %s add", + vxlan_sg->sg_str, vxlan_sg->term_oif->name); + + if (pim_ifchannel_local_membership_add(vxlan_sg->term_oif, + &vxlan_sg->sg)) { + vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED; + } else { + zlog_warn("vxlan SG %s term-oif %s add failed", + vxlan_sg->sg_str, vxlan_sg->term_oif->name); + } +} + +static void pim_vxlan_term_mr_oif_del(struct pim_vxlan_sg *vxlan_sg) +{ + if (!(vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED)) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s oif %s del", + vxlan_sg->sg_str, vxlan_sg->term_oif->name); + + vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED; + pim_ifchannel_local_membership_del(vxlan_sg->term_oif, &vxlan_sg->sg); +} + +static void pim_vxlan_term_mr_up_add(struct pim_vxlan_sg *vxlan_sg) +{ + struct pim_upstream *up; + int flags = 0; + + if (vxlan_sg->up) { + /* nothing to do */ + return; + } + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s term mroute-up add", + vxlan_sg->sg_str); + + PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags); + /* enable MLAG designated-forwarder election on termination mroutes */ + PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags); + + up = pim_upstream_add(vxlan_sg->pim, &vxlan_sg->sg, + NULL /* iif */, flags, + __PRETTY_FUNCTION__, NULL); + vxlan_sg->up = up; + + if (!up) { + zlog_warn("vxlan SG %s term mroute-up add failed", + vxlan_sg->sg_str); + } +} + +static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg *vxlan_sg) +{ + struct pim_upstream *up = vxlan_sg->up; + + if (!up) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s term mroute-up del", + vxlan_sg->sg_str); + vxlan_sg->up = NULL; + if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) { + /* clear out all the vxlan related flags */ + up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM | + PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN); + + pim_upstream_del(vxlan_sg->pim, up, + __PRETTY_FUNCTION__); + } +} + +static void pim_vxlan_term_mr_add(struct pim_vxlan_sg *vxlan_sg) +{ + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s term mroute add", vxlan_sg->sg_str); + + vxlan_sg->term_oif = vxlan_sg->pim->vxlan.term_if; + if (!vxlan_sg->term_oif) + /* defer termination mroute till we have a termination device */ + return; + + pim_vxlan_term_mr_up_add(vxlan_sg); + /* set up local membership for the term-oif */ + pim_vxlan_term_mr_oif_add(vxlan_sg); +} + +static void pim_vxlan_term_mr_del(struct pim_vxlan_sg *vxlan_sg) +{ + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s term mroute del", vxlan_sg->sg_str); + + /* remove local membership associated with the term oif */ + pim_vxlan_term_mr_oif_del(vxlan_sg); + /* remove references to the upstream entry */ + pim_vxlan_term_mr_up_del(vxlan_sg); +} + +/************************** vxlan SG cache management ************************/ +static unsigned int pim_vxlan_sg_hash_key_make(void *p) +{ + struct pim_vxlan_sg *vxlan_sg = p; + + return (jhash_2words(vxlan_sg->sg.src.s_addr, + vxlan_sg->sg.grp.s_addr, 0)); +} + +static bool pim_vxlan_sg_hash_eq(const void *p1, const void *p2) +{ + const struct pim_vxlan_sg *sg1 = p1; + const struct pim_vxlan_sg *sg2 = p2; + + return ((sg1->sg.src.s_addr == sg2->sg.src.s_addr) + && (sg1->sg.grp.s_addr == sg2->sg.grp.s_addr)); +} + +static struct pim_vxlan_sg *pim_vxlan_sg_new(struct pim_instance *pim, + struct prefix_sg *sg) +{ + struct pim_vxlan_sg *vxlan_sg; + + vxlan_sg = XCALLOC(MTYPE_PIM_VXLAN_SG, sizeof(*vxlan_sg)); + + vxlan_sg->pim = pim; + vxlan_sg->sg = *sg; + pim_str_sg_set(sg, vxlan_sg->sg_str); + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s alloc", vxlan_sg->sg_str); + + vxlan_sg = hash_get(pim->vxlan.sg_hash, vxlan_sg, hash_alloc_intern); + + return vxlan_sg; +} + +struct pim_vxlan_sg *pim_vxlan_sg_find(struct pim_instance *pim, + struct prefix_sg *sg) +{ + struct pim_vxlan_sg lookup; + + lookup.sg = *sg; + return hash_lookup(pim->vxlan.sg_hash, &lookup); +} + +struct pim_vxlan_sg *pim_vxlan_sg_add(struct pim_instance *pim, + struct prefix_sg *sg) +{ + struct pim_vxlan_sg *vxlan_sg; + + vxlan_sg = pim_vxlan_sg_find(pim, sg); + if (vxlan_sg) + return vxlan_sg; + + vxlan_sg = pim_vxlan_sg_new(pim, sg); + + if (pim_vxlan_is_orig_mroute(vxlan_sg)) + pim_vxlan_orig_mr_add(vxlan_sg); + else + pim_vxlan_term_mr_add(vxlan_sg); + + return vxlan_sg; +} + +void pim_vxlan_sg_del(struct pim_instance *pim, struct prefix_sg *sg) +{ + struct pim_vxlan_sg *vxlan_sg; + + vxlan_sg = pim_vxlan_sg_find(pim, sg); + if (!vxlan_sg) + return; + + vxlan_sg->flags |= PIM_VXLAN_SGF_DEL_IN_PROG; + + pim_vxlan_del_work(vxlan_sg); + + if (pim_vxlan_is_orig_mroute(vxlan_sg)) + pim_vxlan_orig_mr_del(vxlan_sg); + else + pim_vxlan_term_mr_del(vxlan_sg); + + hash_release(vxlan_sg->pim->vxlan.sg_hash, vxlan_sg); + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s free", vxlan_sg->sg_str); + + XFREE(MTYPE_PIM_VXLAN_SG, vxlan_sg); +} + +/******************************* MLAG handling *******************************/ +/* The peerlink sub-interface is added as an OIF to the origination-mroute. + * This is done to send a copy of the multicast-vxlan encapsulated traffic + * to the MLAG peer which may mroute it over the underlay if there are any + * interested receivers. + */ +static void pim_vxlan_sg_peerlink_update(struct hash_backet *backet, void *arg) +{ + struct interface *new_oif = (struct interface *)arg; + struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data; + + if (!pim_vxlan_is_orig_mroute(vxlan_sg)) + return; + + if (vxlan_sg->orig_oif == new_oif) + return; + + pim_vxlan_orig_mr_oif_del(vxlan_sg); + + vxlan_sg->orig_oif = new_oif; + pim_vxlan_orig_mr_oif_add(vxlan_sg); +} + +/* In the case of anycast VTEPs the VTEP-PIP must be used as the + * register source. + */ +bool pim_vxlan_get_register_src(struct pim_instance *pim, + struct pim_upstream *up, struct in_addr *src_p) +{ + if (!(vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED)) + return true; + + /* if address is not available suppress the pim-register */ + if (vxlan_mlag.reg_addr.s_addr == INADDR_ANY) + return false; + + *src_p = vxlan_mlag.reg_addr; + return true; +} + +void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role, + struct interface *peerlink_rif, + struct in_addr *reg_addr) +{ + struct pim_instance *pim; + struct interface *old_oif; + struct interface *new_oif; + char addr_buf[INET_ADDRSTRLEN]; + struct pim_interface *pim_ifp = NULL; + + if (PIM_DEBUG_VXLAN) { + inet_ntop(AF_INET, reg_addr, + addr_buf, INET_ADDRSTRLEN); + zlog_debug("vxlan MLAG update %s state %s role %d rif %s addr %s", + enable ? "enable" : "disable", + peer_state ? "up" : "down", + role, + peerlink_rif ? peerlink_rif->name : "-", + addr_buf); + } + + /* XXX: for now vxlan termination is only possible in the default VRF + * when that changes this will need to change to iterate all VRFs + */ + pim = pim_get_pim_instance(VRF_DEFAULT); + + old_oif = pim_vxlan_orig_mr_oif_get(pim); + + if (enable) + vxlan_mlag.flags |= PIM_VXLAN_MLAGF_ENABLED; + else + vxlan_mlag.flags &= ~PIM_VXLAN_MLAGF_ENABLED; + + if (vxlan_mlag.peerlink_rif != peerlink_rif) + vxlan_mlag.peerlink_rif = peerlink_rif; + + vxlan_mlag.reg_addr = *reg_addr; + vxlan_mlag.peer_state = peer_state; + vxlan_mlag.role = role; + + /* process changes */ + if (vxlan_mlag.peerlink_rif) + pim_ifp = (struct pim_interface *)vxlan_mlag.peerlink_rif->info; + if ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) && + pim_ifp && (pim_ifp->mroute_vif_index > 0)) + pim_vxlan_set_peerlink_rif(pim, peerlink_rif); + else + pim_vxlan_set_peerlink_rif(pim, NULL); + + new_oif = pim_vxlan_orig_mr_oif_get(pim); + if (old_oif != new_oif) + hash_iterate(pim->vxlan.sg_hash, pim_vxlan_sg_peerlink_update, + new_oif); +} + +/****************************** misc callbacks *******************************/ +void pim_vxlan_config_write(struct vty *vty, char *spaces, int *writes) +{ + char addr_buf[INET_ADDRSTRLEN]; + + if ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) && + vxlan_mlag.peerlink_rif) { + + inet_ntop(AF_INET, &vxlan_mlag.reg_addr, + addr_buf, sizeof(addr_buf)); + vty_out(vty, + "%sip pim mlag %s role %s state %s addr %s\n", + spaces, + vxlan_mlag.peerlink_rif->name, + (vxlan_mlag.role == PIM_VXLAN_MLAG_ROLE_PRIMARY) ? + "primary":"secondary", + vxlan_mlag.peer_state ? "up" : "down", + addr_buf); + *writes += 1; + } +} + +static void pim_vxlan_set_default_iif(struct pim_instance *pim, + struct interface *ifp) +{ + struct interface *old_iif; + + if (pim->vxlan.default_iif == ifp) + return; + + old_iif = pim->vxlan.default_iif; + if (PIM_DEBUG_VXLAN) + zlog_debug("%s: vxlan default iif changed from %s to %s", + __PRETTY_FUNCTION__, + old_iif ? old_iif->name : "-", + ifp ? ifp->name : "-"); + + old_iif = pim_vxlan_orig_mr_iif_get(pim); + pim->vxlan.default_iif = ifp; + ifp = pim_vxlan_orig_mr_iif_get(pim); + if (old_iif == ifp) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("%s: vxlan orig iif changed from %s to %s", + __PRETTY_FUNCTION__, old_iif ? old_iif->name : "-", + ifp ? ifp->name : "-"); + + /* add/del upstream entries for the existing vxlan SG when the + * interface becomes available + */ + if (pim->vxlan.sg_hash) + hash_iterate(pim->vxlan.sg_hash, + pim_vxlan_orig_mr_iif_update, ifp); +} + +static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim, + struct interface *ifp) +{ + struct interface *old_iif; + + if (pim->vxlan.peerlink_rif == ifp) + return; + + old_iif = pim->vxlan.peerlink_rif; + if (PIM_DEBUG_VXLAN) + zlog_debug("%s: vxlan peerlink_rif changed from %s to %s", + __PRETTY_FUNCTION__, old_iif ? old_iif->name : "-", + ifp ? ifp->name : "-"); + + old_iif = pim_vxlan_orig_mr_iif_get(pim); + pim->vxlan.peerlink_rif = ifp; + ifp = pim_vxlan_orig_mr_iif_get(pim); + if (old_iif == ifp) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("%s: vxlan orig iif changed from %s to %s", + __PRETTY_FUNCTION__, old_iif ? old_iif->name : "-", + ifp ? ifp->name : "-"); + + /* add/del upstream entries for the existing vxlan SG when the + * interface becomes available + */ + if (pim->vxlan.sg_hash) + hash_iterate(pim->vxlan.sg_hash, + pim_vxlan_orig_mr_iif_update, ifp); +} + +void pim_vxlan_add_vif(struct interface *ifp) +{ + struct pim_interface *pim_ifp = ifp->info; + struct pim_instance *pim = pim_ifp->pim; + + if (pim->vrf_id != VRF_DEFAULT) + return; + + if (if_is_loopback_or_vrf(ifp)) + pim_vxlan_set_default_iif(pim, ifp); + + if (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED && + (ifp == vxlan_mlag.peerlink_rif)) + pim_vxlan_set_peerlink_rif(pim, ifp); +} + +void pim_vxlan_del_vif(struct interface *ifp) +{ + struct pim_interface *pim_ifp = ifp->info; + struct pim_instance *pim = pim_ifp->pim; + + if (pim->vrf_id != VRF_DEFAULT) + return; + + if (pim->vxlan.default_iif == ifp) + pim_vxlan_set_default_iif(pim, NULL); + + if (pim->vxlan.peerlink_rif == ifp) + pim_vxlan_set_peerlink_rif(pim, NULL); +} + +static void pim_vxlan_term_mr_oif_update(struct hash_backet *backet, void *arg) +{ + struct interface *ifp = (struct interface *)arg; + struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data; + + if (pim_vxlan_is_orig_mroute(vxlan_sg)) + return; + + if (vxlan_sg->term_oif == ifp) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan SG %s term oif changed from %s to %s", + vxlan_sg->sg_str, + vxlan_sg->term_oif ? vxlan_sg->term_oif->name : "-", + ifp ? ifp->name : "-"); + + pim_vxlan_term_mr_del(vxlan_sg); + vxlan_sg->term_oif = ifp; + pim_vxlan_term_mr_add(vxlan_sg); +} + +void pim_vxlan_add_term_dev(struct pim_instance *pim, + struct interface *ifp) +{ + struct pim_interface *pim_ifp; + + if (pim->vxlan.term_if == ifp) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan term oif changed from %s to %s", + pim->vxlan.term_if ? pim->vxlan.term_if->name : "-", + ifp->name); + + /* enable pim on the term ifp */ + pim_ifp = (struct pim_interface *)ifp->info; + if (pim_ifp) { + PIM_IF_DO_PIM(pim_ifp->options); + } else { + pim_ifp = pim_if_new(ifp, false /*igmp*/, true /*pim*/, + false /*pimreg*/, true /*vxlan_term*/); + /* ensure that pimreg existss before using the newly created + * vxlan termination device + */ + pim_if_create_pimreg(pim); + } + + pim->vxlan.term_if = ifp; + + if (pim->vxlan.sg_hash) + hash_iterate(pim_ifp->pim->vxlan.sg_hash, + pim_vxlan_term_mr_oif_update, ifp); +} + +void pim_vxlan_del_term_dev(struct pim_instance *pim) +{ + struct interface *ifp = pim->vxlan.term_if; + struct pim_interface *pim_ifp; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan term oif changed from %s to -", ifp->name); + + pim->vxlan.term_if = NULL; + + if (pim->vxlan.sg_hash) + hash_iterate(pim->vxlan.sg_hash, + pim_vxlan_term_mr_oif_update, NULL); + + pim_ifp = (struct pim_interface *)ifp->info; + if (pim_ifp) { + PIM_IF_DONT_PIM(pim_ifp->options); + if (!PIM_IF_TEST_IGMP(pim_ifp->options)) + pim_if_delete(ifp); + } + +} + +void pim_vxlan_init(struct pim_instance *pim) +{ + char hash_name[64]; + + snprintf(hash_name, sizeof(hash_name), + "PIM %s vxlan SG hash", pim->vrf->name); + pim->vxlan.sg_hash = hash_create(pim_vxlan_sg_hash_key_make, + pim_vxlan_sg_hash_eq, hash_name); +} + +void pim_vxlan_exit(struct pim_instance *pim) +{ + if (pim->vxlan.sg_hash) { + hash_clean(pim->vxlan.sg_hash, NULL); + hash_free(pim->vxlan.sg_hash); + pim->vxlan.sg_hash = NULL; + } +} diff --git a/pimd/pim_vxlan.h b/pimd/pim_vxlan.h new file mode 100644 index 000000000..f0a66e6b7 --- /dev/null +++ b/pimd/pim_vxlan.h @@ -0,0 +1,139 @@ +/* PIM support for VxLAN BUM flooding + * + * Copyright (C) 2019 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef PIM_VXLAN_H +#define PIM_VXLAN_H + +/* global timer used for miscellaneous staggered processing */ +#define PIM_VXLAN_WORK_TIME 1 +/* number of SG entries processed at one shot */ +#define PIM_VXLAN_WORK_MAX 500 +/* frequency of periodic NULL registers */ +#define PIM_VXLAN_NULL_REG_INTERVAL 60 /* seconds */ + +#define vxlan_mlag (vxlan_info.mlag) + +enum pim_vxlan_sg_flags { + PIM_VXLAN_SGF_NONE = 0, + PIM_VXLAN_SGF_DEL_IN_PROG = (1 << 0), + PIM_VXLAN_SGF_OIF_INSTALLED = (1 << 1) +}; + +struct pim_vxlan_sg { + struct pim_instance *pim; + + /* key */ + struct prefix_sg sg; + char sg_str[PIM_SG_LEN]; + + enum pim_vxlan_sg_flags flags; + struct pim_upstream *up; + struct listnode *work_node; /* to pim_vxlan.work_list */ + + /* termination info (only applicable to termination XG mroutes) + * term_if - termination device ipmr-lo is added to the OIL + * as local/IGMP membership to allow termination of vxlan traffic + */ + struct interface *term_oif; + + /* origination info + * iif - lo/vrf or peerlink (on MLAG setups) + * peerlink_oif - added to the OIL to send encapsulated BUM traffic to + * the MLAG peer switch + */ + struct interface *iif; + /* on a MLAG setup the peerlink is added as a static OIF */ + struct interface *orig_oif; +}; + +enum pim_vxlan_mlag_flags { + PIM_VXLAN_MLAGF_NONE = 0, + PIM_VXLAN_MLAGF_ENABLED = (1 << 0) +}; + +enum pim_vxlan_mlag_role { + PIM_VXLAN_MLAG_ROLE_SECONDARY = 0, + PIM_VXLAN_MLAG_ROLE_PRIMARY +}; + +struct pim_vxlan_mlag { + enum pim_vxlan_mlag_flags flags; + enum pim_vxlan_mlag_role role; + bool peer_state; + /* routed interface setup on top of MLAG peerlink */ + struct interface *peerlink_rif; + struct in_addr reg_addr; +}; + +enum pim_vxlan_flags { + PIM_VXLANF_NONE = 0, + PIM_VXLANF_WORK_INITED = (1 << 0) +}; + +struct pim_vxlan { + enum pim_vxlan_flags flags; + + struct thread *work_timer; + struct list *work_list; + struct listnode *next_work; + int max_work_cnt; + + struct pim_vxlan_mlag mlag; +}; + +/* zebra adds- + * 1. one (S, G) entry where S=local-VTEP-IP and G==BUM-mcast-grp for + * each BUM MDT. This is the origination entry. + * 2. and one (*, G) entry each MDT. This is the termination place holder. + * + * Note: This doesn't mean that only (*, G) mroutes are used for tunnel + * termination. (S, G) mroutes with ipmr-lo in the OIL can also be + * used for tunnel termiation if SPT switchover happens; however such + * SG entries are created by traffic and will NOT be a part of the vxlan SG + * database. + */ +static inline bool pim_vxlan_is_orig_mroute(struct pim_vxlan_sg *vxlan_sg) +{ + return (vxlan_sg->sg.src.s_addr != 0); +} + +extern struct pim_vxlan *pim_vxlan_p; +extern struct pim_vxlan_sg *pim_vxlan_sg_find(struct pim_instance *pim, + struct prefix_sg *sg); +extern struct pim_vxlan_sg *pim_vxlan_sg_add(struct pim_instance *pim, + struct prefix_sg *sg); +extern void pim_vxlan_sg_del(struct pim_instance *pim, struct prefix_sg *sg); +extern void pim_vxlan_update_sg_reg_state(struct pim_instance *pim, + struct pim_upstream *up, bool reg_join); +extern struct pim_interface *pim_vxlan_get_term_ifp(struct pim_instance *pim); +extern void pim_vxlan_add_vif(struct interface *ifp); +extern void pim_vxlan_del_vif(struct interface *ifp); +extern void pim_vxlan_add_term_dev(struct pim_instance *pim, + struct interface *ifp); +extern void pim_vxlan_del_term_dev(struct pim_instance *pim); +extern bool pim_vxlan_get_register_src(struct pim_instance *pim, + struct pim_upstream *up, struct in_addr *src_p); +extern void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role, + struct interface *peerlink_rif, + struct in_addr *reg_addr); +extern void pim_vxlan_config_write(struct vty *vty, char *spaces, int *writes); + +#endif /* PIM_VXLAN_H */ diff --git a/pimd/pim_vxlan_instance.h b/pimd/pim_vxlan_instance.h new file mode 100644 index 000000000..3f99483fb --- /dev/null +++ b/pimd/pim_vxlan_instance.h @@ -0,0 +1,45 @@ +/* PIM support for VxLAN BUM flooding + * + * Copyright (C) 2019 Cumulus Networks, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#ifndef PIM_VXLAN_INSTANCE_H +#define PIM_VXLAN_INSTANCE_H + +/* pim termination device is expected to include the substring ipmr-lo */ +#define PIM_VXLAN_TERM_DEV_NAME "ipmr-lo" + +struct pim_vxlan_instance { + struct hash *sg_hash; + + /* this is lo for default instance and vrf-dev for non-default + * instances + */ + struct interface *default_iif; + + /* In a MLAG/VxLAN-AA setup the peerlink sub-interface (ISL-rif) is + * used as the IIF in + */ + struct interface *peerlink_rif; + + /* device used by the dataplane to terminate multicast encapsulated + * vxlan traffic + */ + struct interface *term_if; +}; + +extern void pim_vxlan_init(struct pim_instance *pim); +extern void pim_vxlan_exit(struct pim_instance *pim); + +#endif /* PIM_VXLAN_INSTANCE_H */ diff --git a/pimd/pim_zebra.c b/pimd/pim_zebra.c index b20f31082..aeaea7d69 100644 --- a/pimd/pim_zebra.c +++ b/pimd/pim_zebra.c @@ -45,6 +45,7 @@ #include "pim_jp_agg.h" #include "pim_nht.h" #include "pim_ssm.h" +#include "pim_vxlan.h" #undef PIM_DEBUG_IFADDR_DUMP #define PIM_DEBUG_IFADDR_DUMP @@ -110,13 +111,18 @@ static int pim_zebra_if_add(int command, struct zclient *zclient, struct pim_interface *pim_ifp; if (!ifp->info) { - pim_ifp = pim_if_new(ifp, false, false, false); + pim_ifp = pim_if_new(ifp, false, false, false, + false /*vxlan_term*/); ifp->info = pim_ifp; } pim_sock_add(ifp); } + if (!strncmp(ifp->name, PIM_VXLAN_TERM_DEV_NAME, + sizeof(PIM_VXLAN_TERM_DEV_NAME))) + pim_vxlan_add_term_dev(pim, ifp); + return 0; } @@ -124,6 +130,7 @@ static int pim_zebra_if_del(int command, struct zclient *zclient, zebra_size_t length, vrf_id_t vrf_id) { struct interface *ifp; + struct pim_instance *pim; /* zebra api adds/dels interfaces using the same call @@ -152,6 +159,10 @@ static int pim_zebra_if_del(int command, struct zclient *zclient, if_set_index(ifp, IFINDEX_INTERNAL); + pim = pim_get_pim_instance(vrf_id); + if (pim && pim->vxlan.term_if == ifp) + pim_vxlan_del_term_dev(pim); + return 0; } @@ -543,6 +554,41 @@ void pim_zebra_upstream_rpf_changed(struct pim_instance *pim, pim_upstream_update_join_desired(pim, up); } +static int pim_zebra_vxlan_sg_proc(int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + struct stream *s; + struct pim_instance *pim; + struct prefix_sg sg; + + pim = pim_get_pim_instance(vrf_id); + if (!pim) + return 0; + + s = zclient->ibuf; + + sg.family = AF_INET; + sg.prefixlen = stream_getl(s); + stream_get(&sg.src.s_addr, s, sg.prefixlen); + stream_get(&sg.grp.s_addr, s, sg.prefixlen); + + if (PIM_DEBUG_ZEBRA) { + char sg_str[PIM_SG_LEN]; + + pim_str_sg_set(&sg, sg_str); + zlog_debug("%u:recv SG %s %s", vrf_id, + (command == ZEBRA_VXLAN_SG_ADD)?"add":"del", + sg_str); + } + + if (command == ZEBRA_VXLAN_SG_ADD) + pim_vxlan_sg_add(pim, &sg); + else + pim_vxlan_sg_del(pim, &sg); + + return 0; +} + void pim_scan_individual_oil(struct channel_oil *c_oil, int in_vif_index) { struct in_addr vif_source; @@ -769,6 +815,8 @@ void pim_zebra_init(void) zclient->interface_address_delete = pim_zebra_if_address_del; zclient->interface_vrf_update = pim_zebra_interface_vrf_update; zclient->nexthop_update = pim_parse_nexthop_update; + zclient->vxlan_sg_add = pim_zebra_vxlan_sg_proc; + zclient->vxlan_sg_del = pim_zebra_vxlan_sg_proc; zclient_init(zclient, ZEBRA_ROUTE_PIM, 0, &pimd_privs); if (PIM_DEBUG_PIM_TRACE) { diff --git a/pimd/pimd.h b/pimd/pimd.h index 50c19658d..2f2a87037 100644 --- a/pimd/pimd.h +++ b/pimd/pimd.h @@ -113,6 +113,7 @@ #define PIM_MASK_PIM_NHT_DETAIL (1 << 23) #define PIM_MASK_PIM_NHT_RP (1 << 24) #define PIM_MASK_MTRACE (1 << 25) +#define PIM_MASK_VXLAN (1 << 26) /* Remember 32 bits!!! */ /* PIM error codes */ @@ -180,6 +181,7 @@ extern uint8_t qpim_ecmp_rebalance_enable; #define PIM_DEBUG_PIM_NHT_DETAIL (router->debugs & PIM_MASK_PIM_NHT_DETAIL) #define PIM_DEBUG_PIM_NHT_RP (router->debugs & PIM_MASK_PIM_NHT_RP) #define PIM_DEBUG_MTRACE (router->debugs & PIM_MASK_MTRACE) +#define PIM_DEBUG_VXLAN (router->debugs & PIM_MASK_VXLAN) #define PIM_DEBUG_EVENTS \ (router->debugs \ @@ -220,6 +222,7 @@ extern uint8_t qpim_ecmp_rebalance_enable; #define PIM_DO_DEBUG_PIM_NHT (router->debugs |= PIM_MASK_PIM_NHT) #define PIM_DO_DEBUG_PIM_NHT_RP (router->debugs |= PIM_MASK_PIM_NHT_RP) #define PIM_DO_DEBUG_MTRACE (router->debugs |= PIM_MASK_MTRACE) +#define PIM_DO_DEBUG_VXLAN (router->debugs |= PIM_MASK_VXLAN) #define PIM_DONT_DEBUG_PIM_EVENTS (router->debugs &= ~PIM_MASK_PIM_EVENTS) #define PIM_DONT_DEBUG_PIM_PACKETS (router->debugs &= ~PIM_MASK_PIM_PACKETS) @@ -249,6 +252,7 @@ extern uint8_t qpim_ecmp_rebalance_enable; #define PIM_DONT_DEBUG_PIM_NHT (router->debugs &= ~PIM_MASK_PIM_NHT) #define PIM_DONT_DEBUG_PIM_NHT_RP (router->debugs &= ~PIM_MASK_PIM_NHT_RP) #define PIM_DONT_DEBUG_MTRACE (router->debugs &= ~PIM_MASK_MTRACE) +#define PIM_DONT_DEBUG_VXLAN (router->debugs &= ~PIM_MASK_VXLAN) void pim_router_init(void); void pim_router_terminate(void); diff --git a/pimd/subdir.am b/pimd/subdir.am index 7d8df7d10..7f4810722 100644 --- a/pimd/subdir.am +++ b/pimd/subdir.am @@ -60,6 +60,7 @@ pimd_libpim_a_SOURCES = \ pimd/pim_vty.c \ pimd/pim_zebra.c \ pimd/pim_zlookup.c \ + pimd/pim_vxlan.c \ pimd/pimd.c \ # end @@ -110,6 +111,8 @@ noinst_HEADERS += \ pimd/pim_vty.h \ pimd/pim_zebra.h \ pimd/pim_zlookup.h \ + pimd/pim_vxlan.h \ + pimd/pim_vxlan_instance.h \ pimd/pimd.h \ pimd/mtracebis_netlink.h \ pimd/mtracebis_routeget.h \ |