diff options
author | Stephen Worley <sworley@nvidia.com> | 2022-08-16 17:21:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-16 17:21:04 +0200 |
commit | d30d63f4f719168e179e34e1efa66ba86b33b493 (patch) | |
tree | bf19088b80fd2ad85f31be0814480e8e6c794c8e /zebra | |
parent | Merge pull request #11805 from opensourcerouting/fix/treat_as_withdraw_if_as_set (diff) | |
parent | zebra: add empty placeholders for tc via BSD socket (diff) | |
download | frr-d30d63f4f719168e179e34e1efa66ba86b33b493.tar.xz frr-d30d63f4f719168e179e34e1efa66ba86b33b493.zip |
Merge pull request #11694 from sigeryang/master
zebra: add basic traffic control API
Diffstat (limited to 'zebra')
-rw-r--r-- | zebra/debug_nl.c | 35 | ||||
-rw-r--r-- | zebra/dplane_fpm_nl.c | 3 | ||||
-rw-r--r-- | zebra/interface.c | 3 | ||||
-rw-r--r-- | zebra/kernel_netlink.c | 15 | ||||
-rw-r--r-- | zebra/kernel_socket.c | 6 | ||||
-rw-r--r-- | zebra/rt.h | 1 | ||||
-rw-r--r-- | zebra/subdir.am | 3 | ||||
-rw-r--r-- | zebra/tc_netlink.c | 468 | ||||
-rw-r--r-- | zebra/tc_netlink.h | 62 | ||||
-rw-r--r-- | zebra/tc_socket.c | 41 | ||||
-rw-r--r-- | zebra/zebra_dplane.c | 156 | ||||
-rw-r--r-- | zebra/zebra_dplane.h | 25 | ||||
-rw-r--r-- | zebra/zebra_nhg.c | 3 | ||||
-rw-r--r-- | zebra/zebra_rib.c | 5 |
14 files changed, 826 insertions, 0 deletions
diff --git a/zebra/debug_nl.c b/zebra/debug_nl.c index a16d44252..afefab667 100644 --- a/zebra/debug_nl.c +++ b/zebra/debug_nl.c @@ -1536,6 +1536,24 @@ next_rta: goto next_rta; } +static const char *tcm_nltype2str(int nltype) +{ + switch (nltype) { + case RTM_NEWQDISC: + case RTM_DELQDISC: + return "qdisc"; + case RTM_NEWTCLASS: + case RTM_DELTCLASS: + return "tclass"; + case RTM_NEWTFILTER: + case RTM_DELTFILTER: + return "tfilter"; + default: + /* should never hit */ + return "unknown"; + } +} + static void nlncm_dump(const struct netconfmsg *ncm, size_t msglen) { const struct rtattr *rta; @@ -1595,6 +1613,8 @@ void nl_dump(void *msg, size_t msglen) struct ifinfomsg *ifi; struct tunnel_msg *tnlm; struct fib_rule_hdr *frh; + struct tcmsg *tcm; + char fbuf[128]; char ibuf[128]; @@ -1730,6 +1750,21 @@ next_header: nlncm_dump(ncm, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ncm))); break; + case RTM_NEWQDISC: + case RTM_DELQDISC: + case RTM_NEWTCLASS: + case RTM_DELTCLASS: + case RTM_NEWTFILTER: + case RTM_DELTFILTER: + tcm = NLMSG_DATA(nlmsg); + zlog_debug( + " tcm [type=%s family=%s (%d) ifindex=%d handle=%04x:%04x]", + tcm_nltype2str(nlmsg->nlmsg_type), + af_type2str(tcm->tcm_family), tcm->tcm_family, + tcm->tcm_ifindex, tcm->tcm_handle >> 16, + tcm->tcm_handle & 0xffff); + break; + default: break; } diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c index ec4ea372f..d07c4c633 100644 --- a/zebra/dplane_fpm_nl.c +++ b/zebra/dplane_fpm_nl.c @@ -815,6 +815,9 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) case DPLANE_OP_INTF_INSTALL: case DPLANE_OP_INTF_UPDATE: case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: case DPLANE_OP_NONE: break; diff --git a/zebra/interface.c b/zebra/interface.c index 205fa8829..c674b499a 100644 --- a/zebra/interface.c +++ b/zebra/interface.c @@ -1573,6 +1573,9 @@ void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx) case DPLANE_OP_IPSET_ENTRY_DELETE: case DPLANE_OP_NEIGH_TABLE_UPDATE: case DPLANE_OP_GRE_SET: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: break; /* should never hit here */ } } diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c index 396ccb34b..45a372f88 100644 --- a/zebra/kernel_netlink.c +++ b/zebra/kernel_netlink.c @@ -47,6 +47,7 @@ #include "zebra/rt_netlink.h" #include "zebra/if_netlink.h" #include "zebra/rule_netlink.h" +#include "zebra/tc_netlink.h" #include "zebra/netconf_netlink.h" #include "zebra/zebra_errors.h" @@ -114,6 +115,15 @@ static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"}, {RTM_NEWTUNNEL, "RTM_NEWTUNNEL"}, {RTM_DELTUNNEL, "RTM_DELTUNNEL"}, {RTM_GETTUNNEL, "RTM_GETTUNNEL"}, + {RTM_NEWQDISC, "RTM_NEWQDISC"}, + {RTM_DELQDISC, "RTM_DELQDISC"}, + {RTM_GETQDISC, "RTM_GETQDISC"}, + {RTM_NEWTCLASS, "RTM_NEWTCLASS"}, + {RTM_DELTCLASS, "RTM_DELTCLASS"}, + {RTM_GETTCLASS, "RTM_GETTCLASS"}, + {RTM_NEWTFILTER, "RTM_NEWTFILTER"}, + {RTM_DELTFILTER, "RTM_DELTFILTER"}, + {RTM_GETTFILTER, "RTM_GETTFILTER"}, {0}}; static const struct message rtproto_str[] = { @@ -1623,6 +1633,11 @@ static enum netlink_msg_status nl_put_msg(struct nl_batch *bth, case DPLANE_OP_INTF_UPDATE: case DPLANE_OP_INTF_DELETE: return netlink_put_intf_update_msg(bth, ctx); + + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + return netlink_put_tc_update_msg(bth, ctx); } return FRR_NETLINK_ERROR; diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c index 076e9c4df..cb549339a 100644 --- a/zebra/kernel_socket.c +++ b/zebra/kernel_socket.c @@ -1603,6 +1603,12 @@ void kernel_update_multi(struct dplane_ctx_q *ctx_list) res = kernel_intf_update(ctx); break; + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + res = kernel_tc_update(ctx); + break; + /* Ignore 'notifications' - no-op */ case DPLANE_OP_SYS_ROUTE_ADD: case DPLANE_OP_SYS_ROUTE_DELETE: diff --git a/zebra/rt.h b/zebra/rt.h index 0a86a2897..d8a22d2cf 100644 --- a/zebra/rt.h +++ b/zebra/rt.h @@ -71,6 +71,7 @@ kernel_intf_update(struct zebra_dplane_ctx *ctx); extern enum zebra_dplane_result kernel_intf_netconf_update(struct zebra_dplane_ctx *ctx); +extern enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx); #endif /* !HAVE_NETLINK */ diff --git a/zebra/subdir.am b/zebra/subdir.am index a926c14ad..298b71598 100644 --- a/zebra/subdir.am +++ b/zebra/subdir.am @@ -82,6 +82,8 @@ zebra_zebra_SOURCES = \ zebra/rule_netlink.c \ zebra/rule_socket.c \ zebra/table_manager.c \ + zebra/tc_netlink.c \ + zebra/tc_socket.c \ zebra/zapi_msg.c \ zebra/zebra_dplane.c \ zebra/zebra_errors.c \ @@ -163,6 +165,7 @@ noinst_HEADERS += \ zebra/rtadv.h \ zebra/rule_netlink.h \ zebra/table_manager.h \ + zebra/tc_netlink.h \ zebra/zapi_msg.h \ zebra/zebra_dplane.h \ zebra/zebra_errors.h \ diff --git a/zebra/tc_netlink.c b/zebra/tc_netlink.c new file mode 100644 index 000000000..89ce07545 --- /dev/null +++ b/zebra/tc_netlink.c @@ -0,0 +1,468 @@ +/* + * Zebra Traffic Control (TC) interaction with the kernel using netlink. + * + * Copyright (C) 2022 Shichu Yang + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#ifdef HAVE_NETLINK + +#include <linux/if_ether.h> +#include <sys/socket.h> + +#include "if.h" +#include "prefix.h" +#include "vrf.h" + +#include <linux/fib_rules.h> +#include <linux/pkt_cls.h> +#include <linux/pkt_sched.h> +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/kernel_netlink.h" +#include "zebra/tc_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_trace.h" + +/* TODO: move these bitflags to zebra_tc.h */ +#define TC_FILTER_SRC_IP (1 << 0) +#define TC_FILTER_DST_IP (1 << 1) +#define TC_FILTER_IP_PROTOCOL (1 << 9) + +#define TC_FREQ_DEFAULT (100) + +#define TC_MAJOR_BASE (0x1000u) +#define TC_MINOR_NOCLASS (0xffffu) + +#define TC_FILTER_MASK (0x8000u) + +#define TIME_UNITS_PER_SEC (1000000) +#define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r))) + +static uint32_t tc_get_freq(void) +{ + int freq = 0; + FILE *fp = fopen("/proc/net/psched", "r"); + + if (fp) { + uint32_t nom, denom; + + if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) { + if (nom == 1000000) + freq = denom; + } + fclose(fp); + } + + return freq == 0 ? TC_FREQ_DEFAULT : freq; +} + +static inline uint32_t tc_make_handle(uint16_t major, uint16_t minor) +{ + return (major) << 16 | (minor); +} + +static inline uint32_t tc_get_handle(struct zebra_dplane_ctx *ctx, + uint16_t minor) +{ + uint16_t major = TC_MAJOR_BASE + (uint16_t)dplane_ctx_get_ifindex(ctx); + + return tc_make_handle(major, minor); +} + +static void tc_calc_rate_table(struct tc_ratespec *ratespec, uint32_t *table, + uint32_t mtu) +{ + if (mtu == 0) + mtu = 2047; + + int cell_log = -1; + + if (cell_log < 0) { + cell_log = 0; + while ((mtu >> cell_log) > 255) + cell_log++; + } + + for (int i = 0; i < 256; i++) + table[i] = xmittime(ratespec->rate, (i + 1) << cell_log); + + ratespec->cell_align = -1; + ratespec->cell_log = cell_log; + ratespec->linklayer = TC_LINKLAYER_ETHERNET; +} + +static int tc_flower_get_inet_prefix(const struct prefix *prefix, + struct inet_prefix *addr) +{ + addr->family = prefix->family; + + if (addr->family == AF_INET) { + addr->bytelen = 4; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + memcpy(addr->data, prefix->u.val32, sizeof(prefix->u.val32)); + } else if (addr->family == AF_INET6) { + addr->bytelen = 16; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + memcpy(addr->data, prefix->u.val, sizeof(prefix->u.val)); + } else { + return -1; + } + + return 0; +} + +static int tc_flower_get_inet_mask(const struct prefix *prefix, + struct inet_prefix *addr) +{ + addr->family = prefix->family; + + if (addr->family == AF_INET) { + addr->bytelen = 4; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + } else if (addr->family == AF_INET6) { + addr->bytelen = 16; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + } else { + return -1; + } + + memset(addr->data, 0xff, addr->bytelen); + + int rest = prefix->prefixlen; + + for (int i = 0; i < addr->bytelen / 4; i++) { + if (!rest) { + addr->data[i] = 0; + } else if (rest / 32 >= 1) { + rest -= 32; + } else { + addr->data[i] <<= 32 - rest; + addr->data[i] = htonl(addr->data[i]); + rest = 0; + } + } + + return 0; +} + +/* + * Traffic control queue discipline encoding (only "htb" supported) + */ +static ssize_t netlink_qdisc_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct nlsock *nl; + + const char *kind = "htb"; + + struct tc_htb_glob htb_glob = { + .rate2quantum = 10, .version = 3, .defcls = TC_MINOR_NOCLASS}; + + struct rtattr *nest; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = (void *)data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + req->n.nlmsg_flags |= NLM_F_REPLACE; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + req->t.tcm_handle = tc_get_handle(ctx, 0); + req->t.tcm_parent = TC_H_ROOT; + + nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1); + + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + nl_attr_put(&req->n, datalen, TCA_HTB_INIT, &htb_glob, + sizeof(htb_glob)); + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* + * Traffic control class encoding + */ +static ssize_t netlink_tclass_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct nlsock *nl; + struct tc_htb_opt htb_opt = {}; + + uint64_t rate, ceil; + uint64_t buffer, cbuffer; + + /* TODO: fetch mtu from interface */ + uint32_t mtu = 0; + + uint32_t rtab[256]; + uint32_t ctab[256]; + + struct rtattr *nest; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = (void *)data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + req->t.tcm_handle = tc_get_handle(ctx, 1); + req->t.tcm_parent = tc_get_handle(ctx, 0); + + rate = dplane_ctx_tc_get_rate(ctx); + ceil = dplane_ctx_tc_get_ceil(ctx); + + ceil = ceil < rate ? rate : ceil; + + htb_opt.rate.rate = (rate >> 32 != 0) ? ~0U : rate; + htb_opt.ceil.rate = (ceil >> 32 != 0) ? ~0U : ceil; + + buffer = rate / tc_get_freq(), cbuffer = ceil / tc_get_freq(); + + htb_opt.buffer = buffer; + htb_opt.cbuffer = cbuffer; + + tc_calc_rate_table(&htb_opt.rate, rtab, mtu); + tc_calc_rate_table(&htb_opt.ceil, rtab, mtu); + + htb_opt.ceil.mpu = htb_opt.rate.mpu = 0; + htb_opt.ceil.overhead = htb_opt.rate.overhead = 0; + + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + if (rate >> 32 != 0) { + nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &rate, + sizeof(rate)); + } + + if (ceil >> 32 != 0) { + nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &ceil, + sizeof(ceil)); + } + + nl_attr_put(&req->n, datalen, TCA_HTB_PARMS, &htb_opt, sizeof(htb_opt)); + + nl_attr_put(&req->n, datalen, TCA_HTB_RTAB, rtab, sizeof(rtab)); + nl_attr_put(&req->n, datalen, TCA_HTB_CTAB, ctab, sizeof(ctab)); + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* + * Traffic control filter encoding (only "flower" supported) + */ +static ssize_t netlink_tfilter_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct nlsock *nl; + struct rtattr *nest; + + const char *kind = "flower"; + + uint16_t priority; + uint16_t protocol; + uint32_t classid; + uint32_t filter_bm; + uint32_t flags = 0; + + struct inet_prefix addr; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = (void *)data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + req->n.nlmsg_flags |= NLM_F_EXCL; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + + /* TODO: priority and layer-3 protocol support */ + priority = 0; + protocol = htons(ETH_P_IP); + classid = tc_get_handle(ctx, 1); + filter_bm = dplane_ctx_tc_get_filter_bm(ctx); + + req->t.tcm_info = tc_make_handle(priority, protocol); + + req->t.tcm_handle = 1; + req->t.tcm_parent = tc_get_handle(ctx, 0); + + nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1); + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + nl_attr_put(&req->n, datalen, TCA_FLOWER_CLASSID, &classid, + sizeof(classid)); + + if (filter_bm & TC_FILTER_SRC_IP) { + const struct prefix *src_p = dplane_ctx_tc_get_src_ip(ctx); + + if (tc_flower_get_inet_prefix(src_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_SRC + : TCA_FLOWER_KEY_IPV6_SRC, + addr.data, addr.bytelen); + + if (tc_flower_get_inet_mask(src_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) + ? TCA_FLOWER_KEY_IPV4_SRC_MASK + : TCA_FLOWER_KEY_IPV6_SRC_MASK, + addr.data, addr.bytelen); + } + + if (filter_bm & TC_FILTER_DST_IP) { + const struct prefix *dst_p = dplane_ctx_tc_get_dst_ip(ctx); + + if (tc_flower_get_inet_prefix(dst_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_DST + : TCA_FLOWER_KEY_IPV6_DST, + addr.data, addr.bytelen); + + if (tc_flower_get_inet_mask(dst_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) + ? TCA_FLOWER_KEY_IPV4_DST_MASK + : TCA_FLOWER_KEY_IPV6_DST_MASK, + addr.data, addr.bytelen); + } + + if (filter_bm & TC_FILTER_IP_PROTOCOL) { + nl_attr_put8(&req->n, datalen, TCA_FLOWER_KEY_IP_PROTO, + dplane_ctx_tc_get_ip_proto(ctx)); + } + + nl_attr_put32(&req->n, datalen, TCA_FLOWER_FLAGS, flags); + + nl_attr_put16(&req->n, datalen, TCA_FLOWER_KEY_ETH_TYPE, protocol); + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static ssize_t netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_qdisc_msg_encode(RTM_NEWQDISC, ctx, buf, buflen); +} + +static ssize_t netlink_newtclass_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_tclass_msg_encode(RTM_NEWTCLASS, ctx, buf, buflen); +} + +static ssize_t netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_tfilter_msg_encode(RTM_NEWTFILTER, ctx, buf, buflen); +} + +enum netlink_msg_status netlink_put_tc_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + /* TODO: error handling and other actions (delete, replace, ...) */ + + netlink_batch_add_msg(bth, ctx, netlink_newqdisc_msg_encoder, false); + netlink_batch_add_msg(bth, ctx, netlink_newtclass_msg_encoder, false); + return netlink_batch_add_msg(bth, ctx, netlink_newtfilter_msg_encoder, + false); +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/tc_netlink.h b/zebra/tc_netlink.h new file mode 100644 index 000000000..2190bca4f --- /dev/null +++ b/zebra/tc_netlink.h @@ -0,0 +1,62 @@ +/* + * Zebra Traffic Control (TC) interaction with the kernel using netlink. + * + * Copyright (C) 2022 Shichu Yang + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_TC_NETLINK_H +#define _ZEBRA_TC_NETLINK_H + +#ifdef HAVE_NETLINK + +#ifdef __cplusplus +extern "C" { +#endif + +/* Represent a prefixed address in flower filter */ + +struct inet_prefix { + uint16_t flags; + uint16_t bytelen; + uint16_t bitlen; + uint16_t family; + uint32_t data[64]; +}; + +enum { + PREFIXLEN_SPECIFIED = (1 << 0), + ADDRTYPE_INET = (1 << 1), + ADDRTYPE_UNSPEC = (1 << 2), + ADDRTYPE_MULTI = (1 << 3), + + ADDRTYPE_INET_UNSPEC = ADDRTYPE_INET | ADDRTYPE_UNSPEC, + ADDRTYPE_INET_MULTI = ADDRTYPE_INET | ADDRTYPE_MULTI +}; + +extern enum netlink_msg_status +netlink_put_tc_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_TC_NETLINK_H */ diff --git a/zebra/tc_socket.c b/zebra/tc_socket.c new file mode 100644 index 000000000..0bf9e487b --- /dev/null +++ b/zebra/tc_socket.c @@ -0,0 +1,41 @@ +/* + * Zebra Traffic Control (TC) interaction with the kernel using socket. + * + * Copyright (C) 2022 Shichu Yang + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#ifndef HAVE_NETLINK + +#include "lib_errors.h" + +#include "zebra/rt.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_errors.h" + +enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx) +{ + flog_err(EC_LIB_UNAVAILABLE, "%s not Implemented for this platform", + __func__); + return ZEBRA_DPLANE_REQUEST_FAILURE; +} + +#endif /* !HAVE_NETLINK */ diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index fd170450f..763c92ebb 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -314,6 +314,25 @@ struct dplane_netconf_info { }; /* + * Traffic control contexts for the dplane + */ +struct dplane_tc_info { + /* Rate spec (unit: Bytes/s) */ + uint64_t rate; + uint64_t ceil; + + /* TODO: custom burst */ + + /* Filter components for "tfilter" */ + uint32_t filter_bm; + struct prefix src_ip; + struct prefix dst_ip; + uint8_t ip_proto; + + /* TODO: more filter components */ +}; + +/* * The context block used to exchange info about route updates across * the boundary between the zebra main context (and pthread) and the * dataplane layer (and pthread). @@ -362,6 +381,7 @@ struct zebra_dplane_ctx { struct dplane_mac_info macinfo; struct dplane_neigh_info neigh; struct dplane_rule_info rule; + struct dplane_tc_info tc; struct zebra_pbr_iptable iptable; struct zebra_pbr_ipset ipset; struct { @@ -540,6 +560,9 @@ static struct zebra_dplane_globals { _Atomic uint32_t dg_intfs_in; _Atomic uint32_t dg_intf_errors; + _Atomic uint32_t dg_tcs_in; + _Atomic uint32_t dg_tcs_errors; + /* Dataplane pthread */ struct frr_pthread *dg_pthread; @@ -777,6 +800,9 @@ static void dplane_ctx_free_internal(struct zebra_dplane_ctx *ctx) case DPLANE_OP_INTF_INSTALL: case DPLANE_OP_INTF_UPDATE: case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: break; case DPLANE_OP_IPSET_ENTRY_ADD: @@ -1100,6 +1126,16 @@ const char *dplane_op2str(enum dplane_op_e op) case DPLANE_OP_INTF_DELETE: ret = "INTF_DELETE"; break; + + case DPLANE_OP_TC_INSTALL: + ret = "TC_INSTALL"; + break; + case DPLANE_OP_TC_UPDATE: + ret = "TC_UPDATE"; + break; + case DPLANE_OP_TC_DELETE: + ret = "TC_DELETE"; + break; } return ret; @@ -1419,6 +1455,50 @@ uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx) return ctx->u.rinfo.zd_old_distance; } +uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.rate; +} + +uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.ceil; +} + +uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.filter_bm; +} + +const struct prefix * +dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.tc.src_ip); +} + +const struct prefix * +dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.tc.dst_ip); +} + +uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.ip_proto; +} + /* * Set the nexthops associated with a context: note that processing code * may well expect that nexthops are in canonical (sorted) order, so we @@ -2691,6 +2771,26 @@ done: return ret; } +int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op) +{ + int ret = EINVAL; + + struct zebra_vrf *zvrf = NULL; + struct zebra_ns *zns = NULL; + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + /* TODO: init traffic control qdisc */ + zns = zvrf ? zvrf->zns : zebra_ns_lookup(NS_DEFAULT); + + dplane_ctx_ns_init(ctx, zns, true); + + ret = AOK; + + return ret; +} + /** * dplane_ctx_nexthop_init() - Initialize a context block for a nexthop update * @@ -3410,6 +3510,47 @@ dplane_route_update_internal(struct route_node *rn, return result; } +static enum zebra_dplane_result dplane_tc_update_internal(enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret = EINVAL; + struct zebra_dplane_ctx *ctx = NULL; + + /* Obtain context block */ + ctx = dplane_ctx_alloc(); + + if (!ctx) { + ret = ENOMEM; + goto done; + } + + /* Init context with info from zebra data structs */ + ret = dplane_ctx_tc_init(ctx, op); + + if (ret == AOK) + ret = dplane_update_enqueue(ctx); + +done: + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_tcs_in, 1, + memory_order_relaxed); + if (ret == AOK) { + result = ZEBRA_DPLANE_REQUEST_QUEUED; + } else { + atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors, 1, + memory_order_relaxed); + if (ctx) + dplane_ctx_free(&ctx); + } + + return result; +} + +enum zebra_dplane_result dplane_tc_update(void) +{ + return dplane_tc_update_internal(DPLANE_OP_TC_UPDATE); +} + /** * dplane_nexthop_update_internal() - Helper for enqueuing nexthop changes * @@ -5591,6 +5732,13 @@ static void kernel_dplane_log_detail(struct zebra_dplane_ctx *ctx) dplane_ctx_get_ifindex(ctx), dplane_ctx_intf_is_protodown(ctx)); break; + + /* TODO: more detailed log */ + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + zlog_debug("Dplane tc ifidx %u", dplane_ctx_get_ifindex(ctx)); + break; } } @@ -5734,6 +5882,14 @@ static void kernel_dplane_handle_result(struct zebra_dplane_ctx *ctx) 1, memory_order_relaxed); break; + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors, + 1, memory_order_relaxed); + break; + /* Ignore 'notifications' - no-op */ case DPLANE_OP_SYS_ROUTE_ADD: case DPLANE_OP_SYS_ROUTE_DELETE: diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index c96ea4009..8b239a9ba 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -193,6 +193,11 @@ enum dplane_op_e { DPLANE_OP_INTF_INSTALL, DPLANE_OP_INTF_UPDATE, DPLANE_OP_INTF_DELETE, + + /* Traffic control */ + DPLANE_OP_TC_INSTALL, + DPLANE_OP_TC_UPDATE, + DPLANE_OP_TC_DELETE, }; /* @@ -378,6 +383,16 @@ uint8_t dplane_ctx_get_distance(const struct zebra_dplane_ctx *ctx); void dplane_ctx_set_distance(struct zebra_dplane_ctx *ctx, uint8_t distance); uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx); +/* Accessors for traffic control context */ +uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx); +uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx); +const struct prefix * +dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx); +const struct prefix * +dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx); + void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh); void dplane_ctx_set_backup_nhg(struct zebra_dplane_ctx *ctx, const struct nexthop_group *nhg); @@ -708,6 +723,13 @@ enum zebra_dplane_result dplane_intf_update(const struct interface *ifp); enum zebra_dplane_result dplane_intf_delete(const struct interface *ifp); /* + * Enqueue interface link changes for the dataplane. + */ +enum zebra_dplane_result dplane_tc_add(void); +enum zebra_dplane_result dplane_tc_update(void); +enum zebra_dplane_result dplane_tc_delete(void); + +/* * Link layer operations for the dataplane. */ enum zebra_dplane_result dplane_neigh_ip_update(enum dplane_op_e op, @@ -849,6 +871,9 @@ int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, int dplane_ctx_intf_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, const struct interface *ifp); +/* Encode traffic control information into data plane context. */ +int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op); + /* Retrieve the limit on the number of pending, unprocessed updates. */ uint32_t dplane_get_in_queue_limit(void); diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index c5b533fc2..1964c763c 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -3125,6 +3125,9 @@ void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx) case DPLANE_OP_INTF_INSTALL: case DPLANE_OP_INTF_UPDATE: case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: break; } } diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 79eb99ddf..03bda8cc3 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -4391,6 +4391,11 @@ static void rib_process_dplane_results(struct thread *thread) zebra_if_dplane_result(ctx); break; + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + break; + /* Some op codes not handled here */ case DPLANE_OP_ADDR_INSTALL: case DPLANE_OP_ADDR_UNINSTALL: |