summaryrefslogtreecommitdiffstats
path: root/zebra
diff options
context:
space:
mode:
authorSiger Yang <siger.yang@outlook.com>2022-07-29 15:59:19 +0200
committerSiger Yang <siger.yang@outlook.com>2022-08-10 20:32:43 +0200
commit449a30edf6c91045f1419536672608519daec1b9 (patch)
tree29d4be814dd3401a3c71952bd22f9d2ad3de6c0c /zebra
parentinclude: add linux header pkt_cls.h (diff)
downloadfrr-449a30edf6c91045f1419536672608519daec1b9.tar.xz
frr-449a30edf6c91045f1419536672608519daec1b9.zip
zebra: add tc netlink and dplane ops
This commit implements necessary netlink encoders for traffic control including QDISC, TCLASS and TFILTER, and adds basic dplane operations. Co-authored-by: Stephen Worley <sworley@nvidia.com> Signed-off-by: Siger Yang <siger.yang@outlook.com>
Diffstat (limited to 'zebra')
-rw-r--r--zebra/debug_nl.c35
-rw-r--r--zebra/dplane_fpm_nl.c3
-rw-r--r--zebra/interface.c3
-rw-r--r--zebra/kernel_netlink.c15
-rw-r--r--zebra/rt.h1
-rw-r--r--zebra/subdir.am2
-rw-r--r--zebra/tc_netlink.c468
-rw-r--r--zebra/tc_netlink.h62
-rw-r--r--zebra/zebra_dplane.c156
-rw-r--r--zebra/zebra_dplane.h25
-rw-r--r--zebra/zebra_nhg.c3
-rw-r--r--zebra/zebra_rib.c5
12 files changed, 778 insertions, 0 deletions
diff --git a/zebra/debug_nl.c b/zebra/debug_nl.c
index a16d44252..afefab667 100644
--- a/zebra/debug_nl.c
+++ b/zebra/debug_nl.c
@@ -1536,6 +1536,24 @@ next_rta:
goto next_rta;
}
+static const char *tcm_nltype2str(int nltype)
+{
+ switch (nltype) {
+ case RTM_NEWQDISC:
+ case RTM_DELQDISC:
+ return "qdisc";
+ case RTM_NEWTCLASS:
+ case RTM_DELTCLASS:
+ return "tclass";
+ case RTM_NEWTFILTER:
+ case RTM_DELTFILTER:
+ return "tfilter";
+ default:
+ /* should never hit */
+ return "unknown";
+ }
+}
+
static void nlncm_dump(const struct netconfmsg *ncm, size_t msglen)
{
const struct rtattr *rta;
@@ -1595,6 +1613,8 @@ void nl_dump(void *msg, size_t msglen)
struct ifinfomsg *ifi;
struct tunnel_msg *tnlm;
struct fib_rule_hdr *frh;
+ struct tcmsg *tcm;
+
char fbuf[128];
char ibuf[128];
@@ -1730,6 +1750,21 @@ next_header:
nlncm_dump(ncm, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ncm)));
break;
+ case RTM_NEWQDISC:
+ case RTM_DELQDISC:
+ case RTM_NEWTCLASS:
+ case RTM_DELTCLASS:
+ case RTM_NEWTFILTER:
+ case RTM_DELTFILTER:
+ tcm = NLMSG_DATA(nlmsg);
+ zlog_debug(
+ " tcm [type=%s family=%s (%d) ifindex=%d handle=%04x:%04x]",
+ tcm_nltype2str(nlmsg->nlmsg_type),
+ af_type2str(tcm->tcm_family), tcm->tcm_family,
+ tcm->tcm_ifindex, tcm->tcm_handle >> 16,
+ tcm->tcm_handle & 0xffff);
+ break;
+
default:
break;
}
diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c
index ec4ea372f..d07c4c633 100644
--- a/zebra/dplane_fpm_nl.c
+++ b/zebra/dplane_fpm_nl.c
@@ -815,6 +815,9 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx)
case DPLANE_OP_INTF_INSTALL:
case DPLANE_OP_INTF_UPDATE:
case DPLANE_OP_INTF_DELETE:
+ case DPLANE_OP_TC_INSTALL:
+ case DPLANE_OP_TC_UPDATE:
+ case DPLANE_OP_TC_DELETE:
case DPLANE_OP_NONE:
break;
diff --git a/zebra/interface.c b/zebra/interface.c
index 205fa8829..c674b499a 100644
--- a/zebra/interface.c
+++ b/zebra/interface.c
@@ -1573,6 +1573,9 @@ void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx)
case DPLANE_OP_IPSET_ENTRY_DELETE:
case DPLANE_OP_NEIGH_TABLE_UPDATE:
case DPLANE_OP_GRE_SET:
+ case DPLANE_OP_TC_INSTALL:
+ case DPLANE_OP_TC_UPDATE:
+ case DPLANE_OP_TC_DELETE:
break; /* should never hit here */
}
}
diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
index 396ccb34b..45a372f88 100644
--- a/zebra/kernel_netlink.c
+++ b/zebra/kernel_netlink.c
@@ -47,6 +47,7 @@
#include "zebra/rt_netlink.h"
#include "zebra/if_netlink.h"
#include "zebra/rule_netlink.h"
+#include "zebra/tc_netlink.h"
#include "zebra/netconf_netlink.h"
#include "zebra/zebra_errors.h"
@@ -114,6 +115,15 @@ static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
{RTM_NEWTUNNEL, "RTM_NEWTUNNEL"},
{RTM_DELTUNNEL, "RTM_DELTUNNEL"},
{RTM_GETTUNNEL, "RTM_GETTUNNEL"},
+ {RTM_NEWQDISC, "RTM_NEWQDISC"},
+ {RTM_DELQDISC, "RTM_DELQDISC"},
+ {RTM_GETQDISC, "RTM_GETQDISC"},
+ {RTM_NEWTCLASS, "RTM_NEWTCLASS"},
+ {RTM_DELTCLASS, "RTM_DELTCLASS"},
+ {RTM_GETTCLASS, "RTM_GETTCLASS"},
+ {RTM_NEWTFILTER, "RTM_NEWTFILTER"},
+ {RTM_DELTFILTER, "RTM_DELTFILTER"},
+ {RTM_GETTFILTER, "RTM_GETTFILTER"},
{0}};
static const struct message rtproto_str[] = {
@@ -1623,6 +1633,11 @@ static enum netlink_msg_status nl_put_msg(struct nl_batch *bth,
case DPLANE_OP_INTF_UPDATE:
case DPLANE_OP_INTF_DELETE:
return netlink_put_intf_update_msg(bth, ctx);
+
+ case DPLANE_OP_TC_INSTALL:
+ case DPLANE_OP_TC_UPDATE:
+ case DPLANE_OP_TC_DELETE:
+ return netlink_put_tc_update_msg(bth, ctx);
}
return FRR_NETLINK_ERROR;
diff --git a/zebra/rt.h b/zebra/rt.h
index 0a86a2897..d8a22d2cf 100644
--- a/zebra/rt.h
+++ b/zebra/rt.h
@@ -71,6 +71,7 @@ kernel_intf_update(struct zebra_dplane_ctx *ctx);
extern enum zebra_dplane_result
kernel_intf_netconf_update(struct zebra_dplane_ctx *ctx);
+extern enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx);
#endif /* !HAVE_NETLINK */
diff --git a/zebra/subdir.am b/zebra/subdir.am
index a926c14ad..6fd3d20d3 100644
--- a/zebra/subdir.am
+++ b/zebra/subdir.am
@@ -82,6 +82,7 @@ zebra_zebra_SOURCES = \
zebra/rule_netlink.c \
zebra/rule_socket.c \
zebra/table_manager.c \
+ zebra/tc_netlink.c \
zebra/zapi_msg.c \
zebra/zebra_dplane.c \
zebra/zebra_errors.c \
@@ -163,6 +164,7 @@ noinst_HEADERS += \
zebra/rtadv.h \
zebra/rule_netlink.h \
zebra/table_manager.h \
+ zebra/tc_netlink.h \
zebra/zapi_msg.h \
zebra/zebra_dplane.h \
zebra/zebra_errors.h \
diff --git a/zebra/tc_netlink.c b/zebra/tc_netlink.c
new file mode 100644
index 000000000..89ce07545
--- /dev/null
+++ b/zebra/tc_netlink.c
@@ -0,0 +1,468 @@
+/*
+ * Zebra Traffic Control (TC) interaction with the kernel using netlink.
+ *
+ * Copyright (C) 2022 Shichu Yang
+ *
+ * This file is part of FRR.
+ *
+ * FRR is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * FRR is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with FRR; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#ifdef HAVE_NETLINK
+
+#include <linux/if_ether.h>
+#include <sys/socket.h>
+
+#include "if.h"
+#include "prefix.h"
+#include "vrf.h"
+
+#include <linux/fib_rules.h>
+#include <linux/pkt_cls.h>
+#include <linux/pkt_sched.h>
+#include "zebra/zserv.h"
+#include "zebra/zebra_ns.h"
+#include "zebra/zebra_vrf.h"
+#include "zebra/rt.h"
+#include "zebra/interface.h"
+#include "zebra/debug.h"
+#include "zebra/rtadv.h"
+#include "zebra/kernel_netlink.h"
+#include "zebra/tc_netlink.h"
+#include "zebra/zebra_errors.h"
+#include "zebra/zebra_dplane.h"
+#include "zebra/zebra_trace.h"
+
+/* TODO: move these bitflags to zebra_tc.h */
+#define TC_FILTER_SRC_IP (1 << 0)
+#define TC_FILTER_DST_IP (1 << 1)
+#define TC_FILTER_IP_PROTOCOL (1 << 9)
+
+#define TC_FREQ_DEFAULT (100)
+
+#define TC_MAJOR_BASE (0x1000u)
+#define TC_MINOR_NOCLASS (0xffffu)
+
+#define TC_FILTER_MASK (0x8000u)
+
+#define TIME_UNITS_PER_SEC (1000000)
+#define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r)))
+
+static uint32_t tc_get_freq(void)
+{
+ int freq = 0;
+ FILE *fp = fopen("/proc/net/psched", "r");
+
+ if (fp) {
+ uint32_t nom, denom;
+
+ if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) {
+ if (nom == 1000000)
+ freq = denom;
+ }
+ fclose(fp);
+ }
+
+ return freq == 0 ? TC_FREQ_DEFAULT : freq;
+}
+
+static inline uint32_t tc_make_handle(uint16_t major, uint16_t minor)
+{
+ return (major) << 16 | (minor);
+}
+
+static inline uint32_t tc_get_handle(struct zebra_dplane_ctx *ctx,
+ uint16_t minor)
+{
+ uint16_t major = TC_MAJOR_BASE + (uint16_t)dplane_ctx_get_ifindex(ctx);
+
+ return tc_make_handle(major, minor);
+}
+
+static void tc_calc_rate_table(struct tc_ratespec *ratespec, uint32_t *table,
+ uint32_t mtu)
+{
+ if (mtu == 0)
+ mtu = 2047;
+
+ int cell_log = -1;
+
+ if (cell_log < 0) {
+ cell_log = 0;
+ while ((mtu >> cell_log) > 255)
+ cell_log++;
+ }
+
+ for (int i = 0; i < 256; i++)
+ table[i] = xmittime(ratespec->rate, (i + 1) << cell_log);
+
+ ratespec->cell_align = -1;
+ ratespec->cell_log = cell_log;
+ ratespec->linklayer = TC_LINKLAYER_ETHERNET;
+}
+
+static int tc_flower_get_inet_prefix(const struct prefix *prefix,
+ struct inet_prefix *addr)
+{
+ addr->family = prefix->family;
+
+ if (addr->family == AF_INET) {
+ addr->bytelen = 4;
+ addr->bitlen = prefix->prefixlen;
+ addr->flags = 0;
+ addr->flags |= PREFIXLEN_SPECIFIED;
+ addr->flags |= ADDRTYPE_INET;
+ memcpy(addr->data, prefix->u.val32, sizeof(prefix->u.val32));
+ } else if (addr->family == AF_INET6) {
+ addr->bytelen = 16;
+ addr->bitlen = prefix->prefixlen;
+ addr->flags = 0;
+ addr->flags |= PREFIXLEN_SPECIFIED;
+ addr->flags |= ADDRTYPE_INET;
+ memcpy(addr->data, prefix->u.val, sizeof(prefix->u.val));
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int tc_flower_get_inet_mask(const struct prefix *prefix,
+ struct inet_prefix *addr)
+{
+ addr->family = prefix->family;
+
+ if (addr->family == AF_INET) {
+ addr->bytelen = 4;
+ addr->bitlen = prefix->prefixlen;
+ addr->flags = 0;
+ addr->flags |= PREFIXLEN_SPECIFIED;
+ addr->flags |= ADDRTYPE_INET;
+ } else if (addr->family == AF_INET6) {
+ addr->bytelen = 16;
+ addr->bitlen = prefix->prefixlen;
+ addr->flags = 0;
+ addr->flags |= PREFIXLEN_SPECIFIED;
+ addr->flags |= ADDRTYPE_INET;
+ } else {
+ return -1;
+ }
+
+ memset(addr->data, 0xff, addr->bytelen);
+
+ int rest = prefix->prefixlen;
+
+ for (int i = 0; i < addr->bytelen / 4; i++) {
+ if (!rest) {
+ addr->data[i] = 0;
+ } else if (rest / 32 >= 1) {
+ rest -= 32;
+ } else {
+ addr->data[i] <<= 32 - rest;
+ addr->data[i] = htonl(addr->data[i]);
+ rest = 0;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Traffic control queue discipline encoding (only "htb" supported)
+ */
+static ssize_t netlink_qdisc_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
+ void *data, size_t datalen)
+{
+ struct nlsock *nl;
+
+ const char *kind = "htb";
+
+ struct tc_htb_glob htb_glob = {
+ .rate2quantum = 10, .version = 3, .defcls = TC_MINOR_NOCLASS};
+
+ struct rtattr *nest;
+
+ struct {
+ struct nlmsghdr n;
+ struct tcmsg t;
+ char buf[0];
+ } *req = (void *)data;
+
+ if (datalen < sizeof(*req))
+ return 0;
+
+ nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
+
+ memset(req, 0, sizeof(*req));
+
+ req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
+
+ req->n.nlmsg_flags |= NLM_F_REPLACE;
+
+ req->n.nlmsg_type = cmd;
+
+ req->n.nlmsg_pid = nl->snl.nl_pid;
+
+ req->t.tcm_family = AF_UNSPEC;
+ req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
+ req->t.tcm_handle = tc_get_handle(ctx, 0);
+ req->t.tcm_parent = TC_H_ROOT;
+
+ nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1);
+
+ nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
+
+ nl_attr_put(&req->n, datalen, TCA_HTB_INIT, &htb_glob,
+ sizeof(htb_glob));
+ nl_attr_nest_end(&req->n, nest);
+
+ return NLMSG_ALIGN(req->n.nlmsg_len);
+}
+
+/*
+ * Traffic control class encoding
+ */
+static ssize_t netlink_tclass_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
+ void *data, size_t datalen)
+{
+ struct nlsock *nl;
+ struct tc_htb_opt htb_opt = {};
+
+ uint64_t rate, ceil;
+ uint64_t buffer, cbuffer;
+
+ /* TODO: fetch mtu from interface */
+ uint32_t mtu = 0;
+
+ uint32_t rtab[256];
+ uint32_t ctab[256];
+
+ struct rtattr *nest;
+
+ struct {
+ struct nlmsghdr n;
+ struct tcmsg t;
+ char buf[0];
+ } *req = (void *)data;
+
+ if (datalen < sizeof(*req))
+ return 0;
+
+ nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
+
+ memset(req, 0, sizeof(*req));
+
+ req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
+
+ req->n.nlmsg_type = cmd;
+
+ req->n.nlmsg_pid = nl->snl.nl_pid;
+
+ req->t.tcm_family = AF_UNSPEC;
+ req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
+ req->t.tcm_handle = tc_get_handle(ctx, 1);
+ req->t.tcm_parent = tc_get_handle(ctx, 0);
+
+ rate = dplane_ctx_tc_get_rate(ctx);
+ ceil = dplane_ctx_tc_get_ceil(ctx);
+
+ ceil = ceil < rate ? rate : ceil;
+
+ htb_opt.rate.rate = (rate >> 32 != 0) ? ~0U : rate;
+ htb_opt.ceil.rate = (ceil >> 32 != 0) ? ~0U : ceil;
+
+ buffer = rate / tc_get_freq(), cbuffer = ceil / tc_get_freq();
+
+ htb_opt.buffer = buffer;
+ htb_opt.cbuffer = cbuffer;
+
+ tc_calc_rate_table(&htb_opt.rate, rtab, mtu);
+ tc_calc_rate_table(&htb_opt.ceil, rtab, mtu);
+
+ htb_opt.ceil.mpu = htb_opt.rate.mpu = 0;
+ htb_opt.ceil.overhead = htb_opt.rate.overhead = 0;
+
+ nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
+
+ if (rate >> 32 != 0) {
+ nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &rate,
+ sizeof(rate));
+ }
+
+ if (ceil >> 32 != 0) {
+ nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &ceil,
+ sizeof(ceil));
+ }
+
+ nl_attr_put(&req->n, datalen, TCA_HTB_PARMS, &htb_opt, sizeof(htb_opt));
+
+ nl_attr_put(&req->n, datalen, TCA_HTB_RTAB, rtab, sizeof(rtab));
+ nl_attr_put(&req->n, datalen, TCA_HTB_CTAB, ctab, sizeof(ctab));
+ nl_attr_nest_end(&req->n, nest);
+
+ return NLMSG_ALIGN(req->n.nlmsg_len);
+}
+
+/*
+ * Traffic control filter encoding (only "flower" supported)
+ */
+static ssize_t netlink_tfilter_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
+ void *data, size_t datalen)
+{
+ struct nlsock *nl;
+ struct rtattr *nest;
+
+ const char *kind = "flower";
+
+ uint16_t priority;
+ uint16_t protocol;
+ uint32_t classid;
+ uint32_t filter_bm;
+ uint32_t flags = 0;
+
+ struct inet_prefix addr;
+
+ struct {
+ struct nlmsghdr n;
+ struct tcmsg t;
+ char buf[0];
+ } *req = (void *)data;
+
+ if (datalen < sizeof(*req))
+ return 0;
+
+ nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
+
+ memset(req, 0, sizeof(*req));
+
+ req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
+
+ req->n.nlmsg_flags |= NLM_F_EXCL;
+
+ req->n.nlmsg_type = cmd;
+
+ req->n.nlmsg_pid = nl->snl.nl_pid;
+
+ req->t.tcm_family = AF_UNSPEC;
+ req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
+
+ /* TODO: priority and layer-3 protocol support */
+ priority = 0;
+ protocol = htons(ETH_P_IP);
+ classid = tc_get_handle(ctx, 1);
+ filter_bm = dplane_ctx_tc_get_filter_bm(ctx);
+
+ req->t.tcm_info = tc_make_handle(priority, protocol);
+
+ req->t.tcm_handle = 1;
+ req->t.tcm_parent = tc_get_handle(ctx, 0);
+
+ nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1);
+ nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
+
+ nl_attr_put(&req->n, datalen, TCA_FLOWER_CLASSID, &classid,
+ sizeof(classid));
+
+ if (filter_bm & TC_FILTER_SRC_IP) {
+ const struct prefix *src_p = dplane_ctx_tc_get_src_ip(ctx);
+
+ if (tc_flower_get_inet_prefix(src_p, &addr) != 0)
+ return 0;
+
+ nl_attr_put(&req->n, datalen,
+ (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_SRC
+ : TCA_FLOWER_KEY_IPV6_SRC,
+ addr.data, addr.bytelen);
+
+ if (tc_flower_get_inet_mask(src_p, &addr) != 0)
+ return 0;
+
+ nl_attr_put(&req->n, datalen,
+ (addr.family == AF_INET)
+ ? TCA_FLOWER_KEY_IPV4_SRC_MASK
+ : TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ addr.data, addr.bytelen);
+ }
+
+ if (filter_bm & TC_FILTER_DST_IP) {
+ const struct prefix *dst_p = dplane_ctx_tc_get_dst_ip(ctx);
+
+ if (tc_flower_get_inet_prefix(dst_p, &addr) != 0)
+ return 0;
+
+ nl_attr_put(&req->n, datalen,
+ (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_DST
+ : TCA_FLOWER_KEY_IPV6_DST,
+ addr.data, addr.bytelen);
+
+ if (tc_flower_get_inet_mask(dst_p, &addr) != 0)
+ return 0;
+
+ nl_attr_put(&req->n, datalen,
+ (addr.family == AF_INET)
+ ? TCA_FLOWER_KEY_IPV4_DST_MASK
+ : TCA_FLOWER_KEY_IPV6_DST_MASK,
+ addr.data, addr.bytelen);
+ }
+
+ if (filter_bm & TC_FILTER_IP_PROTOCOL) {
+ nl_attr_put8(&req->n, datalen, TCA_FLOWER_KEY_IP_PROTO,
+ dplane_ctx_tc_get_ip_proto(ctx));
+ }
+
+ nl_attr_put32(&req->n, datalen, TCA_FLOWER_FLAGS, flags);
+
+ nl_attr_put16(&req->n, datalen, TCA_FLOWER_KEY_ETH_TYPE, protocol);
+ nl_attr_nest_end(&req->n, nest);
+
+ return NLMSG_ALIGN(req->n.nlmsg_len);
+}
+
+static ssize_t netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx *ctx,
+ void *buf, size_t buflen)
+{
+ return netlink_qdisc_msg_encode(RTM_NEWQDISC, ctx, buf, buflen);
+}
+
+static ssize_t netlink_newtclass_msg_encoder(struct zebra_dplane_ctx *ctx,
+ void *buf, size_t buflen)
+{
+ return netlink_tclass_msg_encode(RTM_NEWTCLASS, ctx, buf, buflen);
+}
+
+static ssize_t netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx *ctx,
+ void *buf, size_t buflen)
+{
+ return netlink_tfilter_msg_encode(RTM_NEWTFILTER, ctx, buf, buflen);
+}
+
+enum netlink_msg_status netlink_put_tc_update_msg(struct nl_batch *bth,
+ struct zebra_dplane_ctx *ctx)
+{
+ /* TODO: error handling and other actions (delete, replace, ...) */
+
+ netlink_batch_add_msg(bth, ctx, netlink_newqdisc_msg_encoder, false);
+ netlink_batch_add_msg(bth, ctx, netlink_newtclass_msg_encoder, false);
+ return netlink_batch_add_msg(bth, ctx, netlink_newtfilter_msg_encoder,
+ false);
+}
+
+#endif /* HAVE_NETLINK */
diff --git a/zebra/tc_netlink.h b/zebra/tc_netlink.h
new file mode 100644
index 000000000..2190bca4f
--- /dev/null
+++ b/zebra/tc_netlink.h
@@ -0,0 +1,62 @@
+/*
+ * Zebra Traffic Control (TC) interaction with the kernel using netlink.
+ *
+ * Copyright (C) 2022 Shichu Yang
+ *
+ * This file is part of FRR.
+ *
+ * FRR is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * FRR is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with FRR; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef _ZEBRA_TC_NETLINK_H
+#define _ZEBRA_TC_NETLINK_H
+
+#ifdef HAVE_NETLINK
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Represent a prefixed address in flower filter */
+
+struct inet_prefix {
+ uint16_t flags;
+ uint16_t bytelen;
+ uint16_t bitlen;
+ uint16_t family;
+ uint32_t data[64];
+};
+
+enum {
+ PREFIXLEN_SPECIFIED = (1 << 0),
+ ADDRTYPE_INET = (1 << 1),
+ ADDRTYPE_UNSPEC = (1 << 2),
+ ADDRTYPE_MULTI = (1 << 3),
+
+ ADDRTYPE_INET_UNSPEC = ADDRTYPE_INET | ADDRTYPE_UNSPEC,
+ ADDRTYPE_INET_MULTI = ADDRTYPE_INET | ADDRTYPE_MULTI
+};
+
+extern enum netlink_msg_status
+netlink_put_tc_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HAVE_NETLINK */
+
+#endif /* _ZEBRA_TC_NETLINK_H */
diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
index 4c7838198..2fc1fad8b 100644
--- a/zebra/zebra_dplane.c
+++ b/zebra/zebra_dplane.c
@@ -314,6 +314,25 @@ struct dplane_netconf_info {
};
/*
+ * Traffic control contexts for the dplane
+ */
+struct dplane_tc_info {
+ /* Rate spec (unit: Bytes/s) */
+ uint64_t rate;
+ uint64_t ceil;
+
+ /* TODO: custom burst */
+
+ /* Filter components for "tfilter" */
+ uint32_t filter_bm;
+ struct prefix src_ip;
+ struct prefix dst_ip;
+ uint8_t ip_proto;
+
+ /* TODO: more filter components */
+};
+
+/*
* The context block used to exchange info about route updates across
* the boundary between the zebra main context (and pthread) and the
* dataplane layer (and pthread).
@@ -362,6 +381,7 @@ struct zebra_dplane_ctx {
struct dplane_mac_info macinfo;
struct dplane_neigh_info neigh;
struct dplane_rule_info rule;
+ struct dplane_tc_info tc;
struct zebra_pbr_iptable iptable;
struct zebra_pbr_ipset ipset;
struct {
@@ -540,6 +560,9 @@ static struct zebra_dplane_globals {
_Atomic uint32_t dg_intfs_in;
_Atomic uint32_t dg_intf_errors;
+ _Atomic uint32_t dg_tcs_in;
+ _Atomic uint32_t dg_tcs_errors;
+
/* Dataplane pthread */
struct frr_pthread *dg_pthread;
@@ -777,6 +800,9 @@ static void dplane_ctx_free_internal(struct zebra_dplane_ctx *ctx)
case DPLANE_OP_INTF_INSTALL:
case DPLANE_OP_INTF_UPDATE:
case DPLANE_OP_INTF_DELETE:
+ case DPLANE_OP_TC_INSTALL:
+ case DPLANE_OP_TC_UPDATE:
+ case DPLANE_OP_TC_DELETE:
break;
case DPLANE_OP_IPSET_ENTRY_ADD:
@@ -1100,6 +1126,16 @@ const char *dplane_op2str(enum dplane_op_e op)
case DPLANE_OP_INTF_DELETE:
ret = "INTF_DELETE";
break;
+
+ case DPLANE_OP_TC_INSTALL:
+ ret = "TC_INSTALL";
+ break;
+ case DPLANE_OP_TC_UPDATE:
+ ret = "TC_UPDATE";
+ break;
+ case DPLANE_OP_TC_DELETE:
+ ret = "TC_DELETE";
+ break;
}
return ret;
@@ -1419,6 +1455,50 @@ uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx)
return ctx->u.rinfo.zd_old_distance;
}
+uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ return ctx->u.tc.rate;
+}
+
+uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ return ctx->u.tc.ceil;
+}
+
+uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ return ctx->u.tc.filter_bm;
+}
+
+const struct prefix *
+dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ return &(ctx->u.tc.src_ip);
+}
+
+const struct prefix *
+dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ return &(ctx->u.tc.dst_ip);
+}
+
+uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ return ctx->u.tc.ip_proto;
+}
+
/*
* Set the nexthops associated with a context: note that processing code
* may well expect that nexthops are in canonical (sorted) order, so we
@@ -2691,6 +2771,26 @@ done:
return ret;
}
+int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op)
+{
+ int ret = EINVAL;
+
+ struct zebra_vrf *zvrf = NULL;
+ struct zebra_ns *zns = NULL;
+
+ ctx->zd_op = op;
+ ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS;
+
+ /* TODO: init traffic control qdisc */
+ zns = zvrf ? zvrf->zns : zebra_ns_lookup(NS_DEFAULT);
+
+ dplane_ctx_ns_init(ctx, zns, true);
+
+ ret = AOK;
+
+ return ret;
+}
+
/**
* dplane_ctx_nexthop_init() - Initialize a context block for a nexthop update
*
@@ -3410,6 +3510,47 @@ dplane_route_update_internal(struct route_node *rn,
return result;
}
+static enum zebra_dplane_result dplane_tc_update_internal(enum dplane_op_e op)
+{
+ enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE;
+ int ret = EINVAL;
+ struct zebra_dplane_ctx *ctx = NULL;
+
+ /* Obtain context block */
+ ctx = dplane_ctx_alloc();
+
+ if (!ctx) {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ /* Init context with info from zebra data structs */
+ ret = dplane_ctx_tc_init(ctx, op);
+
+ if (ret == AOK)
+ ret = dplane_update_enqueue(ctx);
+
+done:
+ /* Update counter */
+ atomic_fetch_add_explicit(&zdplane_info.dg_tcs_in, 1,
+ memory_order_relaxed);
+ if (ret == AOK) {
+ result = ZEBRA_DPLANE_REQUEST_QUEUED;
+ } else {
+ atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors, 1,
+ memory_order_relaxed);
+ if (ctx)
+ dplane_ctx_free(&ctx);
+ }
+
+ return result;
+}
+
+enum zebra_dplane_result dplane_tc_update(void)
+{
+ return dplane_tc_update_internal(DPLANE_OP_TC_UPDATE);
+}
+
/**
* dplane_nexthop_update_internal() - Helper for enqueuing nexthop changes
*
@@ -5591,6 +5732,13 @@ static void kernel_dplane_log_detail(struct zebra_dplane_ctx *ctx)
dplane_ctx_get_ifindex(ctx),
dplane_ctx_intf_is_protodown(ctx));
break;
+
+ /* TODO: more detailed log */
+ case DPLANE_OP_TC_INSTALL:
+ case DPLANE_OP_TC_UPDATE:
+ case DPLANE_OP_TC_DELETE:
+ zlog_debug("Dplane tc ifidx %u", dplane_ctx_get_ifindex(ctx));
+ break;
}
}
@@ -5734,6 +5882,14 @@ static void kernel_dplane_handle_result(struct zebra_dplane_ctx *ctx)
1, memory_order_relaxed);
break;
+ case DPLANE_OP_TC_INSTALL:
+ case DPLANE_OP_TC_UPDATE:
+ case DPLANE_OP_TC_DELETE:
+ if (res != ZEBRA_DPLANE_REQUEST_SUCCESS)
+ atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors,
+ 1, memory_order_relaxed);
+ break;
+
/* Ignore 'notifications' - no-op */
case DPLANE_OP_SYS_ROUTE_ADD:
case DPLANE_OP_SYS_ROUTE_DELETE:
diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h
index c96ea4009..8b239a9ba 100644
--- a/zebra/zebra_dplane.h
+++ b/zebra/zebra_dplane.h
@@ -193,6 +193,11 @@ enum dplane_op_e {
DPLANE_OP_INTF_INSTALL,
DPLANE_OP_INTF_UPDATE,
DPLANE_OP_INTF_DELETE,
+
+ /* Traffic control */
+ DPLANE_OP_TC_INSTALL,
+ DPLANE_OP_TC_UPDATE,
+ DPLANE_OP_TC_DELETE,
};
/*
@@ -378,6 +383,16 @@ uint8_t dplane_ctx_get_distance(const struct zebra_dplane_ctx *ctx);
void dplane_ctx_set_distance(struct zebra_dplane_ctx *ctx, uint8_t distance);
uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx);
+/* Accessors for traffic control context */
+uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx);
+uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx);
+uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx);
+const struct prefix *
+dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx);
+const struct prefix *
+dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx);
+uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx);
+
void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh);
void dplane_ctx_set_backup_nhg(struct zebra_dplane_ctx *ctx,
const struct nexthop_group *nhg);
@@ -708,6 +723,13 @@ enum zebra_dplane_result dplane_intf_update(const struct interface *ifp);
enum zebra_dplane_result dplane_intf_delete(const struct interface *ifp);
/*
+ * Enqueue interface link changes for the dataplane.
+ */
+enum zebra_dplane_result dplane_tc_add(void);
+enum zebra_dplane_result dplane_tc_update(void);
+enum zebra_dplane_result dplane_tc_delete(void);
+
+/*
* Link layer operations for the dataplane.
*/
enum zebra_dplane_result dplane_neigh_ip_update(enum dplane_op_e op,
@@ -849,6 +871,9 @@ int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op,
int dplane_ctx_intf_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op,
const struct interface *ifp);
+/* Encode traffic control information into data plane context. */
+int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op);
+
/* Retrieve the limit on the number of pending, unprocessed updates. */
uint32_t dplane_get_in_queue_limit(void);
diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c
index c5b533fc2..1964c763c 100644
--- a/zebra/zebra_nhg.c
+++ b/zebra/zebra_nhg.c
@@ -3125,6 +3125,9 @@ void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx)
case DPLANE_OP_INTF_INSTALL:
case DPLANE_OP_INTF_UPDATE:
case DPLANE_OP_INTF_DELETE:
+ case DPLANE_OP_TC_INSTALL:
+ case DPLANE_OP_TC_UPDATE:
+ case DPLANE_OP_TC_DELETE:
break;
}
}
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index 79eb99ddf..03bda8cc3 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -4391,6 +4391,11 @@ static void rib_process_dplane_results(struct thread *thread)
zebra_if_dplane_result(ctx);
break;
+ case DPLANE_OP_TC_INSTALL:
+ case DPLANE_OP_TC_UPDATE:
+ case DPLANE_OP_TC_DELETE:
+ break;
+
/* Some op codes not handled here */
case DPLANE_OP_ADDR_INSTALL:
case DPLANE_OP_ADDR_UNINSTALL: