summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-07-30 18:31:14 +0200
committerDavid S. Miller <davem@davemloft.net>2018-07-30 18:31:14 +0200
commit8f3f6500c74935bfe5a9067e3106b806f336facf (patch)
treee0120e70e86ad96977d6a92b59aebbb1e06af3eb
parentliquidio: remove redundant function cn23xx_dump_iq_regs (diff)
parentact_mirred: use TC_ACT_REINSERT when possible (diff)
downloadlinux-8f3f6500c74935bfe5a9067e3106b806f336facf.tar.xz
linux-8f3f6500c74935bfe5a9067e3106b806f336facf.zip
Merge branch 'TC-refactor-act_mirred-packets-re-injection'
Paolo Abeni says: ==================== TC: refactor act_mirred packets re-injection This series is aimed at improving the act_mirred redirect performances. Such action is used by OVS to represent TC S/W flows, and it's current largest bottle-neck is the need for a skb_clone() for each packet. The first 2 patches introduce some cleanup and safeguards to allow extending tca_result - we will use it to store RCU protected redirect information - and introduce a clear separation between user-space accessible tcfa_action values and internal values accessible only by the kernel. Then a new tcfa_action value is introduced: TC_ACT_REINJECT, similar to TC_ACT_REDIRECT, but preserving the mirred semantic. Such value is not accessible from user-space. The last patch exploits the newly introduced infrastructure in the act_mirred action, to avoid a skb_clone, when possible. Overall this the above gives a ~10% performance improvement in forwarding tput, when using the TC S/W datapath. v1 -> v2: - preserve the rcu lock in act_bpf - add and use a new action value to reinject the packets, preserving the mirred semantic v2 -> v3: - renamed to new action as TC_ACT_REINJECT - TC_ACT_REINJECT is not exposed to user-space v3 -> v4: - dropped the TC_ACT_REDIRECT patch - report failure via extack, too - rename the new action as TC_ACT_REINSERT - skip clone only if the control action don't touch tcf_result v4 -> v5: - fix a couple of build issue reported by kbuild bot - dont split messages ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/act_api.h2
-rw-r--r--include/net/pkt_cls.h3
-rw-r--r--include/net/sch_generic.h30
-rw-r--r--include/uapi/linux/pkt_cls.h6
-rw-r--r--net/core/dev.c6
-rw-r--r--net/sched/act_api.c14
-rw-r--r--net/sched/act_csum.c12
-rw-r--r--net/sched/act_ife.c5
-rw-r--r--net/sched/act_mirred.c57
-rw-r--r--net/sched/act_sample.c4
-rw-r--r--net/sched/act_skbedit.c10
-rw-r--r--net/sched/act_skbmod.c21
-rw-r--r--net/sched/act_tunnel_key.c6
-rw-r--r--net/sched/act_vlan.c19
14 files changed, 126 insertions, 69 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 683ce41053d9..8c9bc02d05e1 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -85,7 +85,7 @@ struct tc_action_ops {
size_t size;
struct module *owner;
int (*act)(struct sk_buff *, const struct tc_action *,
- struct tcf_result *);
+ struct tcf_result *); /* called under RCU BH lock*/
int (*dump)(struct sk_buff *, struct tc_action *, int, int);
void (*cleanup)(struct tc_action *);
int (*lookup)(struct net *net, struct tc_action **a, u32 index,
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6d02f31abba8..22bfc3a13c25 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -7,6 +7,9 @@
#include <net/sch_generic.h>
#include <net/act_api.h>
+/* TC action not accessible from user space */
+#define TC_ACT_REINSERT (TC_ACT_VALUE_MAX + 1)
+
/* Basic packet classifier frontend definitions. */
struct tcf_walker {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c5432362dc26..a6d00093f35e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -235,6 +235,12 @@ struct tcf_result {
u32 classid;
};
const struct tcf_proto *goto_tp;
+
+ /* used by the TC_ACT_REINSERT action */
+ struct {
+ bool ingress;
+ struct gnet_stats_queue *qstats;
+ };
};
};
@@ -285,6 +291,8 @@ struct tcf_proto {
/* Fast access part */
struct tcf_proto __rcu *next;
void __rcu *root;
+
+ /* called under RCU BH lock*/
int (*classify)(struct sk_buff *,
const struct tcf_proto *,
struct tcf_result *);
@@ -567,6 +575,15 @@ static inline void skb_reset_tc(struct sk_buff *skb)
#endif
}
+static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ return skb->tc_redirected;
+#else
+ return false;
+#endif
+}
+
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
@@ -1106,4 +1123,17 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
struct mini_Qdisc __rcu **p_miniq);
+static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
+{
+ struct gnet_stats_queue *stats = res->qstats;
+ int ret;
+
+ if (res->ingress)
+ ret = netif_receive_skb(skb);
+ else
+ ret = dev_queue_xmit(skb);
+ if (ret && stats)
+ qstats_overlimit_inc(res->qstats);
+}
+
#endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index b4512254036b..48e5b5d49a34 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -45,6 +45,7 @@ enum {
* the skb and act like everything
* is alright.
*/
+#define TC_ACT_VALUE_MAX TC_ACT_TRAP
/* There is a special kind of actions called "extended actions",
* which need a value parameter. These have a local opcode located in
@@ -55,11 +56,12 @@ enum {
#define __TC_ACT_EXT_SHIFT 28
#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
-#define TC_ACT_EXT_CMP(combined, opcode) \
- (((combined) & (~TC_ACT_EXT_VAL_MASK)) == opcode)
+#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
+#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
#define TC_ACT_JUMP __TC_ACT_EXT(1)
#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
+#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN
/* Action type identifiers*/
enum {
diff --git a/net/core/dev.c b/net/core/dev.c
index 89031b5fef9f..38b0c414d780 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4252,7 +4252,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
- if (skb_cloned(skb))
+ if (skb_cloned(skb) || skb_is_tc_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
@@ -4602,6 +4602,10 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
__skb_push(skb, skb->mac_len);
skb_do_redirect(skb);
return NULL;
+ case TC_ACT_REINSERT:
+ /* this does not scrub the packet, and updates stats on error */
+ skb_tc_reinsert(skb, &cl_res);
+ return NULL;
default:
break;
}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index b43df1e25c6d..229d63c99be2 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -786,6 +786,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
+static bool tcf_action_valid(int action)
+{
+ int opcode = TC_ACT_EXT_OPCODE(action);
+
+ if (!opcode)
+ return action <= TC_ACT_VALUE_MAX;
+ return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
+}
+
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
@@ -895,6 +904,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
}
}
+ if (!tcf_action_valid(a->tcfa_action)) {
+ NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead");
+ a->tcfa_action = TC_ACT_UNSPEC;
+ }
+
return a;
err_mod:
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 4e8c383f379e..648a3a35b720 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -561,15 +561,14 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
u32 update_flags;
int action;
- rcu_read_lock();
- params = rcu_dereference(p->params);
+ params = rcu_dereference_bh(p->params);
tcf_lastuse_update(&p->tcf_tm);
bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
action = READ_ONCE(p->tcf_action);
if (unlikely(action == TC_ACT_SHOT))
- goto drop_stats;
+ goto drop;
update_flags = params->update_flags;
switch (tc_skb_protocol(skb)) {
@@ -583,16 +582,11 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
break;
}
-unlock:
- rcu_read_unlock();
return action;
drop:
- action = TC_ACT_SHOT;
-
-drop_stats:
qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
- goto unlock;
+ return TC_ACT_SHOT;
}
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3d6e265758c0..df4060e32d43 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -820,14 +820,11 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_ife_params *p;
int ret;
- rcu_read_lock();
- p = rcu_dereference(ife->params);
+ p = rcu_dereference_bh(ife->params);
if (p->flags & IFE_ENCODE) {
ret = tcf_ife_encode(skb, a, res, p);
- rcu_read_unlock();
return ret;
}
- rcu_read_unlock();
return tcf_ife_decode(skb, a, res);
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6afd89a36c69..b26d060da08e 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -25,6 +25,7 @@
#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_mirred.h>
#include <net/tc_act/tc_mirred.h>
@@ -49,6 +50,18 @@ static bool tcf_mirred_act_wants_ingress(int action)
}
}
+static bool tcf_mirred_can_reinsert(int action)
+{
+ switch (action) {
+ case TC_ACT_SHOT:
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ return true;
+ }
+ return false;
+}
+
static void tcf_mirred_release(struct tc_action *a)
{
struct tcf_mirred *m = to_mirred(a);
@@ -171,21 +184,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_mirred *m = to_mirred(a);
+ struct sk_buff *skb2 = skb;
bool m_mac_header_xmit;
struct net_device *dev;
- struct sk_buff *skb2;
int retval, err = 0;
+ bool use_reinsert;
+ bool want_ingress;
+ bool is_redirect;
int m_eaction;
int mac_len;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
- rcu_read_lock();
m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
m_eaction = READ_ONCE(m->tcfm_eaction);
retval = READ_ONCE(m->tcf_action);
- dev = rcu_dereference(m->tcfm_dev);
+ dev = rcu_dereference_bh(m->tcfm_dev);
if (unlikely(!dev)) {
pr_notice_once("tc mirred: target device is gone\n");
goto out;
@@ -197,16 +212,25 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
- goto out;
+ /* we could easily avoid the clone only if called by ingress and clsact;
+ * since we can't easily detect the clsact caller, skip clone only for
+ * ingress - that covers the TC S/W datapath.
+ */
+ is_redirect = tcf_mirred_is_act_redirect(m_eaction);
+ use_reinsert = skb_at_tc_ingress(skb) && is_redirect &&
+ tcf_mirred_can_reinsert(retval);
+ if (!use_reinsert) {
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
+ goto out;
+ }
/* If action's target direction differs than filter's direction,
* and devices expect a mac header on xmit, then mac push/pull is
* needed.
*/
- if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) &&
- m_mac_header_xmit) {
+ want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
+ if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
if (!skb_at_tc_ingress(skb)) {
/* caught at egress, act ingress: pull mac */
mac_len = skb_network_header(skb) - skb_mac_header(skb);
@@ -217,15 +241,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
}
}
+ skb2->skb_iif = skb->dev->ifindex;
+ skb2->dev = dev;
+
/* mirror is always swallowed */
- if (tcf_mirred_is_act_redirect(m_eaction)) {
+ if (is_redirect) {
skb2->tc_redirected = 1;
skb2->tc_from_ingress = skb2->tc_at_ingress;
+
+ /* let's the caller reinsert the packet, if possible */
+ if (use_reinsert) {
+ res->ingress = want_ingress;
+ res->qstats = this_cpu_ptr(m->common.cpu_qstats);
+ return TC_ACT_REINSERT;
+ }
}
- skb2->skb_iif = skb->dev->ifindex;
- skb2->dev = dev;
- if (!tcf_mirred_act_wants_ingress(m_eaction))
+ if (!want_ingress)
err = dev_queue_xmit(skb2);
else
err = netif_receive_skb(skb2);
@@ -236,7 +268,6 @@ out:
if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
- rcu_read_unlock();
return retval;
}
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 3079e7be5bde..2608ccc83e5e 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -140,8 +140,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
retval = READ_ONCE(s->tcf_action);
- rcu_read_lock();
- psample_group = rcu_dereference(s->psample_group);
+ psample_group = rcu_dereference_bh(s->psample_group);
/* randomly sample packets according to rate */
if (psample_group && (prandom_u32() % s->rate == 0)) {
@@ -165,7 +164,6 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
skb_pull(skb, skb->mac_len);
}
- rcu_read_unlock();
return retval;
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index da56e6938c9e..a6db47ebec11 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -43,8 +43,7 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&d->tcf_tm);
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
- rcu_read_lock();
- params = rcu_dereference(d->params);
+ params = rcu_dereference_bh(d->params);
action = READ_ONCE(d->tcf_action);
if (params->flags & SKBEDIT_F_PRIORITY)
@@ -77,14 +76,11 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
}
if (params->flags & SKBEDIT_F_PTYPE)
skb->pkt_type = params->ptype;
-
-unlock:
- rcu_read_unlock();
return action;
+
err:
qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
- action = TC_ACT_SHOT;
- goto unlock;
+ return TC_ACT_SHOT;
}
static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index cdc6bacfb190..c437c6d51a71 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -41,20 +41,14 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
* then MAX_EDIT_LEN needs to change appropriately
*/
err = skb_ensure_writable(skb, MAX_EDIT_LEN);
- if (unlikely(err)) { /* best policy is to drop on the floor */
- qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
- return TC_ACT_SHOT;
- }
+ if (unlikely(err)) /* best policy is to drop on the floor */
+ goto drop;
- rcu_read_lock();
action = READ_ONCE(d->tcf_action);
- if (unlikely(action == TC_ACT_SHOT)) {
- qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
- rcu_read_unlock();
- return action;
- }
+ if (unlikely(action == TC_ACT_SHOT))
+ goto drop;
- p = rcu_dereference(d->skbmod_p);
+ p = rcu_dereference_bh(d->skbmod_p);
flags = p->flags;
if (flags & SKBMOD_F_DMAC)
ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
@@ -62,7 +56,6 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
if (flags & SKBMOD_F_ETYPE)
eth_hdr(skb)->h_proto = p->eth_type;
- rcu_read_unlock();
if (flags & SKBMOD_F_SWAPMAC) {
u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
@@ -73,6 +66,10 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
}
return action;
+
+drop:
+ qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+ return TC_ACT_SHOT;
}
static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index f811850fd1d0..d42d9e112789 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -31,9 +31,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_tunnel_key_params *params;
int action;
- rcu_read_lock();
-
- params = rcu_dereference(t->params);
+ params = rcu_dereference_bh(t->params);
tcf_lastuse_update(&t->tcf_tm);
bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
@@ -53,8 +51,6 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
break;
}
- rcu_read_unlock();
-
return action;
}
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index ad37f308175a..15a0ee214c9c 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -40,11 +40,9 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
if (skb_at_tc_ingress(skb))
skb_push_rcsum(skb, skb->mac_len);
- rcu_read_lock();
-
action = READ_ONCE(v->tcf_action);
- p = rcu_dereference(v->vlan_p);
+ p = rcu_dereference_bh(v->vlan_p);
switch (p->tcfv_action) {
case TCA_VLAN_ACT_POP:
@@ -61,7 +59,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
case TCA_VLAN_ACT_MODIFY:
/* No-op if no vlan tag (either hw-accel or in-payload) */
if (!skb_vlan_tagged(skb))
- goto unlock;
+ goto out;
/* extract existing tag (and guarantee no hw-accel tag) */
if (skb_vlan_tag_present(skb)) {
tci = skb_vlan_tag_get(skb);
@@ -86,18 +84,15 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
BUG();
}
- goto unlock;
-
-drop:
- action = TC_ACT_SHOT;
- qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
-
-unlock:
- rcu_read_unlock();
+out:
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
return action;
+
+drop:
+ qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+ return TC_ACT_SHOT;
}
static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {