summaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/actions.c45
-rw-r--r--net/openvswitch/conntrack.c143
-rw-r--r--net/openvswitch/conntrack.h9
-rw-r--r--net/openvswitch/datapath.c8
-rw-r--r--net/openvswitch/datapath.h1
-rw-r--r--net/openvswitch/flow.c4
-rw-r--r--net/openvswitch/flow.h3
-rw-r--r--net/openvswitch/flow_netlink.c171
-rw-r--r--net/openvswitch/flow_netlink.h6
-rw-r--r--net/openvswitch/flow_table.c5
-rw-r--r--net/openvswitch/vport-geneve.c15
-rw-r--r--net/openvswitch/vport-gre.c10
-rw-r--r--net/openvswitch/vport-internal_dev.c54
-rw-r--r--net/openvswitch/vport-netdev.c33
-rw-r--r--net/openvswitch/vport-netdev.h1
-rw-r--r--net/openvswitch/vport-vxlan.c21
-rw-r--r--net/openvswitch/vport.c121
-rw-r--r--net/openvswitch/vport.h23
18 files changed, 339 insertions, 334 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 315f5330b6e5..c88d0f2d3e01 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -620,7 +620,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
-static int ovs_vport_output(struct sock *sock, struct sk_buff *skb)
+static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
struct vport *vport = data->vport;
@@ -679,12 +679,12 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
skb_pull(skb, hlen);
}
-static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
- __be16 ethertype)
+static void ovs_fragment(struct net *net, struct vport *vport,
+ struct sk_buff *skb, u16 mru, __be16 ethertype)
{
if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment");
- return;
+ goto err;
}
if (ethertype == htons(ETH_P_IP)) {
@@ -700,7 +700,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
skb_dst_set_noref(skb, &ovs_dst);
IPCB(skb)->frag_max_size = mru;
- ip_do_fragment(skb->sk, skb, ovs_vport_output);
+ ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else if (ethertype == htons(ETH_P_IPV6)) {
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
@@ -708,8 +708,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
struct rt6_info ovs_rt;
if (!v6ops) {
- kfree_skb(skb);
- return;
+ goto err;
}
prepare_frag(vport, skb);
@@ -722,14 +721,18 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
skb_dst_set_noref(skb, &ovs_rt.dst);
IP6CB(skb)->frag_max_size = mru;
- v6ops->fragment(skb->sk, skb, ovs_vport_output);
+ v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else {
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
ovs_vport_name(vport), ntohs(ethertype), mru,
vport->dev->mtu);
- kfree_skb(skb);
+ goto err;
}
+
+ return;
+err:
+ kfree_skb(skb);
}
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
@@ -743,6 +746,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
ovs_vport_send(vport, skb);
} else if (mru <= vport->dev->mtu) {
+ struct net *net = read_pnet(&dp->net);
__be16 ethertype = key->eth.type;
if (!is_flow_key_valid(key)) {
@@ -752,7 +756,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
ethertype = vlan_get_protocol(skb);
}
- ovs_fragment(vport, skb, mru, ethertype);
+ ovs_fragment(net, vport, skb, mru, ethertype);
} else {
kfree_skb(skb);
}
@@ -765,7 +769,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
const struct nlattr *actions, int actions_len)
{
- struct ip_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
@@ -793,11 +796,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
if (vport) {
int err;
- upcall.egress_tun_info = &info;
- err = ovs_vport_get_egress_tun_info(vport, skb,
- &upcall);
- if (err)
- upcall.egress_tun_info = NULL;
+ err = dev_fill_metadata_dst(vport->dev, skb);
+ if (!err)
+ upcall.egress_tun_info = skb_tunnel_info(skb);
}
break;
@@ -968,7 +969,7 @@ static int execute_masked_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_CT_STATE:
case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK:
- case OVS_KEY_ATTR_CT_LABEL:
+ case OVS_KEY_ATTR_CT_LABELS:
err = -EINVAL;
break;
}
@@ -1099,12 +1100,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_ACTION_ATTR_CT:
+ if (!is_flow_key_valid(key)) {
+ err = ovs_flow_key_update(skb, key);
+ if (err)
+ return err;
+ }
+
err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
nla_data(a));
/* Hide stolen IP fragments from user space. */
- if (err == -EINPROGRESS)
- return 0;
+ if (err)
+ return err == -EINPROGRESS ? 0 : err;
break;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 002a755fa07e..c2cc11168fd5 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -37,9 +37,9 @@ struct md_mark {
};
/* Metadata label for masked write to conntrack label. */
-struct md_label {
- struct ovs_key_ct_label value;
- struct ovs_key_ct_label mask;
+struct md_labels {
+ struct ovs_key_ct_labels value;
+ struct ovs_key_ct_labels mask;
};
/* Conntrack action context for execution. */
@@ -47,10 +47,10 @@ struct ovs_conntrack_info {
struct nf_conntrack_helper *helper;
struct nf_conntrack_zone zone;
struct nf_conn *ct;
- u32 flags;
+ u8 commit : 1;
u16 family;
struct md_mark mark;
- struct md_label label;
+ struct md_labels labels;
};
static u16 key_to_nfproto(const struct sw_flow_key *key)
@@ -109,21 +109,21 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct)
#endif
}
-static void ovs_ct_get_label(const struct nf_conn *ct,
- struct ovs_key_ct_label *label)
+static void ovs_ct_get_labels(const struct nf_conn *ct,
+ struct ovs_key_ct_labels *labels)
{
struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
if (cl) {
size_t len = cl->words * sizeof(long);
- if (len > OVS_CT_LABEL_LEN)
- len = OVS_CT_LABEL_LEN;
- else if (len < OVS_CT_LABEL_LEN)
- memset(label, 0, OVS_CT_LABEL_LEN);
- memcpy(label, cl->bits, len);
+ if (len > OVS_CT_LABELS_LEN)
+ len = OVS_CT_LABELS_LEN;
+ else if (len < OVS_CT_LABELS_LEN)
+ memset(labels, 0, OVS_CT_LABELS_LEN);
+ memcpy(labels, cl->bits, len);
} else {
- memset(label, 0, OVS_CT_LABEL_LEN);
+ memset(labels, 0, OVS_CT_LABELS_LEN);
}
}
@@ -134,7 +134,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
key->ct.state = state;
key->ct.zone = zone->id;
key->ct.mark = ovs_ct_get_mark(ct);
- ovs_ct_get_label(ct, &key->ct.label);
+ ovs_ct_get_labels(ct, &key->ct.labels);
}
/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
@@ -151,6 +151,8 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
state = ovs_ct_get_state(ctinfo);
+ if (!nf_ct_is_confirmed(ct))
+ state |= OVS_CS_F_NEW;
if (ct->master)
state |= OVS_CS_F_RELATED;
zone = nf_ct_zone(ct);
@@ -167,7 +169,7 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
{
- if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
@@ -179,8 +181,8 @@ int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label),
- &key->ct.label))
+ nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels),
+ &key->ct.labels))
return -EMSGSIZE;
return 0;
@@ -213,18 +215,15 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
#endif
}
-static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_ct_label *label,
- const struct ovs_key_ct_label *mask)
+static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_ct_labels *labels,
+ const struct ovs_key_ct_labels *mask)
{
enum ip_conntrack_info ctinfo;
struct nf_conn_labels *cl;
struct nf_conn *ct;
int err;
- if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
- return -ENOTSUPP;
-
/* The connection could be invalid, in which case set_label is no-op.*/
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
@@ -235,15 +234,15 @@ static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key,
nf_ct_labels_ext_add(ct);
cl = nf_ct_labels_find(ct);
}
- if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN)
+ if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN)
return -ENOSPC;
- err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask,
- OVS_CT_LABEL_LEN / sizeof(u32));
+ err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask,
+ OVS_CT_LABELS_LEN / sizeof(u32));
if (err)
return err;
- ovs_ct_get_label(ct, &key->ct.label);
+ ovs_ct_get_labels(ct, &key->ct.labels);
return 0;
}
@@ -294,6 +293,9 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
return helper->help(skb, protoff, ct, ctinfo);
}
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
static int handle_fragments(struct net *net, struct sw_flow_key *key,
u16 zone, struct sk_buff *skb)
{
@@ -304,32 +306,40 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
int err;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- err = ip_defrag(skb, user);
+ err = ip_defrag(net, skb, user);
if (err)
return err;
ovs_cb.mru = IPCB(skb)->frag_max_size;
- } else if (key->eth.type == htons(ETH_P_IPV6)) {
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ } else if (key->eth.type == htons(ETH_P_IPV6)) {
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
struct sk_buff *reasm;
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- reasm = nf_ct_frag6_gather(skb, user);
+ reasm = nf_ct_frag6_gather(net, skb, user);
if (!reasm)
return -EINPROGRESS;
- if (skb == reasm)
+ if (skb == reasm) {
+ kfree_skb(skb);
return -EINVAL;
+ }
+
+ /* Don't free 'skb' even though it is one of the original
+ * fragments, as we're going to morph it into the head.
+ */
+ skb_get(skb);
+ nf_ct_frag6_consume_orig(reasm);
key->ip.proto = ipv6_hdr(reasm)->nexthdr;
skb_morph(skb, reasm);
+ skb->next = reasm->next;
consume_skb(reasm);
ovs_cb.mru = IP6CB(skb)->frag_max_size;
-#else
- return -EPFNOSUPPORT;
#endif
} else {
+ kfree_skb(skb);
return -EPFNOSUPPORT;
}
@@ -347,7 +357,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
{
struct nf_conntrack_tuple tuple;
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
return NULL;
return __nf_ct_expect_find(net, zone, &tuple);
}
@@ -377,7 +387,7 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
return true;
}
-static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
+static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
@@ -408,6 +418,8 @@ static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
}
}
+ ovs_ct_update_key(skb, key, true);
+
return 0;
}
@@ -430,8 +442,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
err = __ovs_ct_lookup(net, key, info, skb);
if (err)
return err;
-
- ovs_ct_update_key(skb, key, true);
}
return 0;
@@ -460,22 +470,23 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
if (nf_conntrack_confirm(skb) != NF_ACCEPT)
return -EINVAL;
- ovs_ct_update_key(skb, key, true);
-
return 0;
}
-static bool label_nonzero(const struct ovs_key_ct_label *label)
+static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
{
size_t i;
- for (i = 0; i < sizeof(*label); i++)
- if (label->ct_label[i])
+ for (i = 0; i < sizeof(*labels); i++)
+ if (labels->ct_labels[i])
return true;
return false;
}
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
int ovs_ct_execute(struct net *net, struct sk_buff *skb,
struct sw_flow_key *key,
const struct ovs_conntrack_info *info)
@@ -493,7 +504,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
return err;
}
- if (info->flags & OVS_CT_F_COMMIT)
+ if (info->commit)
err = ovs_ct_commit(net, key, info, skb);
else
err = ovs_ct_lookup(net, key, info, skb);
@@ -506,11 +517,13 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
if (err)
goto err;
}
- if (label_nonzero(&info->label.mask))
- err = ovs_ct_set_label(skb, key, &info->label.value,
- &info->label.mask);
+ if (labels_nonzero(&info->labels.mask))
+ err = ovs_ct_set_labels(skb, key, &info->labels.value,
+ &info->labels.mask);
err:
skb_push(skb, nh_ofs);
+ if (err)
+ kfree_skb(skb);
return err;
}
@@ -539,14 +552,13 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
}
static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
- [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32),
- .maxlen = sizeof(u32) },
+ [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 },
[OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
.maxlen = sizeof(u16) },
[OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
.maxlen = sizeof(struct md_mark) },
- [OVS_CT_ATTR_LABEL] = { .minlen = sizeof(struct md_label),
- .maxlen = sizeof(struct md_label) },
+ [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels),
+ .maxlen = sizeof(struct md_labels) },
[OVS_CT_ATTR_HELPER] = { .minlen = 1,
.maxlen = NF_CT_HELPER_NAME_LEN }
};
@@ -576,8 +588,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
}
switch (type) {
- case OVS_CT_ATTR_FLAGS:
- info->flags = nla_get_u32(a);
+ case OVS_CT_ATTR_COMMIT:
+ info->commit = true;
break;
#ifdef CONFIG_NF_CONNTRACK_ZONES
case OVS_CT_ATTR_ZONE:
@@ -588,15 +600,23 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
case OVS_CT_ATTR_MARK: {
struct md_mark *mark = nla_data(a);
+ if (!mark->mask) {
+ OVS_NLERR(log, "ct_mark mask cannot be 0");
+ return -EINVAL;
+ }
info->mark = *mark;
break;
}
#endif
#ifdef CONFIG_NF_CONNTRACK_LABELS
- case OVS_CT_ATTR_LABEL: {
- struct md_label *label = nla_data(a);
+ case OVS_CT_ATTR_LABELS: {
+ struct md_labels *labels = nla_data(a);
- info->label = *label;
+ if (!labels_nonzero(&labels->mask)) {
+ OVS_NLERR(log, "ct_labels mask cannot be 0");
+ return -EINVAL;
+ }
+ info->labels = *labels;
break;
}
#endif
@@ -633,7 +653,7 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
attr == OVS_KEY_ATTR_CT_MARK)
return true;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- attr == OVS_KEY_ATTR_CT_LABEL) {
+ attr == OVS_KEY_ATTR_CT_LABELS) {
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
return ovs_net->xt_label;
@@ -701,18 +721,19 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
if (!start)
return -EMSGSIZE;
- if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags))
+ if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT))
return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
return -EMSGSIZE;
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
&ct_info->mark))
return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label),
- &ct_info->label))
+ labels_nonzero(&ct_info->labels.mask) &&
+ nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
+ &ct_info->labels))
return -EMSGSIZE;
if (ct_info->helper) {
if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
@@ -737,7 +758,7 @@ void ovs_ct_free_action(const struct nlattr *a)
void ovs_ct_init(struct net *net)
{
- unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE;
+ unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
if (nf_connlabels_get(net, n_bits)) {
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index 43f5dd7a5577..a7544f405c16 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -34,6 +34,10 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
void ovs_ct_free_action(const struct nlattr *a);
+
+#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
+ OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \
+ OVS_CS_F_INVALID | OVS_CS_F_TRACKED)
#else
#include <linux/errno.h>
@@ -63,6 +67,7 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
struct sw_flow_key *key,
const struct ovs_conntrack_info *info)
{
+ kfree_skb(skb);
return -ENOTSUPP;
}
@@ -72,7 +77,7 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb,
key->ct.state = 0;
key->ct.zone = 0;
key->ct.mark = 0;
- memset(&key->ct.label, 0, sizeof(key->ct.label));
+ memset(&key->ct.labels, 0, sizeof(key->ct.labels));
}
static inline int ovs_ct_put_key(const struct sw_flow_key *key,
@@ -82,5 +87,7 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key,
}
static inline void ovs_ct_free_action(const struct nlattr *a) { }
+
+#define CT_SUPPORTED_MASK 0
#endif /* CONFIG_NF_CONNTRACK */
#endif /* ovs_conntrack.h */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index b816ff871528..5633172b791a 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -91,8 +91,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
static void ovs_notify(struct genl_family *family,
struct sk_buff *skb, struct genl_info *info)
{
- genl_notify(family, skb, genl_info_net(info), info->snd_portid,
- 0, info->nlhdr, GFP_KERNEL);
+ genl_notify(family, skb, info, 0, GFP_KERNEL);
}
/**
@@ -490,9 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
if (upcall_info->egress_tun_info) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
- err = ovs_nla_put_egress_tunnel_key(user_skb,
- upcall_info->egress_tun_info,
- upcall_info->egress_tun_opts);
+ err = ovs_nla_put_tunnel_info(user_skb,
+ upcall_info->egress_tun_info);
BUG_ON(err);
nla_nest_end(user_skb, nla);
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index f88038a99f44..67bdecd9fdc1 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -117,7 +117,6 @@ struct ovs_skb_cb {
*/
struct dp_upcall_info {
struct ip_tunnel_info *egress_tun_info;
- const void *egress_tun_opts;
const struct nlattr *userdata;
const struct nlattr *actions;
int actions_len;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index c8db44ab2ee7..0ea128eeeab2 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -698,8 +698,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
{
/* Extract metadata from packet. */
if (tun_info) {
- if (ip_tunnel_info_af(tun_info) != AF_INET)
- return -EINVAL;
+ key->tun_proto = ip_tunnel_info_af(tun_info);
memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
if (tun_info->options_len) {
@@ -714,6 +713,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->tun_opts_len = 0;
}
} else {
+ key->tun_proto = 0;
key->tun_opts_len = 0;
memset(&key->tun_key, 0, sizeof(key->tun_key));
}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index fe527d2dd4b7..1d055c559eaf 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -63,6 +63,7 @@ struct sw_flow_key {
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} __packed phy; /* Safe when right after 'tun_key'. */
+ u8 tun_proto; /* Protocol of encapsulating tunnel. */
u32 ovs_flow_hash; /* Datapath computed hash value. */
u32 recirc_id; /* Recirculation ID. */
struct {
@@ -116,7 +117,7 @@ struct sw_flow_key {
u16 zone;
u32 mark;
u8 state;
- struct ovs_key_ct_label label;
+ struct ovs_key_ct_labels labels;
} ct;
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 5c030a4d7338..907d6fd28ede 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -262,8 +262,8 @@ size_t ovs_tun_key_attr_size(void)
* updating this function.
*/
return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
+ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
@@ -291,10 +291,10 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
+ nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
- + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */
+ + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */
+ nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
+ nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
- + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABEL */
+ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -323,6 +323,8 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE },
[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED,
.next = ovs_vxlan_ext_key_lens },
+ [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+ [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
};
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
@@ -349,10 +351,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
.next = ovs_tunnel_key_lens, },
[OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
- [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) },
+ [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
[OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
- [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) },
+ [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
};
static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -542,15 +544,15 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
return 0;
}
-static int ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask,
- bool log)
+static int ip_tun_from_nlattr(const struct nlattr *attr,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
{
- struct nlattr *a;
- int rem;
- bool ttl = false;
+ bool ttl = false, ipv4 = false, ipv6 = false;
__be16 tun_flags = 0;
int opts_type = 0;
+ struct nlattr *a;
+ int rem;
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
@@ -578,10 +580,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
nla_get_in_addr(a), is_mask);
+ ipv4 = true;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
nla_get_in_addr(a), is_mask);
+ ipv4 = true;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+ nla_get_in6_addr(a), is_mask);
+ ipv6 = true;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+ nla_get_in6_addr(a), is_mask);
+ ipv6 = true;
break;
case OVS_TUNNEL_KEY_ATTR_TOS:
SW_FLOW_KEY_PUT(match, tun_key.tos,
@@ -636,28 +650,46 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
opts_type = type;
break;
default:
- OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
+ OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
return -EINVAL;
}
}
SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+ if (is_mask)
+ SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
+ else
+ SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
+ false);
if (rem > 0) {
- OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
+ OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
rem);
return -EINVAL;
}
+ if (ipv4 && ipv6) {
+ OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
+ return -EINVAL;
+ }
+
if (!is_mask) {
- if (!match->key->tun_key.u.ipv4.dst) {
+ if (!ipv4 && !ipv6) {
+ OVS_NLERR(log, "IP tunnel dst address not specified");
+ return -EINVAL;
+ }
+ if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
OVS_NLERR(log, "IPv4 tunnel dst address is zero");
return -EINVAL;
}
+ if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
+ OVS_NLERR(log, "IPv6 tunnel dst address is zero");
+ return -EINVAL;
+ }
if (!ttl) {
- OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
+ OVS_NLERR(log, "IP tunnel TTL not specified.");
return -EINVAL;
}
}
@@ -682,21 +714,36 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb,
return 0;
}
-static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ip_tunnel_key *output,
- const void *tun_opts, int swkey_tun_opts_len)
+static int __ip_tun_to_nlattr(struct sk_buff *skb,
+ const struct ip_tunnel_key *output,
+ const void *tun_opts, int swkey_tun_opts_len,
+ unsigned short tun_proto)
{
if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE;
- if (output->u.ipv4.src &&
- nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
- output->u.ipv4.src))
- return -EMSGSIZE;
- if (output->u.ipv4.dst &&
- nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
- output->u.ipv4.dst))
- return -EMSGSIZE;
+ switch (tun_proto) {
+ case AF_INET:
+ if (output->u.ipv4.src &&
+ nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
+ output->u.ipv4.src))
+ return -EMSGSIZE;
+ if (output->u.ipv4.dst &&
+ nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
+ output->u.ipv4.dst))
+ return -EMSGSIZE;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_any(&output->u.ipv6.src) &&
+ nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
+ &output->u.ipv6.src))
+ return -EMSGSIZE;
+ if (!ipv6_addr_any(&output->u.ipv6.dst) &&
+ nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
+ &output->u.ipv6.dst))
+ return -EMSGSIZE;
+ break;
+ }
if (output->tos &&
nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
return -EMSGSIZE;
@@ -717,7 +764,7 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
- if (tun_opts) {
+ if (swkey_tun_opts_len) {
if (output->tun_flags & TUNNEL_GENEVE_OPT &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
@@ -730,9 +777,10 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
return 0;
}
-static int ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ip_tunnel_key *output,
- const void *tun_opts, int swkey_tun_opts_len)
+static int ip_tun_to_nlattr(struct sk_buff *skb,
+ const struct ip_tunnel_key *output,
+ const void *tun_opts, int swkey_tun_opts_len,
+ unsigned short tun_proto)
{
struct nlattr *nla;
int err;
@@ -741,7 +789,8 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
if (!nla)
return -EMSGSIZE;
- err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
+ err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
+ tun_proto);
if (err)
return err;
@@ -749,13 +798,13 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
return 0;
}
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
- const struct ip_tunnel_info *egress_tun_info,
- const void *egress_tun_opts)
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
{
- return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key,
- egress_tun_opts,
- egress_tun_info->options_len);
+ return __ip_tun_to_nlattr(skb, &tun_info->key,
+ ip_tunnel_info_opts(tun_info),
+ tun_info->options_len,
+ ip_tunnel_info_af(tun_info));
}
static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
@@ -806,15 +855,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
*attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
}
if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
- if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
- is_mask, log) < 0)
+ if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+ is_mask, log) < 0)
return -EINVAL;
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
}
if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
- u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]);
+ u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
+
+ if (ct_state & ~CT_SUPPORTED_MASK) {
+ OVS_NLERR(log, "ct_state flags %08x unsupported",
+ ct_state);
+ return -EINVAL;
+ }
SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
@@ -833,14 +888,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
}
- if (*attrs & (1 << OVS_KEY_ATTR_CT_LABEL) &&
- ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABEL)) {
- const struct ovs_key_ct_label *cl;
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
+ const struct ovs_key_ct_labels *cl;
- cl = nla_data(a[OVS_KEY_ATTR_CT_LABEL]);
- SW_FLOW_KEY_MEMCPY(match, ct.label, cl->ct_label,
+ cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
+ SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
sizeof(*cl), is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABEL);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
}
return 0;
}
@@ -1093,6 +1148,9 @@ static void nlattr_set(struct nlattr *attr, u8 val,
} else {
memset(nla_data(nla), val, nla_len(nla));
}
+
+ if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
+ *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
}
}
@@ -1194,7 +1252,7 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
/* The userspace does not send tunnel attributes that
* are 0, but we should not wildcard them nonetheless.
*/
- if (match->key->tun_key.u.ipv4.dst)
+ if (match->key->tun_proto)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
0xff, true);
@@ -1367,14 +1425,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if ((swkey->tun_key.u.ipv4.dst || is_mask)) {
+ if ((swkey->tun_proto || is_mask)) {
const void *opts = NULL;
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
- if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
- swkey->tun_opts_len))
+ if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
+ swkey->tun_opts_len, swkey->tun_proto))
goto nla_put_failure;
}
@@ -1877,7 +1935,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
int err = 0, start, opts_type;
ovs_match_init(&match, &key, NULL);
- opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
+ opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
if (opts_type < 0)
return opts_type;
@@ -1913,6 +1971,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
tun_info = &tun_dst->u.tun_info;
tun_info->mode = IP_TUNNEL_INFO_TX;
+ if (key.tun_proto == AF_INET6)
+ tun_info->mode |= IP_TUNNEL_INFO_IPV6;
tun_info->key = key.tun_key;
/* We need to store the options in the action itself since
@@ -1973,7 +2033,7 @@ static int validate_set(const struct nlattr *a,
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_CT_MARK:
- case OVS_KEY_ATTR_CT_LABEL:
+ case OVS_KEY_ATTR_CT_LABELS:
case OVS_KEY_ATTR_ETHERNET:
break;
@@ -2374,10 +2434,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
if (!start)
return -EMSGSIZE;
- err = ipv4_tun_to_nlattr(skb, &tun_info->key,
- tun_info->options_len ?
- ip_tunnel_info_opts(tun_info) : NULL,
- tun_info->options_len);
+ err = ovs_nla_put_tunnel_info(skb, tun_info);
if (err)
return err;
nla_nest_end(skb, start);
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 6ca3f0baf449..47dd142eca1c 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_get_match(struct net *, struct sw_flow_match *,
const struct nlattr *key, const struct nlattr *mask,
bool log);
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
- const struct ip_tunnel_info *,
- const void *egress_tun_opts);
+
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info);
bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index f2ea83ba4763..d073fff82fdb 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -93,7 +93,8 @@ struct sw_flow *ovs_flow_alloc(void)
/* Initialize the default stat node. */
stats = kmem_cache_alloc_node(flow_stats_cache,
- GFP_KERNEL | __GFP_ZERO, 0);
+ GFP_KERNEL | __GFP_ZERO,
+ node_online(0) ? 0 : NUMA_NO_NODE);
if (!stats)
goto err;
@@ -427,7 +428,7 @@ static u32 flow_hash(const struct sw_flow_key *key,
static int flow_key_start(const struct sw_flow_key *key)
{
- if (key->tun_key.u.ipv4.dst)
+ if (key->tun_proto)
return 0;
else
return rounddown(offsetof(struct sw_flow_key, phy),
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 2735e9c4a3b8..efb736bb6855 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport,
return 0;
}
-static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- struct geneve_port *geneve_port = geneve_vport(vport);
- struct net *net = ovs_dp_get_net(vport->dp);
- __be16 dport = htons(geneve_port->port_no);
- __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
-
- return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
- skb, IPPROTO_UDP, sport, dport);
-}
-
static struct vport *geneve_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
@@ -128,9 +116,8 @@ static struct vport_ops ovs_geneve_vport_ops = {
.create = geneve_create,
.destroy = ovs_netdev_tunnel_destroy,
.get_options = geneve_get_options,
- .send = ovs_netdev_send,
+ .send = dev_queue_xmit,
.owner = THIS_MODULE,
- .get_egress_tun_info = geneve_get_egress_tun_info,
};
static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 4d24481669c9..c3257d78d3d2 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms)
return ovs_netdev_link(vport, parms->name);
}
-static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
- skb, IPPROTO_GRE, 0, 0);
-}
-
static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
- .send = ovs_netdev_send,
- .get_egress_tun_info = gre_get_egress_tun_info,
+ .send = dev_queue_xmit,
.destroy = ovs_netdev_tunnel_destroy,
.owner = THIS_MODULE,
};
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 388b8a6bf112..ec76398a792f 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -106,12 +106,45 @@ static void internal_dev_destructor(struct net_device *dev)
free_netdev(dev);
}
+static struct rtnl_link_stats64 *
+internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ int i;
+
+ memset(stats, 0, sizeof(*stats));
+ stats->rx_errors = dev->stats.rx_errors;
+ stats->tx_errors = dev->stats.tx_errors;
+ stats->tx_dropped = dev->stats.tx_dropped;
+ stats->rx_dropped = dev->stats.rx_dropped;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_sw_netstats *percpu_stats;
+ struct pcpu_sw_netstats local_stats;
+ unsigned int start;
+
+ percpu_stats = per_cpu_ptr(dev->tstats, i);
+
+ do {
+ start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
+ local_stats = *percpu_stats;
+ } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
+
+ stats->rx_bytes += local_stats.rx_bytes;
+ stats->rx_packets += local_stats.rx_packets;
+ stats->tx_bytes += local_stats.tx_bytes;
+ stats->tx_packets += local_stats.tx_packets;
+ }
+
+ return stats;
+}
+
static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_change_mtu = internal_dev_change_mtu,
+ .ndo_get_stats64 = internal_get_stats,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -161,6 +194,11 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
err = -ENOMEM;
goto error_free_vport;
}
+ vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!vport->dev->tstats) {
+ err = -ENOMEM;
+ goto error_free_netdev;
+ }
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
internal_dev = internal_dev_priv(vport->dev);
@@ -173,7 +211,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
rtnl_lock();
err = register_netdevice(vport->dev);
if (err)
- goto error_free_netdev;
+ goto error_unlock;
dev_set_promiscuity(vport->dev, 1);
rtnl_unlock();
@@ -181,8 +219,10 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
return vport;
-error_free_netdev:
+error_unlock:
rtnl_unlock();
+ free_percpu(vport->dev->tstats);
+error_free_netdev:
free_netdev(vport->dev);
error_free_vport:
ovs_vport_free(vport);
@@ -198,26 +238,25 @@ static void internal_dev_destroy(struct vport *vport)
/* unregister_netdevice() waits for an RCU grace period. */
unregister_netdevice(vport->dev);
-
+ free_percpu(vport->dev->tstats);
rtnl_unlock();
}
-static void internal_dev_recv(struct vport *vport, struct sk_buff *skb)
+static netdev_tx_t internal_dev_recv(struct sk_buff *skb)
{
- struct net_device *netdev = vport->dev;
+ struct net_device *netdev = skb->dev;
struct pcpu_sw_netstats *stats;
if (unlikely(!(netdev->flags & IFF_UP))) {
kfree_skb(skb);
netdev->stats.rx_dropped++;
- return;
+ return NETDEV_TX_OK;
}
skb_dst_drop(skb);
nf_reset(skb);
secpath_reset(skb);
- skb->dev = netdev;
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, netdev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@ -229,6 +268,7 @@ static void internal_dev_recv(struct vport *vport, struct sk_buff *skb)
u64_stats_update_end(&stats->syncp);
netif_rx(skb);
+ return NETDEV_TX_OK;
}
static struct vport_ops ovs_internal_vport_ops = {
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index f7e8dcce7ada..b327368a3848 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -190,37 +190,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
}
EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
-static unsigned int packet_length(const struct sk_buff *skb)
-{
- unsigned int length = skb->len - ETH_HLEN;
-
- if (skb->protocol == htons(ETH_P_8021Q))
- length -= VLAN_HLEN;
-
- return length;
-}
-
-void ovs_netdev_send(struct vport *vport, struct sk_buff *skb)
-{
- int mtu = vport->dev->mtu;
-
- if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
- net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
- vport->dev->name,
- packet_length(skb), mtu);
- vport->dev->stats.tx_errors++;
- goto drop;
- }
-
- skb->dev = vport->dev;
- dev_queue_xmit(skb);
- return;
-
-drop:
- kfree_skb(skb);
-}
-EXPORT_SYMBOL_GPL(ovs_netdev_send);
-
/* Returns null if this device is not attached to a datapath. */
struct vport *ovs_netdev_get_vport(struct net_device *dev)
{
@@ -235,7 +204,7 @@ static struct vport_ops ovs_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_NETDEV,
.create = netdev_create,
.destroy = netdev_destroy,
- .send = ovs_netdev_send,
+ .send = dev_queue_xmit,
};
int __init ovs_netdev_init(void)
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index bf22fcedbc69..19e29c12adcc 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -27,7 +27,6 @@
struct vport *ovs_netdev_get_vport(struct net_device *dev);
struct vport *ovs_netdev_link(struct vport *vport, const char *name);
-void ovs_netdev_send(struct vport *vport, struct sk_buff *skb);
void ovs_netdev_detach_dev(struct vport *);
int __init ovs_netdev_init(void);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index c11413d5075f..1605691d9414 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -146,31 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
return ovs_netdev_link(vport, parms->name);
}
-static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- struct vxlan_dev *vxlan = netdev_priv(vport->dev);
- struct net *net = ovs_dp_get_net(vport->dp);
- __be16 dst_port = vxlan_dev_dst_port(vxlan);
- __be16 src_port;
- int port_min;
- int port_max;
-
- inet_get_local_port_range(net, &port_min, &port_max);
- src_port = udp_flow_src_port(net, skb, 0, 0, true);
-
- return ovs_tunnel_get_egress_info(upcall, net,
- skb, IPPROTO_UDP,
- src_port, dst_port);
-}
-
static struct vport_ops ovs_vxlan_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_create,
.destroy = ovs_netdev_tunnel_destroy,
.get_options = vxlan_get_options,
- .send = ovs_netdev_send,
- .get_egress_tun_info = vxlan_get_egress_tun_info,
+ .send = dev_queue_xmit,
};
static int __init ovs_vxlan_tnl_init(void)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index dc81dc619aa2..0ac0fd004d7e 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -280,35 +280,19 @@ void ovs_vport_del(struct vport *vport)
*/
void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
{
- struct net_device *dev = vport->dev;
- int i;
-
- memset(stats, 0, sizeof(*stats));
- stats->rx_errors = dev->stats.rx_errors;
- stats->tx_errors = dev->stats.tx_errors;
- stats->tx_dropped = dev->stats.tx_dropped;
- stats->rx_dropped = dev->stats.rx_dropped;
-
- stats->rx_dropped += atomic_long_read(&dev->rx_dropped);
- stats->tx_dropped += atomic_long_read(&dev->tx_dropped);
-
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *percpu_stats;
- struct pcpu_sw_netstats local_stats;
- unsigned int start;
-
- percpu_stats = per_cpu_ptr(dev->tstats, i);
-
- do {
- start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
- local_stats = *percpu_stats;
- } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
-
- stats->rx_bytes += local_stats.rx_bytes;
- stats->rx_packets += local_stats.rx_packets;
- stats->tx_bytes += local_stats.tx_bytes;
- stats->tx_packets += local_stats.tx_packets;
- }
+ const struct rtnl_link_stats64 *dev_stats;
+ struct rtnl_link_stats64 temp;
+
+ dev_stats = dev_get_stats(vport->dev, &temp);
+ stats->rx_errors = dev_stats->rx_errors;
+ stats->tx_errors = dev_stats->tx_errors;
+ stats->tx_dropped = dev_stats->tx_dropped;
+ stats->rx_dropped = dev_stats->rx_dropped;
+
+ stats->rx_bytes = dev_stats->rx_bytes;
+ stats->rx_packets = dev_stats->rx_packets;
+ stats->tx_bytes = dev_stats->tx_bytes;
+ stats->tx_packets = dev_stats->tx_packets;
}
/**
@@ -460,6 +444,15 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->mru = 0;
+ if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
+ u32 mark;
+
+ mark = skb->mark;
+ skb_scrub_packet(skb, true);
+ skb->mark = mark;
+ tun_info = NULL;
+ }
+
/* Extract flow from 'skb' into 'key'. */
error = ovs_flow_key_extract(tun_info, skb, &key);
if (unlikely(error)) {
@@ -487,60 +480,32 @@ void ovs_vport_deferred_free(struct vport *vport)
}
EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
- struct net *net,
- struct sk_buff *skb,
- u8 ipproto,
- __be16 tp_src,
- __be16 tp_dst)
+static unsigned int packet_length(const struct sk_buff *skb)
{
- struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
- const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
- const struct ip_tunnel_key *tun_key;
- u32 skb_mark = skb->mark;
- struct rtable *rt;
- struct flowi4 fl;
-
- if (unlikely(!tun_info))
- return -EINVAL;
- if (ip_tunnel_info_af(tun_info) != AF_INET)
- return -EINVAL;
-
- tun_key = &tun_info->key;
+ unsigned int length = skb->len - ETH_HLEN;
- /* Route lookup to get srouce IP address.
- * The process may need to be changed if the corresponding process
- * in vports ops changed.
- */
- rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
-
- ip_rt_put(rt);
+ if (skb->protocol == htons(ETH_P_8021Q))
+ length -= VLAN_HLEN;
- /* Generate egress_tun_info based on tun_info,
- * saddr, tp_src and tp_dst
- */
- ip_tunnel_key_init(&egress_tun_info->key,
- fl.saddr, tun_key->u.ipv4.dst,
- tun_key->tos,
- tun_key->ttl,
- tp_src, tp_dst,
- tun_key->tun_id,
- tun_key->tun_flags);
- egress_tun_info->options_len = tun_info->options_len;
- egress_tun_info->mode = tun_info->mode;
- upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
- return 0;
+ return length;
}
-EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
{
- /* get_egress_tun_info() is only implemented on tunnel ports. */
- if (unlikely(!vport->ops->get_egress_tun_info))
- return -EINVAL;
+ int mtu = vport->dev->mtu;
+
+ if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
+ net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
+ vport->dev->name,
+ packet_length(skb), mtu);
+ vport->dev->stats.tx_errors++;
+ goto drop;
+ }
+
+ skb->dev = vport->dev;
+ vport->ops->send(skb);
+ return;
- return vport->ops->get_egress_tun_info(vport, skb, upcall);
+drop:
+ kfree_skb(skb);
}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index a413f3ae6a7b..bdfd82a7c064 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -27,7 +27,6 @@
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/u64_stats_sync.h>
-#include <net/route.h>
#include "datapath.h"
@@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
- struct net *net,
- struct sk_buff *,
- u8 ipproto,
- __be16 tp_src,
- __be16 tp_dst);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall);
-
/**
* struct vport_portids - array of netlink portids of a vport.
* must be protected by rcu.
@@ -140,8 +129,6 @@ struct vport_parms {
* have any configuration.
* @send: Send a packet on the device.
* zero for dropped packets or negative for error.
- * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
- * a packet.
*/
struct vport_ops {
enum ovs_vport_type type;
@@ -153,10 +140,7 @@ struct vport_ops {
int (*set_options)(struct vport *, struct nlattr *);
int (*get_options)(const struct vport *, struct sk_buff *);
- void (*send)(struct vport *, struct sk_buff *);
- int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
- struct dp_upcall_info *upcall);
-
+ netdev_tx_t (*send) (struct sk_buff *skb);
struct module *owner;
struct list_head list;
};
@@ -234,9 +218,6 @@ static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
return rt;
}
-static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
-{
- vport->ops->send(vport, skb);
-}
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb);
#endif /* vport.h */