diff options
Diffstat (limited to 'net/openvswitch')
-rw-r--r-- | net/openvswitch/actions.c | 45 | ||||
-rw-r--r-- | net/openvswitch/conntrack.c | 143 | ||||
-rw-r--r-- | net/openvswitch/conntrack.h | 9 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 8 | ||||
-rw-r--r-- | net/openvswitch/datapath.h | 1 | ||||
-rw-r--r-- | net/openvswitch/flow.c | 4 | ||||
-rw-r--r-- | net/openvswitch/flow.h | 3 | ||||
-rw-r--r-- | net/openvswitch/flow_netlink.c | 171 | ||||
-rw-r--r-- | net/openvswitch/flow_netlink.h | 6 | ||||
-rw-r--r-- | net/openvswitch/flow_table.c | 5 | ||||
-rw-r--r-- | net/openvswitch/vport-geneve.c | 15 | ||||
-rw-r--r-- | net/openvswitch/vport-gre.c | 10 | ||||
-rw-r--r-- | net/openvswitch/vport-internal_dev.c | 54 | ||||
-rw-r--r-- | net/openvswitch/vport-netdev.c | 33 | ||||
-rw-r--r-- | net/openvswitch/vport-netdev.h | 1 | ||||
-rw-r--r-- | net/openvswitch/vport-vxlan.c | 21 | ||||
-rw-r--r-- | net/openvswitch/vport.c | 121 | ||||
-rw-r--r-- | net/openvswitch/vport.h | 23 |
18 files changed, 339 insertions, 334 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 315f5330b6e5..c88d0f2d3e01 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -620,7 +620,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } -static int ovs_vport_output(struct sock *sock, struct sk_buff *skb) +static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage); struct vport *vport = data->vport; @@ -679,12 +679,12 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb) skb_pull(skb, hlen); } -static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, - __be16 ethertype) +static void ovs_fragment(struct net *net, struct vport *vport, + struct sk_buff *skb, u16 mru, __be16 ethertype) { if (skb_network_offset(skb) > MAX_L2_LEN) { OVS_NLERR(1, "L2 header too long to fragment"); - return; + goto err; } if (ethertype == htons(ETH_P_IP)) { @@ -700,7 +700,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, skb_dst_set_noref(skb, &ovs_dst); IPCB(skb)->frag_max_size = mru; - ip_do_fragment(skb->sk, skb, ovs_vport_output); + ip_do_fragment(net, skb->sk, skb, ovs_vport_output); refdst_drop(orig_dst); } else if (ethertype == htons(ETH_P_IPV6)) { const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); @@ -708,8 +708,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, struct rt6_info ovs_rt; if (!v6ops) { - kfree_skb(skb); - return; + goto err; } prepare_frag(vport, skb); @@ -722,14 +721,18 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, skb_dst_set_noref(skb, &ovs_rt.dst); IP6CB(skb)->frag_max_size = mru; - v6ops->fragment(skb->sk, skb, ovs_vport_output); + v6ops->fragment(net, skb->sk, skb, ovs_vport_output); refdst_drop(orig_dst); } else { WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", ovs_vport_name(vport), ntohs(ethertype), mru, vport->dev->mtu); - kfree_skb(skb); + goto err; } + + return; +err: + kfree_skb(skb); } static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, @@ -743,6 +746,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, if (likely(!mru || (skb->len <= mru + ETH_HLEN))) { ovs_vport_send(vport, skb); } else if (mru <= vport->dev->mtu) { + struct net *net = read_pnet(&dp->net); __be16 ethertype = key->eth.type; if (!is_flow_key_valid(key)) { @@ -752,7 +756,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, ethertype = vlan_get_protocol(skb); } - ovs_fragment(vport, skb, mru, ethertype); + ovs_fragment(net, vport, skb, mru, ethertype); } else { kfree_skb(skb); } @@ -765,7 +769,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, const struct nlattr *actions, int actions_len) { - struct ip_tunnel_info info; struct dp_upcall_info upcall; const struct nlattr *a; int rem; @@ -793,11 +796,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, if (vport) { int err; - upcall.egress_tun_info = &info; - err = ovs_vport_get_egress_tun_info(vport, skb, - &upcall); - if (err) - upcall.egress_tun_info = NULL; + err = dev_fill_metadata_dst(vport->dev, skb); + if (!err) + upcall.egress_tun_info = skb_tunnel_info(skb); } break; @@ -968,7 +969,7 @@ static int execute_masked_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_CT_STATE: case OVS_KEY_ATTR_CT_ZONE: case OVS_KEY_ATTR_CT_MARK: - case OVS_KEY_ATTR_CT_LABEL: + case OVS_KEY_ATTR_CT_LABELS: err = -EINVAL; break; } @@ -1099,12 +1100,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; case OVS_ACTION_ATTR_CT: + if (!is_flow_key_valid(key)) { + err = ovs_flow_key_update(skb, key); + if (err) + return err; + } + err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key, nla_data(a)); /* Hide stolen IP fragments from user space. */ - if (err == -EINPROGRESS) - return 0; + if (err) + return err == -EINPROGRESS ? 0 : err; break; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 002a755fa07e..c2cc11168fd5 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -37,9 +37,9 @@ struct md_mark { }; /* Metadata label for masked write to conntrack label. */ -struct md_label { - struct ovs_key_ct_label value; - struct ovs_key_ct_label mask; +struct md_labels { + struct ovs_key_ct_labels value; + struct ovs_key_ct_labels mask; }; /* Conntrack action context for execution. */ @@ -47,10 +47,10 @@ struct ovs_conntrack_info { struct nf_conntrack_helper *helper; struct nf_conntrack_zone zone; struct nf_conn *ct; - u32 flags; + u8 commit : 1; u16 family; struct md_mark mark; - struct md_label label; + struct md_labels labels; }; static u16 key_to_nfproto(const struct sw_flow_key *key) @@ -109,21 +109,21 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct) #endif } -static void ovs_ct_get_label(const struct nf_conn *ct, - struct ovs_key_ct_label *label) +static void ovs_ct_get_labels(const struct nf_conn *ct, + struct ovs_key_ct_labels *labels) { struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; if (cl) { size_t len = cl->words * sizeof(long); - if (len > OVS_CT_LABEL_LEN) - len = OVS_CT_LABEL_LEN; - else if (len < OVS_CT_LABEL_LEN) - memset(label, 0, OVS_CT_LABEL_LEN); - memcpy(label, cl->bits, len); + if (len > OVS_CT_LABELS_LEN) + len = OVS_CT_LABELS_LEN; + else if (len < OVS_CT_LABELS_LEN) + memset(labels, 0, OVS_CT_LABELS_LEN); + memcpy(labels, cl->bits, len); } else { - memset(label, 0, OVS_CT_LABEL_LEN); + memset(labels, 0, OVS_CT_LABELS_LEN); } } @@ -134,7 +134,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, key->ct.state = state; key->ct.zone = zone->id; key->ct.mark = ovs_ct_get_mark(ct); - ovs_ct_get_label(ct, &key->ct.label); + ovs_ct_get_labels(ct, &key->ct.labels); } /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has @@ -151,6 +151,8 @@ static void ovs_ct_update_key(const struct sk_buff *skb, ct = nf_ct_get(skb, &ctinfo); if (ct) { state = ovs_ct_get_state(ctinfo); + if (!nf_ct_is_confirmed(ct)) + state |= OVS_CS_F_NEW; if (ct->master) state |= OVS_CS_F_RELATED; zone = nf_ct_zone(ct); @@ -167,7 +169,7 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) { - if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) + if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && @@ -179,8 +181,8 @@ int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label), - &key->ct.label)) + nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels), + &key->ct.labels)) return -EMSGSIZE; return 0; @@ -213,18 +215,15 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, #endif } -static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ct_label *label, - const struct ovs_key_ct_label *mask) +static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_key_ct_labels *labels, + const struct ovs_key_ct_labels *mask) { enum ip_conntrack_info ctinfo; struct nf_conn_labels *cl; struct nf_conn *ct; int err; - if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) - return -ENOTSUPP; - /* The connection could be invalid, in which case set_label is no-op.*/ ct = nf_ct_get(skb, &ctinfo); if (!ct) @@ -235,15 +234,15 @@ static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key, nf_ct_labels_ext_add(ct); cl = nf_ct_labels_find(ct); } - if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN) + if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN) return -ENOSPC; - err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask, - OVS_CT_LABEL_LEN / sizeof(u32)); + err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, + OVS_CT_LABELS_LEN / sizeof(u32)); if (err) return err; - ovs_ct_get_label(ct, &key->ct.label); + ovs_ct_get_labels(ct, &key->ct.labels); return 0; } @@ -294,6 +293,9 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) return helper->help(skb, protoff, ct, ctinfo); } +/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero + * value if 'skb' is freed. + */ static int handle_fragments(struct net *net, struct sw_flow_key *key, u16 zone, struct sk_buff *skb) { @@ -304,32 +306,40 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, int err; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - err = ip_defrag(skb, user); + err = ip_defrag(net, skb, user); if (err) return err; ovs_cb.mru = IPCB(skb)->frag_max_size; - } else if (key->eth.type == htons(ETH_P_IPV6)) { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; struct sk_buff *reasm; memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - reasm = nf_ct_frag6_gather(skb, user); + reasm = nf_ct_frag6_gather(net, skb, user); if (!reasm) return -EINPROGRESS; - if (skb == reasm) + if (skb == reasm) { + kfree_skb(skb); return -EINVAL; + } + + /* Don't free 'skb' even though it is one of the original + * fragments, as we're going to morph it into the head. + */ + skb_get(skb); + nf_ct_frag6_consume_orig(reasm); key->ip.proto = ipv6_hdr(reasm)->nexthdr; skb_morph(skb, reasm); + skb->next = reasm->next; consume_skb(reasm); ovs_cb.mru = IP6CB(skb)->frag_max_size; -#else - return -EPFNOSUPPORT; #endif } else { + kfree_skb(skb); return -EPFNOSUPPORT; } @@ -347,7 +357,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, { struct nf_conntrack_tuple tuple; - if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple)) + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple)) return NULL; return __nf_ct_expect_find(net, zone, &tuple); } @@ -377,7 +387,7 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb, return true; } -static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, +static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, const struct ovs_conntrack_info *info, struct sk_buff *skb) { @@ -408,6 +418,8 @@ static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, } } + ovs_ct_update_key(skb, key, true); + return 0; } @@ -430,8 +442,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, err = __ovs_ct_lookup(net, key, info, skb); if (err) return err; - - ovs_ct_update_key(skb, key, true); } return 0; @@ -460,22 +470,23 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, if (nf_conntrack_confirm(skb) != NF_ACCEPT) return -EINVAL; - ovs_ct_update_key(skb, key, true); - return 0; } -static bool label_nonzero(const struct ovs_key_ct_label *label) +static bool labels_nonzero(const struct ovs_key_ct_labels *labels) { size_t i; - for (i = 0; i < sizeof(*label); i++) - if (label->ct_label[i]) + for (i = 0; i < sizeof(*labels); i++) + if (labels->ct_labels[i]) return true; return false; } +/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero + * value if 'skb' is freed. + */ int ovs_ct_execute(struct net *net, struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_conntrack_info *info) @@ -493,7 +504,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, return err; } - if (info->flags & OVS_CT_F_COMMIT) + if (info->commit) err = ovs_ct_commit(net, key, info, skb); else err = ovs_ct_lookup(net, key, info, skb); @@ -506,11 +517,13 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, if (err) goto err; } - if (label_nonzero(&info->label.mask)) - err = ovs_ct_set_label(skb, key, &info->label.value, - &info->label.mask); + if (labels_nonzero(&info->labels.mask)) + err = ovs_ct_set_labels(skb, key, &info->labels.value, + &info->labels.mask); err: skb_push(skb, nh_ofs); + if (err) + kfree_skb(skb); return err; } @@ -539,14 +552,13 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, } static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { - [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32), - .maxlen = sizeof(u32) }, + [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), .maxlen = sizeof(u16) }, [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), .maxlen = sizeof(struct md_mark) }, - [OVS_CT_ATTR_LABEL] = { .minlen = sizeof(struct md_label), - .maxlen = sizeof(struct md_label) }, + [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), + .maxlen = sizeof(struct md_labels) }, [OVS_CT_ATTR_HELPER] = { .minlen = 1, .maxlen = NF_CT_HELPER_NAME_LEN } }; @@ -576,8 +588,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, } switch (type) { - case OVS_CT_ATTR_FLAGS: - info->flags = nla_get_u32(a); + case OVS_CT_ATTR_COMMIT: + info->commit = true; break; #ifdef CONFIG_NF_CONNTRACK_ZONES case OVS_CT_ATTR_ZONE: @@ -588,15 +600,23 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, case OVS_CT_ATTR_MARK: { struct md_mark *mark = nla_data(a); + if (!mark->mask) { + OVS_NLERR(log, "ct_mark mask cannot be 0"); + return -EINVAL; + } info->mark = *mark; break; } #endif #ifdef CONFIG_NF_CONNTRACK_LABELS - case OVS_CT_ATTR_LABEL: { - struct md_label *label = nla_data(a); + case OVS_CT_ATTR_LABELS: { + struct md_labels *labels = nla_data(a); - info->label = *label; + if (!labels_nonzero(&labels->mask)) { + OVS_NLERR(log, "ct_labels mask cannot be 0"); + return -EINVAL; + } + info->labels = *labels; break; } #endif @@ -633,7 +653,7 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) attr == OVS_KEY_ATTR_CT_MARK) return true; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - attr == OVS_KEY_ATTR_CT_LABEL) { + attr == OVS_KEY_ATTR_CT_LABELS) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); return ovs_net->xt_label; @@ -701,18 +721,19 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, if (!start) return -EMSGSIZE; - if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags)) + if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) return -EMSGSIZE; - if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && + if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask && nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), &ct_info->mark)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label), - &ct_info->label)) + labels_nonzero(&ct_info->labels.mask) && + nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), + &ct_info->labels)) return -EMSGSIZE; if (ct_info->helper) { if (nla_put_string(skb, OVS_CT_ATTR_HELPER, @@ -737,7 +758,7 @@ void ovs_ct_free_action(const struct nlattr *a) void ovs_ct_init(struct net *net) { - unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE; + unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); if (nf_connlabels_get(net, n_bits)) { diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 43f5dd7a5577..a7544f405c16 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -34,6 +34,10 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); void ovs_ct_free_action(const struct nlattr *a); + +#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ + OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \ + OVS_CS_F_INVALID | OVS_CS_F_TRACKED) #else #include <linux/errno.h> @@ -63,6 +67,7 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_conntrack_info *info) { + kfree_skb(skb); return -ENOTSUPP; } @@ -72,7 +77,7 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb, key->ct.state = 0; key->ct.zone = 0; key->ct.mark = 0; - memset(&key->ct.label, 0, sizeof(key->ct.label)); + memset(&key->ct.labels, 0, sizeof(key->ct.labels)); } static inline int ovs_ct_put_key(const struct sw_flow_key *key, @@ -82,5 +87,7 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key, } static inline void ovs_ct_free_action(const struct nlattr *a) { } + +#define CT_SUPPORTED_MASK 0 #endif /* CONFIG_NF_CONNTRACK */ #endif /* ovs_conntrack.h */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index b816ff871528..5633172b791a 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -91,8 +91,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, static void ovs_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info) { - genl_notify(family, skb, genl_info_net(info), info->snd_portid, - 0, info->nlhdr, GFP_KERNEL); + genl_notify(family, skb, info, 0, GFP_KERNEL); } /** @@ -490,9 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (upcall_info->egress_tun_info) { nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); - err = ovs_nla_put_egress_tunnel_key(user_skb, - upcall_info->egress_tun_info, - upcall_info->egress_tun_opts); + err = ovs_nla_put_tunnel_info(user_skb, + upcall_info->egress_tun_info); BUG_ON(err); nla_nest_end(user_skb, nla); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index f88038a99f44..67bdecd9fdc1 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -117,7 +117,6 @@ struct ovs_skb_cb { */ struct dp_upcall_info { struct ip_tunnel_info *egress_tun_info; - const void *egress_tun_opts; const struct nlattr *userdata; const struct nlattr *actions; int actions_len; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index c8db44ab2ee7..0ea128eeeab2 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -698,8 +698,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, { /* Extract metadata from packet. */ if (tun_info) { - if (ip_tunnel_info_af(tun_info) != AF_INET) - return -EINVAL; + key->tun_proto = ip_tunnel_info_af(tun_info); memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key)); if (tun_info->options_len) { @@ -714,6 +713,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->tun_opts_len = 0; } } else { + key->tun_proto = 0; key->tun_opts_len = 0; memset(&key->tun_key, 0, sizeof(key->tun_key)); } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index fe527d2dd4b7..1d055c559eaf 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -63,6 +63,7 @@ struct sw_flow_key { u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } __packed phy; /* Safe when right after 'tun_key'. */ + u8 tun_proto; /* Protocol of encapsulating tunnel. */ u32 ovs_flow_hash; /* Datapath computed hash value. */ u32 recirc_id; /* Recirculation ID. */ struct { @@ -116,7 +117,7 @@ struct sw_flow_key { u16 zone; u32 mark; u8 state; - struct ovs_key_ct_label label; + struct ovs_key_ct_labels labels; } ct; } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 5c030a4d7338..907d6fd28ede 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -262,8 +262,8 @@ size_t ovs_tun_key_attr_size(void) * updating this function. */ return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ - + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ - + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */ + + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ @@ -291,10 +291,10 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ - + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */ + + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ - + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABEL */ + + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -323,6 +323,8 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE }, [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED, .next = ovs_vxlan_ext_key_lens }, + [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, }; /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ @@ -349,10 +351,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, .next = ovs_tunnel_key_lens, }, [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, - [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) }, + [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, - [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) }, + [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -542,15 +544,15 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, return 0; } -static int ipv4_tun_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask, - bool log) +static int ip_tun_from_nlattr(const struct nlattr *attr, + struct sw_flow_match *match, bool is_mask, + bool log) { - struct nlattr *a; - int rem; - bool ttl = false; + bool ttl = false, ipv4 = false, ipv6 = false; __be16 tun_flags = 0; int opts_type = 0; + struct nlattr *a; + int rem; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); @@ -578,10 +580,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, nla_get_in_addr(a), is_mask); + ipv4 = true; break; case OVS_TUNNEL_KEY_ATTR_IPV4_DST: SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst, nla_get_in_addr(a), is_mask); + ipv4 = true; + break; + case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: + SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, + nla_get_in6_addr(a), is_mask); + ipv6 = true; + break; + case OVS_TUNNEL_KEY_ATTR_IPV6_DST: + SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, + nla_get_in6_addr(a), is_mask); + ipv6 = true; break; case OVS_TUNNEL_KEY_ATTR_TOS: SW_FLOW_KEY_PUT(match, tun_key.tos, @@ -636,28 +650,46 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, opts_type = type; break; default: - OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", + OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); return -EINVAL; } } SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); + if (is_mask) + SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true); + else + SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET, + false); if (rem > 0) { - OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.", + OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.", rem); return -EINVAL; } + if (ipv4 && ipv6) { + OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes"); + return -EINVAL; + } + if (!is_mask) { - if (!match->key->tun_key.u.ipv4.dst) { + if (!ipv4 && !ipv6) { + OVS_NLERR(log, "IP tunnel dst address not specified"); + return -EINVAL; + } + if (ipv4 && !match->key->tun_key.u.ipv4.dst) { OVS_NLERR(log, "IPv4 tunnel dst address is zero"); return -EINVAL; } + if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) { + OVS_NLERR(log, "IPv6 tunnel dst address is zero"); + return -EINVAL; + } if (!ttl) { - OVS_NLERR(log, "IPv4 tunnel TTL not specified."); + OVS_NLERR(log, "IP tunnel TTL not specified."); return -EINVAL; } } @@ -682,21 +714,36 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb, return 0; } -static int __ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ip_tunnel_key *output, - const void *tun_opts, int swkey_tun_opts_len) +static int __ip_tun_to_nlattr(struct sk_buff *skb, + const struct ip_tunnel_key *output, + const void *tun_opts, int swkey_tun_opts_len, + unsigned short tun_proto) { if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) return -EMSGSIZE; - if (output->u.ipv4.src && - nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, - output->u.ipv4.src)) - return -EMSGSIZE; - if (output->u.ipv4.dst && - nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, - output->u.ipv4.dst)) - return -EMSGSIZE; + switch (tun_proto) { + case AF_INET: + if (output->u.ipv4.src && + nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, + output->u.ipv4.src)) + return -EMSGSIZE; + if (output->u.ipv4.dst && + nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, + output->u.ipv4.dst)) + return -EMSGSIZE; + break; + case AF_INET6: + if (!ipv6_addr_any(&output->u.ipv6.src) && + nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, + &output->u.ipv6.src)) + return -EMSGSIZE; + if (!ipv6_addr_any(&output->u.ipv6.dst) && + nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST, + &output->u.ipv6.dst)) + return -EMSGSIZE; + break; + } if (output->tos && nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos)) return -EMSGSIZE; @@ -717,7 +764,7 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; - if (tun_opts) { + if (swkey_tun_opts_len) { if (output->tun_flags & TUNNEL_GENEVE_OPT && nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, swkey_tun_opts_len, tun_opts)) @@ -730,9 +777,10 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, return 0; } -static int ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ip_tunnel_key *output, - const void *tun_opts, int swkey_tun_opts_len) +static int ip_tun_to_nlattr(struct sk_buff *skb, + const struct ip_tunnel_key *output, + const void *tun_opts, int swkey_tun_opts_len, + unsigned short tun_proto) { struct nlattr *nla; int err; @@ -741,7 +789,8 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, if (!nla) return -EMSGSIZE; - err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len); + err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len, + tun_proto); if (err) return err; @@ -749,13 +798,13 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, return 0; } -int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, - const struct ip_tunnel_info *egress_tun_info, - const void *egress_tun_opts) +int ovs_nla_put_tunnel_info(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) { - return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key, - egress_tun_opts, - egress_tun_info->options_len); + return __ip_tun_to_nlattr(skb, &tun_info->key, + ip_tunnel_info_opts(tun_info), + tun_info->options_len, + ip_tunnel_info_af(tun_info)); } static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, @@ -806,15 +855,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { - if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask, log) < 0) + if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, + is_mask, log) < 0) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { - u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]); + u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); + + if (ct_state & ~CT_SUPPORTED_MASK) { + OVS_NLERR(log, "ct_state flags %08x unsupported", + ct_state); + return -EINVAL; + } SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); @@ -833,14 +888,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); } - if (*attrs & (1 << OVS_KEY_ATTR_CT_LABEL) && - ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABEL)) { - const struct ovs_key_ct_label *cl; + if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) && + ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) { + const struct ovs_key_ct_labels *cl; - cl = nla_data(a[OVS_KEY_ATTR_CT_LABEL]); - SW_FLOW_KEY_MEMCPY(match, ct.label, cl->ct_label, + cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]); + SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels, sizeof(*cl), is_mask); - *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABEL); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); } return 0; } @@ -1093,6 +1148,9 @@ static void nlattr_set(struct nlattr *attr, u8 val, } else { memset(nla_data(nla), val, nla_len(nla)); } + + if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) + *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK; } } @@ -1194,7 +1252,7 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, /* The userspace does not send tunnel attributes that * are 0, but we should not wildcard them nonetheless. */ - if (match->key->tun_key.u.ipv4.dst) + if (match->key->tun_proto) SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true); @@ -1367,14 +1425,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) goto nla_put_failure; - if ((swkey->tun_key.u.ipv4.dst || is_mask)) { + if ((swkey->tun_proto || is_mask)) { const void *opts = NULL; if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); - if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, - swkey->tun_opts_len)) + if (ip_tun_to_nlattr(skb, &output->tun_key, opts, + swkey->tun_opts_len, swkey->tun_proto)) goto nla_put_failure; } @@ -1877,7 +1935,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, int err = 0, start, opts_type; ovs_match_init(&match, &key, NULL); - opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); + opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); if (opts_type < 0) return opts_type; @@ -1913,6 +1971,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, tun_info = &tun_dst->u.tun_info; tun_info->mode = IP_TUNNEL_INFO_TX; + if (key.tun_proto == AF_INET6) + tun_info->mode |= IP_TUNNEL_INFO_IPV6; tun_info->key = key.tun_key; /* We need to store the options in the action itself since @@ -1973,7 +2033,7 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_CT_MARK: - case OVS_KEY_ATTR_CT_LABEL: + case OVS_KEY_ATTR_CT_LABELS: case OVS_KEY_ATTR_ETHERNET: break; @@ -2374,10 +2434,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) if (!start) return -EMSGSIZE; - err = ipv4_tun_to_nlattr(skb, &tun_info->key, - tun_info->options_len ? - ip_tunnel_info_opts(tun_info) : NULL, - tun_info->options_len); + err = ovs_nla_put_tunnel_info(skb, tun_info); if (err) return err; nla_nest_end(skb, start); diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 6ca3f0baf449..47dd142eca1c 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_get_match(struct net *, struct sw_flow_match *, const struct nlattr *key, const struct nlattr *mask, bool log); -int ovs_nla_put_egress_tunnel_key(struct sk_buff *, - const struct ip_tunnel_info *, - const void *egress_tun_opts); + +int ovs_nla_put_tunnel_info(struct sk_buff *skb, + struct ip_tunnel_info *tun_info); bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log); int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index f2ea83ba4763..d073fff82fdb 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -93,7 +93,8 @@ struct sw_flow *ovs_flow_alloc(void) /* Initialize the default stat node. */ stats = kmem_cache_alloc_node(flow_stats_cache, - GFP_KERNEL | __GFP_ZERO, 0); + GFP_KERNEL | __GFP_ZERO, + node_online(0) ? 0 : NUMA_NO_NODE); if (!stats) goto err; @@ -427,7 +428,7 @@ static u32 flow_hash(const struct sw_flow_key *key, static int flow_key_start(const struct sw_flow_key *key) { - if (key->tun_key.u.ipv4.dst) + if (key->tun_proto) return 0; else return rounddown(offsetof(struct sw_flow_key, phy), diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 2735e9c4a3b8..efb736bb6855 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport, return 0; } -static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) -{ - struct geneve_port *geneve_port = geneve_vport(vport); - struct net *net = ovs_dp_get_net(vport->dp); - __be16 dport = htons(geneve_port->port_no); - __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - - return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp), - skb, IPPROTO_UDP, sport, dport); -} - static struct vport *geneve_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); @@ -128,9 +116,8 @@ static struct vport_ops ovs_geneve_vport_ops = { .create = geneve_create, .destroy = ovs_netdev_tunnel_destroy, .get_options = geneve_get_options, - .send = ovs_netdev_send, + .send = dev_queue_xmit, .owner = THIS_MODULE, - .get_egress_tun_info = geneve_get_egress_tun_info, }; static int __init ovs_geneve_tnl_init(void) diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 4d24481669c9..c3257d78d3d2 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms) return ovs_netdev_link(vport, parms->name); } -static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) -{ - return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp), - skb, IPPROTO_GRE, 0, 0); -} - static struct vport_ops ovs_gre_vport_ops = { .type = OVS_VPORT_TYPE_GRE, .create = gre_create, - .send = ovs_netdev_send, - .get_egress_tun_info = gre_get_egress_tun_info, + .send = dev_queue_xmit, .destroy = ovs_netdev_tunnel_destroy, .owner = THIS_MODULE, }; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 388b8a6bf112..ec76398a792f 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -106,12 +106,45 @@ static void internal_dev_destructor(struct net_device *dev) free_netdev(dev); } +static struct rtnl_link_stats64 * +internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + int i; + + memset(stats, 0, sizeof(*stats)); + stats->rx_errors = dev->stats.rx_errors; + stats->tx_errors = dev->stats.tx_errors; + stats->tx_dropped = dev->stats.tx_dropped; + stats->rx_dropped = dev->stats.rx_dropped; + + for_each_possible_cpu(i) { + const struct pcpu_sw_netstats *percpu_stats; + struct pcpu_sw_netstats local_stats; + unsigned int start; + + percpu_stats = per_cpu_ptr(dev->tstats, i); + + do { + start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); + local_stats = *percpu_stats; + } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); + + stats->rx_bytes += local_stats.rx_bytes; + stats->rx_packets += local_stats.rx_packets; + stats->tx_bytes += local_stats.tx_bytes; + stats->tx_packets += local_stats.tx_packets; + } + + return stats; +} + static const struct net_device_ops internal_dev_netdev_ops = { .ndo_open = internal_dev_open, .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = internal_dev_change_mtu, + .ndo_get_stats64 = internal_get_stats, }; static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { @@ -161,6 +194,11 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) err = -ENOMEM; goto error_free_vport; } + vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!vport->dev->tstats) { + err = -ENOMEM; + goto error_free_netdev; + } dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); internal_dev = internal_dev_priv(vport->dev); @@ -173,7 +211,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) rtnl_lock(); err = register_netdevice(vport->dev); if (err) - goto error_free_netdev; + goto error_unlock; dev_set_promiscuity(vport->dev, 1); rtnl_unlock(); @@ -181,8 +219,10 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) return vport; -error_free_netdev: +error_unlock: rtnl_unlock(); + free_percpu(vport->dev->tstats); +error_free_netdev: free_netdev(vport->dev); error_free_vport: ovs_vport_free(vport); @@ -198,26 +238,25 @@ static void internal_dev_destroy(struct vport *vport) /* unregister_netdevice() waits for an RCU grace period. */ unregister_netdevice(vport->dev); - + free_percpu(vport->dev->tstats); rtnl_unlock(); } -static void internal_dev_recv(struct vport *vport, struct sk_buff *skb) +static netdev_tx_t internal_dev_recv(struct sk_buff *skb) { - struct net_device *netdev = vport->dev; + struct net_device *netdev = skb->dev; struct pcpu_sw_netstats *stats; if (unlikely(!(netdev->flags & IFF_UP))) { kfree_skb(skb); netdev->stats.rx_dropped++; - return; + return NETDEV_TX_OK; } skb_dst_drop(skb); nf_reset(skb); secpath_reset(skb); - skb->dev = netdev; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -229,6 +268,7 @@ static void internal_dev_recv(struct vport *vport, struct sk_buff *skb) u64_stats_update_end(&stats->syncp); netif_rx(skb); + return NETDEV_TX_OK; } static struct vport_ops ovs_internal_vport_ops = { diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index f7e8dcce7ada..b327368a3848 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -190,37 +190,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) } EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy); -static unsigned int packet_length(const struct sk_buff *skb) -{ - unsigned int length = skb->len - ETH_HLEN; - - if (skb->protocol == htons(ETH_P_8021Q)) - length -= VLAN_HLEN; - - return length; -} - -void ovs_netdev_send(struct vport *vport, struct sk_buff *skb) -{ - int mtu = vport->dev->mtu; - - if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { - net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", - vport->dev->name, - packet_length(skb), mtu); - vport->dev->stats.tx_errors++; - goto drop; - } - - skb->dev = vport->dev; - dev_queue_xmit(skb); - return; - -drop: - kfree_skb(skb); -} -EXPORT_SYMBOL_GPL(ovs_netdev_send); - /* Returns null if this device is not attached to a datapath. */ struct vport *ovs_netdev_get_vport(struct net_device *dev) { @@ -235,7 +204,7 @@ static struct vport_ops ovs_netdev_vport_ops = { .type = OVS_VPORT_TYPE_NETDEV, .create = netdev_create, .destroy = netdev_destroy, - .send = ovs_netdev_send, + .send = dev_queue_xmit, }; int __init ovs_netdev_init(void) diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index bf22fcedbc69..19e29c12adcc 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -27,7 +27,6 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev); struct vport *ovs_netdev_link(struct vport *vport, const char *name); -void ovs_netdev_send(struct vport *vport, struct sk_buff *skb); void ovs_netdev_detach_dev(struct vport *); int __init ovs_netdev_init(void); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index c11413d5075f..1605691d9414 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -146,31 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms) return ovs_netdev_link(vport, parms->name); } -static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) -{ - struct vxlan_dev *vxlan = netdev_priv(vport->dev); - struct net *net = ovs_dp_get_net(vport->dp); - __be16 dst_port = vxlan_dev_dst_port(vxlan); - __be16 src_port; - int port_min; - int port_max; - - inet_get_local_port_range(net, &port_min, &port_max); - src_port = udp_flow_src_port(net, skb, 0, 0, true); - - return ovs_tunnel_get_egress_info(upcall, net, - skb, IPPROTO_UDP, - src_port, dst_port); -} - static struct vport_ops ovs_vxlan_netdev_vport_ops = { .type = OVS_VPORT_TYPE_VXLAN, .create = vxlan_create, .destroy = ovs_netdev_tunnel_destroy, .get_options = vxlan_get_options, - .send = ovs_netdev_send, - .get_egress_tun_info = vxlan_get_egress_tun_info, + .send = dev_queue_xmit, }; static int __init ovs_vxlan_tnl_init(void) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index dc81dc619aa2..0ac0fd004d7e 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -280,35 +280,19 @@ void ovs_vport_del(struct vport *vport) */ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) { - struct net_device *dev = vport->dev; - int i; - - memset(stats, 0, sizeof(*stats)); - stats->rx_errors = dev->stats.rx_errors; - stats->tx_errors = dev->stats.tx_errors; - stats->tx_dropped = dev->stats.tx_dropped; - stats->rx_dropped = dev->stats.rx_dropped; - - stats->rx_dropped += atomic_long_read(&dev->rx_dropped); - stats->tx_dropped += atomic_long_read(&dev->tx_dropped); - - for_each_possible_cpu(i) { - const struct pcpu_sw_netstats *percpu_stats; - struct pcpu_sw_netstats local_stats; - unsigned int start; - - percpu_stats = per_cpu_ptr(dev->tstats, i); - - do { - start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); - local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); - - stats->rx_bytes += local_stats.rx_bytes; - stats->rx_packets += local_stats.rx_packets; - stats->tx_bytes += local_stats.tx_bytes; - stats->tx_packets += local_stats.tx_packets; - } + const struct rtnl_link_stats64 *dev_stats; + struct rtnl_link_stats64 temp; + + dev_stats = dev_get_stats(vport->dev, &temp); + stats->rx_errors = dev_stats->rx_errors; + stats->tx_errors = dev_stats->tx_errors; + stats->tx_dropped = dev_stats->tx_dropped; + stats->rx_dropped = dev_stats->rx_dropped; + + stats->rx_bytes = dev_stats->rx_bytes; + stats->rx_packets = dev_stats->rx_packets; + stats->tx_bytes = dev_stats->tx_bytes; + stats->tx_packets = dev_stats->tx_packets; } /** @@ -460,6 +444,15 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->mru = 0; + if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { + u32 mark; + + mark = skb->mark; + skb_scrub_packet(skb, true); + skb->mark = mark; + tun_info = NULL; + } + /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { @@ -487,60 +480,32 @@ void ovs_vport_deferred_free(struct vport *vport) } EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); -int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, - struct net *net, - struct sk_buff *skb, - u8 ipproto, - __be16 tp_src, - __be16 tp_dst) +static unsigned int packet_length(const struct sk_buff *skb) { - struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info; - const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb); - const struct ip_tunnel_key *tun_key; - u32 skb_mark = skb->mark; - struct rtable *rt; - struct flowi4 fl; - - if (unlikely(!tun_info)) - return -EINVAL; - if (ip_tunnel_info_af(tun_info) != AF_INET) - return -EINVAL; - - tun_key = &tun_info->key; + unsigned int length = skb->len - ETH_HLEN; - /* Route lookup to get srouce IP address. - * The process may need to be changed if the corresponding process - * in vports ops changed. - */ - rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); - if (IS_ERR(rt)) - return PTR_ERR(rt); - - ip_rt_put(rt); + if (skb->protocol == htons(ETH_P_8021Q)) + length -= VLAN_HLEN; - /* Generate egress_tun_info based on tun_info, - * saddr, tp_src and tp_dst - */ - ip_tunnel_key_init(&egress_tun_info->key, - fl.saddr, tun_key->u.ipv4.dst, - tun_key->tos, - tun_key->ttl, - tp_src, tp_dst, - tun_key->tun_id, - tun_key->tun_flags); - egress_tun_info->options_len = tun_info->options_len; - egress_tun_info->mode = tun_info->mode; - upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info); - return 0; + return length; } -EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info); -int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) +void ovs_vport_send(struct vport *vport, struct sk_buff *skb) { - /* get_egress_tun_info() is only implemented on tunnel ports. */ - if (unlikely(!vport->ops->get_egress_tun_info)) - return -EINVAL; + int mtu = vport->dev->mtu; + + if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { + net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", + vport->dev->name, + packet_length(skb), mtu); + vport->dev->stats.tx_errors++; + goto drop; + } + + skb->dev = vport->dev; + vport->ops->send(skb); + return; - return vport->ops->get_egress_tun_info(vport, skb, upcall); +drop: + kfree_skb(skb); } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index a413f3ae6a7b..bdfd82a7c064 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -27,7 +27,6 @@ #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/u64_stats_sync.h> -#include <net/route.h> #include "datapath.h" @@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids); int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); -int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, - struct net *net, - struct sk_buff *, - u8 ipproto, - __be16 tp_src, - __be16 tp_dst); - -int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall); - /** * struct vport_portids - array of netlink portids of a vport. * must be protected by rcu. @@ -140,8 +129,6 @@ struct vport_parms { * have any configuration. * @send: Send a packet on the device. * zero for dropped packets or negative for error. - * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for - * a packet. */ struct vport_ops { enum ovs_vport_type type; @@ -153,10 +140,7 @@ struct vport_ops { int (*set_options)(struct vport *, struct nlattr *); int (*get_options)(const struct vport *, struct sk_buff *); - void (*send)(struct vport *, struct sk_buff *); - int (*get_egress_tun_info)(struct vport *, struct sk_buff *, - struct dp_upcall_info *upcall); - + netdev_tx_t (*send) (struct sk_buff *skb); struct module *owner; struct list_head list; }; @@ -234,9 +218,6 @@ static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, return rt; } -static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb) -{ - vport->ops->send(vport, skb); -} +void ovs_vport_send(struct vport *vport, struct sk_buff *skb); #endif /* vport.h */ |