From 72564b59ffc438ea103b0727a921aaddce766728 Mon Sep 17 00:00:00 2001 From: David Wragg Date: Wed, 10 Feb 2016 00:05:55 +0000 Subject: vxlan: Relax MTU constraints Allow the MTU of vxlan devices without an underlying device to be set to larger values (up to a maximum based on IP packet limits and vxlan overhead). Previously, their MTUs could not be set to higher than the conventional ethernet value of 1500. This is a very arbitrary value in the context of vxlan, and prevented vxlan devices from being able to take advantage of jumbo frames etc. The default MTU remains 1500, for compatibility. Signed-off-by: David Wragg Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'drivers/net/vxlan.c') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 65439188c582..e992c6a05f86 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2367,29 +2367,43 @@ static void vxlan_set_multicast_list(struct net_device *dev) { } -static int vxlan_change_mtu(struct net_device *dev, int new_mtu) +static int __vxlan_change_mtu(struct net_device *dev, + struct net_device *lowerdev, + struct vxlan_rdst *dst, int new_mtu, bool strict) { - struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_rdst *dst = &vxlan->default_dst; - struct net_device *lowerdev; - int max_mtu; + int max_mtu = IP_MAX_MTU; - lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex); - if (lowerdev == NULL) - return eth_change_mtu(dev, new_mtu); + if (lowerdev) + max_mtu = lowerdev->mtu; if (dst->remote_ip.sa.sa_family == AF_INET6) - max_mtu = lowerdev->mtu - VXLAN6_HEADROOM; + max_mtu -= VXLAN6_HEADROOM; else - max_mtu = lowerdev->mtu - VXLAN_HEADROOM; + max_mtu -= VXLAN_HEADROOM; - if (new_mtu < 68 || new_mtu > max_mtu) + if (new_mtu < 68) return -EINVAL; + if (new_mtu > max_mtu) { + if (strict) + return -EINVAL; + + new_mtu = max_mtu; + } + dev->mtu = new_mtu; return 0; } +static int vxlan_change_mtu(struct net_device *dev, int new_mtu) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_rdst *dst = &vxlan->default_dst; + struct net_device *lowerdev = __dev_get_by_index(vxlan->net, + dst->remote_ifindex); + return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true); +} + static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb, struct ip_tunnel_info *info, __be16 sport, __be16 dport) -- cgit v1.2.3 From 7e059158d57b79159eaf1f504825d19866ef2c42 Mon Sep 17 00:00:00 2001 From: David Wragg Date: Wed, 10 Feb 2016 00:05:58 +0000 Subject: vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices Prior to 4.3, openvswitch tunnel vports (vxlan, gre and geneve) could transmit vxlan packets of any size, constrained only by the ability to send out the resulting packets. 4.3 introduced netdevs corresponding to tunnel vports. These netdevs have an MTU, which limits the size of a packet that can be successfully encapsulated. The default MTU values are low (1500 or less), which is awkwardly small in the context of physical networks supporting jumbo frames, and leads to a conspicuous change in behaviour for userspace. Instead, set the MTU on openvswitch-created netdevs to be the relevant maximum (i.e. the maximum IP packet size minus any relevant overhead), effectively restoring the behaviour prior to 4.3. Signed-off-by: David Wragg Signed-off-by: David S. Miller --- drivers/net/geneve.c | 18 ++++++++++++++---- drivers/net/vxlan.c | 11 ++++++++--- include/net/ip_tunnels.h | 1 + net/ipv4/ip_gre.c | 8 ++++++++ net/ipv4/ip_tunnel.c | 20 +++++++++++++++++--- net/openvswitch/vport-vxlan.c | 2 ++ 6 files changed, 50 insertions(+), 10 deletions(-) (limited to 'drivers/net/vxlan.c') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index d2031ce8baea..028e3873c310 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1453,11 +1453,21 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, err = geneve_configure(net, dev, &geneve_remote_unspec, 0, 0, 0, htons(dst_port), true, 0); - if (err) { - free_netdev(dev); - return ERR_PTR(err); - } + if (err) + goto err; + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = geneve_change_mtu(dev, IP_MAX_MTU); + if (err) + goto err; + return dev; + + err: + free_netdev(dev); + return ERR_PTR(err); } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e992c6a05f86..a31cd954b308 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2779,6 +2779,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, int err; bool use_ipv6 = false; __be16 default_port = vxlan->cfg.dst_port; + struct net_device *lowerdev = NULL; vxlan->net = src_net; @@ -2799,9 +2800,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, } if (conf->remote_ifindex) { - struct net_device *lowerdev - = __dev_get_by_index(src_net, conf->remote_ifindex); - + lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex); dst->remote_ifindex = conf->remote_ifindex; if (!lowerdev) { @@ -2825,6 +2824,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, needed_headroom = lowerdev->hard_header_len; } + if (conf->mtu) { + err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false); + if (err) + return err; + } + if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA) needed_headroom += VXLAN6_HEADROOM; else diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 6db96ea0144f..dda9abf6b89c 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -230,6 +230,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, u8 *protocol, struct flowi4 *fl4); +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7c51c4e1661f..56fdf4e0dce4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1240,6 +1240,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, err = ipgre_newlink(net, dev, tb, NULL); if (err < 0) goto out; + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); + if (err) + goto out; + return dev; out: free_netdev(dev); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index c7bd72e9b544..89e8861e05fc 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -943,17 +943,31 @@ done: } EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); -int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + if (new_mtu < 68) return -EINVAL; + + if (new_mtu > max_mtu) { + if (strict) + return -EINVAL; + + new_mtu = max_mtu; + } + dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + return __ip_tunnel_change_mtu(dev, new_mtu, true); +} EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); static void ip_tunnel_dev_free(struct net_device *dev) diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 1605691d9414..de9cb19efb6a 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -91,6 +91,8 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) struct vxlan_config conf = { .no_share = true, .flags = VXLAN_F_COLLECT_METADATA, + /* Don't restrict the packets that can be sent by MTU */ + .mtu = IP_MAX_MTU, }; if (!options) { -- cgit v1.2.3 From 82a0f6b4aba95a21729f56ed0cbe57f2701b4872 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 16 Feb 2016 22:16:53 +0100 Subject: vxlan: clear IFF_TX_SKB_SHARING ether_setup sets IFF_TX_SKB_SHARING but this is not supported by vxlan as it modifies the skb on xmit. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/vxlan.c') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a31cd954b308..db96f3a16f6c 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2537,6 +2537,7 @@ static void vxlan_setup(struct net_device *dev) dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; netif_keep_dst(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; INIT_LIST_HEAD(&vxlan->next); -- cgit v1.2.3 From f468a729a2ddb1a26f8c4b98a82050e4030fe458 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 16 Feb 2016 22:18:26 +0100 Subject: vxlan: do not use fdb in metadata mode In metadata mode, the vxlan interface is not supposed to use the fdb control plane but an external one (openvswitch or static routes). With the current code, packets may leak into the fdb handling code which usually causes them to be dropped anyway but may have strange side effects. Just drop the packets directly when in metadata mode if the destination data are not correctly provided on egress. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/net/vxlan.c') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index db96f3a16f6c..e6944b29588e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2171,9 +2171,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) #endif } - if (vxlan->flags & VXLAN_F_COLLECT_METADATA && - info && info->mode & IP_TUNNEL_INFO_TX) { - vxlan_xmit_one(skb, dev, NULL, false); + if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { + if (info && info->mode & IP_TUNNEL_INFO_TX) + vxlan_xmit_one(skb, dev, NULL, false); + else + kfree_skb(skb); return NETDEV_TX_OK; } -- cgit v1.2.3