diff options
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r-- | drivers/net/vxlan.c | 511 |
1 files changed, 318 insertions, 193 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 328b4712683c..96aa7e6cf214 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -59,6 +59,8 @@ static const u8 all_zeros_mac[ETH_ALEN + 2]; static int vxlan_sock_add(struct vxlan_dev *vxlan); +static void vxlan_vs_del_dev(struct vxlan_dev *vxlan); + /* per-network namespace private data for this module */ struct vxlan_net { struct list_head vxlan_list; @@ -224,26 +226,37 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, return NULL; } -static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni) +static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex, + __be32 vni) { - struct vxlan_dev *vxlan; + struct vxlan_dev_node *node; /* For flow based devices, map all packets to VNI 0 */ if (vs->flags & VXLAN_F_COLLECT_METADATA) vni = 0; - hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) { - if (vxlan->default_dst.remote_vni == vni) - return vxlan; + hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) { + if (node->vxlan->default_dst.remote_vni != vni) + continue; + + if (IS_ENABLED(CONFIG_IPV6)) { + const struct vxlan_config *cfg = &node->vxlan->cfg; + + if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) && + cfg->remote_ifindex != ifindex) + continue; + } + + return node->vxlan; } return NULL; } /* Look up VNI in a per net namespace table */ -static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni, - sa_family_t family, __be16 port, - u32 flags) +static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex, + __be32 vni, sa_family_t family, + __be16 port, u32 flags) { struct vxlan_sock *vs; @@ -251,7 +264,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni, if (!vs) return NULL; - return vxlan_vs_find_vni(vs, vni); + return vxlan_vs_find_vni(vs, ifindex, vni); } /* Fill in neighbour message in skbuff. */ @@ -303,7 +316,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (rdst->remote_vni != vxlan->default_dst.remote_vni && nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni))) goto nla_put_failure; - if ((vxlan->flags & VXLAN_F_COLLECT_METADATA) && fdb->vni && + if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni && nla_put_u32(skb, NDA_SRC_VNI, be32_to_cpu(fdb->vni))) goto nla_put_failure; @@ -417,7 +430,7 @@ static u32 eth_vni_hash(const unsigned char *addr, __be32 vni) static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni) { - if (vxlan->flags & VXLAN_F_COLLECT_METADATA) + if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) return &vxlan->fdb_head[eth_vni_hash(mac, vni)]; else return &vxlan->fdb_head[eth_hash(mac)]; @@ -432,7 +445,7 @@ static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan, hlist_for_each_entry_rcu(f, head, hlist) { if (ether_addr_equal(mac, f->eth_addr)) { - if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { + if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) { if (vni == f->vni) return f; } else { @@ -740,6 +753,22 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) call_rcu(&f->rcu, vxlan_fdb_free); } +static void vxlan_dst_free(struct rcu_head *head) +{ + struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu); + + dst_cache_destroy(&rd->dst_cache); + kfree(rd); +} + +static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, + struct vxlan_rdst *rd) +{ + list_del_rcu(&rd->list); + vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH); + call_rcu(&rd->rcu, vxlan_dst_free); +} + static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, union vxlan_addr *ip, __be16 *port, __be32 *src_vni, __be32 *vni, u32 *ifindex) @@ -864,9 +893,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan, * otherwise destroy the fdb entry */ if (rd && !list_is_singular(&f->remotes)) { - list_del_rcu(&rd->list); - vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH); - kfree_rcu(rd, rcu); + vxlan_fdb_dst_destroy(vxlan, f, rd); goto out; } @@ -941,20 +968,28 @@ out: */ static bool vxlan_snoop(struct net_device *dev, union vxlan_addr *src_ip, const u8 *src_mac, - __be32 vni) + u32 src_ifindex, __be32 vni) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f; + u32 ifindex = 0; + +#if IS_ENABLED(CONFIG_IPV6) + if (src_ip->sa.sa_family == AF_INET6 && + (ipv6_addr_type(&src_ip->sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)) + ifindex = src_ifindex; +#endif f = vxlan_find_mac(vxlan, src_mac, vni); if (likely(f)) { struct vxlan_rdst *rdst = first_remote_rcu(f); - if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip))) + if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) && + rdst->remote_ifindex == ifindex)) return false; /* Don't migrate static entries, drop packets */ - if (f->state & NUD_NOARP) + if (f->state & (NUD_PERMANENT | NUD_NOARP)) return true; if (net_ratelimit()) @@ -977,7 +1012,7 @@ static bool vxlan_snoop(struct net_device *dev, vxlan->cfg.dst_port, vni, vxlan->default_dst.remote_vni, - 0, NTF_SELF); + ifindex, NTF_SELF); spin_unlock(&vxlan->hash_lock); } @@ -999,11 +1034,11 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) /* The vxlan_sock is only used by dev, leaving group has * no effect on other vxlan devices. */ - if (family == AF_INET && sock4 && atomic_read(&sock4->refcnt) == 1) + if (family == AF_INET && sock4 && refcount_read(&sock4->refcnt) == 1) return false; #if IS_ENABLED(CONFIG_IPV6) sock6 = rtnl_dereference(dev->vn6_sock); - if (family == AF_INET6 && sock6 && atomic_read(&sock6->refcnt) == 1) + if (family == AF_INET6 && sock6 && refcount_read(&sock6->refcnt) == 1) return false; #endif @@ -1040,7 +1075,7 @@ static bool __vxlan_sock_release_prep(struct vxlan_sock *vs) if (!vs) return false; - if (!atomic_dec_and_test(&vs->refcnt)) + if (!refcount_dec_and_test(&vs->refcnt)) return false; vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id); @@ -1061,12 +1096,14 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan) #if IS_ENABLED(CONFIG_IPV6) struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); - rcu_assign_pointer(vxlan->vn6_sock, NULL); + RCU_INIT_POINTER(vxlan->vn6_sock, NULL); #endif - rcu_assign_pointer(vxlan->vn4_sock, NULL); + RCU_INIT_POINTER(vxlan->vn4_sock, NULL); synchronize_net(); + vxlan_vs_del_dev(vxlan); + if (__vxlan_sock_release_prep(sock4)) { udp_tunnel_sock_release(sock4->sock); kfree(sock4); @@ -1246,6 +1283,7 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, struct sk_buff *skb, __be32 vni) { union vxlan_addr saddr; + u32 ifindex = skb->dev->ifindex; skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, vxlan->dev); @@ -1266,8 +1304,8 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, #endif } - if ((vxlan->flags & VXLAN_F_LEARN) && - vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, vni)) + if ((vxlan->cfg.flags & VXLAN_F_LEARN) && + vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni)) return false; return true; @@ -1333,7 +1371,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); - vxlan = vxlan_vs_find_vni(vs, vni); + vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni); if (!vxlan) goto drop; @@ -1489,7 +1527,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) if (netif_rx_ni(reply) == NET_RX_DROP) dev->stats.rx_dropped++; - } else if (vxlan->flags & VXLAN_F_L3MISS) { + } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) { union vxlan_addr ipa = { .sin.sin_addr.s_addr = tip, .sin.sin_family = AF_INET, @@ -1566,10 +1604,8 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request, skb_pull(reply, sizeof(struct ipv6hdr)); skb_reset_transport_header(reply); - na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen); - /* Neighbor Advertisement */ - memset(na, 0, sizeof(*na)+na_olen); + na = skb_put_zero(reply, sizeof(*na) + na_olen); na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; na->icmph.icmp6_router = isrouter; na->icmph.icmp6_override = 1; @@ -1649,7 +1685,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) if (netif_rx_ni(reply) == NET_RX_DROP) dev->stats.rx_dropped++; - } else if (vxlan->flags & VXLAN_F_L3MISS) { + } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) { union vxlan_addr ipa = { .sin6.sin6_addr = msg->target, .sin6.sin6_family = AF_INET6, @@ -1682,7 +1718,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; pip = ip_hdr(skb); n = neigh_lookup(&arp_tbl, &pip->daddr, dev); - if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { + if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) { union vxlan_addr ipa = { .sin.sin_addr.s_addr = pip->daddr, .sin.sin_family = AF_INET, @@ -1703,7 +1739,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; pip6 = ipv6_hdr(skb); n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev); - if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { + if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) { union vxlan_addr ipa = { .sin6.sin6_addr = pip6->daddr, .sin6.sin6_family = AF_INET6, @@ -1811,7 +1847,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, if (err) return err; - vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); + vxh = __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(vni); @@ -1977,8 +2013,9 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, #endif } - if (dst_vxlan->flags & VXLAN_F_LEARN) - vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, vni); + if (dst_vxlan->cfg.flags & VXLAN_F_LEARN) + vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, 0, + vni); u64_stats_update_begin(&tx_stats->syncp); tx_stats->tx_packets++; @@ -1996,8 +2033,10 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, } static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, - struct vxlan_dev *vxlan, union vxlan_addr *daddr, - __be16 dst_port, __be32 vni, struct dst_entry *dst, + struct vxlan_dev *vxlan, + union vxlan_addr *daddr, + __be16 dst_port, int dst_ifindex, __be32 vni, + struct dst_entry *dst, u32 rt_flags) { #if IS_ENABLED(CONFIG_IPV6) @@ -2013,9 +2052,9 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *dst_vxlan; dst_release(dst); - dst_vxlan = vxlan_find_vni(vxlan->net, vni, + dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni, daddr->sa.sa_family, dst_port, - vxlan->flags); + vxlan->cfg.flags); if (!dst_vxlan) { dev->stats.tx_errors++; kfree_skb(skb); @@ -2045,8 +2084,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct dst_entry *ndst = NULL; __be32 vni, label; __u8 tos, ttl; + int ifindex; int err; - u32 flags = vxlan->flags; + u32 flags = vxlan->cfg.flags; bool udp_sum = false; bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev)); @@ -2065,6 +2105,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = (rdst->remote_vni) ? : default_vni; + ifindex = rdst->remote_ifindex; local_ip = vxlan->cfg.saddr; dst_cache = &rdst->dst_cache; md->gbp = skb->mark; @@ -2098,6 +2139,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst = &remote_ip; dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; vni = tunnel_id_to_key32(info->key.tun_id); + ifindex = 0; dst_cache = &info->dst_cache; if (info->options_len) md = ip_tunnel_info_opts(info); @@ -2115,8 +2157,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct rtable *rt; __be16 df = 0; - rt = vxlan_get_route(vxlan, dev, sock4, skb, - rdst ? rdst->remote_ifindex : 0, tos, + rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos, dst->sin.sin_addr.s_addr, &local_ip.sin.sin_addr.s_addr, dst_port, src_port, @@ -2129,8 +2170,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, /* Bypass encapsulation if the destination is local */ if (!info) { err = encap_bypass_if_local(skb, dev, vxlan, dst, - dst_port, vni, &rt->dst, - rt->rt_flags); + dst_port, ifindex, vni, + &rt->dst, rt->rt_flags); if (err) goto out_unlock; } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) { @@ -2152,8 +2193,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } else { struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); - ndst = vxlan6_get_route(vxlan, dev, sock6, skb, - rdst ? rdst->remote_ifindex : 0, tos, + ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos, label, &dst->sin6.sin6_addr, &local_ip.sin6.sin6_addr, dst_port, src_port, @@ -2168,8 +2208,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; err = encap_bypass_if_local(skb, dev, vxlan, dst, - dst_port, vni, ndst, - rt6i_flags); + dst_port, ifindex, vni, + ndst, rt6i_flags); if (err) goto out_unlock; } @@ -2228,7 +2268,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); - if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { + if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) { if (info && info->mode & IP_TUNNEL_INFO_BRIDGE && info->mode & IP_TUNNEL_INFO_TX) { vni = tunnel_id_to_key32(info->key.tun_id); @@ -2241,7 +2281,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) } } - if (vxlan->flags & VXLAN_F_PROXY) { + if (vxlan->cfg.flags & VXLAN_F_PROXY) { eth = eth_hdr(skb); if (ntohs(eth->h_proto) == ETH_P_ARP) return arp_reduce(dev, skb, vni); @@ -2261,7 +2301,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) f = vxlan_find_mac(vxlan, eth->h_dest, vni); did_rsc = false; - if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) && + if (f && (f->flags & NTF_ROUTER) && (vxlan->cfg.flags & VXLAN_F_RSC) && (ntohs(eth->h_proto) == ETH_P_IP || ntohs(eth->h_proto) == ETH_P_IPV6)) { did_rsc = route_shortcircuit(dev, skb); @@ -2272,7 +2312,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) if (f == NULL) { f = vxlan_find_mac(vxlan, all_zeros_mac, vni); if (f == NULL) { - if ((vxlan->flags & VXLAN_F_L2MISS) && + if ((vxlan->cfg.flags & VXLAN_F_L2MISS) && !is_multicast_ether_addr(eth->h_dest)) vxlan_fdb_miss(vxlan, eth->h_dest); @@ -2342,13 +2382,27 @@ static void vxlan_cleanup(unsigned long arg) mod_timer(&vxlan->age_timer, next_timer); } -static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan) +static void vxlan_vs_del_dev(struct vxlan_dev *vxlan) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + + spin_lock(&vn->sock_lock); + hlist_del_init_rcu(&vxlan->hlist4.hlist); +#if IS_ENABLED(CONFIG_IPV6) + hlist_del_init_rcu(&vxlan->hlist6.hlist); +#endif + spin_unlock(&vn->sock_lock); +} + +static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan, + struct vxlan_dev_node *node) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); __be32 vni = vxlan->default_dst.remote_vni; + node->vxlan = vxlan; spin_lock(&vn->sock_lock); - hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni)); + hlist_add_head_rcu(&node->hlist, vni_head(vs, vni)); spin_unlock(&vn->sock_lock); } @@ -2459,10 +2513,7 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu) struct vxlan_rdst *dst = &vxlan->default_dst; struct net_device *lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex); - bool use_ipv6 = false; - - if (dst->remote_ip.sa.sa_family == AF_INET6) - use_ipv6 = true; + bool use_ipv6 = !!(vxlan->cfg.flags & VXLAN_F_IPV6); /* This check is different than dev->max_mtu, because it looks at * the lowerdev->mtu, rather than the static dev->max_mtu @@ -2584,7 +2635,7 @@ static void vxlan_setup(struct net_device *dev) eth_hw_addr_random(dev); ether_setup(dev); - dev->destructor = free_netdev; + dev->needs_free_netdev = true; SET_NETDEV_DEVTYPE(dev, &vxlan_type); dev->features |= NETIF_F_LLTX; @@ -2598,6 +2649,10 @@ static void vxlan_setup(struct net_device *dev) netif_keep_dst(dev); dev->priv_flags |= IFF_NO_QUEUE; + /* MTU range: 68 - 65535 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = ETH_MAX_MTU; + INIT_LIST_HEAD(&vxlan->next); spin_lock_init(&vxlan->hash_lock); @@ -2605,8 +2660,6 @@ static void vxlan_setup(struct net_device *dev) vxlan->age_timer.function = vxlan_cleanup; vxlan->age_timer.data = (unsigned long) vxlan; - vxlan->cfg.dst_port = htons(vxlan_port); - vxlan->dev = dev; gro_cells_init(&vxlan->gro_cells, dev); @@ -2662,7 +2715,8 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG }, }; -static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) +static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { @@ -2676,11 +2730,19 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) } } + if (tb[IFLA_MTU]) { + u32 mtu = nla_get_u32(tb[IFLA_MTU]); + + if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU) + return -EINVAL; + } + if (!data) return -EINVAL; if (data[IFLA_VXLAN_ID]) { - __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); + u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); + if (id >= VXLAN_N_VID) return -ERANGE; } @@ -2763,7 +2825,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, } vs->sock = sock; - atomic_set(&vs->refcnt, 1); + refcount_set(&vs->refcnt, 1); vs->flags = (flags & VXLAN_F_RCV_FLAGS); spin_lock(&vn->sock_lock); @@ -2792,12 +2854,13 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_sock *vs = NULL; + struct vxlan_dev_node *node; if (!vxlan->cfg.no_share) { spin_lock(&vn->sock_lock); vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, - vxlan->cfg.dst_port, vxlan->flags); - if (vs && !atomic_add_unless(&vs->refcnt, 1, 0)) { + vxlan->cfg.dst_port, vxlan->cfg.flags); + if (vs && !refcount_inc_not_zero(&vs->refcnt)) { spin_unlock(&vn->sock_lock); return -EBUSY; } @@ -2805,23 +2868,27 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) } if (!vs) vs = vxlan_socket_create(vxlan->net, ipv6, - vxlan->cfg.dst_port, vxlan->flags); + vxlan->cfg.dst_port, vxlan->cfg.flags); if (IS_ERR(vs)) return PTR_ERR(vs); #if IS_ENABLED(CONFIG_IPV6) - if (ipv6) + if (ipv6) { rcu_assign_pointer(vxlan->vn6_sock, vs); - else + node = &vxlan->hlist6; + } else #endif + { rcu_assign_pointer(vxlan->vn4_sock, vs); - vxlan_vs_add_dev(vs, vxlan); + node = &vxlan->hlist4; + } + vxlan_vs_add_dev(vs, vxlan, node); return 0; } static int vxlan_sock_add(struct vxlan_dev *vxlan) { - bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA; - bool ipv6 = vxlan->flags & VXLAN_F_IPV6 || metadata; + bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA; + bool ipv6 = vxlan->cfg.flags & VXLAN_F_IPV6 || metadata; bool ipv4 = !ipv6 || metadata; int ret = 0; @@ -2841,116 +2908,176 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan) return ret; } -static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, - struct vxlan_config *conf, - bool changelink) +static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf, + struct net_device **lower, + struct vxlan_dev *old) { struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); - struct vxlan_dev *vxlan = netdev_priv(dev), *tmp; - struct vxlan_rdst *dst = &vxlan->default_dst; - unsigned short needed_headroom = ETH_HLEN; + struct vxlan_dev *tmp; bool use_ipv6 = false; - __be16 default_port = vxlan->cfg.dst_port; - struct net_device *lowerdev = NULL; - if (!changelink) { - if (conf->flags & VXLAN_F_GPE) { - /* For now, allow GPE only together with - * COLLECT_METADATA. This can be relaxed later; in such - * case, the other side of the PtP link will have to be - * provided. - */ - if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) || - !(conf->flags & VXLAN_F_COLLECT_METADATA)) { - pr_info("unsupported combination of extensions\n"); - return -EINVAL; - } - vxlan_raw_setup(dev); - } else { - vxlan_ether_setup(dev); + if (conf->flags & VXLAN_F_GPE) { + /* For now, allow GPE only together with + * COLLECT_METADATA. This can be relaxed later; in such + * case, the other side of the PtP link will have to be + * provided. + */ + if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) || + !(conf->flags & VXLAN_F_COLLECT_METADATA)) { + return -EINVAL; } - - /* MTU range: 68 - 65535 */ - dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = ETH_MAX_MTU; - vxlan->net = src_net; } - dst->remote_vni = conf->vni; + if (!conf->remote_ip.sa.sa_family && !conf->saddr.sa.sa_family) { + /* Unless IPv6 is explicitly requested, assume IPv4 */ + conf->remote_ip.sa.sa_family = AF_INET; + conf->saddr.sa.sa_family = AF_INET; + } else if (!conf->remote_ip.sa.sa_family) { + conf->remote_ip.sa.sa_family = conf->saddr.sa.sa_family; + } else if (!conf->saddr.sa.sa_family) { + conf->saddr.sa.sa_family = conf->remote_ip.sa.sa_family; + } - memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip)); + if (conf->saddr.sa.sa_family != conf->remote_ip.sa.sa_family) + return -EINVAL; - /* Unless IPv6 is explicitly requested, assume IPv4 */ - if (!dst->remote_ip.sa.sa_family) - dst->remote_ip.sa.sa_family = AF_INET; + if (vxlan_addr_multicast(&conf->saddr)) + return -EINVAL; - if (dst->remote_ip.sa.sa_family == AF_INET6 || - vxlan->cfg.saddr.sa.sa_family == AF_INET6) { + if (conf->saddr.sa.sa_family == AF_INET6) { if (!IS_ENABLED(CONFIG_IPV6)) return -EPFNOSUPPORT; use_ipv6 = true; - vxlan->flags |= VXLAN_F_IPV6; + conf->flags |= VXLAN_F_IPV6; + + if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) { + int local_type = + ipv6_addr_type(&conf->saddr.sin6.sin6_addr); + int remote_type = + ipv6_addr_type(&conf->remote_ip.sin6.sin6_addr); + + if (local_type & IPV6_ADDR_LINKLOCAL) { + if (!(remote_type & IPV6_ADDR_LINKLOCAL) && + (remote_type != IPV6_ADDR_ANY)) + return -EINVAL; + + conf->flags |= VXLAN_F_IPV6_LINKLOCAL; + } else { + if (remote_type == + (IPV6_ADDR_UNICAST | IPV6_ADDR_LINKLOCAL)) + return -EINVAL; + + conf->flags &= ~VXLAN_F_IPV6_LINKLOCAL; + } + } } - if (conf->label && !use_ipv6) { - pr_info("label only supported in use with IPv6\n"); + if (conf->label && !use_ipv6) return -EINVAL; - } - if (conf->remote_ifindex && - conf->remote_ifindex != vxlan->cfg.remote_ifindex) { - lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex); - dst->remote_ifindex = conf->remote_ifindex; + if (conf->remote_ifindex) { + struct net_device *lowerdev; - if (!lowerdev) { - pr_info("ifindex %d does not exist\n", - dst->remote_ifindex); + lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex); + if (!lowerdev) return -ENODEV; - } #if IS_ENABLED(CONFIG_IPV6) if (use_ipv6) { struct inet6_dev *idev = __in6_dev_get(lowerdev); - if (idev && idev->cnf.disable_ipv6) { - pr_info("IPv6 is disabled via sysctl\n"); + if (idev && idev->cnf.disable_ipv6) return -EPERM; - } } #endif - if (!conf->mtu) - dev->mtu = lowerdev->mtu - - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + *lower = lowerdev; + } else { + if (vxlan_addr_multicast(&conf->remote_ip)) + return -EINVAL; - needed_headroom = lowerdev->hard_header_len; - } else if (!conf->remote_ifindex && - vxlan_addr_multicast(&dst->remote_ip)) { - pr_info("multicast destination requires interface to be specified\n"); - return -EINVAL; +#if IS_ENABLED(CONFIG_IPV6) + if (conf->flags & VXLAN_F_IPV6_LINKLOCAL) + return -EINVAL; +#endif + + *lower = NULL; } - if (lowerdev) { - dev->gso_max_size = lowerdev->gso_max_size; - dev->gso_max_segs = lowerdev->gso_max_segs; + if (!conf->dst_port) { + if (conf->flags & VXLAN_F_GPE) + conf->dst_port = htons(4790); /* IANA VXLAN-GPE port */ + else + conf->dst_port = htons(vxlan_port); } - if (conf->mtu) { - int max_mtu = ETH_MAX_MTU; + if (!conf->age_interval) + conf->age_interval = FDB_AGE_DEFAULT; - if (lowerdev) - max_mtu = lowerdev->mtu; + list_for_each_entry(tmp, &vn->vxlan_list, next) { + if (tmp == old) + continue; - max_mtu -= (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + if (tmp->cfg.vni != conf->vni) + continue; + if (tmp->cfg.dst_port != conf->dst_port) + continue; + if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) != + (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6))) + continue; - if (conf->mtu < dev->min_mtu || conf->mtu > dev->max_mtu) - return -EINVAL; + if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) && + tmp->cfg.remote_ifindex != conf->remote_ifindex) + continue; + + return -EEXIST; + } + + return 0; +} - dev->mtu = conf->mtu; +static void vxlan_config_apply(struct net_device *dev, + struct vxlan_config *conf, + struct net_device *lowerdev, + struct net *src_net, + bool changelink) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_rdst *dst = &vxlan->default_dst; + unsigned short needed_headroom = ETH_HLEN; + bool use_ipv6 = !!(conf->flags & VXLAN_F_IPV6); + int max_mtu = ETH_MAX_MTU; - if (conf->mtu > max_mtu) - dev->mtu = max_mtu; + if (!changelink) { + if (conf->flags & VXLAN_F_GPE) + vxlan_raw_setup(dev); + else + vxlan_ether_setup(dev); + + if (conf->mtu) + dev->mtu = conf->mtu; + + vxlan->net = src_net; } + dst->remote_vni = conf->vni; + + memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip)); + + if (lowerdev) { + dst->remote_ifindex = conf->remote_ifindex; + + dev->gso_max_size = lowerdev->gso_max_size; + dev->gso_max_segs = lowerdev->gso_max_segs; + + needed_headroom = lowerdev->hard_header_len; + + max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : + VXLAN_HEADROOM); + } + + if (dev->mtu > max_mtu) + dev->mtu = max_mtu; + if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA) needed_headroom += VXLAN6_HEADROOM; else @@ -2958,31 +3085,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, dev->needed_headroom = needed_headroom; memcpy(&vxlan->cfg, conf, sizeof(*conf)); - if (!vxlan->cfg.dst_port) { - if (conf->flags & VXLAN_F_GPE) - vxlan->cfg.dst_port = htons(4790); /* IANA VXLAN-GPE port */ - else - vxlan->cfg.dst_port = default_port; - } - vxlan->flags |= conf->flags; +} - if (!vxlan->cfg.age_interval) - vxlan->cfg.age_interval = FDB_AGE_DEFAULT; +static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, + struct vxlan_config *conf, + bool changelink) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct net_device *lowerdev; + int ret; - if (changelink) - return 0; + ret = vxlan_config_validate(src_net, conf, &lowerdev, vxlan); + if (ret) + return ret; - list_for_each_entry(tmp, &vn->vxlan_list, next) { - if (tmp->cfg.vni == conf->vni && - (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 || - tmp->cfg.saddr.sa.sa_family == AF_INET6) == use_ipv6 && - tmp->cfg.dst_port == vxlan->cfg.dst_port && - (tmp->flags & VXLAN_F_RCV_FLAGS) == - (vxlan->flags & VXLAN_F_RCV_FLAGS)) { - pr_info("duplicate VNI %u\n", be32_to_cpu(conf->vni)); - return -EEXIST; - } - } + vxlan_config_apply(dev, conf, lowerdev, src_net, changelink); return 0; } @@ -3046,22 +3163,35 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], } if (data[IFLA_VXLAN_GROUP]) { + if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) + return -EOPNOTSUPP; + conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]); + conf->remote_ip.sa.sa_family = AF_INET; } else if (data[IFLA_VXLAN_GROUP6]) { if (!IS_ENABLED(CONFIG_IPV6)) return -EPFNOSUPPORT; + if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6)) + return -EOPNOTSUPP; + conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]); conf->remote_ip.sa.sa_family = AF_INET6; } if (data[IFLA_VXLAN_LOCAL]) { + if (changelink && (conf->saddr.sa.sa_family != AF_INET)) + return -EOPNOTSUPP; + conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]); conf->saddr.sa.sa_family = AF_INET; } else if (data[IFLA_VXLAN_LOCAL6]) { if (!IS_ENABLED(CONFIG_IPV6)) return -EPFNOSUPPORT; + if (changelink && (conf->saddr.sa.sa_family != AF_INET6)) + return -EOPNOTSUPP; + /* TODO: respect scope id */ conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]); conf->saddr.sa.sa_family = AF_INET6; @@ -3081,12 +3211,10 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], IPV6_FLOWLABEL_MASK; if (data[IFLA_VXLAN_LEARNING]) { - if (nla_get_u8(data[IFLA_VXLAN_LEARNING])) { + if (nla_get_u8(data[IFLA_VXLAN_LEARNING])) conf->flags |= VXLAN_F_LEARN; - } else { + else conf->flags &= ~VXLAN_F_LEARN; - vxlan->flags &= ~VXLAN_F_LEARN; - } } else if (!changelink) { /* default to learn on a new device */ conf->flags |= VXLAN_F_LEARN; @@ -3219,7 +3347,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], } static int vxlan_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct vxlan_config conf; int err; @@ -3232,7 +3361,8 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, } static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_rdst *dst = &vxlan->default_dst; @@ -3286,15 +3416,9 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], static void vxlan_dellink(struct net_device *dev, struct list_head *head) { struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); vxlan_flush(vxlan, true); - spin_lock(&vn->sock_lock); - if (!hlist_unhashed(&vxlan->hlist)) - hlist_del_rcu(&vxlan->hlist); - spin_unlock(&vn->sock_lock); - gro_cells_destroy(&vxlan->gro_cells); list_del(&vxlan->next); unregister_netdevice_queue(dev, head); @@ -3375,43 +3499,44 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) || nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) || nla_put_u8(skb, IFLA_VXLAN_LEARNING, - !!(vxlan->flags & VXLAN_F_LEARN)) || + !!(vxlan->cfg.flags & VXLAN_F_LEARN)) || nla_put_u8(skb, IFLA_VXLAN_PROXY, - !!(vxlan->flags & VXLAN_F_PROXY)) || - nla_put_u8(skb, IFLA_VXLAN_RSC, !!(vxlan->flags & VXLAN_F_RSC)) || + !!(vxlan->cfg.flags & VXLAN_F_PROXY)) || + nla_put_u8(skb, IFLA_VXLAN_RSC, + !!(vxlan->cfg.flags & VXLAN_F_RSC)) || nla_put_u8(skb, IFLA_VXLAN_L2MISS, - !!(vxlan->flags & VXLAN_F_L2MISS)) || + !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) || nla_put_u8(skb, IFLA_VXLAN_L3MISS, - !!(vxlan->flags & VXLAN_F_L3MISS)) || + !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) || nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA, - !!(vxlan->flags & VXLAN_F_COLLECT_METADATA)) || + !!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) || nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) || nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) || nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) || nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM, - !(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) || + !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) || nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, - !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) || + !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) || nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, - !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) || + !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) || nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX, - !!(vxlan->flags & VXLAN_F_REMCSUM_TX)) || + !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) || nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX, - !!(vxlan->flags & VXLAN_F_REMCSUM_RX))) + !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX))) goto nla_put_failure; if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) goto nla_put_failure; - if (vxlan->flags & VXLAN_F_GBP && + if (vxlan->cfg.flags & VXLAN_F_GBP && nla_put_flag(skb, IFLA_VXLAN_GBP)) goto nla_put_failure; - if (vxlan->flags & VXLAN_F_GPE && + if (vxlan->cfg.flags & VXLAN_F_GPE && nla_put_flag(skb, IFLA_VXLAN_GPE)) goto nla_put_failure; - if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL && + if (vxlan->cfg.flags & VXLAN_F_REMCSUM_NOPARTIAL && nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL)) goto nla_put_failure; |