diff options
Diffstat (limited to 'drivers/net/vrf.c')
-rw-r--r-- | drivers/net/vrf.c | 331 |
1 files changed, 317 insertions, 14 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 64f2ab663ffe..92fa3e1ea65c 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -30,17 +30,19 @@ #include <net/arp.h> #include <net/ip.h> #include <net/ip_fib.h> +#include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/rtnetlink.h> #include <net/route.h> #include <net/addrconf.h> #include <net/l3mdev.h> +#define RT_FL_TOS(oldflp4) \ + ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) + #define DRV_NAME "vrf" #define DRV_VERSION "1.0" -#define vrf_is_slave(dev) ((dev)->flags & IFF_SLAVE) - #define vrf_master_get_rcu(dev) \ ((struct net_device *)rcu_dereference(dev->rx_handler_data)) @@ -56,6 +58,7 @@ struct slave_queue { struct net_vrf { struct slave_queue queue; struct rtable *rth; + struct rt6_info *rt6; u32 tb_id; }; @@ -73,9 +76,9 @@ static struct dst_entry *vrf_ip_check(struct dst_entry *dst, u32 cookie) return dst; } -static int vrf_ip_local_out(struct sk_buff *skb) +static int vrf_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { - return ip_local_out(skb); + return ip_local_out(net, sk, skb); } static unsigned int vrf_v4_mtu(const struct dst_entry *dst) @@ -103,12 +106,56 @@ static struct dst_ops vrf_dst_ops = { .default_advmss = vrf_default_advmss, }; +/* neighbor handling is done with actual device; do not want + * to flip skb->dev for those ndisc packets. This really fails + * for multiple next protocols (e.g., NEXTHDR_HOP). But it is + * a start. + */ +#if IS_ENABLED(CONFIG_IPV6) +static bool check_ipv6_frame(const struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data; + size_t hlen = sizeof(*ipv6h); + bool rc = true; + + if (skb->len < hlen) + goto out; + + if (ipv6h->nexthdr == NEXTHDR_ICMP) { + const struct icmp6hdr *icmph; + + if (skb->len < hlen + sizeof(*icmph)) + goto out; + + icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h)); + switch (icmph->icmp6_type) { + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + rc = false; + break; + } + } + +out: + return rc; +} +#else +static bool check_ipv6_frame(const struct sk_buff *skb) +{ + return false; +} +#endif + static bool is_ip_rx_frame(struct sk_buff *skb) { switch (skb->protocol) { case htons(ETH_P_IP): - case htons(ETH_P_IPV6): return true; + case htons(ETH_P_IPV6): + return check_ipv6_frame(skb); } return false; } @@ -168,12 +215,53 @@ static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev, return stats; } +#if IS_ENABLED(CONFIG_IPV6) +static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, + struct net_device *dev) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); + struct flowi6 fl6 = { + /* needed to match OIF rule */ + .flowi6_oif = dev->ifindex, + .flowi6_iif = LOOPBACK_IFINDEX, + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = ip6_flowinfo(iph), + .flowi6_mark = skb->mark, + .flowi6_proto = iph->nexthdr, + .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF, + }; + int ret = NET_XMIT_DROP; + struct dst_entry *dst; + struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst == dst_null) + goto err; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + ret = ip6_local_out(net, skb->sk, skb); + if (unlikely(net_xmit_eval(ret))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + + return ret; +err: + vrf_tx_error(dev, skb); + return NET_XMIT_DROP; +} +#else static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { vrf_tx_error(dev, skb); return NET_XMIT_DROP; } +#endif static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4, struct net_device *vrf_dev) @@ -208,7 +296,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_VRFSRC | + .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF, .daddr = ip4h->daddr, }; @@ -221,7 +309,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, RT_SCOPE_LINK); } - ret = ip_local_out(skb); + ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else @@ -268,6 +356,157 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } +#if IS_ENABLED(CONFIG_IPV6) +static struct dst_entry *vrf_ip6_check(struct dst_entry *dst, u32 cookie) +{ + return dst; +} + +static struct dst_ops vrf_dst_ops6 = { + .family = AF_INET6, + .local_out = ip6_local_out, + .check = vrf_ip6_check, + .mtu = vrf_v4_mtu, + .destroy = vrf_dst_destroy, + .default_advmss = vrf_default_advmss, +}; + +static int init_dst_ops6_kmem_cachep(void) +{ + vrf_dst_ops6.kmem_cachep = kmem_cache_create("vrf_ip6_dst_cache", + sizeof(struct rt6_info), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + + if (!vrf_dst_ops6.kmem_cachep) + return -ENOMEM; + + return 0; +} + +static void free_dst_ops6_kmem_cachep(void) +{ + kmem_cache_destroy(vrf_dst_ops6.kmem_cachep); +} + +static int vrf_input6(struct sk_buff *skb) +{ + skb->dev->stats.rx_errors++; + kfree_skb(skb); + return 0; +} + +/* modelled after ip6_finish_output2 */ +static int vrf_finish_output6(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst->dev; + struct neighbour *neigh; + struct in6_addr *nexthop; + int ret; + + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; + + rcu_read_lock_bh(); + nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + if (!IS_ERR(neigh)) { + ret = dst_neigh_output(dst, neigh, skb); + rcu_read_unlock_bh(); + return ret; + } + rcu_read_unlock_bh(); + + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + kfree_skb(skb); + return -EINVAL; +} + +/* modelled after ip6_output */ +static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, + net, sk, skb, NULL, skb_dst(skb)->dev, + vrf_finish_output6, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); +} + +static void vrf_rt6_destroy(struct net_vrf *vrf) +{ + dst_destroy(&vrf->rt6->dst); + free_percpu(vrf->rt6->rt6i_pcpu); + vrf->rt6 = NULL; +} + +static int vrf_rt6_create(struct net_device *dev) +{ + struct net_vrf *vrf = netdev_priv(dev); + struct dst_entry *dst; + struct rt6_info *rt6; + int cpu; + int rc = -ENOMEM; + + rt6 = dst_alloc(&vrf_dst_ops6, dev, 0, + DST_OBSOLETE_NONE, + (DST_HOST | DST_NOPOLICY | DST_NOXFRM)); + if (!rt6) + goto out; + + dst = &rt6->dst; + + rt6->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_KERNEL); + if (!rt6->rt6i_pcpu) { + dst_destroy(dst); + goto out; + } + for_each_possible_cpu(cpu) { + struct rt6_info **p = per_cpu_ptr(rt6->rt6i_pcpu, cpu); + *p = NULL; + } + + memset(dst + 1, 0, sizeof(*rt6) - sizeof(*dst)); + + INIT_LIST_HEAD(&rt6->rt6i_siblings); + INIT_LIST_HEAD(&rt6->rt6i_uncached); + + rt6->dst.input = vrf_input6; + rt6->dst.output = vrf_output6; + + rt6->rt6i_table = fib6_get_table(dev_net(dev), vrf->tb_id); + + atomic_set(&rt6->dst.__refcnt, 2); + + vrf->rt6 = rt6; + rc = 0; +out: + return rc; +} +#else +static int init_dst_ops6_kmem_cachep(void) +{ + return 0; +} + +static void free_dst_ops6_kmem_cachep(void) +{ +} + +static void vrf_rt6_destroy(struct net_vrf *vrf) +{ +} + +static int vrf_rt6_create(struct net_device *dev) +{ + return 0; +} +#endif + /* modelled after ip_finish_output2 */ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -311,10 +550,9 @@ err: return ret; } -static int vrf_output(struct sock *sk, struct sk_buff *skb) +static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; - struct net *net = dev_net(dev); IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); @@ -433,7 +671,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) if (ret < 0) goto out_unregister; - port_dev->flags |= IFF_SLAVE; + port_dev->priv_flags |= IFF_L3MDEV_SLAVE; __vrf_insert_slave(queue, slave); cycle_netdev(port_dev); @@ -448,7 +686,7 @@ out_fail: static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) { - if (netif_is_l3_master(port_dev) || vrf_is_slave(port_dev)) + if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev)) return -EINVAL; return do_vrf_add_slave(dev, port_dev); @@ -462,7 +700,7 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) struct slave *slave; netdev_upper_dev_unlink(port_dev, dev); - port_dev->flags &= ~IFF_SLAVE; + port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; netdev_rx_handler_unregister(port_dev); @@ -490,6 +728,7 @@ static void vrf_dev_uninit(struct net_device *dev) struct slave *slave, *next; vrf_rtable_destroy(vrf); + vrf_rt6_destroy(vrf); list_for_each_entry_safe(slave, next, head, list) vrf_del_slave(dev, slave->dev); @@ -513,10 +752,15 @@ static int vrf_dev_init(struct net_device *dev) if (!vrf->rth) goto out_stats; + if (vrf_rt6_create(dev) != 0) + goto out_rth; + dev->flags = IFF_MASTER | IFF_NOARP; return 0; +out_rth: + vrf_rtable_destroy(vrf); out_stats: free_percpu(dev->dstats); dev->dstats = NULL; @@ -545,7 +789,7 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, { struct rtable *rth = NULL; - if (!(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) { + if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) { struct net_vrf *vrf = netdev_priv(dev); rth = vrf->rth; @@ -555,9 +799,61 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, return rth; } +/* called under rcu_read_lock */ +static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) +{ + struct fib_result res = { .tclassid = 0 }; + struct net *net = dev_net(dev); + u32 orig_tos = fl4->flowi4_tos; + u8 flags = fl4->flowi4_flags; + u8 scope = fl4->flowi4_scope; + u8 tos = RT_FL_TOS(fl4); + + if (unlikely(!fl4->daddr)) + return; + + fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF; + fl4->flowi4_iif = LOOPBACK_IFINDEX; + fl4->flowi4_tos = tos & IPTOS_RT_MASK; + fl4->flowi4_scope = ((tos & RTO_ONLINK) ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); + + if (!fib_lookup(net, fl4, &res, 0)) { + if (res.type == RTN_LOCAL) + fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr; + else + fib_select_path(net, &res, fl4, -1); + } + + fl4->flowi4_flags = flags; + fl4->flowi4_tos = orig_tos; + fl4->flowi4_scope = scope; +} + +#if IS_ENABLED(CONFIG_IPV6) +static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, + const struct flowi6 *fl6) +{ + struct rt6_info *rt = NULL; + + if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { + struct net_vrf *vrf = netdev_priv(dev); + + rt = vrf->rt6; + atomic_inc(&rt->dst.__refcnt); + } + + return (struct dst_entry *)rt; +} +#endif + static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, + .l3mdev_get_saddr = vrf_get_saddr, +#if IS_ENABLED(CONFIG_IPV6) + .l3mdev_get_rt6_dst = vrf_get_rt6_dst, +#endif }; static void vrf_get_drvinfo(struct net_device *dev, @@ -672,7 +968,7 @@ static int vrf_device_event(struct notifier_block *unused, if (event == NETDEV_UNREGISTER) { struct net_device *vrf_dev; - if (netif_is_l3_master(dev)) + if (!netif_is_l3_slave(dev)) goto out; vrf_dev = netdev_master_upper_dev_get(dev); @@ -699,6 +995,10 @@ static int __init vrf_init_module(void) if (!vrf_dst_ops.kmem_cachep) return -ENOMEM; + rc = init_dst_ops6_kmem_cachep(); + if (rc != 0) + goto error2; + register_netdevice_notifier(&vrf_notifier_block); rc = rtnl_link_register(&vrf_link_ops); @@ -709,6 +1009,8 @@ static int __init vrf_init_module(void) error: unregister_netdevice_notifier(&vrf_notifier_block); + free_dst_ops6_kmem_cachep(); +error2: kmem_cache_destroy(vrf_dst_ops.kmem_cachep); return rc; } @@ -718,6 +1020,7 @@ static void __exit vrf_cleanup_module(void) rtnl_link_unregister(&vrf_link_ops); unregister_netdevice_notifier(&vrf_notifier_block); kmem_cache_destroy(vrf_dst_ops.kmem_cachep); + free_dst_ops6_kmem_cachep(); } module_init(vrf_init_module); |