diff options
Diffstat (limited to 'net/mpls')
-rw-r--r-- | net/mpls/Kconfig | 8 | ||||
-rw-r--r-- | net/mpls/Makefile | 1 | ||||
-rw-r--r-- | net/mpls/af_mpls.c | 201 | ||||
-rw-r--r-- | net/mpls/internal.h | 9 | ||||
-rw-r--r-- | net/mpls/mpls_iptunnel.c | 231 |
5 files changed, 403 insertions, 47 deletions
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig index 17bde799c854..5c467ef97311 100644 --- a/net/mpls/Kconfig +++ b/net/mpls/Kconfig @@ -24,7 +24,13 @@ config NET_MPLS_GSO config MPLS_ROUTING tristate "MPLS: routing support" - help + ---help--- Add support for forwarding of mpls packets. +config MPLS_IPTUNNEL + tristate "MPLS: IP over MPLS tunnel support" + depends on LWTUNNEL && MPLS_ROUTING + ---help--- + mpls ip tunnel support. + endif # MPLS diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 65bbe68c72e6..9ca923625016 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o obj-$(CONFIG_MPLS_ROUTING) += mpls_router.o +obj-$(CONFIG_MPLS_IPTUNNEL) += mpls_iptunnel.o mpls_router-y := af_mpls.o diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 1f93a5978f2a..bb185a28de98 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -15,6 +15,10 @@ #include <net/ip_fib.h> #include <net/netevent.h> #include <net/netns/generic.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#include <net/addrconf.h> +#endif #include "internal.h" #define LABEL_NOT_SPECIFIED (1<<20) @@ -23,11 +27,23 @@ /* This maximum ha length copied from the definition of struct neighbour */ #define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))) +enum mpls_payload_type { + MPT_UNSPEC, /* IPv4 or IPv6 */ + MPT_IPV4 = 4, + MPT_IPV6 = 6, + + /* Other types not implemented: + * - Pseudo-wire with or without control word (RFC4385) + * - GAL (RFC5586) + */ +}; + struct mpls_route { /* next hop label forwarding entry */ struct net_device __rcu *rt_dev; struct rcu_head rt_rcu; u32 rt_label[MAX_NEW_LABELS]; u8 rt_protocol; /* routing protocol that set this entry */ + u8 rt_payload_type; u8 rt_labels; u8 rt_via_alen; u8 rt_via_table; @@ -58,10 +74,11 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) return rcu_dereference_rtnl(dev->mpls_ptr); } -static bool mpls_output_possible(const struct net_device *dev) +bool mpls_output_possible(const struct net_device *dev) { return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); } +EXPORT_SYMBOL_GPL(mpls_output_possible); static unsigned int mpls_rt_header_size(const struct mpls_route *rt) { @@ -69,13 +86,14 @@ static unsigned int mpls_rt_header_size(const struct mpls_route *rt) return rt->rt_labels * sizeof(struct mpls_shim_hdr); } -static unsigned int mpls_dev_mtu(const struct net_device *dev) +unsigned int mpls_dev_mtu(const struct net_device *dev) { /* The amount of data the layer 2 frame can hold */ return dev->mtu; } +EXPORT_SYMBOL_GPL(mpls_dev_mtu); -static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { if (skb->len <= mtu) return false; @@ -85,20 +103,13 @@ static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) return true; } +EXPORT_SYMBOL_GPL(mpls_pkt_too_big); static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, struct mpls_entry_decoded dec) { - /* RFC4385 and RFC5586 encode other packets in mpls such that - * they don't conflict with the ip version number, making - * decoding by examining the ip version correct in everything - * except for the strangest cases. - * - * The strange cases if we choose to support them will require - * manual configuration. - */ - struct iphdr *hdr4; - bool success = true; + enum mpls_payload_type payload_type; + bool success = false; /* The IPv4 code below accesses through the IPv4 header * checksum, which is 12 bytes into the packet. @@ -113,23 +124,32 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, if (!pskb_may_pull(skb, 12)) return false; - /* Use ip_hdr to find the ip protocol version */ - hdr4 = ip_hdr(skb); - if (hdr4->version == 4) { + payload_type = rt->rt_payload_type; + if (payload_type == MPT_UNSPEC) + payload_type = ip_hdr(skb)->version; + + switch (payload_type) { + case MPT_IPV4: { + struct iphdr *hdr4 = ip_hdr(skb); skb->protocol = htons(ETH_P_IP); csum_replace2(&hdr4->check, htons(hdr4->ttl << 8), htons(dec.ttl << 8)); hdr4->ttl = dec.ttl; + success = true; + break; } - else if (hdr4->version == 6) { + case MPT_IPV6: { struct ipv6hdr *hdr6 = ipv6_hdr(skb); skb->protocol = htons(ETH_P_IPV6); hdr6->hop_limit = dec.ttl; + success = true; + break; + } + case MPT_UNSPEC: + break; } - else - /* version 0 and version 1 are used by pseudo wires */ - success = false; + return success; } @@ -248,16 +268,17 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { }; struct mpls_route_config { - u32 rc_protocol; - u32 rc_ifindex; - u16 rc_via_table; - u16 rc_via_alen; - u8 rc_via[MAX_VIA_ALEN]; - u32 rc_label; - u32 rc_output_labels; - u32 rc_output_label[MAX_NEW_LABELS]; - u32 rc_nlflags; - struct nl_info rc_nlinfo; + u32 rc_protocol; + u32 rc_ifindex; + u16 rc_via_table; + u16 rc_via_alen; + u8 rc_via[MAX_VIA_ALEN]; + u32 rc_label; + u32 rc_output_labels; + u32 rc_output_label[MAX_NEW_LABELS]; + u32 rc_nlflags; + enum mpls_payload_type rc_payload_type; + struct nl_info rc_nlinfo; }; static struct mpls_route *mpls_rt_alloc(size_t alen) @@ -286,7 +307,7 @@ static void mpls_notify_route(struct net *net, unsigned index, struct mpls_route *rt = new ? new : old; unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0; /* Ignore reserved labels for now */ - if (rt && (index >= 16)) + if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED)) rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags); } @@ -320,13 +341,96 @@ static unsigned find_free_label(struct net *net) platform_label = rtnl_dereference(net->mpls.platform_label); platform_labels = net->mpls.platform_labels; - for (index = 16; index < platform_labels; index++) { + for (index = MPLS_LABEL_FIRST_UNRESERVED; index < platform_labels; + index++) { if (!rtnl_dereference(platform_label[index])) return index; } return LABEL_NOT_SPECIFIED; } +#if IS_ENABLED(CONFIG_INET) +static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr) +{ + struct net_device *dev; + struct rtable *rt; + struct in_addr daddr; + + memcpy(&daddr, addr, sizeof(struct in_addr)); + rt = ip_route_output(net, daddr.s_addr, 0, 0, 0); + if (IS_ERR(rt)) + return ERR_CAST(rt); + + dev = rt->dst.dev; + dev_hold(dev); + + ip_rt_put(rt); + + return dev; +} +#else +static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr) +{ + return ERR_PTR(-EAFNOSUPPORT); +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) +static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr) +{ + struct net_device *dev; + struct dst_entry *dst; + struct flowi6 fl6; + int err; + + if (!ipv6_stub) + return ERR_PTR(-EAFNOSUPPORT); + + memset(&fl6, 0, sizeof(fl6)); + memcpy(&fl6.daddr, addr, sizeof(struct in6_addr)); + err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6); + if (err) + return ERR_PTR(err); + + dev = dst->dev; + dev_hold(dev); + dst_release(dst); + + return dev; +} +#else +static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr) +{ + return ERR_PTR(-EAFNOSUPPORT); +} +#endif + +static struct net_device *find_outdev(struct net *net, + struct mpls_route_config *cfg) +{ + struct net_device *dev = NULL; + + if (!cfg->rc_ifindex) { + switch (cfg->rc_via_table) { + case NEIGH_ARP_TABLE: + dev = inet_fib_lookup_dev(net, cfg->rc_via); + break; + case NEIGH_ND_TABLE: + dev = inet6_fib_lookup_dev(net, cfg->rc_via); + break; + case NEIGH_LINK_TABLE: + break; + } + } else { + dev = dev_get_by_index(net, cfg->rc_ifindex); + } + + if (!dev) + return ERR_PTR(-ENODEV); + + return dev; +} + static int mpls_route_add(struct mpls_route_config *cfg) { struct mpls_route __rcu **platform_label; @@ -345,8 +449,8 @@ static int mpls_route_add(struct mpls_route_config *cfg) index = find_free_label(net); } - /* The first 16 labels are reserved, and may not be set */ - if (index < 16) + /* Reserved labels may not be set */ + if (index < MPLS_LABEL_FIRST_UNRESERVED) goto errout; /* The full 20 bit range may not be supported. */ @@ -357,10 +461,12 @@ static int mpls_route_add(struct mpls_route_config *cfg) if (cfg->rc_output_labels > MAX_NEW_LABELS) goto errout; - err = -ENODEV; - dev = dev_get_by_index(net, cfg->rc_ifindex); - if (!dev) + dev = find_outdev(net, cfg); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + dev = NULL; goto errout; + } /* Ensure this is a supported device */ err = -EINVAL; @@ -401,6 +507,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) rt->rt_label[i] = cfg->rc_output_label[i]; rt->rt_protocol = cfg->rc_protocol; RCU_INIT_POINTER(rt->rt_dev, dev); + rt->rt_payload_type = cfg->rc_payload_type; rt->rt_via_table = cfg->rc_via_table; memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen); @@ -423,8 +530,8 @@ static int mpls_route_del(struct mpls_route_config *cfg) index = cfg->rc_label; - /* The first 16 labels are reserved, and may not be removed */ - if (index < 16) + /* Reserved labels may not be removed */ + if (index < MPLS_LABEL_FIRST_UNRESERVED) goto errout; /* The full 20 bit range may not be supported */ @@ -626,6 +733,7 @@ int nla_put_labels(struct sk_buff *skb, int attrtype, return 0; } +EXPORT_SYMBOL_GPL(nla_put_labels); int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels, u32 label[]) @@ -671,6 +779,7 @@ int nla_get_labels(const struct nlattr *nla, *labels = nla_labels; return 0; } +EXPORT_SYMBOL_GPL(nla_get_labels); static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, struct mpls_route_config *cfg) @@ -740,8 +849,8 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, &cfg->rc_label)) goto errout; - /* The first 16 labels are reserved, and may not be set */ - if (cfg->rc_label < 16) + /* Reserved labels may not be set */ + if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED) goto errout; break; @@ -866,8 +975,8 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) ASSERT_RTNL(); index = cb->args[0]; - if (index < 16) - index = 16; + if (index < MPLS_LABEL_FIRST_UNRESERVED) + index = MPLS_LABEL_FIRST_UNRESERVED; platform_label = rtnl_dereference(net->mpls.platform_label); platform_labels = net->mpls.platform_labels; @@ -953,6 +1062,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) goto nort0; RCU_INIT_POINTER(rt0->rt_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; + rt0->rt_payload_type = MPT_IPV4; rt0->rt_via_table = NEIGH_LINK_TABLE; memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); } @@ -963,6 +1073,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) goto nort2; RCU_INIT_POINTER(rt2->rt_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; + rt2->rt_payload_type = MPT_IPV6; rt2->rt_via_table = NEIGH_LINK_TABLE; memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len); } @@ -1066,8 +1177,10 @@ static int mpls_net_init(struct net *net) table[0].data = net; net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); - if (net->mpls.ctl == NULL) + if (net->mpls.ctl == NULL) { + kfree(table); return -ENOMEM; + } return 0; } diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 8cabeb5a1cb9..2681a4ba6c37 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -50,7 +50,12 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr * return result; } -int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); -int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels, u32 label[]); +int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, + const u32 label[]); +int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels, + u32 label[]); +bool mpls_output_possible(const struct net_device *dev); +unsigned int mpls_dev_mtu(const struct net_device *dev); +bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu); #endif /* MPLS_INTERNAL_H */ diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c new file mode 100644 index 000000000000..21e70bc9af98 --- /dev/null +++ b/net/mpls/mpls_iptunnel.c @@ -0,0 +1,231 @@ +/* + * mpls tunnels An implementation mpls tunnels using the light weight tunnel + * infrastructure + * + * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/module.h> +#include <linux/mpls.h> +#include <linux/vmalloc.h> +#include <net/ip.h> +#include <net/dst.h> +#include <net/lwtunnel.h> +#include <net/netevent.h> +#include <net/netns/generic.h> +#include <net/ip6_fib.h> +#include <net/route.h> +#include <net/mpls_iptunnel.h> +#include <linux/mpls_iptunnel.h> +#include "internal.h" + +static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = { + [MPLS_IPTUNNEL_DST] = { .type = NLA_U32 }, +}; + +static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) +{ + /* The size of the layer 2.5 labels to be added for this route */ + return en->labels * sizeof(struct mpls_shim_hdr); +} + +int mpls_output(struct sock *sk, struct sk_buff *skb) +{ + struct mpls_iptunnel_encap *tun_encap_info; + struct mpls_shim_hdr *hdr; + struct net_device *out_dev; + unsigned int hh_len; + unsigned int new_header_size; + unsigned int mtu; + struct dst_entry *dst = skb_dst(skb); + struct rtable *rt = NULL; + struct rt6_info *rt6 = NULL; + int err = 0; + bool bos; + int i; + unsigned int ttl; + + /* Obtain the ttl */ + if (skb->protocol == htons(ETH_P_IP)) { + ttl = ip_hdr(skb)->ttl; + rt = (struct rtable *)dst; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + ttl = ipv6_hdr(skb)->hop_limit; + rt6 = (struct rt6_info *)dst; + } else { + goto drop; + } + + skb_orphan(skb); + + /* Find the output device */ + out_dev = dst->dev; + if (!mpls_output_possible(out_dev) || + !dst->lwtstate || skb_warn_if_lro(skb)) + goto drop; + + skb_forward_csum(skb); + + tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate); + + /* Verify the destination can hold the packet */ + new_header_size = mpls_encap_size(tun_encap_info); + mtu = mpls_dev_mtu(out_dev); + if (mpls_pkt_too_big(skb, mtu - new_header_size)) + goto drop; + + hh_len = LL_RESERVED_SPACE(out_dev); + if (!out_dev->header_ops) + hh_len = 0; + + /* Ensure there is enough space for the headers in the skb */ + if (skb_cow(skb, hh_len + new_header_size)) + goto drop; + + skb_push(skb, new_header_size); + skb_reset_network_header(skb); + + skb->dev = out_dev; + skb->protocol = htons(ETH_P_MPLS_UC); + + /* Push the new labels */ + hdr = mpls_hdr(skb); + bos = true; + for (i = tun_encap_info->labels - 1; i >= 0; i--) { + hdr[i] = mpls_entry_encode(tun_encap_info->label[i], + ttl, 0, bos); + bos = false; + } + + if (rt) + err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway, + skb); + else if (rt6) + err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway, + skb); + if (err) + net_dbg_ratelimited("%s: packet transmission failed: %d\n", + __func__, err); + + return 0; + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int mpls_build_state(struct net_device *dev, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct mpls_iptunnel_encap *tun_encap_info; + struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; + struct lwtunnel_state *newts; + int tun_encap_info_len; + int ret; + + ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, + mpls_iptunnel_policy); + if (ret < 0) + return ret; + + if (!tb[MPLS_IPTUNNEL_DST]) + return -EINVAL; + + tun_encap_info_len = sizeof(*tun_encap_info); + + newts = lwtunnel_state_alloc(tun_encap_info_len); + if (!newts) + return -ENOMEM; + + newts->len = tun_encap_info_len; + tun_encap_info = mpls_lwtunnel_encap(newts); + ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, + &tun_encap_info->labels, tun_encap_info->label); + if (ret) + goto errout; + newts->type = LWTUNNEL_ENCAP_MPLS; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + + *ts = newts; + + return 0; + +errout: + kfree(newts); + *ts = NULL; + + return ret; +} + +static int mpls_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct mpls_iptunnel_encap *tun_encap_info; + + tun_encap_info = mpls_lwtunnel_encap(lwtstate); + + if (nla_put_labels(skb, MPLS_IPTUNNEL_DST, tun_encap_info->labels, + tun_encap_info->label)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct mpls_iptunnel_encap *tun_encap_info; + + tun_encap_info = mpls_lwtunnel_encap(lwtstate); + + return nla_total_size(tun_encap_info->labels * 4); +} + +static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct mpls_iptunnel_encap *a_hdr = mpls_lwtunnel_encap(a); + struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b); + int l; + + if (a_hdr->labels != b_hdr->labels) + return 1; + + for (l = 0; l < MAX_NEW_LABELS; l++) + if (a_hdr->label[l] != b_hdr->label[l]) + return 1; + return 0; +} + +static const struct lwtunnel_encap_ops mpls_iptun_ops = { + .build_state = mpls_build_state, + .output = mpls_output, + .fill_encap = mpls_fill_encap_info, + .get_encap_size = mpls_encap_nlsize, + .cmp_encap = mpls_encap_cmp, +}; + +static int __init mpls_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&mpls_iptun_ops, LWTUNNEL_ENCAP_MPLS); +} +module_init(mpls_iptunnel_init); + +static void __exit mpls_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&mpls_iptun_ops, LWTUNNEL_ENCAP_MPLS); +} +module_exit(mpls_iptunnel_exit); + +MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels"); +MODULE_LICENSE("GPL v2"); |