diff options
author | David S. Miller <davem@davemloft.net> | 2019-07-06 00:01:15 +0200 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-07-06 00:01:15 +0200 |
commit | e3b60ffbc16feeb007d07b2b7d1da4304e98c1a3 (patch) | |
tree | 2de159d2ed3f637bc5423e82a31b7e34da032e76 /net/xfrm | |
parent | Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next (diff) | |
parent | xfrm: remove get_mtu indirection from xfrm_type (diff) | |
download | linux-e3b60ffbc16feeb007d07b2b7d1da4304e98c1a3.tar.xz linux-e3b60ffbc16feeb007d07b2b7d1da4304e98c1a3.zip |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next
Steffen Klassert says:
====================
pull request (net-next): ipsec-next 2019-07-05
1) A lot of work to remove indirections from the xfrm code.
From Florian Westphal.
2) Fix a WARN_ON with ipv6 that triggered because of a
forgotten break statement. From Florian Westphal.
3) Remove xfrmi_init_net, it is not needed.
From Li RongQing.
Please pull or let me know if there are problems.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/xfrm')
-rw-r--r-- | net/xfrm/xfrm_device.c | 5 | ||||
-rw-r--r-- | net/xfrm/xfrm_input.c | 25 | ||||
-rw-r--r-- | net/xfrm/xfrm_interface.c | 6 | ||||
-rw-r--r-- | net/xfrm/xfrm_policy.c | 2 | ||||
-rw-r--r-- | net/xfrm/xfrm_state.c | 437 |
5 files changed, 345 insertions, 130 deletions
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index ff654306d836..189ef15acbbc 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -271,9 +271,8 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) return false; if ((!dev || (dev == xfrm_dst_path(dst)->dev)) && - (!xdst->child->xfrm && x->type->get_mtu)) { - mtu = x->type->get_mtu(x, xdst->child_mtu_cached); - + (!xdst->child->xfrm)) { + mtu = xfrm_state_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) goto ok; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 314973aaa414..6088bc2dc11e 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -359,28 +359,29 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); if (likely(afinfo)) err = afinfo->extract_input(x, skb); + rcu_read_unlock(); - if (err) { - rcu_read_unlock(); + if (err) return err; - } if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (!inner_mode) { - rcu_read_unlock(); + if (!inner_mode) return -EAFNOSUPPORT; - } } - afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); - if (unlikely(!afinfo)) { - rcu_read_unlock(); - return -EAFNOSUPPORT; + switch (inner_mode->family) { + case AF_INET: + skb->protocol = htons(ETH_P_IP); + break; + case AF_INET6: + skb->protocol = htons(ETH_P_IPV6); + break; + default: + WARN_ON_ONCE(1); + break; } - skb->protocol = afinfo->eth_proto; - rcu_read_unlock(); return xfrm_inner_mode_encap_remove(x, inner_mode, skb); } diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index ad3a2555c517..f8eb9e342173 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -793,11 +793,6 @@ static void __net_exit xfrmi_destroy_interfaces(struct xfrmi_net *xfrmn) unregister_netdevice_many(&list); } -static int __net_init xfrmi_init_net(struct net *net) -{ - return 0; -} - static void __net_exit xfrmi_exit_net(struct net *net) { struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); @@ -808,7 +803,6 @@ static void __net_exit xfrmi_exit_net(struct net *net) } static struct pernet_operations xfrmi_net_ops = { - .init = xfrmi_init_net, .exit = xfrmi_exit_net, .id = &xfrmi_net_id, .size = sizeof(struct xfrmi_net), diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b1694d5d15d3..1070dfece76b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3628,7 +3628,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } xfrm_nr = ti; if (npols > 1) { - xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net); + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); tpp = stp; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 50621d982970..c6f3c4a1bd99 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -27,6 +27,8 @@ #include <linux/interrupt.h> #include <linux/kernel.h> +#include <crypto/aead.h> + #include "xfrm_hash.h" #define xfrm_state_deref_prot(table, net) \ @@ -177,63 +179,132 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) static bool km_is_alive(const struct km_event *c); void km_state_expired(struct xfrm_state *x, int hard, u32 portid); -static DEFINE_SPINLOCK(xfrm_type_lock); int xfrm_register_type(const struct xfrm_type *type, unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - const struct xfrm_type **typemap; int err = 0; - if (unlikely(afinfo == NULL)) + if (!afinfo) return -EAFNOSUPPORT; - typemap = afinfo->type_map; - spin_lock_bh(&xfrm_type_lock); - if (likely(typemap[type->proto] == NULL)) - typemap[type->proto] = type; - else - err = -EEXIST; - spin_unlock_bh(&xfrm_type_lock); +#define X(afi, T, name) do { \ + WARN_ON((afi)->type_ ## name); \ + (afi)->type_ ## name = (T); \ + } while (0) + + switch (type->proto) { + case IPPROTO_COMP: + X(afinfo, type, comp); + break; + case IPPROTO_AH: + X(afinfo, type, ah); + break; + case IPPROTO_ESP: + X(afinfo, type, esp); + break; + case IPPROTO_IPIP: + X(afinfo, type, ipip); + break; + case IPPROTO_DSTOPTS: + X(afinfo, type, dstopts); + break; + case IPPROTO_ROUTING: + X(afinfo, type, routing); + break; + case IPPROTO_IPV6: + X(afinfo, type, ipip6); + break; + default: + WARN_ON(1); + err = -EPROTONOSUPPORT; + break; + } +#undef X rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_register_type); -int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family) +void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - const struct xfrm_type **typemap; - int err = 0; if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_map; - spin_lock_bh(&xfrm_type_lock); + return; - if (unlikely(typemap[type->proto] != type)) - err = -ENOENT; - else - typemap[type->proto] = NULL; - spin_unlock_bh(&xfrm_type_lock); +#define X(afi, T, name) do { \ + WARN_ON((afi)->type_ ## name != (T)); \ + (afi)->type_ ## name = NULL; \ + } while (0) + + switch (type->proto) { + case IPPROTO_COMP: + X(afinfo, type, comp); + break; + case IPPROTO_AH: + X(afinfo, type, ah); + break; + case IPPROTO_ESP: + X(afinfo, type, esp); + break; + case IPPROTO_IPIP: + X(afinfo, type, ipip); + break; + case IPPROTO_DSTOPTS: + X(afinfo, type, dstopts); + break; + case IPPROTO_ROUTING: + X(afinfo, type, routing); + break; + case IPPROTO_IPV6: + X(afinfo, type, ipip6); + break; + default: + WARN_ON(1); + break; + } +#undef X rcu_read_unlock(); - return err; } EXPORT_SYMBOL(xfrm_unregister_type); static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) { + const struct xfrm_type *type = NULL; struct xfrm_state_afinfo *afinfo; - const struct xfrm_type **typemap; - const struct xfrm_type *type; int modload_attempted = 0; retry: afinfo = xfrm_state_get_afinfo(family); if (unlikely(afinfo == NULL)) return NULL; - typemap = afinfo->type_map; - type = READ_ONCE(typemap[proto]); + switch (proto) { + case IPPROTO_COMP: + type = afinfo->type_comp; + break; + case IPPROTO_AH: + type = afinfo->type_ah; + break; + case IPPROTO_ESP: + type = afinfo->type_esp; + break; + case IPPROTO_IPIP: + type = afinfo->type_ipip; + break; + case IPPROTO_DSTOPTS: + type = afinfo->type_dstopts; + break; + case IPPROTO_ROUTING: + type = afinfo->type_routing; + break; + case IPPROTO_IPV6: + type = afinfo->type_ipip6; + break; + default: + break; + } + if (unlikely(type && !try_module_get(type->owner))) type = NULL; @@ -253,65 +324,71 @@ static void xfrm_put_type(const struct xfrm_type *type) module_put(type->owner); } -static DEFINE_SPINLOCK(xfrm_type_offload_lock); int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - const struct xfrm_type_offload **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; - typemap = afinfo->type_offload_map; - spin_lock_bh(&xfrm_type_offload_lock); - if (likely(typemap[type->proto] == NULL)) - typemap[type->proto] = type; - else - err = -EEXIST; - spin_unlock_bh(&xfrm_type_offload_lock); + switch (type->proto) { + case IPPROTO_ESP: + WARN_ON(afinfo->type_offload_esp); + afinfo->type_offload_esp = type; + break; + default: + WARN_ON(1); + err = -EPROTONOSUPPORT; + break; + } + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_register_type_offload); -int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, - unsigned short family) +void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, + unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - const struct xfrm_type_offload **typemap; - int err = 0; if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_offload_map; - spin_lock_bh(&xfrm_type_offload_lock); + return; - if (unlikely(typemap[type->proto] != type)) - err = -ENOENT; - else - typemap[type->proto] = NULL; - spin_unlock_bh(&xfrm_type_offload_lock); + switch (type->proto) { + case IPPROTO_ESP: + WARN_ON(afinfo->type_offload_esp != type); + afinfo->type_offload_esp = NULL; + break; + default: + WARN_ON(1); + break; + } rcu_read_unlock(); - return err; } EXPORT_SYMBOL(xfrm_unregister_type_offload); static const struct xfrm_type_offload * xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load) { + const struct xfrm_type_offload *type = NULL; struct xfrm_state_afinfo *afinfo; - const struct xfrm_type_offload **typemap; - const struct xfrm_type_offload *type; retry: afinfo = xfrm_state_get_afinfo(family); if (unlikely(afinfo == NULL)) return NULL; - typemap = afinfo->type_offload_map; - type = typemap[proto]; + switch (proto) { + case IPPROTO_ESP: + type = afinfo->type_offload_esp; + break; + default: + break; + } + if ((type && !try_module_get(type->owner))) type = NULL; @@ -770,24 +847,79 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) EXPORT_SYMBOL(xfrm_sad_getinfo); static void +__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) +{ + const struct flowi4 *fl4 = &fl->u.ip4; + + sel->daddr.a4 = fl4->daddr; + sel->saddr.a4 = fl4->saddr; + sel->dport = xfrm_flowi_dport(fl, &fl4->uli); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl, &fl4->uli); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET; + sel->prefixlen_d = 32; + sel->prefixlen_s = 32; + sel->proto = fl4->flowi4_proto; + sel->ifindex = fl4->flowi4_oif; +} + +static void +__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) +{ + const struct flowi6 *fl6 = &fl->u.ip6; + + /* Initialize temporary selector matching only to current session. */ + *(struct in6_addr *)&sel->daddr = fl6->daddr; + *(struct in6_addr *)&sel->saddr = fl6->saddr; + sel->dport = xfrm_flowi_dport(fl, &fl6->uli); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl, &fl6->uli); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET6; + sel->prefixlen_d = 128; + sel->prefixlen_s = 128; + sel->proto = fl6->flowi6_proto; + sel->ifindex = fl6->flowi6_oif; +} + +static void xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, const struct xfrm_tmpl *tmpl, const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family) { - struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family); - - if (!afinfo) - return; + switch (family) { + case AF_INET: + __xfrm4_init_tempsel(&x->sel, fl); + break; + case AF_INET6: + __xfrm6_init_tempsel(&x->sel, fl); + break; + } - afinfo->init_tempsel(&x->sel, fl); + x->id = tmpl->id; - if (family != tmpl->encap_family) { - afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family); - if (!afinfo) - return; + switch (tmpl->encap_family) { + case AF_INET: + if (x->id.daddr.a4 == 0) + x->id.daddr.a4 = daddr->a4; + x->props.saddr = tmpl->saddr; + if (x->props.saddr.a4 == 0) + x->props.saddr.a4 = saddr->a4; + break; + case AF_INET6: + if (ipv6_addr_any((struct in6_addr *)&x->id.daddr)) + memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr)); + memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); + if (ipv6_addr_any((struct in6_addr *)&x->props.saddr)) + memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); + break; } - afinfo->init_temprop(x, tmpl, daddr, saddr); + + x->props.mode = tmpl->mode; + x->props.reqid = tmpl->reqid; + x->props.family = tmpl->encap_family; } static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, @@ -1633,51 +1765,129 @@ xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, EXPORT_SYMBOL(xfrm_find_acq); #ifdef CONFIG_XFRM_SUB_POLICY -int +#if IS_ENABLED(CONFIG_IPV6) +/* distribution counting sort function for xfrm_state and xfrm_tmpl */ +static void +__xfrm6_sort(void **dst, void **src, int n, + int (*cmp)(const void *p), int maxclass) +{ + int count[XFRM_MAX_DEPTH] = { }; + int class[XFRM_MAX_DEPTH]; + int i; + + for (i = 0; i < n; i++) { + int c = cmp(src[i]); + + class[i] = c; + count[c]++; + } + + for (i = 2; i < maxclass; i++) + count[i] += count[i - 1]; + + for (i = 0; i < n; i++) { + dst[count[class[i] - 1]++] = src[i]; + src[i] = NULL; + } +} + +/* Rule for xfrm_state: + * + * rule 1: select IPsec transport except AH + * rule 2: select MIPv6 RO or inbound trigger + * rule 3: select IPsec transport AH + * rule 4: select IPsec tunnel + * rule 5: others + */ +static int __xfrm6_state_sort_cmp(const void *p) +{ + const struct xfrm_state *v = p; + + switch (v->props.mode) { + case XFRM_MODE_TRANSPORT: + if (v->id.proto != IPPROTO_AH) + return 1; + else + return 3; +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_IN_TRIGGER: + return 2; +#endif + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + return 4; + } + return 5; +} + +/* Rule for xfrm_tmpl: + * + * rule 1: select IPsec transport + * rule 2: select MIPv6 RO or inbound trigger + * rule 3: select IPsec tunnel + * rule 4: others + */ +static int __xfrm6_tmpl_sort_cmp(const void *p) +{ + const struct xfrm_tmpl *v = p; + + switch (v->mode) { + case XFRM_MODE_TRANSPORT: + return 1; +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_IN_TRIGGER: + return 2; +#endif + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + return 3; + } + return 4; +} +#else +static inline int __xfrm6_state_sort_cmp(const void *p) { return 5; } +static inline int __xfrm6_tmpl_sort_cmp(const void *p) { return 4; } + +static inline void +__xfrm6_sort(void **dst, void **src, int n, + int (*cmp)(const void *p), int maxclass) +{ + int i; + + for (i = 0; i < n; i++) + dst[i] = src[i]; +} +#endif /* CONFIG_IPV6 */ + +void xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, - unsigned short family, struct net *net) + unsigned short family) { int i; - int err = 0; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return -EAFNOSUPPORT; - spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/ - if (afinfo->tmpl_sort) - err = afinfo->tmpl_sort(dst, src, n); + if (family == AF_INET6) + __xfrm6_sort((void **)dst, (void **)src, n, + __xfrm6_tmpl_sort_cmp, 5); else for (i = 0; i < n; i++) dst[i] = src[i]; - spin_unlock_bh(&net->xfrm.xfrm_state_lock); - rcu_read_unlock(); - return err; } -EXPORT_SYMBOL(xfrm_tmpl_sort); -int +void xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, unsigned short family) { int i; - int err = 0; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - struct net *net = xs_net(*src); - - if (!afinfo) - return -EAFNOSUPPORT; - spin_lock_bh(&net->xfrm.xfrm_state_lock); - if (afinfo->state_sort) - err = afinfo->state_sort(dst, src, n); + if (family == AF_INET6) + __xfrm6_sort((void **)dst, (void **)src, n, + __xfrm6_state_sort_cmp, 6); else for (i = 0; i < n; i++) dst[i] = src[i]; - spin_unlock_bh(&net->xfrm.xfrm_state_lock); - rcu_read_unlock(); - return err; } -EXPORT_SYMBOL(xfrm_state_sort); #endif /* Silly enough, but I'm lazy to build resolution list */ @@ -2195,38 +2405,49 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); -int xfrm_state_mtu(struct xfrm_state *x, int mtu) +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) { const struct xfrm_type *type = READ_ONCE(x->type); + struct crypto_aead *aead; + u32 blksize, net_adj = 0; + + if (x->km.state != XFRM_STATE_VALID || + !type || type->proto != IPPROTO_ESP) + return mtu - x->props.header_len; + + aead = x->data; + blksize = ALIGN(crypto_aead_blocksize(aead), 4); - if (x->km.state == XFRM_STATE_VALID && - type && type->get_mtu) - return type->get_mtu(x, mtu); + switch (x->props.mode) { + case XFRM_MODE_TRANSPORT: + case XFRM_MODE_BEET: + if (x->props.family == AF_INET) + net_adj = sizeof(struct iphdr); + else if (x->props.family == AF_INET6) + net_adj = sizeof(struct ipv6hdr); + break; + case XFRM_MODE_TUNNEL: + break; + default: + WARN_ON_ONCE(1); + break; + } - return mtu - x->props.header_len; + return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - + net_adj) & ~(blksize - 1)) + net_adj - 2; } +EXPORT_SYMBOL_GPL(xfrm_state_mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { - const struct xfrm_state_afinfo *afinfo; const struct xfrm_mode *inner_mode; const struct xfrm_mode *outer_mode; int family = x->props.family; int err; - err = -EAFNOSUPPORT; - afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - goto error; - - err = 0; - if (afinfo->init_flags) - err = afinfo->init_flags(x); - - rcu_read_unlock(); - - if (err) - goto error; + if (family == AF_INET && + xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc) + x->props.flags |= XFRM_STATE_NOPMTUDISC; err = -EPROTONOSUPPORT; |