summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig35
-rw-r--r--net/ipv6/Makefile4
-rw-r--r--net/ipv6/addrconf.c65
-rw-r--r--net/ipv6/addrconf_core.c40
-rw-r--r--net/ipv6/addrlabel.c14
-rw-r--r--net/ipv6/af_inet6.c24
-rw-r--r--net/ipv6/esp6_offload.c48
-rw-r--r--net/ipv6/fib6_rules.c53
-rw-r--r--net/ipv6/icmp.c59
-rw-r--r--net/ipv6/ila/ila_lwt.c3
-rw-r--r--net/ipv6/ila/ila_main.c9
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/ip6_fib.c54
-rw-r--r--net/ipv6/ip6_flowlabel.c22
-rw-r--r--net/ipv6/ip6_gre.c20
-rw-r--r--net/ipv6/ip6_input.c12
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/ip6_vti.c6
-rw-r--r--net/ipv6/ip6mr.c1
-rw-r--r--net/ipv6/ndisc.c25
-rw-r--r--net/ipv6/netfilter/Kconfig19
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c81
-rw-r--r--net/ipv6/netfilter/ip6t_srh.c6
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c91
-rw-r--r--net/ipv6/output_core.c30
-rw-r--r--net/ipv6/raw.c1
-rw-r--r--net/ipv6/route.c1237
-rw-r--r--net/ipv6/seg6.c9
-rw-r--r--net/ipv6/seg6_iptunnel.c4
-rw-r--r--net/ipv6/seg6_local.c11
-rw-r--r--net/ipv6/sit.c6
-rw-r--r--net/ipv6/tcp_ipv6.c70
-rw-r--r--net/ipv6/udp.c17
-rw-r--r--net/ipv6/xfrm6_mode_beet.c131
-rw-r--r--net/ipv6/xfrm6_mode_ro.c85
-rw-r--r--net/ipv6/xfrm6_mode_transport.c121
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c151
-rw-r--r--net/ipv6/xfrm6_output.c36
-rw-r--r--net/ipv6/xfrm6_policy.c126
-rw-r--r--net/ipv6/xfrm6_protocol.c3
-rw-r--r--net/ipv6/xfrm6_tunnel.c6
43 files changed, 1067 insertions, 1681 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 613282c65a10..cd915e332c98 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -135,44 +135,11 @@ config INET6_TUNNEL
tristate
default n
-config INET6_XFRM_MODE_TRANSPORT
- tristate "IPv6: IPsec transport mode"
- default IPV6
- select XFRM
- ---help---
- Support for IPsec transport mode.
-
- If unsure, say Y.
-
-config INET6_XFRM_MODE_TUNNEL
- tristate "IPv6: IPsec tunnel mode"
- default IPV6
- select XFRM
- ---help---
- Support for IPsec tunnel mode.
-
- If unsure, say Y.
-
-config INET6_XFRM_MODE_BEET
- tristate "IPv6: IPsec BEET mode"
- default IPV6
- select XFRM
- ---help---
- Support for IPsec BEET mode.
-
- If unsure, say Y.
-
-config INET6_XFRM_MODE_ROUTEOPTIMIZATION
- tristate "IPv6: MIPv6 route optimization mode"
- select XFRM
- ---help---
- Support for MIPv6 route optimization mode.
-
config IPV6_VTI
tristate "Virtual (secure) IPv6: tunneling"
select IPV6_TUNNEL
select NET_IP_TUNNEL
- depends on INET6_XFRM_MODE_TUNNEL
+ select XFRM
---help---
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index e0026fa1261b..8ccf35514015 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -35,10 +35,6 @@ obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o
obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
-obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
-obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
-obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
-obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_IPV6_MIP6) += mip6.o
obj-$(CONFIG_IPV6_ILA) += ila/
obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ae17a966ae3..f96d1de79509 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -173,7 +173,8 @@ static int addrconf_ifdown(struct net_device *dev, int how);
static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
int plen,
const struct net_device *dev,
- u32 flags, u32 noflags);
+ u32 flags, u32 noflags,
+ bool no_gw);
static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
@@ -610,11 +611,13 @@ static int inet6_netconf_valid_get_req(struct sk_buff *skb,
}
if (!netlink_strict_get_check(skb))
- return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb,
- NETCONFA_MAX, devconf_ipv6_policy, extack);
+ return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
+ tb, NETCONFA_MAX,
+ devconf_ipv6_policy, extack);
- err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb,
- NETCONFA_MAX, devconf_ipv6_policy, extack);
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
+ tb, NETCONFA_MAX,
+ devconf_ipv6_policy, extack);
if (err)
return err;
@@ -1230,10 +1233,8 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r
{
struct fib6_info *f6i;
- f6i = addrconf_get_prefix_route(&ifp->addr,
- ifp->prefix_len,
- ifp->idev->dev,
- 0, RTF_GATEWAY | RTF_DEFAULT);
+ f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ ifp->idev->dev, 0, RTF_DEFAULT, true);
if (f6i) {
if (del_rt)
ip6_del_rt(dev_net(ifp->idev->dev), f6i);
@@ -2402,7 +2403,8 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, u32 metric,
static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
int plen,
const struct net_device *dev,
- u32 flags, u32 noflags)
+ u32 flags, u32 noflags,
+ bool no_gw)
{
struct fib6_node *fn;
struct fib6_info *rt = NULL;
@@ -2419,7 +2421,9 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex)
+ if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex)
+ continue;
+ if (no_gw && rt->fib6_nh.fib_nh_gw_family)
continue;
if ((rt->fib6_flags & flags) != flags)
continue;
@@ -2717,7 +2721,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
pinfo->prefix_len,
dev,
RTF_ADDRCONF | RTF_PREFIX_RT,
- RTF_GATEWAY | RTF_DEFAULT);
+ RTF_DEFAULT, true);
if (rt) {
/* Autoconf prefix route */
@@ -4563,8 +4567,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 ifa_flags;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
- extack);
+ err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
if (err < 0)
return err;
@@ -4588,10 +4592,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
struct fib6_info *f6i;
u32 prio;
- f6i = addrconf_get_prefix_route(&ifp->addr,
- ifp->prefix_len,
- ifp->idev->dev,
- 0, RTF_GATEWAY | RTF_DEFAULT);
+ f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ ifp->idev->dev, 0, RTF_DEFAULT, true);
if (!f6i)
return -ENOENT;
@@ -4729,8 +4731,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct ifa6_config cfg;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
- extack);
+ err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
if (err < 0)
return err;
@@ -5086,8 +5088,8 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
fillargs->flags |= NLM_F_DUMP_FILTERED;
}
- err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
- ifa_ipv6_policy, extack);
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
if (err < 0)
return err;
@@ -5237,11 +5239,11 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
}
if (!netlink_strict_get_check(skb))
- return nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX,
- ifa_ipv6_policy, extack);
+ return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
- err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
- ifa_ipv6_policy, extack);
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
if (err)
return err;
@@ -5667,8 +5669,8 @@ static int inet6_validate_link_af(const struct net_device *dev,
if (dev && !__in6_dev_get(dev))
return -EAFNOSUPPORT;
- return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy,
- NULL);
+ return nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
+ inet6_af_policy, NULL);
}
static int check_addr_gen_mode(int mode)
@@ -5700,7 +5702,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
if (!idev)
return -EAFNOSUPPORT;
- if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
+ if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
BUG();
if (tb[IFLA_INET6_TOKEN]) {
@@ -5752,7 +5754,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
nla_put_u8(skb, IFLA_OPERSTATE,
netif_running(dev) ? dev->operstate : IF_OPER_DOWN))
goto nla_put_failure;
- protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
+ protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO);
if (!protoinfo)
goto nla_put_failure;
@@ -5972,7 +5974,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
struct fib6_info *rt;
rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
- ifp->idev->dev, 0, 0);
+ ifp->idev->dev, 0, 0,
+ false);
if (rt)
ip6_del_rt(net, rt);
}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 6c79af056d9b..763a947e0d14 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -5,7 +5,7 @@
#include <linux/export.h>
#include <net/ipv6.h>
-#include <net/addrconf.h>
+#include <net/ipv6_stubs.h>
#include <net/ip.h>
/* if ipv6 module registers this function is used by xfrm to force all
@@ -144,43 +144,53 @@ static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
return NULL;
}
-static struct fib6_info *
+static int
eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int flags)
+ int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags)
{
- return NULL;
+ return -EAFNOSUPPORT;
}
-static struct fib6_info *
+static int
eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags)
+ struct fib6_result *res, int flags)
{
- return NULL;
+ return -EAFNOSUPPORT;
}
-static struct fib6_info *
-eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb, int strict)
+static void
+eafnosupport_fib6_select_path(const struct net *net, struct fib6_result *res,
+ struct flowi6 *fl6, int oif, bool have_oif_match,
+ const struct sk_buff *skb, int strict)
{
- return f6i;
}
static u32
-eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
- struct in6_addr *saddr)
+eafnosupport_ip6_mtu_from_fib6(const struct fib6_result *res,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
return 0;
}
+static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
+ return -EAFNOSUPPORT;
+}
+
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.ipv6_route_input = eafnosupport_ipv6_route_input,
.fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
.fib6_lookup = eafnosupport_fib6_lookup,
- .fib6_multipath_select = eafnosupport_fib6_multipath_select,
+ .fib6_select_path = eafnosupport_fib6_select_path,
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
+ .fib6_nh_init = eafnosupport_fib6_nh_init,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index d43d076c98f5..642fc6ac13d2 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -383,8 +383,8 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 label;
int err = 0;
- err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy,
- extack);
+ err = nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb, IFAL_MAX,
+ ifal_policy, extack);
if (err < 0)
return err;
@@ -476,7 +476,7 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
}
if (nlmsg_attrlen(nlh, sizeof(*ifal))) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst");
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request");
return -EINVAL;
}
@@ -537,8 +537,8 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb,
}
if (!netlink_strict_get_check(skb))
- return nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX,
- ifal_policy, extack);
+ return nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb,
+ IFAL_MAX, ifal_policy, extack);
ifal = nlmsg_data(nlh);
if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) {
@@ -546,8 +546,8 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb,
return -EINVAL;
}
- err = nlmsg_parse_strict(nlh, sizeof(*ifal), tb, IFAL_MAX,
- ifal_policy, extack);
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifal), tb, IFAL_MAX,
+ ifal_policy, extack);
if (err)
return err;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2f45d2a3e3a3..c04ae282f4e4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -56,6 +56,7 @@
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <net/ipv6_stubs.h>
#include <net/ndisc.h>
#ifdef CONFIG_IPV6_TUNNEL
#include <net/ip6_tunnel.h>
@@ -546,12 +547,6 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct net *net = sock_net(sk);
switch (cmd) {
- case SIOCGSTAMP:
- return sock_get_timestamp(sk, (struct timeval __user *)arg);
-
- case SIOCGSTAMPNS:
- return sock_get_timestampns(sk, (struct timespec __user *)arg);
-
case SIOCADDRT:
case SIOCDELRT:
@@ -584,6 +579,7 @@ const struct proto_ops inet6_stream_ops = {
.getname = inet6_getname,
.poll = tcp_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
+ .gettstamp = sock_gettstamp,
.listen = inet_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
@@ -617,6 +613,7 @@ const struct proto_ops inet6_dgram_ops = {
.getname = inet6_getname,
.poll = udp_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
@@ -847,6 +844,17 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
+ net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
+ net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
+
+ /* By default, rate limit error messages.
+ * Except for pmtu discovery, it would break it.
+ * proc_do_large_bitmap needs pointer to the bitmap.
+ */
+ bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1);
+ bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1);
+ net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask;
+
net->ipv6.sysctl.flowlabel_consistency = 1;
net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
net->ipv6.sysctl.idgen_retries = 3;
@@ -914,8 +922,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
.fib6_get_table = fib6_get_table,
.fib6_table_lookup = fib6_table_lookup,
.fib6_lookup = fib6_lookup,
- .fib6_multipath_select = fib6_multipath_select,
+ .fib6_select_path = fib6_select_path,
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
+ .fib6_nh_init = fib6_nh_init,
+ .fib6_nh_release = fib6_nh_release,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d46b4eb645c2..d453cf417b03 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -74,13 +74,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
goto out;
if (sp->len == XFRM_MAX_DEPTH)
- goto out;
+ goto out_reset;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ipv6_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET6);
if (!x)
- goto out;
+ goto out_reset;
sp->xvec[sp->len++] = x;
sp->olen++;
@@ -88,7 +88,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
xo = xfrm_offload(skb);
if (!xo) {
xfrm_state_put(x);
- goto out;
+ goto out_reset;
}
}
@@ -109,6 +109,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
xfrm_input(skb, IPPROTO_ESP, spi, -2);
return ERR_PTR(-EINPROGRESS);
+out_reset:
+ secpath_reset(skb);
out:
skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
@@ -134,6 +136,44 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
xo->proto = proto;
}
+static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __skb_push(skb, skb->mac_len);
+ return skb_mac_gso_segment(skb, features);
+}
+
+static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ const struct net_offload *ops;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb->transport_header += x->props.header_len;
+ ops = rcu_dereference(inet6_offloads[xo->proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+ segs = ops->callbacks.gso_segment(skb, features);
+
+ return segs;
+}
+
+static struct sk_buff *xfrm6_outer_mode_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ switch (x->outer_mode.encap) {
+ case XFRM_MODE_TUNNEL:
+ return xfrm6_tunnel_gso_segment(x, skb, features);
+ case XFRM_MODE_TRANSPORT:
+ return xfrm6_transport_gso_segment(x, skb, features);
+ }
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -172,7 +212,7 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
xo->flags |= XFRM_GSO_SEGMENT;
- return x->outer_mode->gso_segment(x, skb, esp_features);
+ return xfrm6_outer_mode_gso_segment(x, skb, esp_features);
}
static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index f590446595d8..06d1b7763600 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -61,16 +61,16 @@ unsigned int fib6_rules_seq_read(struct net *net)
}
/* called with rcu lock held; no reference taken on fib6_info */
-struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags)
+int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags)
{
- struct fib6_info *f6i;
int err;
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = fib6_table_lookup,
.lookup_data = &oif,
+ .result = res,
.flags = FIB_LOOKUP_NOREF,
};
@@ -78,19 +78,15 @@ struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
err = fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
- if (err)
- return ERR_PTR(err);
-
- f6i = arg.result ? : net->ipv6.fib6_null_entry;
} else {
- f6i = fib6_table_lookup(net, net->ipv6.fib6_local_tbl,
- oif, fl6, flags);
- if (!f6i || f6i == net->ipv6.fib6_null_entry)
- f6i = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
- oif, fl6, flags);
+ err = fib6_table_lookup(net, net->ipv6.fib6_local_tbl, oif,
+ fl6, res, flags);
+ if (err || res->f6i == net->ipv6.fib6_null_entry)
+ err = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
+ oif, fl6, res, flags);
}
- return f6i;
+ return err;
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
@@ -98,9 +94,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
if (net->ipv6.fib6_has_custom_rules) {
+ struct fib6_result res = {};
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
.lookup_data = skb,
+ .result = &res,
.flags = FIB_LOOKUP_NOREF,
};
@@ -110,8 +108,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
- if (arg.result)
- return arg.result;
+ if (res.rt6)
+ return &res.rt6->dst;
} else {
struct rt6_info *rt;
@@ -157,11 +155,11 @@ static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags,
static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
+ struct fib6_result *res = arg->result;
struct flowi6 *flp6 = &flp->u.ip6;
struct net *net = rule->fr_net;
struct fib6_table *table;
- struct fib6_info *f6i;
- int err = -EAGAIN, *oif;
+ int err, *oif;
u32 tb_id;
switch (rule->action) {
@@ -182,14 +180,12 @@ static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
return -EAGAIN;
oif = (int *)arg->lookup_data;
- f6i = fib6_table_lookup(net, table, *oif, flp6, flags);
- if (f6i != net->ipv6.fib6_null_entry) {
+ err = fib6_table_lookup(net, table, *oif, flp6, res, flags);
+ if (!err && res->f6i != net->ipv6.fib6_null_entry)
err = fib6_rule_saddr(net, rule, flags, flp6,
- fib6_info_nh_dev(f6i));
-
- if (likely(!err))
- arg->result = f6i;
- }
+ res->nh->fib_nh_dev);
+ else
+ err = -EAGAIN;
return err;
}
@@ -197,6 +193,7 @@ static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
+ struct fib6_result *res = arg->result;
struct flowi6 *flp6 = &flp->u.ip6;
struct rt6_info *rt = NULL;
struct fib6_table *table;
@@ -251,7 +248,7 @@ again:
discard_pkt:
dst_hold(&rt->dst);
out:
- arg->result = rt;
+ res->rt6 = rt;
return err;
}
@@ -266,9 +263,13 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
{
- struct rt6_info *rt = (struct rt6_info *) arg->result;
+ struct fib6_result *res = arg->result;
+ struct rt6_info *rt = res->rt6;
struct net_device *dev = NULL;
+ if (!rt)
+ return false;
+
if (rt->rt6i_idev)
dev = rt->rt6i_idev->dev;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 802faa2fcc0e..afb915807cd0 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -168,22 +168,21 @@ static bool is_ineligible(const struct sk_buff *skb)
return false;
}
-static bool icmpv6_mask_allow(int type)
+static bool icmpv6_mask_allow(struct net *net, int type)
{
- /* Informational messages are not limited. */
- if (type & ICMPV6_INFOMSG_MASK)
+ if (type > ICMPV6_MSG_MAX)
return true;
- /* Do not limit pmtu discovery, it would break it. */
- if (type == ICMPV6_PKT_TOOBIG)
+ /* Limit if icmp type is set in ratemask. */
+ if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
return true;
return false;
}
-static bool icmpv6_global_allow(int type)
+static bool icmpv6_global_allow(struct net *net, int type)
{
- if (icmpv6_mask_allow(type))
+ if (icmpv6_mask_allow(net, type))
return true;
if (icmp_global_allow())
@@ -202,7 +201,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
struct dst_entry *dst;
bool res = false;
- if (icmpv6_mask_allow(type))
+ if (icmpv6_mask_allow(net, type))
return true;
/*
@@ -511,7 +510,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
- if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
+ if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
goto out_bh_enable;
mip6_addr_swap(skb);
@@ -683,12 +682,20 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct dst_entry *dst;
struct ipcm6_cookie ipc6;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
+ bool acast;
+
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
+ net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
+ return;
saddr = &ipv6_hdr(skb)->daddr;
+ acast = ipv6_anycast_destination(skb_dst(skb), saddr);
+ if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
+ return;
+
if (!ipv6_unicast_destination(skb) &&
- !(net->ipv6.sysctl.anycast_src_echo_reply &&
- ipv6_anycast_destination(skb_dst(skb), saddr)))
+ !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
saddr = NULL;
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
@@ -723,6 +730,11 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (IS_ERR(dst))
goto out;
+ /* Check the ratelimit */
+ if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
+ !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
+ goto out_dst_release;
+
idev = __in6_dev_get(skb->dev);
msg.skb = skb;
@@ -743,6 +755,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
skb->len + sizeof(struct icmp6hdr));
}
+out_dst_release:
dst_release(dst);
out:
icmpv6_xmit_unlock(sk);
@@ -1115,6 +1128,27 @@ static struct ctl_table ipv6_icmp_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "echo_ignore_multicast",
+ .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "echo_ignore_anycast",
+ .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ratemask",
+ .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
+ .maxlen = ICMPV6_MSG_MAX + 1,
+ .mode = 0644,
+ .proc_handler = proc_do_large_bitmap,
+ },
{ },
};
@@ -1129,6 +1163,9 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
if (table) {
table[0].data = &net->ipv6.sysctl.icmpv6_time;
table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
+ table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
+ table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
+ table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
}
return table;
}
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 3d56a2fb6f86..422dcc691f71 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -146,7 +146,8 @@ static int ila_build_state(struct nlattr *nla,
if (family != AF_INET6)
return -EINVAL;
- ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
+ ret = nla_parse_nested_deprecated(tb, ILA_ATTR_MAX, nla,
+ ila_nl_policy, extack);
if (ret < 0)
return ret;
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 18fac76b9520..257d2b681246 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -16,29 +16,29 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
static const struct genl_ops ila_nl_ops[] = {
{
.cmd = ILA_CMD_ADD,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = ila_xlat_nl_cmd_add_mapping,
- .policy = ila_nl_policy,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = ILA_CMD_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = ila_xlat_nl_cmd_del_mapping,
- .policy = ila_nl_policy,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = ILA_CMD_FLUSH,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = ila_xlat_nl_cmd_flush,
- .policy = ila_nl_policy,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = ILA_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = ila_xlat_nl_cmd_get_mapping,
.start = ila_xlat_nl_dump_start,
.dumpit = ila_xlat_nl_dump,
.done = ila_xlat_nl_dump_done,
- .policy = ila_nl_policy,
},
};
@@ -49,6 +49,7 @@ struct genl_family ila_nl_family __ro_after_init = {
.name = ILA_GENL_NAME,
.version = ILA_GENL_VERSION,
.maxattr = ILA_ATTR_MAX,
+ .policy = ila_nl_policy,
.netnsok = true,
.parallel_ops = true,
.module = THIS_MODULE,
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 79d2e43c05c5..5fc1f4e0c0cf 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -417,6 +417,7 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info)
done:
rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
return ret;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6613d8dbb0e5..08e0390e001c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -162,7 +162,7 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
}
INIT_LIST_HEAD(&f6i->fib6_siblings);
- atomic_inc(&f6i->fib6_ref);
+ refcount_set(&f6i->fib6_ref, 1);
return f6i;
}
@@ -175,10 +175,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
WARN_ON(f6i->fib6_node);
bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
- if (bucket) {
- f6i->rt6i_exception_bucket = NULL;
- kfree(bucket);
- }
+ kfree(bucket);
if (f6i->rt6i_pcpu) {
int cpu;
@@ -199,10 +196,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
free_percpu(f6i->rt6i_pcpu);
}
- lwtstate_put(f6i->fib6_nh.nh_lwtstate);
-
- if (f6i->fib6_nh.nh_dev)
- dev_put(f6i->fib6_nh.nh_dev);
+ fib6_nh_release(&f6i->fib6_nh);
ip_fib_metrics_put(f6i->fib6_metrics);
@@ -357,10 +351,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
}
/* called with rcu lock held; no reference taken on fib6_info */
-struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags)
+int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags)
{
- return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags);
+ return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
+ res, flags);
}
static void __net_init fib6_tables_init(struct net *net)
@@ -851,8 +846,8 @@ insert_above:
RCU_INIT_POINTER(in->parent, pn);
in->leaf = fn->leaf;
- atomic_inc(&rcu_dereference_protected(in->leaf,
- lockdep_is_held(&table->tb6_lock))->fib6_ref);
+ fib6_info_hold(rcu_dereference_protected(in->leaf,
+ lockdep_is_held(&table->tb6_lock)));
/* update parent pointer */
if (dir)
@@ -921,9 +916,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
if (pcpu_rt) {
struct fib6_info *from;
- from = rcu_dereference_protected(pcpu_rt->from,
- lockdep_is_held(&table->tb6_lock));
- rcu_assign_pointer(pcpu_rt->from, NULL);
+ from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
fib6_info_release(from);
}
}
@@ -934,7 +927,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
{
struct fib6_table *table = rt->fib6_table;
- if (atomic_read(&rt->fib6_ref) != 1) {
+ if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
* which must be released in time. So, scan ascendant nodes
@@ -947,7 +940,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
struct fib6_info *new_leaf;
if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
new_leaf = fib6_find_prefix(net, table, fn);
- atomic_inc(&new_leaf->fib6_ref);
+ fib6_info_hold(new_leaf);
rcu_assign_pointer(fn->leaf, new_leaf);
fib6_info_release(rt);
@@ -1113,7 +1106,7 @@ add:
return err;
rcu_assign_pointer(rt->fib6_next, iter);
- atomic_inc(&rt->fib6_ref);
+ fib6_info_hold(rt);
rcu_assign_pointer(rt->fib6_node, fn);
rcu_assign_pointer(*ins, rt);
if (!info->skip_notify)
@@ -1141,7 +1134,7 @@ add:
if (err)
return err;
- atomic_inc(&rt->fib6_ref);
+ fib6_info_hold(rt);
rcu_assign_pointer(rt->fib6_node, fn);
rt->fib6_next = iter->fib6_next;
rcu_assign_pointer(*ins, rt);
@@ -1283,7 +1276,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (!sfn)
goto failure;
- atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref);
+ fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
rcu_assign_pointer(sfn->leaf,
info->nl_net->ipv6.fib6_null_entry);
sfn->fn_flags = RTN_ROOT;
@@ -1326,7 +1319,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
rcu_assign_pointer(fn->leaf,
info->nl_net->ipv6.fib6_null_entry);
} else {
- atomic_inc(&rt->fib6_ref);
+ fib6_info_hold(rt);
rcu_assign_pointer(fn->leaf, rt);
}
}
@@ -2297,6 +2290,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
struct fib6_info *rt = v;
struct ipv6_route_iter *iter = seq->private;
+ unsigned int flags = rt->fib6_flags;
const struct net_device *dev;
seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
@@ -2306,15 +2300,17 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->fib6_flags & RTF_GATEWAY)
- seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw);
- else
+ if (rt->fib6_nh.fib_nh_gw_family) {
+ flags |= RTF_GATEWAY;
+ seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
+ } else {
seq_puts(seq, "00000000000000000000000000000000");
+ }
- dev = rt->fib6_nh.nh_dev;
+ dev = rt->fib6_nh.fib_nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
- rt->fib6_metric, atomic_read(&rt->fib6_ref), 0,
- rt->fib6_flags, dev ? dev->name : "");
+ rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
+ flags, dev ? dev->name : "");
iter->w.leaf = NULL;
return 0;
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index cb54a8a3c273..be5f3d7ceb96 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
return fl;
}
+static void fl_free_rcu(struct rcu_head *head)
+{
+ struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
+
+ if (fl->share == IPV6_FL_S_PROCESS)
+ put_pid(fl->owner.pid);
+ kfree(fl->opt);
+ kfree(fl);
+}
+
static void fl_free(struct ip6_flowlabel *fl)
{
- if (fl) {
- if (fl->share == IPV6_FL_S_PROCESS)
- put_pid(fl->owner.pid);
- kfree(fl->opt);
- kfree_rcu(fl, rcu);
- }
+ if (fl)
+ call_rcu(&fl->rcu, fl_free_rcu);
}
static void fl_release(struct ip6_flowlabel *fl)
@@ -633,9 +639,9 @@ recheck:
if (fl1->share == IPV6_FL_S_EXCL ||
fl1->share != fl->share ||
((fl1->share == IPV6_FL_S_PROCESS) &&
- (fl1->owner.pid == fl->owner.pid)) ||
+ (fl1->owner.pid != fl->owner.pid)) ||
((fl1->share == IPV6_FL_S_USER) &&
- uid_eq(fl1->owner.uid, fl->owner.uid)))
+ !uid_eq(fl1->owner.uid, fl->owner.uid)))
goto release;
err = -ENOMEM;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b32c95f02128..655e46b227f9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -525,10 +525,10 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
}
static int ip6erspan_rcv(struct sk_buff *skb,
- struct tnl_ptk_info *tpi)
+ struct tnl_ptk_info *tpi,
+ int gre_hdr_len)
{
struct erspan_base_hdr *ershdr;
- struct erspan_metadata *pkt_md;
const struct ipv6hdr *ipv6h;
struct erspan_md2 *md2;
struct ip6_tnl *tunnel;
@@ -547,18 +547,16 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, len)))
return PACKET_REJECT;
- ershdr = (struct erspan_base_hdr *)skb->data;
- pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
if (__iptunnel_pull_header(skb, len,
htons(ETH_P_TEB),
false, false) < 0)
return PACKET_REJECT;
if (tunnel->parms.collect_md) {
+ struct erspan_metadata *pkt_md, *md;
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
- struct erspan_metadata *md;
+ unsigned char *gh;
__be64 tun_id;
__be16 flags;
@@ -571,6 +569,14 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (!tun_dst)
return PACKET_REJECT;
+ /* skb can be uncloned in __iptunnel_pull_header, so
+ * old pkt_md is no longer valid and we need to reset
+ * it
+ */
+ gh = skb_network_header(skb) +
+ skb_network_header_len(skb);
+ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+ sizeof(*ershdr));
info = &tun_dst->u.tun_info;
md = ip_tunnel_info_opts(info);
md->version = ver;
@@ -607,7 +613,7 @@ static int gre_rcv(struct sk_buff *skb)
if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
tpi.proto == htons(ETH_P_ERSPAN2))) {
- if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD)
+ if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
goto out;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c7ed2b6d5a1d..b50b1af1f530 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -29,6 +29,7 @@
#include <linux/icmpv6.h>
#include <linux/mroute6.h>
#include <linux/slab.h>
+#include <linux/indirect_call_wrapper.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
@@ -47,6 +48,8 @@
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
+INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -57,7 +60,8 @@ static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
- edemux(skb);
+ INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
+ udp_v6_early_demux, skb);
}
if (!skb_valid_dst(skb))
ip6_route_input(skb);
@@ -316,6 +320,9 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
ip6_sublist_rcv(&sublist, curr_dev, curr_net);
}
+INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *));
+
/*
* Deliver the packet to the host
*/
@@ -391,7 +398,8 @@ resubmit_final:
!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
- ret = ipprot->handler(skb);
+ ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv,
+ skb);
if (ret > 0) {
if (ipprot->flags & INET6_PROTO_FINAL) {
/* Not an extension header, most likely UDP
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index edbd12067170..adef2236abe2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -117,7 +117,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb);
+ ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
return ret;
}
@@ -601,7 +601,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
inet6_sk(skb->sk) : NULL;
struct ipv6hdr *tmp_hdr;
struct frag_hdr *fh;
- unsigned int mtu, hlen, left, len;
+ unsigned int mtu, hlen, left, len, nexthdr_offset;
int hroom, troom;
__be32 frag_id;
int ptr, offset = 0, err = 0;
@@ -612,6 +612,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
goto fail;
hlen = err;
nexthdr = *prevhdr;
+ nexthdr_offset = prevhdr - skb_network_header(skb);
mtu = ip6_skb_dst_mtu(skb);
@@ -646,6 +647,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
(err = skb_checksum_help(skb)))
goto fail;
+ prevhdr = skb_network_header(skb) + nexthdr_offset;
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0c6403cf8b52..ade1390c6348 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -627,7 +627,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->daddr, eiph->saddr, 0, 0,
IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
- if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
+ if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) {
if (!IS_ERR(rt))
ip_rt_put(rt);
goto out;
@@ -636,7 +636,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
} else {
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
skb2->dev) ||
- skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
+ skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
goto out;
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 8b6eefff2f7e..218a0dedc8f4 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -342,7 +342,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
struct net_device *dev;
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
- struct xfrm_mode *inner_mode;
+ const struct xfrm_mode *inner_mode;
struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
u32 orig_mark = skb->mark;
int ret;
@@ -361,7 +361,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
x = xfrm_input_state(skb);
- inner_mode = x->inner_mode;
+ inner_mode = &x->inner_mode;
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@ -372,7 +372,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
}
}
- family = inner_mode->afinfo->family;
+ family = inner_mode->family;
skb->mark = be32_to_cpu(t->parms.i_key);
ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e4dd57976737..4e69847ed5be 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -355,7 +355,6 @@ static const struct rhashtable_params ip6mr_rht_params = {
.key_offset = offsetof(struct mfc6_cache, cmparg),
.key_len = sizeof(struct mfc6_cache_cmp_arg),
.nelem_hint = 3,
- .locks_mul = 1,
.obj_cmpfn = ip6mr_hash_cmp,
.automatic_shrinking = true,
};
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 659ecf4e4b3c..4c8e2ea8bf19 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -77,6 +77,8 @@ static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
__u32 *hash_rnd);
static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
+static bool ndisc_allow_add(const struct net_device *dev,
+ struct netlink_ext_ack *extack);
static int ndisc_constructor(struct neighbour *neigh);
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -117,6 +119,7 @@ struct neigh_table nd_tbl = {
.pconstructor = pndisc_constructor,
.pdestructor = pndisc_destructor,
.proxy_redo = pndisc_redo,
+ .allow_add = ndisc_allow_add,
.id = "ndisc_cache",
.parms = {
.tbl = &nd_tbl,
@@ -392,6 +395,20 @@ static void pndisc_destructor(struct pneigh_entry *n)
ipv6_dev_mc_dec(dev, &maddr);
}
+/* called with rtnl held */
+static bool ndisc_allow_add(const struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ if (!idev || idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
+ return false;
+ }
+
+ return true;
+}
+
static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
int len)
{
@@ -1276,8 +1293,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
if (rt) {
- neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
- rt->fib6_nh.nh_dev, NULL,
+ neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
+ rt->fib6_nh.fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
@@ -1306,8 +1323,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
- rt->fib6_nh.nh_dev, NULL,
+ neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
+ rt->fib6_nh.fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index ddc99a1653aa..086fc669279e 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -23,14 +23,6 @@ config NF_TABLES_IPV6
if NF_TABLES_IPV6
-config NFT_CHAIN_ROUTE_IPV6
- tristate "IPv6 nf_tables route chain support"
- help
- This option enables the "route" chain for IPv6 in nf_tables. This
- chain type is used to force packet re-routing after mangling header
- fields such as the source, destination, flowlabel, hop-limit and
- the packet mark.
-
config NFT_REJECT_IPV6
select NF_REJECT_IPV6
default NFT_REJECT
@@ -278,15 +270,10 @@ if IP6_NF_NAT
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- select NF_NAT_MASQUERADE
+ select NETFILTER_XT_TARGET_MASQUERADE
help
- Masquerading is a special case of NAT: all outgoing connections are
- changed to seem to come from a particular interface's address, and
- if the interface goes down, those connections are lost. This is
- only useful for dialup accounts with dynamic IP address (ie. your IP
- address will be different on next dialup).
-
- To compile it as a module, choose M here. If unsure, say N.
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
config IP6_NF_TARGET_NPT
tristate "NPT (Network Prefix translation) target support"
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 3853c648ebaa..731a74c60dca 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -27,7 +27,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
# nf_tables
-obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
@@ -47,7 +46,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o
# targets
-obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
deleted file mode 100644
index 29c7f1915a96..000000000000
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
- * NAT funded by Astaro.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/addrconf.h>
-#include <net/ipv6.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-
-static unsigned int
-masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
-{
- return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
-}
-
-static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_range2 *range = par->targinfo;
-
- if (range->flags & NF_NAT_RANGE_MAP_IPS)
- return -EINVAL;
- return nf_ct_netns_get(par->net, par->family);
-}
-
-static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
-{
- nf_ct_netns_put(par->net, par->family);
-}
-
-static struct xt_target masquerade_tg6_reg __read_mostly = {
- .name = "MASQUERADE",
- .family = NFPROTO_IPV6,
- .checkentry = masquerade_tg6_checkentry,
- .destroy = masquerade_tg6_destroy,
- .target = masquerade_tg6,
- .targetsize = sizeof(struct nf_nat_range),
- .table = "nat",
- .hooks = 1 << NF_INET_POST_ROUTING,
- .me = THIS_MODULE,
-};
-
-static int __init masquerade_tg6_init(void)
-{
- int err;
-
- err = xt_register_target(&masquerade_tg6_reg);
- if (err)
- return err;
-
- err = nf_nat_masquerade_ipv6_register_notifier();
- if (err)
- xt_unregister_target(&masquerade_tg6_reg);
-
- return err;
-}
-static void __exit masquerade_tg6_exit(void)
-{
- nf_nat_masquerade_ipv6_unregister_notifier();
- xt_unregister_target(&masquerade_tg6_reg);
-}
-
-module_init(masquerade_tg6_init);
-module_exit(masquerade_tg6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("Xtables: automatic address SNAT");
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
index 1059894a6f4c..4cb83fb69844 100644
--- a/net/ipv6/netfilter/ip6t_srh.c
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -210,6 +210,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
psidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
((srh->segments_left + 1) * sizeof(struct in6_addr));
psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid);
+ if (!psid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID,
ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk,
&srhinfo->psid_addr)))
@@ -223,6 +225,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
((srh->segments_left - 1) * sizeof(struct in6_addr));
nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid);
+ if (!nsid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID,
ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk,
&srhinfo->nsid_addr)))
@@ -233,6 +237,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
if (srhinfo->mt_flags & IP6T_SRH_LSID) {
lsidoff = srhoff + sizeof(struct ipv6_sr_hdr);
lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid);
+ if (!lsid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID,
ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk,
&srhinfo->lsid_addr)))
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
deleted file mode 100644
index da3f1f8cb325..000000000000
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-#include <net/route.h>
-
-static unsigned int nf_route_table_hook(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- unsigned int ret;
- struct nft_pktinfo pkt;
- struct in6_addr saddr, daddr;
- u_int8_t hop_limit;
- u32 mark, flowlabel;
- int err;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv6(&pkt, skb);
-
- /* save source/dest address, mark, hoplimit, flowlabel, priority */
- memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
- memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
- mark = skb->mark;
- hop_limit = ipv6_hdr(skb)->hop_limit;
-
- /* flowlabel and prio (includes version, which shouldn't change either */
- flowlabel = *((u32 *)ipv6_hdr(skb));
-
- ret = nft_do_chain(&pkt, priv);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
- memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
- skb->mark != mark ||
- ipv6_hdr(skb)->hop_limit != hop_limit ||
- flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
- err = ip6_route_me_harder(state->net, skb);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-
- return ret;
-}
-
-static const struct nft_chain_type nft_chain_route_ipv6 = {
- .name = "route",
- .type = NFT_CHAIN_T_ROUTE,
- .family = NFPROTO_IPV6,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_OUT),
- .hooks = {
- [NF_INET_LOCAL_OUT] = nf_route_table_hook,
- },
-};
-
-static int __init nft_chain_route_init(void)
-{
- nft_register_chain_type(&nft_chain_route_ipv6);
-
- return 0;
-}
-
-static void __exit nft_chain_route_exit(void)
-{
- nft_unregister_chain_type(&nft_chain_route_ipv6);
-}
-
-module_init(nft_chain_route_init);
-module_exit(nft_chain_route_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route");
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 4fe7c90962dd..868ae23dbae1 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -10,15 +10,25 @@
#include <net/secure_seq.h>
#include <linux/netfilter.h>
-static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
+static u32 __ipv6_select_ident(struct net *net,
const struct in6_addr *dst,
const struct in6_addr *src)
{
+ const struct {
+ struct in6_addr dst;
+ struct in6_addr src;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .dst = *dst,
+ .src = *src,
+ };
u32 hash, id;
- hash = __ipv6_addr_jhash(dst, hashrnd);
- hash = __ipv6_addr_jhash(src, hash);
- hash ^= net_hash_mix(net);
+ /* Note the following code is not safe, but this is okay. */
+ if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
+ get_random_bytes(&net->ipv4.ip_id_key,
+ sizeof(net->ipv4.ip_id_key));
+
+ hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
/* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
* set the hight order instead thus minimizing possible future
@@ -41,7 +51,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
*/
__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
{
- static u32 ip6_proxy_idents_hashrnd __read_mostly;
struct in6_addr buf[2];
struct in6_addr *addrs;
u32 id;
@@ -53,11 +62,7 @@ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
if (!addrs)
return 0;
- net_get_random_once(&ip6_proxy_idents_hashrnd,
- sizeof(ip6_proxy_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
- &addrs[1], &addrs[0]);
+ id = __ipv6_select_ident(net, &addrs[1], &addrs[0]);
return htonl(id);
}
EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
@@ -66,12 +71,9 @@ __be32 ipv6_select_ident(struct net *net,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
- static u32 ip6_idents_hashrnd __read_mostly;
u32 id;
- net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
+ id = __ipv6_select_ident(net, daddr, saddr);
return htonl(id);
}
EXPORT_SYMBOL(ipv6_select_ident);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5a426226c762..84dbe21b71e5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1356,6 +1356,7 @@ const struct proto_ops inet6_sockraw_ops = {
.getname = inet6_getname,
.poll = datagram_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4ef4bbdb49d4..23a20d62daac 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -59,7 +59,7 @@
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
@@ -102,14 +102,15 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
+static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+ int strict);
static size_t rt6_nlmsg_size(struct fib6_info *rt);
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
int iif, int type, u32 portid, u32 seq,
unsigned int flags);
-static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
struct in6_addr *daddr,
struct in6_addr *saddr);
@@ -295,7 +296,7 @@ static const struct fib6_info fib6_null_entry_template = {
.fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
.fib6_protocol = RTPROT_KERNEL,
.fib6_metric = ~(u32)0,
- .fib6_ref = ATOMIC_INIT(1),
+ .fib6_ref = REFCOUNT_INIT(1),
.fib6_type = RTN_UNREACHABLE,
.fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
};
@@ -379,11 +380,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
in6_dev_put(idev);
}
- rcu_read_lock();
- from = rcu_dereference(rt->from);
- rcu_assign_pointer(rt->from, NULL);
+ from = xchg((__force struct fib6_info **)&rt->from, NULL);
fib6_info_release(from);
- rcu_read_unlock();
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -427,13 +425,15 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
-struct fib6_info *fib6_multipath_select(const struct net *net,
- struct fib6_info *match,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb,
- int strict)
+void fib6_select_path(const struct net *net, struct fib6_result *res,
+ struct flowi6 *fl6, int oif, bool have_oif_match,
+ const struct sk_buff *skb, int strict)
{
struct fib6_info *sibling, *next_sibling;
+ struct fib6_info *match = res->f6i;
+
+ if (!match->fib6_nsiblings || have_oif_match)
+ goto out;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
@@ -441,61 +441,89 @@ struct fib6_info *fib6_multipath_select(const struct net *net,
if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
- if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
- return match;
+ if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
+ goto out;
list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
fib6_siblings) {
+ const struct fib6_nh *nh = &sibling->fib6_nh;
int nh_upper_bound;
- nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
+ nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
if (fl6->mp_hash > nh_upper_bound)
continue;
- if (rt6_score_route(sibling, oif, strict) < 0)
+ if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
break;
match = sibling;
break;
}
- return match;
+out:
+ res->f6i = match;
+ res->nh = &match->fib6_nh;
}
/*
* Route lookup. rcu_read_lock() should be held.
*/
-static inline struct fib6_info *rt6_device_match(struct net *net,
- struct fib6_info *rt,
- const struct in6_addr *saddr,
- int oif,
- int flags)
+static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
+ const struct in6_addr *saddr, int oif, int flags)
{
- struct fib6_info *sprt;
+ const struct net_device *dev;
- if (!oif && ipv6_addr_any(saddr) &&
- !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
- return rt;
+ if (nh->fib_nh_flags & RTNH_F_DEAD)
+ return false;
+
+ dev = nh->fib_nh_dev;
+ if (oif) {
+ if (dev->ifindex == oif)
+ return true;
+ } else {
+ if (ipv6_chk_addr(net, saddr, dev,
+ flags & RT6_LOOKUP_F_IFACE))
+ return true;
+ }
- for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
- const struct net_device *dev = sprt->fib6_nh.nh_dev;
+ return false;
+}
- if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
- continue;
+static void rt6_device_match(struct net *net, struct fib6_result *res,
+ const struct in6_addr *saddr, int oif, int flags)
+{
+ struct fib6_info *f6i = res->f6i;
+ struct fib6_info *spf6i;
+ struct fib6_nh *nh;
- if (oif) {
- if (dev->ifindex == oif)
- return sprt;
- } else {
- if (ipv6_chk_addr(net, saddr, dev,
- flags & RT6_LOOKUP_F_IFACE))
- return sprt;
+ if (!oif && ipv6_addr_any(saddr)) {
+ nh = &f6i->fib6_nh;
+ if (!(nh->fib_nh_flags & RTNH_F_DEAD))
+ goto out;
+ }
+
+ for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
+ nh = &spf6i->fib6_nh;
+ if (__rt6_device_match(net, nh, saddr, oif, flags)) {
+ res->f6i = spf6i;
+ goto out;
}
}
- if (oif && flags & RT6_LOOKUP_F_IFACE)
- return net->ipv6.fib6_null_entry;
+ if (oif && flags & RT6_LOOKUP_F_IFACE) {
+ res->f6i = net->ipv6.fib6_null_entry;
+ nh = &res->f6i->fib6_nh;
+ goto out;
+ }
- return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
+ nh = &f6i->fib6_nh;
+ if (nh->fib_nh_flags & RTNH_F_DEAD) {
+ res->f6i = net->ipv6.fib6_null_entry;
+ nh = &res->f6i->fib6_nh;
+ }
+out:
+ res->nh = nh;
+ res->fib6_type = res->f6i->fib6_type;
+ res->fib6_flags = res->f6i->fib6_flags;
}
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -517,7 +545,7 @@ static void rt6_probe_deferred(struct work_struct *w)
kfree(work);
}
-static void rt6_probe(struct fib6_info *rt)
+static void rt6_probe(struct fib6_nh *fib6_nh)
{
struct __rt6_probe_work *work = NULL;
const struct in6_addr *nh_gw;
@@ -533,11 +561,11 @@ static void rt6_probe(struct fib6_info *rt)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
+ if (fib6_nh->fib_nh_gw_family)
return;
- nh_gw = &rt->fib6_nh.nh_gw;
- dev = rt->fib6_nh.nh_dev;
+ nh_gw = &fib6_nh->fib_nh_gw6;
+ dev = fib6_nh->fib_nh_dev;
rcu_read_lock_bh();
idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
@@ -554,13 +582,13 @@ static void rt6_probe(struct fib6_info *rt)
__neigh_set_probe_once(neigh);
}
write_unlock(&neigh->lock);
- } else if (time_after(jiffies, rt->last_probe +
+ } else if (time_after(jiffies, fib6_nh->last_probe +
idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
if (work) {
- rt->last_probe = jiffies;
+ fib6_nh->last_probe = jiffies;
INIT_WORK(&work->work, rt6_probe_deferred);
work->target = *nh_gw;
dev_hold(dev);
@@ -572,7 +600,7 @@ out:
rcu_read_unlock_bh();
}
#else
-static inline void rt6_probe(struct fib6_info *rt)
+static inline void rt6_probe(struct fib6_nh *fib6_nh)
{
}
#endif
@@ -580,27 +608,14 @@ static inline void rt6_probe(struct fib6_info *rt)
/*
* Default Router Selection (RFC 2461 6.3.6)
*/
-static inline int rt6_check_dev(struct fib6_info *rt, int oif)
-{
- const struct net_device *dev = rt->fib6_nh.nh_dev;
-
- if (!oif || dev->ifindex == oif)
- return 2;
- return 0;
-}
-
-static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
+static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
{
enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
struct neighbour *neigh;
- if (rt->fib6_flags & RTF_NONEXTHOP ||
- !(rt->fib6_flags & RTF_GATEWAY))
- return RT6_NUD_SUCCEED;
-
rcu_read_lock_bh();
- neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
- &rt->fib6_nh.nh_gw);
+ neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
+ &fib6_nh->fib_nh_gw6);
if (neigh) {
read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
@@ -621,58 +636,44 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
return ret;
}
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
+static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+ int strict)
{
- int m;
+ int m = 0;
+
+ if (!oif || nh->fib_nh_dev->ifindex == oif)
+ m = 2;
- m = rt6_check_dev(rt, oif);
if (!m && (strict & RT6_LOOKUP_F_IFACE))
return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF
- m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
+ m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
#endif
- if (strict & RT6_LOOKUP_F_REACHABLE) {
- int n = rt6_check_neigh(rt);
+ if ((strict & RT6_LOOKUP_F_REACHABLE) &&
+ !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
+ int n = rt6_check_neigh(nh);
if (n < 0)
return n;
}
return m;
}
-/* called with rc_read_lock held */
-static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
+static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
+ int oif, int strict, int *mpri, bool *do_rr)
{
- const struct net_device *dev = fib6_info_nh_dev(f6i);
+ bool match_do_rr = false;
bool rc = false;
-
- if (dev) {
- const struct inet6_dev *idev = __in6_dev_get(dev);
-
- rc = !!idev->cnf.ignore_routes_with_linkdown;
- }
-
- return rc;
-}
-
-static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
- int *mpri, struct fib6_info *match,
- bool *do_rr)
-{
int m;
- bool match_do_rr = false;
- if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
+ if (nh->fib_nh_flags & RTNH_F_DEAD)
goto out;
- if (fib6_ignore_linkdown(rt) &&
- rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
+ if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
+ nh->fib_nh_flags & RTNH_F_LINKDOWN &&
!(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
goto out;
- if (fib6_check_expired(rt))
- goto out;
-
- m = rt6_score_route(rt, oif, strict);
+ m = rt6_score_route(nh, fib6_flags, oif, strict);
if (m == RT6_NUD_FAIL_DO_RR) {
match_do_rr = true;
m = 0; /* lowest valid score */
@@ -681,67 +682,82 @@ static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
}
if (strict & RT6_LOOKUP_F_REACHABLE)
- rt6_probe(rt);
+ rt6_probe(nh);
/* note that m can be RT6_NUD_FAIL_PROBE at this point */
if (m > *mpri) {
*do_rr = match_do_rr;
*mpri = m;
- match = rt;
+ rc = true;
}
out:
- return match;
+ return rc;
}
-static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
- struct fib6_info *leaf,
- struct fib6_info *rr_head,
- u32 metric, int oif, int strict,
- bool *do_rr)
+static void __find_rr_leaf(struct fib6_info *f6i_start,
+ struct fib6_info *nomatch, u32 metric,
+ struct fib6_result *res, struct fib6_info **cont,
+ int oif, int strict, bool *do_rr, int *mpri)
{
- struct fib6_info *rt, *match, *cont;
- int mpri = -1;
+ struct fib6_info *f6i;
- match = NULL;
- cont = NULL;
- for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
- if (rt->fib6_metric != metric) {
- cont = rt;
- break;
+ for (f6i = f6i_start;
+ f6i && f6i != nomatch;
+ f6i = rcu_dereference(f6i->fib6_next)) {
+ struct fib6_nh *nh;
+
+ if (cont && f6i->fib6_metric != metric) {
+ *cont = f6i;
+ return;
}
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
- }
+ if (fib6_check_expired(f6i))
+ continue;
- for (rt = leaf; rt && rt != rr_head;
- rt = rcu_dereference(rt->fib6_next)) {
- if (rt->fib6_metric != metric) {
- cont = rt;
- break;
+ nh = &f6i->fib6_nh;
+ if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
+ res->f6i = f6i;
+ res->nh = nh;
+ res->fib6_flags = f6i->fib6_flags;
+ res->fib6_type = f6i->fib6_type;
}
-
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
}
+}
+
+static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
+ struct fib6_info *rr_head, int oif, int strict,
+ bool *do_rr, struct fib6_result *res)
+{
+ u32 metric = rr_head->fib6_metric;
+ struct fib6_info *cont = NULL;
+ int mpri = -1;
- if (match || !cont)
- return match;
+ __find_rr_leaf(rr_head, NULL, metric, res, &cont,
+ oif, strict, do_rr, &mpri);
- for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
+ __find_rr_leaf(leaf, rr_head, metric, res, &cont,
+ oif, strict, do_rr, &mpri);
- return match;
+ if (res->f6i || !cont)
+ return;
+
+ __find_rr_leaf(cont, NULL, metric, res, NULL,
+ oif, strict, do_rr, &mpri);
}
-static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
- int oif, int strict)
+static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
+ struct fib6_result *res, int strict)
{
struct fib6_info *leaf = rcu_dereference(fn->leaf);
- struct fib6_info *match, *rt0;
+ struct fib6_info *rt0;
bool do_rr = false;
int key_plen;
+ /* make sure this function or its helpers sets f6i */
+ res->f6i = NULL;
+
if (!leaf || leaf == net->ipv6.fib6_null_entry)
- return net->ipv6.fib6_null_entry;
+ goto out;
rt0 = rcu_dereference(fn->rr_ptr);
if (!rt0)
@@ -758,11 +774,9 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
key_plen = rt0->fib6_src.plen;
#endif
if (fn->fn_bit != key_plen)
- return net->ipv6.fib6_null_entry;
-
- match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
- &do_rr);
+ goto out;
+ find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
if (do_rr) {
struct fib6_info *next = rcu_dereference(rt0->fib6_next);
@@ -779,12 +793,19 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
}
}
- return match ? match : net->ipv6.fib6_null_entry;
+out:
+ if (!res->f6i) {
+ res->f6i = net->ipv6.fib6_null_entry;
+ res->nh = &res->f6i->fib6_nh;
+ res->fib6_flags = res->f6i->fib6_flags;
+ res->fib6_type = res->f6i->fib6_type;
+ }
}
-static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
+static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
{
- return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
+ return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
+ res->nh->fib_nh_gw_family;
}
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -868,17 +889,17 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
*/
/* called with rcu_lock held */
-static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
+static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
{
- struct net_device *dev = rt->fib6_nh.nh_dev;
+ struct net_device *dev = res->nh->fib_nh_dev;
- if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
+ if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
/* for copies of local routes, dst->dev needs to be the
* device if it is a master device, the master device if
* device is enslaved, and the loopback as the default
*/
if (netif_is_l3_slave(dev) &&
- !rt6_need_strict(&rt->fib6_dst.addr))
+ !rt6_need_strict(&res->f6i->fib6_dst.addr))
dev = l3mdev_master_dev_rcu(dev);
else if (!netif_is_l3_master(dev))
dev = dev_net(dev)->loopback_dev;
@@ -924,11 +945,11 @@ static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
return flags;
}
-static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
+static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
{
- rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
+ rt->dst.error = ip6_rt_type_to_error(fib6_type);
- switch (ort->fib6_type) {
+ switch (fib6_type) {
case RTN_BLACKHOLE:
rt->dst.output = dst_discard_out;
rt->dst.input = dst_discard;
@@ -946,26 +967,28 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
}
}
-static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
+static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
{
- if (ort->fib6_flags & RTF_REJECT) {
- ip6_rt_init_dst_reject(rt, ort);
+ struct fib6_info *f6i = res->f6i;
+
+ if (res->fib6_flags & RTF_REJECT) {
+ ip6_rt_init_dst_reject(rt, res->fib6_type);
return;
}
rt->dst.error = 0;
rt->dst.output = ip6_output;
- if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
+ if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
rt->dst.input = ip6_input;
- } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
+ } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
rt->dst.input = ip6_mc_input;
} else {
rt->dst.input = ip6_forward;
}
- if (ort->fib6_nh.nh_lwtstate) {
- rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
+ if (res->nh->fib_nh_lws) {
+ rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
lwtunnel_set_redirect(&rt->dst);
}
@@ -980,20 +1003,25 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
}
-/* Caller must already hold reference to @ort */
-static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
+/* Caller must already hold reference to f6i in result */
+static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
{
- struct net_device *dev = fib6_info_nh_dev(ort);
+ const struct fib6_nh *nh = res->nh;
+ const struct net_device *dev = nh->fib_nh_dev;
+ struct fib6_info *f6i = res->f6i;
- ip6_rt_init_dst(rt, ort);
+ ip6_rt_init_dst(rt, res);
- rt->rt6i_dst = ort->fib6_dst;
+ rt->rt6i_dst = f6i->fib6_dst;
rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
- rt->rt6i_gateway = ort->fib6_nh.nh_gw;
- rt->rt6i_flags = ort->fib6_flags;
- rt6_set_from(rt, ort);
+ rt->rt6i_flags = res->fib6_flags;
+ if (nh->fib_nh_gw_family) {
+ rt->rt6i_gateway = nh->fib_nh_gw6;
+ rt->rt6i_flags |= RTF_GATEWAY;
+ }
+ rt6_set_from(rt, f6i);
#ifdef CONFIG_IPV6_SUBTREES
- rt->rt6i_src = ort->fib6_src;
+ rt->rt6i_src = f6i->fib6_src;
#endif
}
@@ -1015,14 +1043,13 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
}
}
-static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
- bool null_fallback)
+static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
{
struct rt6_info *rt = *prt;
if (dst_hold_safe(&rt->dst))
return true;
- if (null_fallback) {
+ if (net) {
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
} else {
@@ -1033,21 +1060,29 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
}
/* called with rcu_lock held */
-static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
+static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
{
- unsigned short flags = fib6_info_dst_flags(rt);
- struct net_device *dev = rt->fib6_nh.nh_dev;
+ struct net_device *dev = res->nh->fib_nh_dev;
+ struct fib6_info *f6i = res->f6i;
+ unsigned short flags;
struct rt6_info *nrt;
- if (!fib6_info_hold_safe(rt))
- return NULL;
+ if (!fib6_info_hold_safe(f6i))
+ goto fallback;
+ flags = fib6_info_dst_flags(f6i);
nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
- if (nrt)
- ip6_rt_copy_init(nrt, rt);
- else
- fib6_info_release(rt);
+ if (!nrt) {
+ fib6_info_release(f6i);
+ goto fallback;
+ }
+
+ ip6_rt_copy_init(nrt, res);
+ return nrt;
+fallback:
+ nrt = dev_net(dev)->ipv6.ip6_null_entry;
+ dst_hold(&nrt->dst);
return nrt;
}
@@ -1057,7 +1092,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
const struct sk_buff *skb,
int flags)
{
- struct fib6_info *f6i;
+ struct fib6_result res = {};
struct fib6_node *fn;
struct rt6_info *rt;
@@ -1067,41 +1102,38 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
rcu_read_lock();
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- f6i = rcu_dereference(fn->leaf);
- if (!f6i) {
- f6i = net->ipv6.fib6_null_entry;
- } else {
- f6i = rt6_device_match(net, f6i, &fl6->saddr,
- fl6->flowi6_oif, flags);
- if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
- f6i = fib6_multipath_select(net, f6i, fl6,
- fl6->flowi6_oif, skb,
- flags);
- }
- if (f6i == net->ipv6.fib6_null_entry) {
+ res.f6i = rcu_dereference(fn->leaf);
+ if (!res.f6i)
+ res.f6i = net->ipv6.fib6_null_entry;
+ else
+ rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
+ flags);
+
+ if (res.f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto restart;
+
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ goto out;
}
- trace_fib6_table_lookup(net, f6i, table, fl6);
+ fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
+ fl6->flowi6_oif != 0, skb, flags);
/* Search through exception table */
- rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+ rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
if (rt) {
- if (ip6_hold_safe(net, &rt, true))
+ if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies);
- } else if (f6i == net->ipv6.fib6_null_entry) {
- rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
} else {
- rt = ip6_create_rt_rcu(f6i);
- if (!rt) {
- rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
- }
+ rt = ip6_create_rt_rcu(&res);
}
+out:
+ trace_fib6_table_lookup(net, &res, table, fl6);
+
rcu_read_unlock();
return rt;
@@ -1167,10 +1199,11 @@ int ip6_ins_rt(struct net *net, struct fib6_info *rt)
return __ip6_ins_rt(rt, &info, NULL);
}
-static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
+static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
+ struct fib6_info *f6i = res->f6i;
struct net_device *dev;
struct rt6_info *rt;
@@ -1178,25 +1211,25 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
* Clone the route.
*/
- if (!fib6_info_hold_safe(ort))
+ if (!fib6_info_hold_safe(f6i))
return NULL;
- dev = ip6_rt_get_dev_rcu(ort);
+ dev = ip6_rt_get_dev_rcu(res);
rt = ip6_dst_alloc(dev_net(dev), dev, 0);
if (!rt) {
- fib6_info_release(ort);
+ fib6_info_release(f6i);
return NULL;
}
- ip6_rt_copy_init(rt, ort);
+ ip6_rt_copy_init(rt, res);
rt->rt6i_flags |= RTF_CACHE;
rt->dst.flags |= DST_HOST;
rt->rt6i_dst.addr = *daddr;
rt->rt6i_dst.plen = 128;
- if (!rt6_is_gw_or_nonexthop(ort)) {
- if (ort->fib6_dst.plen != 128 &&
- ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
+ if (!rt6_is_gw_or_nonexthop(res)) {
+ if (f6i->fib6_dst.plen != 128 &&
+ ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen && saddr) {
@@ -1209,55 +1242,56 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
return rt;
}
-static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
+static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
{
- unsigned short flags = fib6_info_dst_flags(rt);
+ struct fib6_info *f6i = res->f6i;
+ unsigned short flags = fib6_info_dst_flags(f6i);
struct net_device *dev;
struct rt6_info *pcpu_rt;
- if (!fib6_info_hold_safe(rt))
+ if (!fib6_info_hold_safe(f6i))
return NULL;
rcu_read_lock();
- dev = ip6_rt_get_dev_rcu(rt);
+ dev = ip6_rt_get_dev_rcu(res);
pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
rcu_read_unlock();
if (!pcpu_rt) {
- fib6_info_release(rt);
+ fib6_info_release(f6i);
return NULL;
}
- ip6_rt_copy_init(pcpu_rt, rt);
+ ip6_rt_copy_init(pcpu_rt, res);
pcpu_rt->rt6i_flags |= RTF_PCPU;
return pcpu_rt;
}
/* It should be called with rcu_read_lock() acquired */
-static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
+static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
{
struct rt6_info *pcpu_rt, **p;
- p = this_cpu_ptr(rt->rt6i_pcpu);
+ p = this_cpu_ptr(res->f6i->rt6i_pcpu);
pcpu_rt = *p;
if (pcpu_rt)
- ip6_hold_safe(NULL, &pcpu_rt, false);
+ ip6_hold_safe(NULL, &pcpu_rt);
return pcpu_rt;
}
static struct rt6_info *rt6_make_pcpu_route(struct net *net,
- struct fib6_info *rt)
+ const struct fib6_result *res)
{
struct rt6_info *pcpu_rt, *prev, **p;
- pcpu_rt = ip6_rt_pcpu_alloc(rt);
+ pcpu_rt = ip6_rt_pcpu_alloc(res);
if (!pcpu_rt) {
dst_hold(&net->ipv6.ip6_null_entry->dst);
return net->ipv6.ip6_null_entry;
}
dst_hold(&pcpu_rt->dst);
- p = this_cpu_ptr(rt->rt6i_pcpu);
+ p = this_cpu_ptr(res->f6i->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt);
BUG_ON(prev);
@@ -1286,9 +1320,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
/* purge completely the exception to allow releasing the held resources:
* some [sk] cache may keep the dst around for unlimited time
*/
- from = rcu_dereference_protected(rt6_ex->rt6i->from,
- lockdep_is_held(&rt6_exception_lock));
- rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
+ from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
fib6_info_release(from);
dst_dev_put(&rt6_ex->rt6i->dst);
@@ -1400,14 +1432,15 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
return NULL;
}
-static unsigned int fib6_mtu(const struct fib6_info *rt)
+static unsigned int fib6_mtu(const struct fib6_result *res)
{
+ const struct fib6_nh *nh = res->nh;
unsigned int mtu;
- if (rt->fib6_pmtu) {
- mtu = rt->fib6_pmtu;
+ if (res->f6i->fib6_pmtu) {
+ mtu = res->f6i->fib6_pmtu;
} else {
- struct net_device *dev = fib6_info_nh_dev(rt);
+ struct net_device *dev = nh->fib_nh_dev;
struct inet6_dev *idev;
rcu_read_lock();
@@ -1418,26 +1451,27 @@ static unsigned int fib6_mtu(const struct fib6_info *rt)
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
- return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
+ return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}
static int rt6_insert_exception(struct rt6_info *nrt,
- struct fib6_info *ort)
+ const struct fib6_result *res)
{
struct net *net = dev_net(nrt->dst.dev);
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
+ struct fib6_info *f6i = res->f6i;
int err = 0;
spin_lock_bh(&rt6_exception_lock);
- if (ort->exception_bucket_flushed) {
+ if (f6i->exception_bucket_flushed) {
err = -EINVAL;
goto out;
}
- bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
lockdep_is_held(&rt6_exception_lock));
if (!bucket) {
bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
@@ -1446,24 +1480,24 @@ static int rt6_insert_exception(struct rt6_info *nrt,
err = -ENOMEM;
goto out;
}
- rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+ rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
}
#ifdef CONFIG_IPV6_SUBTREES
- /* rt6i_src.plen != 0 indicates ort is in subtree
+ /* fib6_src.plen != 0 indicates f6i is in subtree
* and exception table is indexed by a hash of
- * both rt6i_dst and rt6i_src.
+ * both fib6_dst and fib6_src.
* Otherwise, the exception table is indexed by
- * a hash of only rt6i_dst.
+ * a hash of only fib6_dst.
*/
- if (ort->fib6_src.plen)
+ if (f6i->fib6_src.plen)
src_key = &nrt->rt6i_src.addr;
#endif
- /* rt6_mtu_change() might lower mtu on ort.
+ /* rt6_mtu_change() might lower mtu on f6i.
* Only insert this exception route if its mtu
- * is less than ort's mtu value.
+ * is less than f6i's mtu value.
*/
- if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
+ if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
err = -EINVAL;
goto out;
}
@@ -1492,9 +1526,9 @@ out:
/* Update fn->fn_sernum to invalidate all cached dst */
if (!err) {
- spin_lock_bh(&ort->fib6_table->tb6_lock);
- fib6_update_sernum(net, ort);
- spin_unlock_bh(&ort->fib6_table->tb6_lock);
+ spin_lock_bh(&f6i->fib6_table->tb6_lock);
+ fib6_update_sernum(net, f6i);
+ spin_unlock_bh(&f6i->fib6_table->tb6_lock);
fib6_force_start_gc(net);
}
@@ -1531,33 +1565,33 @@ out:
/* Find cached rt in the hash table inside passed in rt
* Caller has to hold rcu_read_lock()
*/
-static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
struct in6_addr *daddr,
struct in6_addr *saddr)
{
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct rt6_info *res = NULL;
+ struct rt6_info *ret = NULL;
- bucket = rcu_dereference(rt->rt6i_exception_bucket);
+ bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
#ifdef CONFIG_IPV6_SUBTREES
- /* rt6i_src.plen != 0 indicates rt is in subtree
+ /* fib6i_src.plen != 0 indicates f6i is in subtree
* and exception table is indexed by a hash of
- * both rt6i_dst and rt6i_src.
+ * both fib6_dst and fib6_src.
* Otherwise, the exception table is indexed by
- * a hash of only rt6i_dst.
+ * a hash of only fib6_dst.
*/
- if (rt->fib6_src.plen)
+ if (res->f6i->fib6_src.plen)
src_key = saddr;
#endif
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
- res = rt6_ex->rt6i;
+ ret = rt6_ex->rt6i;
- return res;
+ return ret;
}
/* Remove the passed in cached rt from the hash table that contains it */
@@ -1805,11 +1839,10 @@ void rt6_age_exceptions(struct fib6_info *rt,
}
/* must be called with rcu lock held */
-struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int strict)
+int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
+ struct flowi6 *fl6, struct fib6_result *res, int strict)
{
struct fib6_node *fn, *saved_fn;
- struct fib6_info *f6i;
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
@@ -1818,8 +1851,8 @@ struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
oif = 0;
redo_rt6_select:
- f6i = rt6_select(net, fn, oif, strict);
- if (f6i == net->ipv6.fib6_null_entry) {
+ rt6_select(net, fn, oif, res, strict);
+ if (res->f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto redo_rt6_select;
@@ -1831,16 +1864,16 @@ redo_rt6_select:
}
}
- trace_fib6_table_lookup(net, f6i, table, fl6);
+ trace_fib6_table_lookup(net, res, table, fl6);
- return f6i;
+ return 0;
}
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6,
const struct sk_buff *skb, int flags)
{
- struct fib6_info *f6i;
+ struct fib6_result res = {};
struct rt6_info *rt;
int strict = 0;
@@ -1851,27 +1884,26 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
rcu_read_lock();
- f6i = fib6_table_lookup(net, table, oif, fl6, strict);
- if (f6i->fib6_nsiblings)
- f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
-
- if (f6i == net->ipv6.fib6_null_entry) {
+ fib6_table_lookup(net, table, oif, fl6, &res, strict);
+ if (res.f6i == net->ipv6.fib6_null_entry) {
rt = net->ipv6.ip6_null_entry;
rcu_read_unlock();
dst_hold(&rt->dst);
return rt;
}
+ fib6_select_path(net, &res, fl6, oif, false, skb, strict);
+
/*Search through exception table */
- rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+ rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
if (rt) {
- if (ip6_hold_safe(net, &rt, true))
+ if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies);
rcu_read_unlock();
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
- !(f6i->fib6_flags & RTF_GATEWAY))) {
+ !res.nh->fib_nh_gw_family)) {
/* Create a RTF_CACHE clone which will not be
* owned by the fib6 tree. It is for the special case where
* the daddr in the skb during the neighbor look-up is different
@@ -1879,7 +1911,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
*/
struct rt6_info *uncached_rt;
- uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
+ uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
rcu_read_unlock();
@@ -1901,10 +1933,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
struct rt6_info *pcpu_rt;
local_bh_disable();
- pcpu_rt = rt6_get_pcpu_route(f6i);
+ pcpu_rt = rt6_get_pcpu_route(&res);
if (!pcpu_rt)
- pcpu_rt = rt6_make_pcpu_route(net, f6i);
+ pcpu_rt = rt6_make_pcpu_route(net, &res);
local_bh_enable();
rcu_read_unlock();
@@ -2323,15 +2355,23 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (rt6->rt6i_flags & RTF_CACHE)
rt6_update_exception_stamp_rt(rt6);
} else if (daddr) {
- struct fib6_info *from;
+ struct fib6_result res = {};
struct rt6_info *nrt6;
rcu_read_lock();
- from = rcu_dereference(rt6->from);
- nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
+ res.f6i = rcu_dereference(rt6->from);
+ if (!res.f6i) {
+ rcu_read_unlock();
+ return;
+ }
+ res.nh = &res.f6i->fib6_nh;
+ res.fib6_flags = res.f6i->fib6_flags;
+ res.fib6_type = res.f6i->fib6_type;
+
+ nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
- if (rt6_insert_exception(nrt6, from))
+ if (rt6_insert_exception(nrt6, &res))
dst_release_immediate(&nrt6->dst);
}
rcu_read_unlock();
@@ -2404,6 +2444,36 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
NULL);
}
+static bool ip6_redirect_nh_match(const struct fib6_result *res,
+ struct flowi6 *fl6,
+ const struct in6_addr *gw,
+ struct rt6_info **ret)
+{
+ const struct fib6_nh *nh = res->nh;
+
+ if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
+ fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
+ return false;
+
+ /* rt_cache's gateway might be different from its 'parent'
+ * in the case of an ip redirect.
+ * So we keep searching in the exception table if the gateway
+ * is different.
+ */
+ if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
+ struct rt6_info *rt_cache;
+
+ rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
+ if (rt_cache &&
+ ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
+ *ret = rt_cache;
+ return true;
+ }
+ return false;
+ }
+ return true;
+}
+
/* Handle redirects */
struct ip6rd_flowi {
struct flowi6 fl6;
@@ -2417,7 +2487,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *ret = NULL, *rt_cache;
+ struct rt6_info *ret = NULL;
+ struct fib6_result res = {};
struct fib6_info *rt;
struct fib6_node *fn;
@@ -2435,34 +2506,15 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
- continue;
+ res.f6i = rt;
+ res.nh = &rt->fib6_nh;
+
if (fib6_check_expired(rt))
continue;
if (rt->fib6_flags & RTF_REJECT)
break;
- if (!(rt->fib6_flags & RTF_GATEWAY))
- continue;
- if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
- continue;
- /* rt_cache's gateway might be different from its 'parent'
- * in the case of an ip redirect.
- * So we keep searching in the exception table if the gateway
- * is different.
- */
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
- rt_cache = rt6_find_cached_rt(rt,
- &fl6->daddr,
- &fl6->saddr);
- if (rt_cache &&
- ipv6_addr_equal(&rdfl->gateway,
- &rt_cache->rt6i_gateway)) {
- ret = rt_cache;
- break;
- }
- continue;
- }
- break;
+ if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
+ goto out;
}
if (!rt)
@@ -2478,15 +2530,20 @@ restart:
goto restart;
}
+ res.f6i = rt;
+ res.nh = &rt->fib6_nh;
out:
- if (ret)
- ip6_hold_safe(net, &ret, true);
- else
- ret = ip6_create_rt_rcu(rt);
+ if (ret) {
+ ip6_hold_safe(net, &ret);
+ } else {
+ res.fib6_flags = res.f6i->fib6_flags;
+ res.fib6_type = res.f6i->fib6_type;
+ ret = ip6_create_rt_rcu(&res);
+ }
rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table, fl6);
+ trace_fib6_table_lookup(net, &res, table, fl6);
return ret;
};
@@ -2604,12 +2661,15 @@ out:
* based on ip6_dst_mtu_forward and exception logic of
* rt6_find_cached_rt; called with rcu_read_lock
*/
-u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
- struct in6_addr *saddr)
+u32 ip6_mtu_from_fib6(const struct fib6_result *res,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct rt6_exception_bucket *bucket;
+ const struct fib6_nh *nh = res->nh;
+ struct fib6_info *f6i = res->f6i;
+ const struct in6_addr *src_key;
struct rt6_exception *rt6_ex;
- struct in6_addr *src_key;
struct inet6_dev *idev;
u32 mtu = 0;
@@ -2631,7 +2691,7 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
if (likely(!mtu)) {
- struct net_device *dev = fib6_info_nh_dev(f6i);
+ struct net_device *dev = nh->fib_nh_dev;
mtu = IPV6_MIN_MTU;
idev = __in6_dev_get(dev);
@@ -2641,7 +2701,7 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
out:
- return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
+ return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
@@ -2897,17 +2957,143 @@ out:
return err;
}
+static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
+{
+ if ((flags & RTF_REJECT) ||
+ (dev && (dev->flags & IFF_LOOPBACK) &&
+ !(addr_type & IPV6_ADDR_LOOPBACK) &&
+ !(flags & RTF_LOCAL)))
+ return true;
+
+ return false;
+}
+
+int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = NULL;
+ struct inet6_dev *idev = NULL;
+ int addr_type;
+ int err;
+
+ fib6_nh->fib_nh_family = AF_INET6;
+
+ err = -ENODEV;
+ if (cfg->fc_ifindex) {
+ dev = dev_get_by_index(net, cfg->fc_ifindex);
+ if (!dev)
+ goto out;
+ idev = in6_dev_get(dev);
+ if (!idev)
+ goto out;
+ }
+
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device required for onlink");
+ goto out;
+ }
+
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+
+ fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
+ }
+
+ fib6_nh->fib_nh_weight = 1;
+
+ /* We cannot add true routes via loopback here,
+ * they would result in kernel looping; promote them to reject routes
+ */
+ addr_type = ipv6_addr_type(&cfg->fc_dst);
+ if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
+ /* hold loopback dev/idev if we haven't done so. */
+ if (dev != net->loopback_dev) {
+ if (dev) {
+ dev_put(dev);
+ in6_dev_put(idev);
+ }
+ dev = net->loopback_dev;
+ dev_hold(dev);
+ idev = in6_dev_get(dev);
+ if (!idev) {
+ err = -ENODEV;
+ goto out;
+ }
+ }
+ goto set_dev;
+ }
+
+ if (cfg->fc_flags & RTF_GATEWAY) {
+ err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+ if (err)
+ goto out;
+
+ fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
+ fib6_nh->fib_nh_gw_family = AF_INET6;
+ }
+
+ err = -ENODEV;
+ if (!dev)
+ goto out;
+
+ if (idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
+ err = -EACCES;
+ goto out;
+ }
+
+ if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+
+ if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
+ !netif_carrier_ok(dev))
+ fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+
+ err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
+ cfg->fc_encap_type, cfg, gfp_flags, extack);
+ if (err)
+ goto out;
+set_dev:
+ fib6_nh->fib_nh_dev = dev;
+ fib6_nh->fib_nh_oif = dev->ifindex;
+ err = 0;
+out:
+ if (idev)
+ in6_dev_put(idev);
+
+ if (err) {
+ lwtstate_put(fib6_nh->fib_nh_lws);
+ fib6_nh->fib_nh_lws = NULL;
+ if (dev)
+ dev_put(dev);
+ }
+
+ return err;
+}
+
+void fib6_nh_release(struct fib6_nh *fib6_nh)
+{
+ fib_nh_common_release(&fib6_nh->nh_common);
+}
+
static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
gfp_t gfp_flags,
struct netlink_ext_ack *extack)
{
struct net *net = cfg->fc_nlinfo.nl_net;
struct fib6_info *rt = NULL;
- struct net_device *dev = NULL;
- struct inet6_dev *idev = NULL;
struct fib6_table *table;
- int addr_type;
int err = -EINVAL;
+ int addr_type;
/* RTF_PCPU is an internal flag; can not be set by userspace */
if (cfg->fc_flags & RTF_PCPU) {
@@ -2941,33 +3127,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
#endif
- if (cfg->fc_ifindex) {
- err = -ENODEV;
- dev = dev_get_by_index(net, cfg->fc_ifindex);
- if (!dev)
- goto out;
- idev = in6_dev_get(dev);
- if (!idev)
- goto out;
- }
-
- if (cfg->fc_metric == 0)
- cfg->fc_metric = IP6_RT_PRIO_USER;
-
- if (cfg->fc_flags & RTNH_F_ONLINK) {
- if (!dev) {
- NL_SET_ERR_MSG(extack,
- "Nexthop device required for onlink");
- err = -ENODEV;
- goto out;
- }
-
- if (!(dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack, "Nexthop device is not up");
- err = -ENETDOWN;
- goto out;
- }
- }
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
@@ -3011,18 +3170,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
cfg->fc_protocol = RTPROT_BOOT;
rt->fib6_protocol = cfg->fc_protocol;
- addr_type = ipv6_addr_type(&cfg->fc_dst);
-
- if (cfg->fc_encap) {
- struct lwtunnel_state *lwtstate;
-
- err = lwtunnel_build_state(cfg->fc_encap_type,
- cfg->fc_encap, AF_INET6, cfg,
- &lwtstate, extack);
- if (err)
- goto out;
- rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
- }
+ rt->fib6_table = table;
+ rt->fib6_metric = cfg->fc_metric;
+ rt->fib6_type = cfg->fc_type;
+ rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->fib6_dst.plen = cfg->fc_dst_len;
@@ -3033,62 +3184,20 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->fib6_src.plen = cfg->fc_src_len;
#endif
-
- rt->fib6_metric = cfg->fc_metric;
- rt->fib6_nh.nh_weight = 1;
-
- rt->fib6_type = cfg->fc_type;
+ err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
+ if (err)
+ goto out;
/* We cannot add true routes via loopback here,
- they would result in kernel looping; promote them to reject routes
+ * they would result in kernel looping; promote them to reject routes
*/
- if ((cfg->fc_flags & RTF_REJECT) ||
- (dev && (dev->flags & IFF_LOOPBACK) &&
- !(addr_type & IPV6_ADDR_LOOPBACK) &&
- !(cfg->fc_flags & RTF_LOCAL))) {
- /* hold loopback dev/idev if we haven't done so. */
- if (dev != net->loopback_dev) {
- if (dev) {
- dev_put(dev);
- in6_dev_put(idev);
- }
- dev = net->loopback_dev;
- dev_hold(dev);
- idev = in6_dev_get(dev);
- if (!idev) {
- err = -ENODEV;
- goto out;
- }
- }
- rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
- goto install_route;
- }
-
- if (cfg->fc_flags & RTF_GATEWAY) {
- err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
- if (err)
- goto out;
-
- rt->fib6_nh.nh_gw = cfg->fc_gateway;
- }
-
- err = -ENODEV;
- if (!dev)
- goto out;
-
- if (idev->cnf.disable_ipv6) {
- NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
- err = -EACCES;
- goto out;
- }
-
- if (!(dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack, "Nexthop device is not up");
- err = -ENETDOWN;
- goto out;
- }
+ addr_type = ipv6_addr_type(&cfg->fc_dst);
+ if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
+ rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
+ struct net_device *dev = fib6_info_nh_dev(rt);
+
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
err = -EINVAL;
@@ -3099,26 +3208,8 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
} else
rt->fib6_prefsrc.plen = 0;
- rt->fib6_flags = cfg->fc_flags;
-
-install_route:
- if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
- !netif_carrier_ok(dev))
- rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
- rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
- rt->fib6_nh.nh_dev = dev;
- rt->fib6_table = table;
-
- if (idev)
- in6_dev_put(idev);
-
return rt;
out:
- if (dev)
- dev_put(dev);
- if (idev)
- in6_dev_put(idev);
-
fib6_info_release(rt);
return ERR_PTR(err);
}
@@ -3259,10 +3350,16 @@ static int ip6_route_del(struct fib6_config *cfg,
if (fn) {
for_each_fib6_node_rt_rcu(fn) {
+ struct fib6_nh *nh;
+
if (cfg->fc_flags & RTF_CACHE) {
+ struct fib6_result res = {
+ .f6i = rt,
+ };
int rc;
- rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
+ rt_cache = rt6_find_cached_rt(&res,
+ &cfg->fc_dst,
&cfg->fc_src);
if (rt_cache) {
rc = ip6_del_cached_rt(rt_cache, cfg);
@@ -3273,12 +3370,14 @@ static int ip6_route_del(struct fib6_config *cfg,
}
continue;
}
+
+ nh = &rt->fib6_nh;
if (cfg->fc_ifindex &&
- (!rt->fib6_nh.nh_dev ||
- rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
+ (!nh->fib_nh_dev ||
+ nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
continue;
if (cfg->fc_flags & RTF_GATEWAY &&
- !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
+ !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
continue;
if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
continue;
@@ -3304,10 +3403,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
{
struct netevent_redirect netevent;
struct rt6_info *rt, *nrt = NULL;
+ struct fib6_result res = {};
struct ndisc_options ndopts;
struct inet6_dev *in6_dev;
struct neighbour *neigh;
- struct fib6_info *from;
struct rd_msg *msg;
int optlen, on_link;
u8 *lladdr;
@@ -3390,14 +3489,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
NDISC_REDIRECT, &ndopts);
rcu_read_lock();
- from = rcu_dereference(rt->from);
- /* This fib6_info_hold() is safe here because we hold reference to rt
- * and rt already holds reference to fib6_info.
- */
- fib6_info_hold(from);
- rcu_read_unlock();
+ res.f6i = rcu_dereference(rt->from);
+ if (!res.f6i)
+ goto out;
- nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
+ res.nh = &res.f6i->fib6_nh;
+ res.fib6_flags = res.f6i->fib6_flags;
+ res.fib6_type = res.f6i->fib6_type;
+ nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
if (!nrt)
goto out;
@@ -3407,11 +3506,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- /* No need to remove rt from the exception table if rt is
- * a cached route because rt6_insert_exception() will
- * takes care of it
- */
- if (rt6_insert_exception(nrt, from)) {
+ /* rt6_insert_exception() will take care of duplicated exceptions */
+ if (rt6_insert_exception(nrt, &res)) {
dst_release_immediate(&nrt->dst);
goto out;
}
@@ -3423,7 +3519,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
out:
- fib6_info_release(from);
+ rcu_read_unlock();
neigh_release(neigh);
}
@@ -3449,11 +3545,12 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.nh_dev->ifindex != ifindex)
+ if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
continue;
- if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
+ if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
+ !rt->fib6_nh.fib_nh_gw_family)
continue;
- if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
+ if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
continue;
if (!fib6_info_hold_safe(rt))
continue;
@@ -3511,9 +3608,11 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
rcu_read_lock();
for_each_fib6_node_rt_rcu(&table->tb6_root) {
- if (dev == rt->fib6_nh.nh_dev &&
+ struct fib6_nh *nh = &rt->fib6_nh;
+
+ if (dev == nh->fib_nh_dev &&
((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
- ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
+ ipv6_addr_equal(&nh->fib_nh_gw6, addr))
break;
}
if (rt && !fib6_info_hold_safe(rt))
@@ -3604,7 +3703,7 @@ static void rtmsg_to_fib6_config(struct net *net,
.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
: RT6_TABLE_MAIN,
.fc_ifindex = rtmsg->rtmsg_ifindex,
- .fc_metric = rtmsg->rtmsg_metric,
+ .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
.fc_expires = rtmsg->rtmsg_info,
.fc_dst_len = rtmsg->rtmsg_dst_len,
.fc_src_len = rtmsg->rtmsg_src_len,
@@ -3662,23 +3761,34 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
{
- int type;
struct dst_entry *dst = skb_dst(skb);
+ struct net *net = dev_net(dst->dev);
+ struct inet6_dev *idev;
+ int type;
+
+ if (netif_is_l3_master(skb->dev) &&
+ dst->dev == net->loopback_dev)
+ idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
+ else
+ idev = ip6_dst_idev(dst);
+
switch (ipstats_mib_noroutes) {
case IPSTATS_MIB_INNOROUTES:
type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
if (type == IPV6_ADDR_ANY) {
- IP6_INC_STATS(dev_net(dst->dev),
- __in6_dev_get_safely(skb->dev),
- IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
break;
}
/* FALLTHROUGH */
case IPSTATS_MIB_OUTNOROUTES:
- IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
- ipstats_mib_noroutes);
+ IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
break;
}
+
+ /* Start over by dropping the dst for l3mdev case */
+ if (netif_is_l3_master(skb->dev))
+ skb_dst_drop(skb);
+
icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
kfree_skb(skb);
return 0;
@@ -3715,36 +3825,26 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
const struct in6_addr *addr,
bool anycast, gfp_t gfp_flags)
{
- u32 tb_id;
- struct net_device *dev = idev->dev;
- struct fib6_info *f6i;
-
- f6i = fib6_info_alloc(gfp_flags);
- if (!f6i)
- return ERR_PTR(-ENOMEM);
+ struct fib6_config cfg = {
+ .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
+ .fc_ifindex = idev->dev->ifindex,
+ .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
+ .fc_dst = *addr,
+ .fc_dst_len = 128,
+ .fc_protocol = RTPROT_KERNEL,
+ .fc_nlinfo.nl_net = net,
+ .fc_ignore_dev_down = true,
+ };
- f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0, NULL);
- f6i->dst_nocount = true;
- f6i->dst_host = true;
- f6i->fib6_protocol = RTPROT_KERNEL;
- f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
if (anycast) {
- f6i->fib6_type = RTN_ANYCAST;
- f6i->fib6_flags |= RTF_ANYCAST;
+ cfg.fc_type = RTN_ANYCAST;
+ cfg.fc_flags |= RTF_ANYCAST;
} else {
- f6i->fib6_type = RTN_LOCAL;
- f6i->fib6_flags |= RTF_LOCAL;
+ cfg.fc_type = RTN_LOCAL;
+ cfg.fc_flags |= RTF_LOCAL;
}
- f6i->fib6_nh.nh_gw = *addr;
- dev_hold(dev);
- f6i->fib6_nh.nh_dev = dev;
- f6i->fib6_dst.addr = *addr;
- f6i->fib6_dst.plen = 128;
- tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
- f6i->fib6_table = fib6_get_table(net, tb_id);
-
- return f6i;
+ return ip6_route_info_create(&cfg, gfp_flags, NULL);
}
/* remove deleted ip from prefsrc entries */
@@ -3760,7 +3860,7 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
- if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
+ if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
spin_lock_bh(&rt6_exception_lock);
@@ -3782,7 +3882,7 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
fib6_clean_all(net, fib6_remove_prefsrc, &adni);
}
-#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
+#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
/* Remove routers and update dst entries when gateway turn into host. */
static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
@@ -3790,7 +3890,8 @@ static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
struct in6_addr *gateway = (struct in6_addr *)arg;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
- ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
+ rt->fib6_nh.fib_nh_gw_family &&
+ ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
return -1;
}
@@ -3811,7 +3912,7 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
struct arg_netdev_event {
const struct net_device *dev;
union {
- unsigned int nh_flags;
+ unsigned char nh_flags;
unsigned long event;
};
};
@@ -3838,9 +3939,9 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
static bool rt6_is_dead(const struct fib6_info *rt)
{
- if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
- (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
- fib6_ignore_linkdown(rt)))
+ if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
+ (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
+ ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
return true;
return false;
@@ -3852,11 +3953,11 @@ static int rt6_multipath_total_weight(const struct fib6_info *rt)
int total = 0;
if (!rt6_is_dead(rt))
- total += rt->fib6_nh.nh_weight;
+ total += rt->fib6_nh.fib_nh_weight;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
if (!rt6_is_dead(iter))
- total += iter->fib6_nh.nh_weight;
+ total += iter->fib6_nh.fib_nh_weight;
}
return total;
@@ -3867,11 +3968,11 @@ static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
int upper_bound = -1;
if (!rt6_is_dead(rt)) {
- *weight += rt->fib6_nh.nh_weight;
+ *weight += rt->fib6_nh.fib_nh_weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
total) - 1;
}
- atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
+ atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
}
static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
@@ -3914,8 +4015,9 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg)
const struct arg_netdev_event *arg = p_arg;
struct net *net = dev_net(arg->dev);
- if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
- rt->fib6_nh.nh_flags &= ~arg->nh_flags;
+ if (rt != net->ipv6.fib6_null_entry &&
+ rt->fib6_nh.fib_nh_dev == arg->dev) {
+ rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(net, rt);
rt6_multipath_rebalance(rt);
}
@@ -3923,7 +4025,7 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg)
return 0;
}
-void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
+void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
{
struct arg_netdev_event arg = {
.dev = dev,
@@ -3943,10 +4045,10 @@ static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
{
struct fib6_info *iter;
- if (rt->fib6_nh.nh_dev == dev)
+ if (rt->fib6_nh.fib_nh_dev == dev)
return true;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.nh_dev == dev)
+ if (iter->fib6_nh.fib_nh_dev == dev)
return true;
return false;
@@ -3967,12 +4069,12 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
struct fib6_info *iter;
unsigned int dead = 0;
- if (rt->fib6_nh.nh_dev == down_dev ||
- rt->fib6_nh.nh_flags & RTNH_F_DEAD)
+ if (rt->fib6_nh.fib_nh_dev == down_dev ||
+ rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
dead++;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.nh_dev == down_dev ||
- iter->fib6_nh.nh_flags & RTNH_F_DEAD)
+ if (iter->fib6_nh.fib_nh_dev == down_dev ||
+ iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
dead++;
return dead;
@@ -3980,15 +4082,15 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
const struct net_device *dev,
- unsigned int nh_flags)
+ unsigned char nh_flags)
{
struct fib6_info *iter;
- if (rt->fib6_nh.nh_dev == dev)
- rt->fib6_nh.nh_flags |= nh_flags;
+ if (rt->fib6_nh.fib_nh_dev == dev)
+ rt->fib6_nh.fib_nh_flags |= nh_flags;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.nh_dev == dev)
- iter->fib6_nh.nh_flags |= nh_flags;
+ if (iter->fib6_nh.fib_nh_dev == dev)
+ iter->fib6_nh.fib_nh_flags |= nh_flags;
}
/* called with write lock held for table with rt */
@@ -4003,12 +4105,12 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
switch (arg->event) {
case NETDEV_UNREGISTER:
- return rt->fib6_nh.nh_dev == dev ? -1 : 0;
+ return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
case NETDEV_DOWN:
if (rt->should_flush)
return -1;
if (!rt->fib6_nsiblings)
- return rt->fib6_nh.nh_dev == dev ? -1 : 0;
+ return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
if (rt6_multipath_uses_dev(rt, dev)) {
unsigned int count;
@@ -4024,10 +4126,10 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
}
return -2;
case NETDEV_CHANGE:
- if (rt->fib6_nh.nh_dev != dev ||
+ if (rt->fib6_nh.fib_nh_dev != dev ||
rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
break;
- rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
+ rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
rt6_multipath_rebalance(rt);
break;
}
@@ -4083,7 +4185,7 @@ static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
Since RFC 1981 doesn't include administrative MTU increase
update PMTU increase is a MUST. (i.e. jumbo frame)
*/
- if (rt->fib6_nh.nh_dev == arg->dev &&
+ if (rt->fib6_nh.fib_nh_dev == arg->dev &&
!fib6_metric_locked(rt, RTAX_MTU)) {
u32 mtu = rt->fib6_pmtu;
@@ -4137,8 +4239,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
unsigned int pref;
int err;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
- extack);
+ err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+ rtm_ipv6_policy, extack);
if (err < 0)
goto errout;
@@ -4374,7 +4476,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
goto cleanup;
}
- rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
+ rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
rt, &r_cfg);
@@ -4524,6 +4626,9 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_USER;
+
if (cfg.fc_mp)
return ip6_route_multipath_add(&cfg, extack);
else
@@ -4538,7 +4643,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ NLA_ALIGN(sizeof(struct rtnexthop))
+ nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
+ + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
nexthop_len *= rt->fib6_nsiblings;
}
@@ -4556,77 +4661,10 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
+ + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
+ nexthop_len;
}
-static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
- unsigned int *flags, bool skip_oif)
-{
- if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
- *flags |= RTNH_F_DEAD;
-
- if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
- *flags |= RTNH_F_LINKDOWN;
-
- rcu_read_lock();
- if (fib6_ignore_linkdown(rt))
- *flags |= RTNH_F_DEAD;
- rcu_read_unlock();
- }
-
- if (rt->fib6_flags & RTF_GATEWAY) {
- if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
- goto nla_put_failure;
- }
-
- *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
- if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
- *flags |= RTNH_F_OFFLOAD;
-
- /* not needed for multipath encoding b/c it has a rtnexthop struct */
- if (!skip_oif && rt->fib6_nh.nh_dev &&
- nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
- goto nla_put_failure;
-
- if (rt->fib6_nh.nh_lwtstate &&
- lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-/* add multipath next hop */
-static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
-{
- const struct net_device *dev = rt->fib6_nh.nh_dev;
- struct rtnexthop *rtnh;
- unsigned int flags = 0;
-
- rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
- if (!rtnh)
- goto nla_put_failure;
-
- rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
- rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
-
- if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
- goto nla_put_failure;
-
- rtnh->rtnh_flags = flags;
-
- /* length of rtnetlink header + attributes */
- rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
@@ -4739,23 +4777,30 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *sibling, *next_sibling;
struct nlattr *mp;
- mp = nla_nest_start(skb, RTA_MULTIPATH);
+ mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
if (!mp)
goto nla_put_failure;
- if (rt6_add_nexthop(skb, rt) < 0)
+ if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
+ rt->fib6_nh.fib_nh_weight) < 0)
goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings) {
- if (rt6_add_nexthop(skb, sibling) < 0)
+ if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
+ sibling->fib6_nh.fib_nh_weight) < 0)
goto nla_put_failure;
}
nla_nest_end(skb, mp);
} else {
- if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
+ unsigned char nh_flags = 0;
+
+ if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
+ &nh_flags, false) < 0)
goto nla_put_failure;
+
+ rtm->rtm_flags |= nh_flags;
}
if (rt6_flags & RTF_EXPIRES) {
@@ -4781,7 +4826,7 @@ nla_put_failure:
static bool fib6_info_uses_dev(const struct fib6_info *f6i,
const struct net_device *dev)
{
- if (f6i->fib6_nh.nh_dev == dev)
+ if (f6i->fib6_nh.fib_nh_dev == dev)
return true;
if (f6i->fib6_nsiblings) {
@@ -4789,7 +4834,7 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
list_for_each_entry_safe(sibling, next_sibling,
&f6i->fib6_siblings, fib6_siblings) {
- if (sibling->fib6_nh.nh_dev == dev)
+ if (sibling->fib6_nh.fib_nh_dev == dev)
return true;
}
}
@@ -4841,8 +4886,8 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
}
if (!netlink_strict_get_check(skb))
- return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
- rtm_ipv6_policy, extack);
+ return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+ rtm_ipv6_policy, extack);
rtm = nlmsg_data(nlh);
if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
@@ -4858,8 +4903,8 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
return -EINVAL;
}
- err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
- rtm_ipv6_policy, extack);
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+ rtm_ipv6_policy, extack);
if (err)
return err;
@@ -5011,16 +5056,20 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
rcu_read_lock();
from = rcu_dereference(rt->from);
-
- if (fibmatch)
- err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
- RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0);
- else
- err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
- &fl6.saddr, iif, RTM_NEWROUTE,
- NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- 0);
+ if (from) {
+ if (fibmatch)
+ err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
+ iif, RTM_NEWROUTE,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ else
+ err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
+ &fl6.saddr, iif, RTM_NEWROUTE,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ } else {
+ err = -ENETUNREACH;
+ }
rcu_read_unlock();
if (err < 0) {
@@ -5074,7 +5123,7 @@ static int ip6_route_dev_notify(struct notifier_block *this,
return NOTIFY_OK;
if (event == NETDEV_REGISTER) {
- net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
+ net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
net->ipv6.ip6_null_entry->dst.dev = dev;
net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -5409,7 +5458,7 @@ void __init ip6_route_init_special_entries(void)
/* Registering of the loopback is done before this portion of code,
* the loopback reference in rt6_info will not be taken, do it
* manually for init_net */
- init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
+ init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 9b2f272ca164..0c5479ef9b38 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -398,28 +398,28 @@ static struct pernet_operations ip6_segments_ops = {
static const struct genl_ops seg6_genl_ops[] = {
{
.cmd = SEG6_CMD_SETHMAC,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = seg6_genl_sethmac,
- .policy = seg6_genl_policy,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = SEG6_CMD_DUMPHMAC,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.start = seg6_genl_dumphmac_start,
.dumpit = seg6_genl_dumphmac,
.done = seg6_genl_dumphmac_done,
- .policy = seg6_genl_policy,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = SEG6_CMD_SET_TUNSRC,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = seg6_genl_set_tunsrc,
- .policy = seg6_genl_policy,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = SEG6_CMD_GET_TUNSRC,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = seg6_genl_get_tunsrc,
- .policy = seg6_genl_policy,
.flags = GENL_ADMIN_PERM,
},
};
@@ -429,6 +429,7 @@ static struct genl_family seg6_genl_family __ro_after_init = {
.name = SEG6_GENL_NAME,
.version = SEG6_GENL_VERSION,
.maxattr = SEG6_ATTR_MAX,
+ .policy = seg6_genl_policy,
.netnsok = true,
.parallel_ops = true,
.ops = seg6_genl_ops,
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index ee5403cbe655..7a525fda8978 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -396,8 +396,8 @@ static int seg6_build_state(struct nlattr *nla,
if (family != AF_INET && family != AF_INET6)
return -EINVAL;
- err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
- seg6_iptunnel_policy, extack);
+ err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla,
+ seg6_iptunnel_policy, extack);
if (err < 0)
return err;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 60325dbfe88b..78155fdb8c36 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -823,8 +823,9 @@ static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt)
int ret;
u32 fd;
- ret = nla_parse_nested(tb, SEG6_LOCAL_BPF_PROG_MAX,
- attrs[SEG6_LOCAL_BPF], bpf_prog_policy, NULL);
+ ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX,
+ attrs[SEG6_LOCAL_BPF],
+ bpf_prog_policy, NULL);
if (ret < 0)
return ret;
@@ -853,7 +854,7 @@ static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
if (!slwt->bpf.prog)
return 0;
- nest = nla_nest_start(skb, SEG6_LOCAL_BPF);
+ nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF);
if (!nest)
return -EMSGSIZE;
@@ -959,8 +960,8 @@ static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
if (family != AF_INET6)
return -EINVAL;
- err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
- extack);
+ err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla,
+ seg6_local_policy, extack);
if (err < 0)
return err;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 07e21a82ce4c..971d60bf9640 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -669,6 +669,10 @@ static int ipip6_rcv(struct sk_buff *skb)
!net_eq(tunnel->net, dev_net(tunnel->dev))))
goto out;
+ /* skb can be uncloned in iptunnel_pull_header, so
+ * old iph is no longer valid
+ */
+ iph = (const struct iphdr *)skb_mac_header(skb);
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -1080,7 +1084,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
if (!tdev && tunnel->parms.link)
tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
- if (tdev) {
+ if (tdev && !netif_is_l3_master(tdev)) {
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 57ef69a10889..beaf28456301 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -43,6 +43,7 @@
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/tcp.h>
#include <net/ndisc.h>
@@ -90,6 +91,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
}
#endif
+/* Helper returning the inet6 address from a given tcp socket.
+ * It can be used in TCP stack instead of inet6_sk(sk).
+ * This avoids a dereference and allow compiler optimizations.
+ * It is a specialized version of inet6_sk_generic().
+ */
+static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
+{
+ unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
+
+ return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+}
+
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -99,7 +112,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
- inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+ tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
}
}
@@ -138,7 +151,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
struct ipv6_txoptions *opt;
@@ -390,7 +403,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sk->sk_state == TCP_CLOSE)
goto out;
- if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
+ if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto out;
}
@@ -405,7 +418,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- np = inet6_sk(sk);
+ np = tcp_inet6_sk(sk);
if (type == NDISC_REDIRECT) {
if (!sock_owned_by_user(sk)) {
@@ -478,7 +491,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
enum tcp_synack_type synack_type)
{
struct inet_request_sock *ireq = inet_rsk(req);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct ipv6_txoptions *opt;
struct flowi6 *fl6 = &fl->u.ip6;
struct sk_buff *skb;
@@ -737,7 +750,7 @@ static void tcp_v6_init_req(struct request_sock *req,
{
bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req);
- const struct ipv6_pinfo *np = inet6_sk(sk_listener);
+ const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
@@ -1066,9 +1079,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
{
struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp;
- const struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct ipv6_txoptions *opt;
- struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
@@ -1088,11 +1100,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
if (!newsk)
return NULL;
- newtcp6sk = (struct tcp6_sock *)newsk;
- inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+ inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
newinet = inet_sk(newsk);
- newnp = inet6_sk(newsk);
+ newnp = tcp_inet6_sk(newsk);
newtp = tcp_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -1110,11 +1121,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
- newnp->mcast_oif = tcp_v6_iif(skb);
- newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
- newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+ newnp->mcast_oif = inet_iif(skb);
+ newnp->mcast_hops = ip_hdr(skb)->ttl;
+ newnp->rcv_flowinfo = 0;
if (np->repflow)
- newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
+ newnp->flow_label = 0;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1156,12 +1167,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
ip6_dst_store(newsk, dst, NULL, NULL);
inet6_sk_rx_dst_set(newsk, skb);
- newtcp6sk = (struct tcp6_sock *)newsk;
- inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+ inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
- newnp = inet6_sk(newsk);
+ newnp = tcp_inet6_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -1276,9 +1286,9 @@ out:
*/
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_sock *tp;
+ struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct sk_buff *opt_skb = NULL;
+ struct tcp_sock *tp;
/* Imagine: socket is IPv6. IPv4 packet arrives,
goes to IPv4 receive handler and backlogged.
@@ -1426,8 +1436,9 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
}
-static int tcp_v6_rcv(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
+ struct sk_buff *skb_to_free;
int sdif = inet6_sdif(skb);
const struct tcphdr *th;
const struct ipv6hdr *hdr;
@@ -1524,7 +1535,7 @@ process:
return 0;
}
}
- if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
+ if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
}
@@ -1554,12 +1565,17 @@ process:
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
+ skb_to_free = sk->sk_rx_skb_cache;
+ sk->sk_rx_skb_cache = NULL;
ret = tcp_v6_do_rcv(sk, skb);
- } else if (tcp_add_backlog(sk, skb)) {
- goto discard_and_relse;
+ } else {
+ if (tcp_add_backlog(sk, skb))
+ goto discard_and_relse;
+ skb_to_free = NULL;
}
bh_unlock_sock(sk);
-
+ if (skb_to_free)
+ __kfree_skb(skb_to_free);
put_and_return:
if (refcounted)
sock_put(sk);
@@ -1639,7 +1655,7 @@ do_time_wait:
goto discard_it;
}
-static void tcp_v6_early_demux(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
const struct tcphdr *th;
@@ -1669,7 +1685,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
if (dst)
- dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+ dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
if (dst &&
inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b444483cdb2b..07fa579dfb96 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -36,6 +36,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/addrconf.h>
#include <net/ndisc.h>
@@ -285,8 +286,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
unsigned int ulen, copied;
- int peeked, peeking, off;
- int err;
+ int off, err, peeking = flags & MSG_PEEK;
int is_udplite = IS_UDPLITE(sk);
struct udp_mib __percpu *mib;
bool checksum_valid = false;
@@ -299,9 +299,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again:
- peeking = flags & MSG_PEEK;
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
+ skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
if (!skb)
return err;
@@ -340,14 +339,14 @@ try_again:
goto csum_copy_err;
}
if (unlikely(err)) {
- if (!peeked) {
+ if (!peeking) {
atomic_inc(&sk->sk_drops);
SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
}
kfree_skb(skb);
return err;
}
- if (!peeked)
+ if (!peeking)
SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
sock_recv_ts_and_drops(msg, sk, skb);
@@ -982,7 +981,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
return NULL;
}
-static void udp_v6_early_demux(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
const struct udphdr *uh;
@@ -1023,7 +1022,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
}
}
-static __inline__ int udpv6_rcv(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb)
{
return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
}
@@ -1047,6 +1046,8 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return -EINVAL;
/* The following checks are replicated from __ip6_datagram_connect()
* and intended to prevent BPF program called below from accessing
* bytes that are out of the bound specified by user in addr_len.
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
deleted file mode 100644
index 57fd314ec2b8..000000000000
--- a/net/ipv6/xfrm6_mode_beet.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6.
- *
- * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
- * Miika Komu <miika@iki.fi>
- * Herbert Xu <herbert@gondor.apana.org.au>
- * Abhinav Pathak <abhinav.pathak@hiit.fi>
- * Jeff Ahrenholz <ahrenholz@gmail.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dsfield.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-static void xfrm6_beet_make_header(struct sk_buff *skb)
-{
- struct ipv6hdr *iph = ipv6_hdr(skb);
-
- iph->version = 6;
-
- memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
- sizeof(iph->flow_lbl));
- iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
-
- ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
- iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
-}
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
- */
-static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct ipv6hdr *top_iph;
- struct ip_beet_phdr *ph;
- int optlen, hdr_len;
-
- hdr_len = 0;
- optlen = XFRM_MODE_SKB_CB(skb)->optlen;
- if (unlikely(optlen))
- hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4);
-
- skb_set_network_header(skb, -x->props.header_len - hdr_len);
- if (x->sel.family != AF_INET6)
- skb->network_header += IPV4_BEET_PHMAXLEN;
- skb->mac_header = skb->network_header +
- offsetof(struct ipv6hdr, nexthdr);
- skb->transport_header = skb->network_header + sizeof(*top_iph);
- ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len);
-
- xfrm6_beet_make_header(skb);
-
- top_iph = ipv6_hdr(skb);
- if (unlikely(optlen)) {
-
- BUG_ON(optlen < 0);
-
- ph->padlen = 4 - (optlen & 4);
- ph->hdrlen = optlen / 8;
- ph->nexthdr = top_iph->nexthdr;
- if (ph->padlen)
- memset(ph + 1, IPOPT_NOP, ph->padlen);
-
- top_iph->nexthdr = IPPROTO_BEETPH;
- }
-
- top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
- top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
- return 0;
-}
-
-static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct ipv6hdr *ip6h;
- int size = sizeof(struct ipv6hdr);
- int err;
-
- err = skb_cow_head(skb, size + skb->mac_len);
- if (err)
- goto out;
-
- __skb_push(skb, size);
- skb_reset_network_header(skb);
- skb_mac_header_rebuild(skb);
-
- xfrm6_beet_make_header(skb);
-
- ip6h = ipv6_hdr(skb);
- ip6h->payload_len = htons(skb->len - size);
- ip6h->daddr = x->sel.daddr.in6;
- ip6h->saddr = x->sel.saddr.in6;
- err = 0;
-out:
- return err;
-}
-
-static struct xfrm_mode xfrm6_beet_mode = {
- .input2 = xfrm6_beet_input,
- .input = xfrm_prepare_input,
- .output2 = xfrm6_beet_output,
- .output = xfrm6_prepare_output,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_BEET,
- .flags = XFRM_MODE_FLAG_TUNNEL,
-};
-
-static int __init xfrm6_beet_init(void)
-{
- return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6);
-}
-
-static void __exit xfrm6_beet_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6);
- BUG_ON(err);
-}
-
-module_init(xfrm6_beet_init);
-module_exit(xfrm6_beet_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
deleted file mode 100644
index da28e4407b8f..000000000000
--- a/net/ipv6/xfrm6_mode_ro.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * xfrm6_mode_ro.c - Route optimization mode for IPv6.
- *
- * Copyright (C)2003-2006 Helsinki University of Technology
- * Copyright (C)2003-2006 USAGI/WIDE Project
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/*
- * Authors:
- * Noriaki TAKAMIYA @USAGI
- * Masahide NAKAMURA @USAGI
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/stringify.h>
-#include <linux/time.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-/* Add route optimization header space.
- *
- * The IP header and mutable extension headers will be moved forward to make
- * space for the route optimization header.
- */
-static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct ipv6hdr *iph;
- u8 *prevhdr;
- int hdr_len;
-
- iph = ipv6_hdr(skb);
-
- hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
- if (hdr_len < 0)
- return hdr_len;
- skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
- skb_set_network_header(skb, -x->props.header_len);
- skb->transport_header = skb->network_header + hdr_len;
- __skb_pull(skb, hdr_len);
- memmove(ipv6_hdr(skb), iph, hdr_len);
-
- x->lastused = ktime_get_real_seconds();
-
- return 0;
-}
-
-static struct xfrm_mode xfrm6_ro_mode = {
- .output = xfrm6_ro_output,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_ROUTEOPTIMIZATION,
-};
-
-static int __init xfrm6_ro_init(void)
-{
- return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6);
-}
-
-static void __exit xfrm6_ro_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6);
- BUG_ON(err);
-}
-
-module_init(xfrm6_ro_init);
-module_exit(xfrm6_ro_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
deleted file mode 100644
index 3c29da5defe6..000000000000
--- a/net/ipv6/xfrm6_mode_transport.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * xfrm6_mode_transport.c - Transport mode encapsulation for IPv6.
- *
- * Copyright (C) 2002 USAGI/WIDE Project
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-#include <net/protocol.h>
-
-/* Add encapsulation header.
- *
- * The IP header and mutable extension headers will be moved forward to make
- * space for the encapsulation header.
- */
-static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct ipv6hdr *iph;
- u8 *prevhdr;
- int hdr_len;
-
- iph = ipv6_hdr(skb);
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
-
- hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
- if (hdr_len < 0)
- return hdr_len;
- skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
- skb_set_network_header(skb, -x->props.header_len);
- skb->transport_header = skb->network_header + hdr_len;
- __skb_pull(skb, hdr_len);
- memmove(ipv6_hdr(skb), iph, hdr_len);
- return 0;
-}
-
-/* Remove encapsulation header.
- *
- * The IP header will be moved over the top of the encapsulation header.
- *
- * On entry, skb->h shall point to where the IP header should be and skb->nh
- * shall be set to where the IP header currently is. skb->data shall point
- * to the start of the payload.
- */
-static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- int ihl = skb->data - skb_transport_header(skb);
-
- if (skb->transport_header != skb->network_header) {
- memmove(skb_transport_header(skb),
- skb_network_header(skb), ihl);
- skb->network_header = skb->transport_header;
- }
- ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
- sizeof(struct ipv6hdr));
- skb_reset_transport_header(skb);
- return 0;
-}
-
-static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
- struct sk_buff *skb,
- netdev_features_t features)
-{
- const struct net_offload *ops;
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- struct xfrm_offload *xo = xfrm_offload(skb);
-
- skb->transport_header += x->props.header_len;
- ops = rcu_dereference(inet6_offloads[xo->proto]);
- if (likely(ops && ops->callbacks.gso_segment))
- segs = ops->callbacks.gso_segment(skb, features);
-
- return segs;
-}
-
-static void xfrm6_transport_xmit(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct xfrm_offload *xo = xfrm_offload(skb);
-
- skb_reset_mac_len(skb);
- pskb_pull(skb, skb->mac_len + sizeof(struct ipv6hdr) + x->props.header_len);
-
- if (xo->flags & XFRM_GSO_SEGMENT) {
- skb_reset_transport_header(skb);
- skb->transport_header -= x->props.header_len;
- }
-}
-
-
-static struct xfrm_mode xfrm6_transport_mode = {
- .input = xfrm6_transport_input,
- .output = xfrm6_transport_output,
- .gso_segment = xfrm4_transport_gso_segment,
- .xmit = xfrm6_transport_xmit,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_TRANSPORT,
-};
-
-static int __init xfrm6_transport_init(void)
-{
- return xfrm_register_mode(&xfrm6_transport_mode, AF_INET6);
-}
-
-static void __exit xfrm6_transport_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm6_transport_mode, AF_INET6);
- BUG_ON(err);
-}
-
-module_init(xfrm6_transport_init);
-module_exit(xfrm6_transport_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TRANSPORT);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
deleted file mode 100644
index de1b0b8c53b0..000000000000
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * xfrm6_mode_tunnel.c - Tunnel mode encapsulation for IPv6.
- *
- * Copyright (C) 2002 USAGI/WIDE Project
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dsfield.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ip6_route.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
-{
- struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
-
- if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
- IP6_ECN_set_ce(skb, inner_iph);
-}
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per RFC 2401.
- */
-static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct ipv6hdr *top_iph;
- int dsfield;
-
- skb_set_inner_network_header(skb, skb_network_offset(skb));
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
-
- skb_set_network_header(skb, -x->props.header_len);
- skb->mac_header = skb->network_header +
- offsetof(struct ipv6hdr, nexthdr);
- skb->transport_header = skb->network_header + sizeof(*top_iph);
- top_iph = ipv6_hdr(skb);
-
- top_iph->version = 6;
-
- memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
- sizeof(top_iph->flow_lbl));
- top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
-
- if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
- dsfield = 0;
- else
- dsfield = XFRM_MODE_SKB_CB(skb)->tos;
- dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
- if (x->props.flags & XFRM_STATE_NOECN)
- dsfield &= ~INET_ECN_MASK;
- ipv6_change_dsfield(top_iph, 0, dsfield);
- top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
- top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
- top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
- return 0;
-}
-
-#define for_each_input_rcu(head, handler) \
- for (handler = rcu_dereference(head); \
- handler != NULL; \
- handler = rcu_dereference(handler->next))
-
-
-static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- int err = -EINVAL;
-
- if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
- goto out;
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- goto out;
-
- err = skb_unclone(skb, GFP_ATOMIC);
- if (err)
- goto out;
-
- if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
- ipipv6_hdr(skb));
- if (!(x->props.flags & XFRM_STATE_NOECN))
- ipip6_ecn_decapsulate(skb);
-
- skb_reset_network_header(skb);
- skb_mac_header_rebuild(skb);
- if (skb->mac_len)
- eth_hdr(skb)->h_proto = skb->protocol;
-
- err = 0;
-
-out:
- return err;
-}
-
-static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x,
- struct sk_buff *skb,
- netdev_features_t features)
-{
- __skb_push(skb, skb->mac_len);
- return skb_mac_gso_segment(skb, features);
-}
-
-static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct xfrm_offload *xo = xfrm_offload(skb);
-
- if (xo->flags & XFRM_GSO_SEGMENT)
- skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
-
- skb_reset_mac_len(skb);
- pskb_pull(skb, skb->mac_len + x->props.header_len);
-}
-
-static struct xfrm_mode xfrm6_tunnel_mode = {
- .input2 = xfrm6_mode_tunnel_input,
- .input = xfrm_prepare_input,
- .output2 = xfrm6_mode_tunnel_output,
- .output = xfrm6_prepare_output,
- .gso_segment = xfrm6_mode_tunnel_gso_segment,
- .xmit = xfrm6_mode_tunnel_xmit,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_TUNNEL,
- .flags = XFRM_MODE_FLAG_TUNNEL,
-};
-
-static int __init xfrm6_mode_tunnel_init(void)
-{
- return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6);
-}
-
-static void __exit xfrm6_mode_tunnel_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm6_tunnel_mode, AF_INET6);
- BUG_ON(err);
-}
-
-module_init(xfrm6_mode_tunnel_init);
-module_exit(xfrm6_mode_tunnel_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6a74080005cf..8ad5e54eb8ca 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -111,21 +111,6 @@ int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
return xfrm6_extract_header(skb);
}
-int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- int err;
-
- err = xfrm_inner_extract_output(x, skb);
- if (err)
- return err;
-
- skb->ignore_df = 1;
- skb->protocol = htons(ETH_P_IPV6);
-
- return x->outer_mode->output2(x, skb);
-}
-EXPORT_SYMBOL(xfrm6_prepare_output);
-
int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
{
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
@@ -137,11 +122,28 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
return xfrm_output(sk, skb);
}
+static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk,
+ struct sk_buff *skb)
+{
+ const struct xfrm_state_afinfo *afinfo;
+ int ret = -EAFNOSUPPORT;
+
+ rcu_read_lock();
+ afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
+ if (likely(afinfo))
+ ret = afinfo->output_finish(sk, skb);
+ else
+ kfree_skb(skb);
+ rcu_read_unlock();
+
+ return ret;
+}
+
static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct xfrm_state *x = skb_dst(skb)->xfrm;
- return x->outer_mode->afinfo->output_finish(sk, skb);
+ return __xfrm6_output_state_finish(x, sk, skb);
}
static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -183,7 +185,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
__xfrm6_output_finish);
skip_frag:
- return x->outer_mode->afinfo->output_finish(sk, skb);
+ return __xfrm6_output_state_finish(x, sk, skb);
}
int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 769f8f78d3b8..699e0730ce8e 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -22,9 +22,6 @@
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/l3mdev.h>
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
-#include <net/mip6.h>
-#endif
static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
@@ -71,24 +68,6 @@ static int xfrm6_get_saddr(struct net *net, int oif,
return 0;
}
-static int xfrm6_get_tos(const struct flowi *fl)
-{
- return 0;
-}
-
-static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
- int nfheader_len)
-{
- if (dst->ops->family == AF_INET6) {
- struct rt6_info *rt = (struct rt6_info *)dst;
- path->path_cookie = rt6_get_cookie(rt);
- }
-
- path->u.rt6.rt6i_nfheader_len = nfheader_len;
-
- return 0;
-}
-
static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
@@ -118,108 +97,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
return 0;
}
-static inline void
-_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
-{
- struct flowi6 *fl6 = &fl->u.ip6;
- int onlyproto = 0;
- const struct ipv6hdr *hdr = ipv6_hdr(skb);
- u32 offset = sizeof(*hdr);
- struct ipv6_opt_hdr *exthdr;
- const unsigned char *nh = skb_network_header(skb);
- u16 nhoff = IP6CB(skb)->nhoff;
- int oif = 0;
- u8 nexthdr;
-
- if (!nhoff)
- nhoff = offsetof(struct ipv6hdr, nexthdr);
-
- nexthdr = nh[nhoff];
-
- if (skb_dst(skb))
- oif = skb_dst(skb)->dev->ifindex;
-
- memset(fl6, 0, sizeof(struct flowi6));
- fl6->flowi6_mark = skb->mark;
- fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
-
- fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
- fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
-
- while (nh + offset + sizeof(*exthdr) < skb->data ||
- pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
- nh = skb_network_header(skb);
- exthdr = (struct ipv6_opt_hdr *)(nh + offset);
-
- switch (nexthdr) {
- case NEXTHDR_FRAGMENT:
- onlyproto = 1;
- /* fall through */
- case NEXTHDR_ROUTING:
- case NEXTHDR_HOP:
- case NEXTHDR_DEST:
- offset += ipv6_optlen(exthdr);
- nexthdr = exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr *)(nh + offset);
- break;
-
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- if (!onlyproto && (nh + offset + 4 < skb->data ||
- pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
- __be16 *ports;
-
- nh = skb_network_header(skb);
- ports = (__be16 *)(nh + offset);
- fl6->fl6_sport = ports[!!reverse];
- fl6->fl6_dport = ports[!reverse];
- }
- fl6->flowi6_proto = nexthdr;
- return;
-
- case IPPROTO_ICMPV6:
- if (!onlyproto && (nh + offset + 2 < skb->data ||
- pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
- u8 *icmp;
-
- nh = skb_network_header(skb);
- icmp = (u8 *)(nh + offset);
- fl6->fl6_icmp_type = icmp[0];
- fl6->fl6_icmp_code = icmp[1];
- }
- fl6->flowi6_proto = nexthdr;
- return;
-
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- case IPPROTO_MH:
- offset += ipv6_optlen(exthdr);
- if (!onlyproto && (nh + offset + 3 < skb->data ||
- pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
- struct ip6_mh *mh;
-
- nh = skb_network_header(skb);
- mh = (struct ip6_mh *)(nh + offset);
- fl6->fl6_mh_type = mh->ip6mh_type;
- }
- fl6->flowi6_proto = nexthdr;
- return;
-#endif
-
- /* XXX Why are there these headers? */
- case IPPROTO_AH:
- case IPPROTO_ESP:
- case IPPROTO_COMP:
- default:
- fl6->fl6_ipsec_spi = 0;
- fl6->flowi6_proto = nexthdr;
- return;
- }
- }
-}
-
static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu)
{
@@ -291,9 +168,6 @@ static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.dst_ops = &xfrm6_dst_ops_template,
.dst_lookup = xfrm6_dst_lookup,
.get_saddr = xfrm6_get_saddr,
- .decode_session = _decode_session6,
- .get_tos = xfrm6_get_tos,
- .init_path = xfrm6_init_path,
.fill_dst = xfrm6_fill_dst,
.blackhole_route = ip6_blackhole_route,
};
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
index cc979b702c89..aaacac7fdbce 100644
--- a/net/ipv6/xfrm6_protocol.c
+++ b/net/ipv6/xfrm6_protocol.c
@@ -46,7 +46,7 @@ static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol)
handler != NULL; \
handler = rcu_dereference(handler->next)) \
-int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
{
int ret;
struct xfrm6_protocol *handler;
@@ -61,7 +61,6 @@ int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
return 0;
}
-EXPORT_SYMBOL(xfrm6_rcv_cb);
static int xfrm6_esp_rcv(struct sk_buff *skb)
{
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index bc65db782bfb..d9e5f6808811 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -345,7 +345,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
unsigned int i;
xfrm_flush_gc();
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
+ xfrm_state_flush(net, 0, false, true);
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
@@ -402,6 +402,10 @@ static void __exit xfrm6_tunnel_fini(void)
xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+ /* Someone maybe has gotten the xfrm6_tunnel_spi.
+ * So need to wait it.
+ */
+ rcu_barrier();
kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
}