diff options
Diffstat (limited to 'net/ipv6')
30 files changed, 452 insertions, 279 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index e504204bca92..bf2e5e5fe142 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -332,10 +332,10 @@ config IPV6_IOAM6_LWTUNNEL bool "IPv6: IOAM Pre-allocated Trace insertion support" depends on IPV6 select LWTUNNEL + select DST_CACHE help - Support for the inline insertion of IOAM Pre-allocated - Trace Header (only on locally generated packets), using - the lightweight tunnels mechanism. + Support for the insertion of IOAM Pre-allocated Trace + Header using the lightweight tunnels mechanism. If unsure, say N. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 1bc7e143217b..3036a45e8a1e 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -5,16 +5,14 @@ obj-$(CONFIG_IPV6) += ipv6.o -ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ +ipv6-y := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o -ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o - -ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o +ipv6-$(CONFIG_SYSCTL) += sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ @@ -29,8 +27,6 @@ ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o -ipv6-objs += $(ipv6-y) - obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o @@ -48,7 +44,8 @@ obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-$(CONFIG_IPV6_FOU) += fou6.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o -obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) +obj-$(CONFIG_INET) += output_core.o protocol.o \ + ip6_offload.o tcpv6_offload.o exthdrs_offload.o obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c6a90b7bbb70..3445f8017430 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -241,6 +241,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ioam6_enabled = 0, .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, + .ndisc_evict_nocarrier = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -300,6 +301,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ioam6_enabled = 0, .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, + .ndisc_evict_nocarrier = 1, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -2237,12 +2239,12 @@ static int addrconf_ifid_6lowpan(u8 *eui, struct net_device *dev) static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev) { - union fwnet_hwaddr *ha; + const union fwnet_hwaddr *ha; if (dev->addr_len != FWNET_ALEN) return -1; - ha = (union fwnet_hwaddr *)dev->dev_addr; + ha = (const union fwnet_hwaddr *)dev->dev_addr; memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id)); eui[0] ^= 2; @@ -3110,6 +3112,9 @@ static void add_v4_addrs(struct inet6_dev *idev) memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); if (idev->dev->flags&IFF_POINTOPOINT) { + if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE) + return; + addr.s6_addr32[0] = htonl(0xfe800000); scope = IFA_LINK; plen = 64; @@ -5542,6 +5547,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; + array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; } static inline size_t inet6_ifla6_size(void) @@ -6984,6 +6990,15 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_douintvec, }, { + .procname = "ndisc_evict_nocarrier", + .data = &ipv6_devconf.ndisc_evict_nocarrier, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)SYSCTL_ONE, + }, + { /* sentinel */ } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b5878bb8e419..0c4da163535a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -521,31 +521,32 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_scope_id = 0; + lock_sock(sk); if (peer) { - if (!inet->inet_dport) - return -ENOTCONN; - if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && - peer == 1) + if (!inet->inet_dport || + (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && + peer == 1)) { + release_sock(sk); return -ENOTCONN; + } sin->sin6_port = inet->inet_dport; sin->sin6_addr = sk->sk_v6_daddr; if (np->sndflow) sin->sin6_flowinfo = np->flow_label; - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin, - CGROUP_INET6_GETPEERNAME, - NULL); + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + CGROUP_INET6_GETPEERNAME); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) sin->sin6_addr = np->saddr; else sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin, - CGROUP_INET6_GETSOCKNAME, - NULL); + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + CGROUP_INET6_GETSOCKNAME); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); + release_sock(sk); return sizeof(*sin); } EXPORT_SYMBOL(inet6_getname); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3a871a09f962..38ece3b7b839 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -979,7 +979,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); - ioam6_fill_trace_data(skb, ns, trace); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: break; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index a1ac0e3d8c60..47447f0241df 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -610,7 +610,11 @@ int ila_xlat_init_net(struct net *net) if (err) return err; - rhashtable_init(&ilan->xlat.rhash_table, &rht_params); + err = rhashtable_init(&ilan->xlat.rhash_table, &rht_params); + if (err) { + free_bucket_spinlocks(ilan->xlat.locks); + return err; + } return 0; } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 55c290d55605..67c9114835c8 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -106,7 +106,7 @@ static inline int compute_score(struct sock *sk, struct net *net, if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; - score = 1; + score = sk->sk_bound_dev_if ? 2 : 1; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 5e8961004832..122a3d47424c 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -631,7 +631,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, struct ioam6_schema *sc, - u8 sclen) + u8 sclen, bool is_input) { struct __kernel_sock_timeval ts; u64 raw64; @@ -645,7 +645,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* hop_lim and node_id */ if (trace->type.bit0) { byte = ipv6_hdr(skb)->hop_limit; - if (skb->dev) + if (is_input) byte--; raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; @@ -730,7 +730,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* hop_lim and node_id (wide) */ if (trace->type.bit8) { byte = ipv6_hdr(skb)->hop_limit; - if (skb->dev) + if (is_input) byte--; raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; @@ -770,6 +770,66 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, data += sizeof(__be32); } + /* bit12 undefined: filled with empty value */ + if (trace->type.bit12) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit13 undefined: filled with empty value */ + if (trace->type.bit13) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit14 undefined: filled with empty value */ + if (trace->type.bit14) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit15 undefined: filled with empty value */ + if (trace->type.bit15) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit16 undefined: filled with empty value */ + if (trace->type.bit16) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit17 undefined: filled with empty value */ + if (trace->type.bit17) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit18 undefined: filled with empty value */ + if (trace->type.bit18) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit19 undefined: filled with empty value */ + if (trace->type.bit19) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit20 undefined: filled with empty value */ + if (trace->type.bit20) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* bit21 undefined: filled with empty value */ + if (trace->type.bit21) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + /* opaque state snapshot */ if (trace->type.bit22) { if (!sc) { @@ -786,21 +846,16 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* called with rcu_read_lock() */ void ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, - struct ioam6_trace_hdr *trace) + struct ioam6_trace_hdr *trace, + bool is_input) { struct ioam6_schema *sc; u8 sclen = 0; - /* Skip if Overflow flag is set OR - * if an unknown type (bit 12-21) is set + /* Skip if Overflow flag is set */ - if (trace->overflow || - trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | - trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | - trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | - trace->type.bit21) { + if (trace->overflow) return; - } /* NodeLen does not include Opaque State Snapshot length. We need to * take it into account if the corresponding bit is set (bit 22) and @@ -822,7 +877,7 @@ void ioam6_fill_trace_data(struct sk_buff *skb, return; } - __ioam6_fill_trace_data(skb, ns, trace, sc, sclen); + __ioam6_fill_trace_data(skb, ns, trace, sc, sclen, is_input); trace->remlen -= trace->nodelen + sclen; } diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index f9ee04541c17..f90a87389fcc 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -9,7 +9,6 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/net.h> -#include <linux/netlink.h> #include <linux/in6.h> #include <linux/ioam6.h> #include <linux/ioam6_iptunnel.h> @@ -17,18 +16,26 @@ #include <net/sock.h> #include <net/lwtunnel.h> #include <net/ioam6.h> +#include <net/netlink.h> +#include <net/ipv6.h> +#include <net/dst_cache.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> #define IOAM6_MASK_SHORT_FIELDS 0xff100000 #define IOAM6_MASK_WIDE_FIELDS 0xe00000 struct ioam6_lwt_encap { - struct ipv6_hopopt_hdr eh; - u8 pad[2]; /* 2-octet padding for 4n-alignment */ - struct ioam6_hdr ioamh; - struct ioam6_trace_hdr traceh; + struct ipv6_hopopt_hdr eh; + u8 pad[2]; /* 2-octet padding for 4n-alignment */ + struct ioam6_hdr ioamh; + struct ioam6_trace_hdr traceh; } __packed; struct ioam6_lwt { + struct dst_cache cache; + u8 mode; + struct in6_addr tundst; struct ioam6_lwt_encap tuninfo; }; @@ -42,40 +49,29 @@ static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt) return &ioam6_lwt_state(lwt)->tuninfo; } -static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt) +static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt) { return &(ioam6_lwt_state(lwt)->tuninfo.traceh); } static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { + [IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8, + IOAM6_IPTUNNEL_MODE_MIN, + IOAM6_IPTUNNEL_MODE_MAX), + [IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)), }; -static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype, - struct ioam6_trace_hdr *trace) -{ - struct ioam6_trace_hdr *data; - struct nlattr *nla; - int len; - - len = sizeof(*trace); - - nla = nla_reserve(skb, attrtype, len); - if (!nla) - return -EMSGSIZE; - - data = nla_data(nla); - memcpy(data, trace, len); - - return 0; -} - static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) { u32 fields; if (!trace->type_be32 || !trace->remlen || - trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4) + trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 || + trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21) return false; trace->nodelen = 0; @@ -97,9 +93,10 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1]; struct ioam6_lwt_encap *tuninfo; struct ioam6_trace_hdr *trace; - struct lwtunnel_state *s; - int len_aligned; - int len, err; + struct lwtunnel_state *lwt; + struct ioam6_lwt *ilwt; + int len_aligned, err; + u8 mode; if (family != AF_INET6) return -EINVAL; @@ -109,6 +106,16 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, if (err < 0) return err; + if (!tb[IOAM6_IPTUNNEL_MODE]) + mode = IOAM6_IPTUNNEL_MODE_INLINE; + else + mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]); + + if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) { + NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination"); + return -EINVAL; + } + if (!tb[IOAM6_IPTUNNEL_TRACE]) { NL_SET_ERR_MSG(extack, "missing trace"); return -EINVAL; @@ -121,15 +128,24 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, return -EINVAL; } - len = sizeof(*tuninfo) + trace->remlen * 4; - len_aligned = ALIGN(len, 8); - - s = lwtunnel_state_alloc(len_aligned); - if (!s) + len_aligned = ALIGN(trace->remlen * 4, 8); + lwt = lwtunnel_state_alloc(sizeof(*ilwt) + len_aligned); + if (!lwt) return -ENOMEM; - tuninfo = ioam6_lwt_info(s); - tuninfo->eh.hdrlen = (len_aligned >> 3) - 1; + ilwt = ioam6_lwt_state(lwt); + err = dst_cache_init(&ilwt->cache, GFP_ATOMIC); + if (err) { + kfree(lwt); + return err; + } + + ilwt->mode = mode; + if (tb[IOAM6_IPTUNNEL_DST]) + ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]); + + tuninfo = ioam6_lwt_info(lwt); + tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1; tuninfo->pad[0] = IPV6_TLV_PADN; tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC; tuninfo->ioamh.opt_type = IPV6_TLV_IOAM; @@ -138,27 +154,39 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, memcpy(&tuninfo->traceh, trace, sizeof(*trace)); - len = len_aligned - len; - if (len == 1) { - tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1; - } else if (len > 0) { + if (len_aligned - trace->remlen * 4) { tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN; - tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2; + tuninfo->traceh.data[trace->remlen * 4 + 1] = 2; } - s->type = LWTUNNEL_ENCAP_IOAM6; - s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + lwt->type = LWTUNNEL_ENCAP_IOAM6; + lwt->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; - *ts = s; + *ts = lwt; return 0; } -static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) +static int ioam6_do_fill(struct net *net, struct sk_buff *skb) { struct ioam6_trace_hdr *trace; - struct ipv6hdr *oldhdr, *hdr; struct ioam6_namespace *ns; + + trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) + + sizeof(struct ipv6_hopopt_hdr) + 2 + + sizeof(struct ioam6_hdr)); + + ns = ioam6_namespace(net, trace->namespace_id); + if (ns) + ioam6_fill_trace_data(skb, ns, trace, false); + + return 0; +} + +static int ioam6_do_inline(struct net *net, struct sk_buff *skb, + struct ioam6_lwt_encap *tuninfo) +{ + struct ipv6hdr *oldhdr, *hdr; int hdrlen, err; hdrlen = (tuninfo->eh.hdrlen + 1) << 3; @@ -187,80 +215,200 @@ static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) hdr->nexthdr = NEXTHDR_HOP; hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); - trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) - + sizeof(struct ipv6_hopopt_hdr) + 2 - + sizeof(struct ioam6_hdr)); + return ioam6_do_fill(net, skb); +} - ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id); - if (ns) - ioam6_fill_trace_data(skb, ns, trace); +static int ioam6_do_encap(struct net *net, struct sk_buff *skb, + struct ioam6_lwt_encap *tuninfo, + struct in6_addr *tundst) +{ + struct dst_entry *dst = skb_dst(skb); + struct ipv6hdr *hdr, *inner_hdr; + int hdrlen, len, err; - return 0; + hdrlen = (tuninfo->eh.hdrlen + 1) << 3; + len = sizeof(*hdr) + hdrlen; + + err = skb_cow_head(skb, len + skb->mac_len); + if (unlikely(err)) + return err; + + inner_hdr = ipv6_hdr(skb); + + skb_push(skb, len); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + skb_set_transport_header(skb, sizeof(*hdr)); + + tuninfo->eh.nexthdr = NEXTHDR_IPV6; + memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); + + hdr = ipv6_hdr(skb); + memcpy(hdr, inner_hdr, sizeof(*hdr)); + + hdr->nexthdr = NEXTHDR_HOP; + hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); + hdr->daddr = *tundst; + ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr, + IPV6_PREFER_SRC_PUBLIC, &hdr->saddr); + + skb_postpush_rcsum(skb, hdr, len); + + return ioam6_do_fill(net, skb); } static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate; + struct dst_entry *dst = skb_dst(skb); + struct in6_addr orig_daddr; + struct ioam6_lwt *ilwt; int err = -EINVAL; if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - /* Only for packets we send and - * that do not contain a Hop-by-Hop yet - */ - if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) - goto out; - - err = ioam6_do_inline(skb, ioam6_lwt_info(lwt)); - if (unlikely(err)) + ilwt = ioam6_lwt_state(dst->lwtstate); + orig_daddr = ipv6_hdr(skb)->daddr; + + switch (ilwt->mode) { + case IOAM6_IPTUNNEL_MODE_INLINE: +do_inline: + /* Direct insertion - if there is no Hop-by-Hop yet */ + if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) + goto out; + + err = ioam6_do_inline(net, skb, &ilwt->tuninfo); + if (unlikely(err)) + goto drop; + + break; + case IOAM6_IPTUNNEL_MODE_ENCAP: +do_encap: + /* Encapsulation (ip6ip6) */ + err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst); + if (unlikely(err)) + goto drop; + + break; + case IOAM6_IPTUNNEL_MODE_AUTO: + /* Automatic (RFC8200 compliant): + * - local packets -> INLINE mode + * - in-transit packets -> ENCAP mode + */ + if (!skb->dev) + goto do_inline; + + goto do_encap; + default: goto drop; + } - err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) goto drop; + if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { + preempt_disable(); + dst = dst_cache_get(&ilwt->cache); + preempt_enable(); + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; + dst_release(dst); + goto drop; + } + + preempt_disable(); + dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); + preempt_enable(); + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return dst_output(net, sk, skb); + } out: - return lwt->orig_output(net, sk, skb); - + return dst->lwtstate->orig_output(net, sk, skb); drop: kfree_skb(skb); return err; } +static void ioam6_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&ioam6_lwt_state(lwt)->cache); +} + static int ioam6_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { - struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); + int err; - if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace)) - return -EMSGSIZE; + err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode); + if (err) + goto ret; - return 0; + if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) { + err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst); + if (err) + goto ret; + } + + err = nla_put(skb, IOAM6_IPTUNNEL_TRACE, sizeof(ilwt->tuninfo.traceh), + &ilwt->tuninfo.traceh); +ret: + return err; } static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) { - struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); + int nlsize; + + nlsize = nla_total_size(sizeof(ilwt->mode)) + + nla_total_size(sizeof(ilwt->tuninfo.traceh)); - return nla_total_size(sizeof(*trace)); + if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) + nlsize += nla_total_size(sizeof(ilwt->tundst)); + + return nlsize; } static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) { - struct ioam6_trace_hdr *a_hdr = ioam6_trace(a); - struct ioam6_trace_hdr *b_hdr = ioam6_trace(b); - - return (a_hdr->namespace_id != b_hdr->namespace_id); + struct ioam6_trace_hdr *trace_a = ioam6_lwt_trace(a); + struct ioam6_trace_hdr *trace_b = ioam6_lwt_trace(b); + struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a); + struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b); + + return (ilwt_a->mode != ilwt_b->mode || + (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE && + !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) || + trace_a->namespace_id != trace_b->namespace_id); } static const struct lwtunnel_encap_ops ioam6_iptun_ops = { - .build_state = ioam6_build_state, + .build_state = ioam6_build_state, + .destroy_state = ioam6_destroy_state, .output = ioam6_output, - .fill_encap = ioam6_fill_encap_info, + .fill_encap = ioam6_fill_encap_info, .get_encap_size = ioam6_encap_nlsize, - .cmp_encap = ioam6_encap_cmp, - .owner = THIS_MODULE, + .cmp_encap = ioam6_encap_cmp, + .owner = THIS_MODULE, }; int __init ioam6_iptunnel_init(void) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3ad201d372d8..d831d2439693 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1088,7 +1088,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) struct flowi6 *fl6 = &t->fl.u.ip6; if (dev->type != ARPHRD_ETHER) { - memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); } @@ -1521,7 +1521,7 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (tunnel->parms.collect_md) return 0; - memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &tunnel->parms.laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); if (ipv6_addr_any(&tunnel->parms.raddr)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 12f985f43bcc..2f044a49afa8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -464,13 +464,14 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) int ip6_forward(struct sk_buff *skb) { - struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); + struct inet6_dev *idev; u32 mtu; + idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); if (net->ipv6.devconf_all->forwarding == 0) goto error; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 20a67efda47f..484aca492cc0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1449,7 +1449,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) unsigned int mtu; int t_hlen; - memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1d8e3ffa225d..527e9ead7449 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -660,7 +660,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu) struct net_device *tdev = NULL; int mtu; - memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index e4bdb09c5586..41efca817db4 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -55,6 +55,8 @@ struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); +DEFINE_STATIC_KEY_FALSE(ip6_min_hopcount); + int ip6_ra_control(struct sock *sk, int sel) { struct ip6_ra_chain *ra, *new_ra, **rap; @@ -950,7 +952,14 @@ done: goto e_inval; if (val < 0 || val > 255) goto e_inval; - np->min_hopcount = val; + + if (val) + static_branch_enable(&ip6_min_hopcount); + + /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount + * while we are changing it. + */ + WRITE_ONCE(np->min_hopcount, val); retv = 0; break; case IPV6_DONTFRAG: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4b098521a44c..f03b597e4121 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -142,7 +142,7 @@ struct neigh_table nd_tbl = { }; EXPORT_SYMBOL_GPL(nd_tbl); -void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, +void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, int data_len, int pad) { int space = __ndisc_opt_addr_space(data_len, pad); @@ -165,7 +165,7 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option); static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type, - void *data, u8 icmp6_type) + const void *data, u8 icmp6_type) { __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len, ndisc_addr_option_pad(skb->dev->type)); @@ -1794,6 +1794,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; + bool evict_nocarrier; switch (event) { case NETDEV_CHANGEADDR: @@ -1810,10 +1811,19 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, in6_dev_put(idev); break; case NETDEV_CHANGE: + idev = in6_dev_get(dev); + if (!idev) + evict_nocarrier = true; + else { + evict_nocarrier = idev->cnf.ndisc_evict_nocarrier && + net->ipv6.devconf_all->ndisc_evict_nocarrier; + in6_dev_put(idev); + } + change_info = ptr; if (change_info->flags_changed & IFF_NOARP) neigh_changeaddr(&nd_tbl, dev); - if (!netif_carrier_ok(dev)) + if (evict_nocarrier && !netif_carrier_ok(dev)) neigh_carrier_down(&nd_tbl, dev); break; case NETDEV_DOWN: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a579ea14a69b..2d816277f2c5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -247,10 +247,10 @@ ip6t_next_entry(const struct ip6t_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ip6t_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table) +ip6t_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { + const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); /* Initializing verdict to NF_DROP keeps gcc happy. */ diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 733c83d38b30..4ad8b2032f1f 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -25,12 +25,7 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static inline bool segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { - bool r; - pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n", - invert ? '!' : ' ', min, id, max); - r = (id >= min && id <= max) ^ invert; - pr_debug(" result %s\n", r ? "PASS" : "FAILED"); - return r; + return (id >= min && id <= max) ^ invert; } static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) @@ -65,30 +60,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) return false; } - pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); - pr_debug("TYPE %04X ", rh->type); - pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); - - pr_debug("IPv6 RT segsleft %02X ", - segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], - rh->segments_left, - !!(rtinfo->invflags & IP6T_RT_INV_SGS))); - pr_debug("type %02X %02X %02X ", - rtinfo->rt_type, rh->type, - (!(rtinfo->flags & IP6T_RT_TYP) || - ((rtinfo->rt_type == rh->type) ^ - !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); - pr_debug("len %02X %04X %02X ", - rtinfo->hdrlen, hdrlen, - !(rtinfo->flags & IP6T_RT_LEN) || - ((rtinfo->hdrlen == hdrlen) ^ - !!(rtinfo->invflags & IP6T_RT_INV_LEN))); - pr_debug("res %02X %02X %02X ", - rtinfo->flags & IP6T_RT_RES, - ((const struct rt0_hdr *)rh)->reserved, - !((rtinfo->flags & IP6T_RT_RES) && - (((const struct rt0_hdr *)rh)->reserved))); - ret = (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], rh->segments_left, !!(rtinfo->invflags & IP6T_RT_INV_SGS))) && @@ -107,22 +78,22 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) reserved), sizeof(_reserved), &_reserved); + if (!rp) { + par->hotdrop = true; + return false; + } ret = (*rp == 0); } - pr_debug("#%d ", rtinfo->addrnr); if (!(rtinfo->flags & IP6T_RT_FST)) { return ret; } else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) { - pr_debug("Not strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - pr_debug("There isn't enough space\n"); return false; } else { unsigned int i = 0; - pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < (unsigned int)((hdrlen - 8) / 16); temp++) { @@ -138,26 +109,20 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) return false; } - if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) { - pr_debug("i=%d temp=%d;\n", i, temp); + if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) i++; - } if (i == rtinfo->addrnr) break; } - pr_debug("i=%d #%d\n", i, rtinfo->addrnr); if (i == rtinfo->addrnr) return ret; else return false; } } else { - pr_debug("Strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - pr_debug("There isn't enough space\n"); return false; } else { - pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < rtinfo->addrnr; temp++) { ap = skb_header_pointer(skb, ptr @@ -173,7 +138,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp])) break; } - pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr); if (temp == rtinfo->addrnr && temp == (unsigned int)((hdrlen - 8) / 16)) return ret; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 727ee8097012..df785ebda0ca 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -27,14 +27,6 @@ static const struct xt_table packet_filter = { .priority = NF_IP6_PRI_FILTER, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -ip6table_filter_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ @@ -90,7 +82,7 @@ static int __init ip6table_filter_init(void) if (ret < 0) return ret; - filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook); + filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 9b518ce37d6a..a88b2ce4a3cb 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -29,7 +29,7 @@ static const struct xt_table packet_mangler = { }; static unsigned int -ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv) +ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret; struct in6_addr saddr, daddr; @@ -46,7 +46,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pr /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, state, priv); + ret = ip6t_do_table(priv, skb, state); if (ret != NF_DROP && ret != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || @@ -68,8 +68,8 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) - return ip6t_mangle_out(skb, state, priv); - return ip6t_do_table(skb, state, priv); + return ip6t_mangle_out(priv, skb, state); + return ip6t_do_table(priv, skb, state); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 921c1723a01e..bf3cb3a13600 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -31,34 +31,27 @@ static const struct xt_table nf_nat_ipv6_table = { .af = NFPROTO_IPV6, }; -static unsigned int ip6table_nat_do_chain(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_NAT_DST, }, { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_NAT_SRC, }, { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST, }, { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 4f2a04af71d3..08861d5d1f4d 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -31,14 +31,6 @@ static const struct xt_table packet_raw_before_defrag = { .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -ip6table_raw_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static struct nf_hook_ops *rawtable_ops __read_mostly; static int ip6table_raw_table_init(struct net *net) @@ -88,7 +80,7 @@ static int __init ip6table_raw_init(void) return ret; /* Register hooks */ - rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook); + rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table); if (IS_ERR(rawtable_ops)) { xt_unregister_template(table); return PTR_ERR(rawtable_ops); diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 931674034d8b..4df14a9bae78 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -32,13 +32,6 @@ static const struct xt_table security_table = { .priority = NF_IP6_PRI_SECURITY, }; -static unsigned int -ip6table_security_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static struct nf_hook_ops *sectbl_ops __read_mostly; static int ip6table_security_table_init(struct net *net) @@ -77,7 +70,7 @@ static int __init ip6table_security_init(void) if (ret < 0) return ret; - sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook); + sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table); if (IS_ERR(sectbl_ops)) { xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index a0108415275f..5c47be29b9ee 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -33,7 +33,7 @@ static const char nf_frags_cache_name[] = "nf-frags"; -unsigned int nf_frag_pernet_id __read_mostly; +static unsigned int nf_frag_pernet_id __read_mostly; static struct inet_frags nf_frags; static struct nft_ct_frag6_pernet *nf_frag_pernet(struct net *net) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index e8a59d8bf2ad..cb4eb1d2c620 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -25,8 +25,6 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> -extern unsigned int nf_frag_pernet_id; - static DEFINE_MUTEX(defrag6_mutex); static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, @@ -91,12 +89,10 @@ static const struct nf_hook_ops ipv6_defrag_ops[] = { static void __net_exit defrag6_net_exit(struct net *net) { - struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); - - if (nf_frag->users) { + if (net->nf.defrag_ipv6_users) { nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); - nf_frag->users = 0; + net->nf.defrag_ipv6_users = 0; } } @@ -134,24 +130,23 @@ static void __exit nf_defrag_fini(void) int nf_defrag_ipv6_enable(struct net *net) { - struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); int err = 0; mutex_lock(&defrag6_mutex); - if (nf_frag->users == UINT_MAX) { + if (net->nf.defrag_ipv6_users == UINT_MAX) { err = -EOVERFLOW; goto out_unlock; } - if (nf_frag->users) { - nf_frag->users++; + if (net->nf.defrag_ipv6_users) { + net->nf.defrag_ipv6_users++; goto out_unlock; } err = nf_register_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); if (err == 0) - nf_frag->users = 1; + net->nf.defrag_ipv6_users = 1; out_unlock: mutex_unlock(&defrag6_mutex); @@ -161,12 +156,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable); void nf_defrag_ipv6_disable(struct net *net) { - struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); - mutex_lock(&defrag6_mutex); - if (nf_frag->users) { - nf_frag->users--; - if (nf_frag->users == 0) + if (net->nf.defrag_ipv6_users) { + net->nf.defrag_ipv6_users--; + if (net->nf.defrag_ipv6_users == 0) nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9b9ef09382ab..3ae25b8ffbd6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6306,11 +6306,11 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, static struct ctl_table ipv6_route_table_template[] = { { - .procname = "flush", - .data = &init_net.ipv6.sysctl.flush_delay, + .procname = "max_size", + .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), - .mode = 0200, - .proc_handler = ipv6_sysctl_rtcache_flush + .mode = 0644, + .proc_handler = proc_dointvec, }, { .procname = "gc_thresh", @@ -6320,11 +6320,11 @@ static struct ctl_table ipv6_route_table_template[] = { .proc_handler = proc_dointvec, }, { - .procname = "max_size", - .data = &init_net.ipv6.sysctl.ip6_rt_max_size, + .procname = "flush", + .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .mode = 0200, + .proc_handler = ipv6_sysctl_rtcache_flush }, { .procname = "gc_min_interval", @@ -6396,10 +6396,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) GFP_KERNEL); if (table) { - table[0].data = &net->ipv6.sysctl.flush_delay; - table[0].extra1 = net; + table[0].data = &net->ipv6.sysctl.ip6_rt_max_size; table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; - table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; + table[2].data = &net->ipv6.sysctl.flush_delay; + table[2].extra1 = net; table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; @@ -6411,7 +6411,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) - table[0].procname = NULL; + table[1].procname = NULL; } return table; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index e412817fba2f..5daa1c3ed83b 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -374,7 +374,11 @@ static int __net_init seg6_net_init(struct net *net) net->ipv6.seg6_data = sdata; #ifdef CONFIG_IPV6_SEG6_HMAC - seg6_hmac_net_init(net); + if (seg6_hmac_net_init(net)) { + kfree(rcu_dereference_raw(sdata->tun_src)); + kfree(sdata); + return -ENOMEM; + }; #endif return 0; @@ -388,7 +392,7 @@ static void __net_exit seg6_net_exit(struct net *net) seg6_hmac_net_exit(net); #endif - kfree(sdata->tun_src); + kfree(rcu_dereference_raw(sdata->tun_src)); kfree(sdata); } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 687d95dce085..29bc4e7c3046 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -405,9 +405,7 @@ int __net_init seg6_hmac_net_init(struct net *net) { struct seg6_pernet_data *sdata = seg6_pernet(net); - rhashtable_init(&sdata->hmac_infos, &rht_params); - - return 0; + return rhashtable_init(&sdata->hmac_infos, &rht_params); } EXPORT_SYMBOL(seg6_hmac_net_init); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ef0c7a7c18e2..1b57ee36d668 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -204,7 +204,7 @@ static int ipip6_tunnel_create(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); int err; - memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + __dev_addr_set(dev, &t->parms.iph.saddr, 4); memcpy(dev->broadcast, &t->parms.iph.daddr, 4); if ((__force u16)t->parms.i_flags & SIT_ISATAP) @@ -1149,7 +1149,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, synchronize_net(); t->parms.iph.saddr = p->iph.saddr; t->parms.iph.daddr = p->iph.daddr; - memcpy(t->dev->dev_addr, &p->iph.saddr, 4); + __dev_addr_set(t->dev, &p->iph.saddr, 4); memcpy(t->dev->broadcast, &p->iph.daddr, 4); ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0ce52d46e4f8..2cc9b0e53ad1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -108,8 +108,8 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) const struct rt6_info *rt = (const struct rt6_info *)dst; sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); + sk->sk_rx_dst_ifindex = skb->skb_iif; + sk->sk_rx_dst_cookie = rt6_get_cookie(rt); } } @@ -414,9 +414,12 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_CLOSE) goto out; - if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto out; + if (static_branch_unlikely(&ip6_min_hopcount)) { + /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ + if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto out; + } } tp = tcp_sk(sk); @@ -569,7 +572,7 @@ done: static void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree(inet_rsk(req)->ipv6_opt); - kfree_skb(inet_rsk(req)->pktopts); + consume_skb(inet_rsk(req)->pktopts); } #ifdef CONFIG_TCP_MD5SIG @@ -599,6 +602,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; int l3index = 0; u8 prefixlen; + u8 flags; if (optlen < sizeof(cmd)) return -EINVAL; @@ -609,6 +613,8 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (sin6->sin6_family != AF_INET6) return -EINVAL; + flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; + if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { prefixlen = cmd.tcpm_prefixlen; @@ -619,7 +625,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; } - if (optname == TCP_MD5SIG_EXT && + if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { struct net_device *dev; @@ -640,9 +646,9 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], AF_INET, prefixlen, - l3index); + l3index, flags); return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, prefixlen, l3index); + AF_INET6, prefixlen, l3index, flags); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) @@ -650,12 +656,12 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], - AF_INET, prefixlen, l3index, + AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, prefixlen, l3index, + AF_INET6, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } @@ -963,7 +969,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowlabel = label; buff->ip_summed = CHECKSUM_PARTIAL; - buff->csum = 0; __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); @@ -1404,7 +1409,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, - AF_INET6, 128, l3index, key->key, key->keylen, + AF_INET6, 128, l3index, key->flags, key->key, key->keylen, sk_gfp_mask(sk, GFP_ATOMIC)); } #endif @@ -1506,9 +1511,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); if (dst) { - if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + if (sk->sk_rx_dst_ifindex != skb->skb_iif || INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, - dst, np->rx_dst_cookie) == NULL) { + dst, sk->sk_rx_dst_cookie) == NULL) { dst_release(dst); sk->sk_rx_dst = NULL; } @@ -1588,7 +1593,7 @@ ipv6_pktoptions: } } - kfree_skb(opt_skb); + consume_skb(opt_skb); return 0; } @@ -1618,7 +1623,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { - struct sk_buff *skb_to_free; int sdif = inet6_sdif(skb); int dif = inet6_iif(skb); const struct tcphdr *th; @@ -1724,9 +1728,13 @@ process: return 0; } } - if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto discard_and_relse; + + if (static_branch_unlikely(&ip6_min_hopcount)) { + /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ + if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto discard_and_relse; + } } if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) @@ -1754,17 +1762,12 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { - skb_to_free = sk->sk_rx_skb_cache; - sk->sk_rx_skb_cache = NULL; ret = tcp_v6_do_rcv(sk, skb); } else { if (tcp_add_backlog(sk, skb)) goto discard_and_relse; - skb_to_free = NULL; } bh_unlock_sock(sk); - if (skb_to_free) - __kfree_skb(skb_to_free); put_and_return: if (refcounted) sock_put(sk); @@ -1875,9 +1878,9 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) - dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie); + dst = dst_check(dst, sk->sk_rx_dst_cookie); if (dst && - inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) + sk->sk_rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e505bb007e9f..12c12619ee35 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -133,7 +133,8 @@ static int compute_score(struct sock *sk, struct net *net, dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif); if (!dev_match) return -1; - score++; + if (sk->sk_bound_dev_if) + score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; @@ -883,7 +884,7 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) if (udp_sk_rx_dst_set(sk, dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); + sk->sk_rx_dst_cookie = rt6_get_cookie(rt); } } @@ -1072,7 +1073,7 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) dst = READ_ONCE(sk->sk_rx_dst); if (dst) - dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); + dst = dst_check(dst, sk->sk_rx_dst_cookie); if (dst) { /* set noref for now. * any place which wants to hold dst has to call @@ -1434,7 +1435,6 @@ do_udp_sendmsg: if (!fl6.flowi6_oif) fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { @@ -1470,6 +1470,7 @@ do_udp_sendmsg: ipc6.opt = opt; fl6.flowi6_proto = sk->sk_protocol; + fl6.flowi6_mark = ipc6.sockc.mark; fl6.daddr = *daddr; if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) fl6.saddr = np->saddr; |