diff options
Diffstat (limited to 'net/ipv6')
35 files changed, 635 insertions, 346 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0169ccf5aa4f..f7c8bbeb27b7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1170,6 +1170,9 @@ enum { IPV6_SADDR_RULE_PRIVACY, IPV6_SADDR_RULE_ORCHID, IPV6_SADDR_RULE_PREFIX, +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + IPV6_SADDR_RULE_NOT_OPTIMISTIC, +#endif IPV6_SADDR_RULE_MAX }; @@ -1197,6 +1200,15 @@ static inline int ipv6_saddr_preferred(int type) return 0; } +static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev) +{ +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic; +#else + return false; +#endif +} + static int ipv6_get_saddr_eval(struct net *net, struct ipv6_saddr_score *score, struct ipv6_saddr_dst *dst, @@ -1257,10 +1269,16 @@ static int ipv6_get_saddr_eval(struct net *net, score->scopedist = ret; break; case IPV6_SADDR_RULE_PREFERRED: + { /* Rule 3: Avoid deprecated and optimistic addresses */ + u8 avoid = IFA_F_DEPRECATED; + + if (!ipv6_use_optimistic_addr(score->ifa->idev)) + avoid |= IFA_F_OPTIMISTIC; ret = ipv6_saddr_preferred(score->addr_type) || - !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)); + !(score->ifa->flags & avoid); break; + } #ifdef CONFIG_IPV6_MIP6 case IPV6_SADDR_RULE_HOA: { @@ -1306,6 +1324,14 @@ static int ipv6_get_saddr_eval(struct net *net, ret = score->ifa->prefix_len; score->matchlen = ret; break; +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + case IPV6_SADDR_RULE_NOT_OPTIMISTIC: + /* Optimistic addresses still have lower precedence than other + * preferred addresses. + */ + ret = !(score->ifa->flags & IFA_F_OPTIMISTIC); + break; +#endif default: ret = 0; } @@ -1385,10 +1411,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, if (unlikely(score->addr_type == IPV6_ADDR_ANY || score->addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ADDRCONF: unspecified / multicast address " - "assigned as unicast address on %s", - dev->name); + net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s", + dev->name); continue; } @@ -2315,8 +2339,8 @@ ok: else stored_lft = 0; if (!update_lft && !create && stored_lft) { - const u32 minimum_lft = min( - stored_lft, (u32)MIN_VALID_LIFETIME); + const u32 minimum_lft = min_t(u32, + stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); /* RFC4862 Section 5.5.3e: @@ -2519,7 +2543,8 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, if (!dev) return -ENODEV; - if ((idev = __in6_dev_get(dev)) == NULL) + idev = __in6_dev_get(dev); + if (idev == NULL) return -ENXIO; read_lock_bh(&idev->lock); @@ -2666,7 +2691,8 @@ static void init_loopback(struct net_device *dev) ASSERT_RTNL(); - if ((idev = ipv6_find_idev(dev)) == NULL) { + idev = ipv6_find_idev(dev); + if (idev == NULL) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2789,7 +2815,8 @@ static void addrconf_sit_config(struct net_device *dev) * our v4 addrs in the tunnel */ - if ((idev = ipv6_find_idev(dev)) == NULL) { + idev = ipv6_find_idev(dev); + if (idev == NULL) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2813,7 +2840,8 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); - if ((idev = ipv6_find_idev(dev)) == NULL) { + idev = ipv6_find_idev(dev); + if (idev == NULL) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -3222,8 +3250,15 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) * Optimistic nodes can start receiving * Frames right away */ - if (ifp->flags & IFA_F_OPTIMISTIC) + if (ifp->flags & IFA_F_OPTIMISTIC) { ip6_ins_rt(ifp->rt); + if (ipv6_use_optimistic_addr(idev)) { + /* Because optimistic nodes can use this address, + * notify listeners. If DAD fails, RTM_DELADDR is sent. + */ + ipv6_ifa_notify(RTM_NEWADDR, ifp); + } + } addrconf_dad_kick(ifp); out: @@ -4330,6 +4365,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; + array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; @@ -5156,6 +5192,14 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, + { + .procname = "use_optimistic", + .data = &ipv6_devconf.use_optimistic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, #endif #ifdef CONFIG_IPV6_MROUTE { @@ -5336,10 +5380,8 @@ static void __net_exit addrconf_exit_net(struct net *net) __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); __addrconf_sysctl_unregister(net->ipv6.devconf_all); #endif - if (!net_eq(net, &init_net)) { - kfree(net->ipv6.devconf_dflt); - kfree(net->ipv6.devconf_all); - } + kfree(net->ipv6.devconf_dflt); + kfree(net->ipv6.devconf_all); } static struct pernet_operations addrconf_ops = { diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 6d16eb0e0c7f..a6727add2624 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -272,10 +272,9 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) ipv6_rearrange_destopt(iph, exthdr.opth); case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING "overrun %sopts\n", - nexthdr == NEXTHDR_HOP ? - "hop" : "dest"); + net_dbg_ratelimited("overrun %sopts\n", + nexthdr == NEXTHDR_HOP ? + "hop" : "dest"); return -EINVAL; } break; @@ -354,7 +353,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ahp = x->data; ahash = ahp->ahash; - if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + err = skb_cow_data(skb, 0, &trailer); + if (err < 0) goto out; nfrags = err; @@ -560,8 +560,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, ah_hlen)) goto out; - - if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + err = skb_cow_data(skb, 0, &trailer); + if (err < 0) goto out; nfrags = err; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 2cdc38338be3..100c589a2a6c 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) +{ + int ifindex = skb->dev ? skb->dev->ifindex : -1; + + if (skb->protocol == htons(ETH_P_IPV6)) + IP6CB(skb)->iif = ifindex; + else + PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; +} + /* * Handle MSG_ERRQUEUE */ @@ -351,7 +361,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_TRUNC; copied = len; } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free_skb; @@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = 0; - if (np->rxopt.all) + if (np->rxopt.all) { + if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && + serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) + ip6_datagram_prepare_pktinfo_errqueue(skb); ip6_datagram_recv_common_ctl(sk, msg, skb); + } if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -445,7 +459,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, msg->msg_flags |= MSG_TRUNC; copied = len; } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free_skb; @@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, &src_info.ipi6_addr); } - put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); + + if (src_info.ipi6_ifindex >= 0) + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, + sizeof(src_info), &src_info); } } @@ -640,7 +657,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, int len; int err = 0; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + for_each_cmsghdr(cmsg, msg) { int addr_type; if (!CMSG_OK(msg, cmsg)) { @@ -893,8 +910,8 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, break; } default: - LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", - cmsg->cmsg_type); + net_dbg_ratelimited("invalid cmsg type: %d\n", + cmsg->cmsg_type); err = -EINVAL; goto exit_f; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 83fc3a385a26..e48f2c7c5c59 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -286,8 +286,8 @@ static int esp_input_done2(struct sk_buff *skb, int err) err = -EINVAL; padlen = nexthdr[0]; if (padlen + 2 + alen >= elen) { - LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage " - "padlen=%d, elen=%d\n", padlen + 2, elen - alen); + net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n", + padlen + 2, elen - alen); goto out; } @@ -345,7 +345,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } - if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) { + nfrags = skb_cow_data(skb, 0, &trailer); + if (nfrags < 0) { ret = -EINVAL; goto out; } @@ -544,12 +545,12 @@ static int esp_init_authenc(struct xfrm_state *x) BUG_ON(!aalg_desc); err = -EINVAL; - if (aalg_desc->uinfo.auth.icv_fullbits/8 != + if (aalg_desc->uinfo.auth.icv_fullbits / 8 != crypto_aead_authsize(aead)) { - NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", - x->aalg->alg_name, - crypto_aead_authsize(aead), - aalg_desc->uinfo.auth.icv_fullbits/8); + pr_info("ESP: %s digestsize %u != %hu\n", + x->aalg->alg_name, + crypto_aead_authsize(aead), + aalg_desc->uinfo.auth.icv_fullbits / 8); goto free_key; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index bfde361b6134..a7bbbe45570b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -47,7 +47,7 @@ #include <net/xfrm.h> #endif -#include <asm/uaccess.h> +#include <linux/uaccess.h> /* * Parsing tlv encoded headers. @@ -184,7 +184,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) int ret; if (opt->dsthao) { - LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n"); + net_dbg_ratelimited("hao duplicated\n"); goto discard; } opt->dsthao = opt->dst1; @@ -193,14 +193,14 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); if (hao->length != 16) { - LIMIT_NETDEBUG( - KERN_DEBUG "hao invalid option length = %d\n", hao->length); + net_dbg_ratelimited("hao invalid option length = %d\n", + hao->length); goto discard; } if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { - LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr); + net_dbg_ratelimited("hao is not an unicast addr: %pI6\n", + &hao->addr); goto discard; } @@ -551,8 +551,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } - LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", - nh[optoff + 1]); + net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n", + nh[optoff + 1]); kfree_skb(skb); return false; } @@ -566,8 +566,8 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) u32 pkt_len; if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { - LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", - nh[optoff+1]); + net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", + nh[optoff+1]); IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); goto drop; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 97ae70077a4f..d674152b6ede 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -243,7 +243,8 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *icmp6h; int err = 0; - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + skb = skb_peek(&sk->sk_write_queue); + if (skb == NULL) goto out; icmp6h = icmp6_hdr(skb); @@ -338,7 +339,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, * anycast. */ if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n"); + net_dbg_ratelimited("icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); } @@ -452,7 +453,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n"); + net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n"); return; } @@ -460,7 +461,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n"); + net_dbg_ratelimited("icmp6_send: no reply to icmp error\n"); return; } @@ -509,7 +510,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n"); + net_dbg_ratelimited("icmp: len problem\n"); goto out_dst_release; } @@ -679,6 +680,7 @@ static int icmpv6_rcv(struct sk_buff *skb) const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; u8 type; + bool success = false; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); @@ -706,9 +708,8 @@ static int icmpv6_rcv(struct sk_buff *skb) daddr = &ipv6_hdr(skb)->daddr; if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ICMPv6 checksum failed [%pI6c > %pI6c]\n", - saddr, daddr); + net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", + saddr, daddr); goto csum_error; } @@ -727,7 +728,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - ping_rcv(skb); + success = ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: @@ -781,7 +782,7 @@ static int icmpv6_rcv(struct sk_buff *skb) if (type & ICMPV6_INFOMSG_MASK) break; - LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n"); + net_dbg_ratelimited("icmpv6: msg of unknown type\n"); /* * error of unknown type. @@ -791,7 +792,14 @@ static int icmpv6_rcv(struct sk_buff *skb) icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); } - kfree_skb(skb); + /* until the v6 path can be better sorted assume failure and + * preserve the status quo behaviour for the rest of the paths to here + */ + if (success) + consume_skb(skb); + else + kfree_skb(skb); + return 0; csum_error: @@ -1009,4 +1017,3 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) return table; } #endif - diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 3dd7d4ebd7cd..2f780cba6e12 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -654,7 +654,11 @@ release: goto done; err = -ENOMEM; - if (sfl1 == NULL || (err = mem_check(sk)) != 0) + if (sfl1 == NULL) + goto done; + + err = mem_check(sk); + if (err != 0) goto done; fl1 = fl_intern(net, fl, freq.flr_label); @@ -769,10 +773,9 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v) static int ip6fl_seq_show(struct seq_file *seq, void *v) { struct ip6fl_iter_state *state = ip6fl_seq_private(seq); - if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", - "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); - else { + if (v == SEQ_START_TOKEN) { + seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n"); + } else { struct ip6_flowlabel *fl = v; seq_printf(seq, "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 0e32d2e1bdbf..13cda4c6313b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -902,7 +902,7 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, struct net_device_stats *stats = &t->dev->stats; int ret; - if (!ip6_tnl_xmit_ctl(t)) + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; switch (skb->protocol) { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index a3084ab5df6c..aacdcb4dc762 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -220,7 +220,8 @@ resubmit: nexthdr = skb_network_header(skb)[nhoff]; raw = raw6_local_deliver(skb, nexthdr); - if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) { + ipprot = rcu_dereference(inet6_protos[nexthdr]); + if (ipprot != NULL) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 01e12d0d8fcc..46d452a56d3e 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -79,7 +79,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_MPLS | + SKB_GSO_TUNNEL_REMCSUM | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8e950c250ada..ce69a12ae48c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -747,13 +747,11 @@ slow_path: if (len < left) { len &= ~7; } - /* - * Allocate buffer. - */ - if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + - hroom + troom, GFP_ATOMIC)) == NULL) { - NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); + /* Allocate buffer */ + frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + + hroom + troom, GFP_ATOMIC); + if (!frag) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; @@ -900,7 +898,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, if (*dst == NULL) *dst = ip6_route_output(net, sk, fl6); - if ((err = (*dst)->error)) + err = (*dst)->error; + if (err) goto out_err_release; if (ipv6_addr_any(&fl6->saddr)) { @@ -948,7 +947,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); *dst = ip6_route_output(net, sk, &fl_gw6); - if ((err = (*dst)->error)) + err = (*dst)->error; + if (err) goto out_err_release; } } @@ -1056,7 +1056,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + skb = skb_peek_tail(&sk->sk_write_queue); + if (skb == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -1536,7 +1537,8 @@ int ip6_push_pending_frames(struct sock *sk) unsigned char proto = fl6->flowi6_proto; int err = 0; - if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) + skb = __skb_dequeue(&sk->sk_write_queue); + if (skb == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9cb94cfa0ae7..92b3da571980 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -183,6 +183,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct in6_addr any; for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && @@ -190,6 +191,22 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ (t->dev->flags & IFF_UP)) return t; } + + memset(&any, 0, sizeof(any)); + hash = HASH(&any, local); + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + (t->dev->flags & IFF_UP)) + return t; + } + + hash = HASH(remote, &any); + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(remote, &t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; @@ -474,6 +491,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, int rel_msg = 0; u8 rel_type = ICMPV6_DEST_UNREACH; u8 rel_code = ICMPV6_ADDR_UNREACH; + u8 tproto; __u32 rel_info = 0; __u16 len; int err = -ENOENT; @@ -483,11 +501,12 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, processing of the error. */ rcu_read_lock(); - if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, - &ipv6h->saddr)) == NULL) + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr); + if (t == NULL) goto out; - if (t->parms.proto != ipproto && t->parms.proto != 0) + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != ipproto && tproto != 0) goto out; err = 0; @@ -531,7 +550,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; - if ((len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { + len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); + if (len > mtu) { rel_type = ICMPV6_PKT_TOOBIG; rel_code = 0; rel_info = mtu; @@ -788,15 +808,16 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + u8 tproto; int err; rcu_read_lock(); - - if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, - &ipv6h->daddr)) != NULL) { + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + if (t != NULL) { struct pcpu_sw_netstats *tstats; - if (t->parms.proto != ipproto && t->parms.proto != 0) { + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != ipproto && tproto != 0) { rcu_read_unlock(); goto discard; } @@ -902,24 +923,28 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } -int ip6_tnl_xmit_ctl(struct ip6_tnl *t) +int ip6_tnl_xmit_ctl(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = t->net; - if (p->flags & IP6_TNL_F_CAP_XMIT) { + if ((p->flags & IP6_TNL_F_CAP_XMIT) || + ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && + (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { struct net_device *ldev = NULL; rcu_read_lock(); if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) + if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) pr_warn("%s xmit: Local address not yet configured!\n", p->name); - else if (!ipv6_addr_is_multicast(&p->raddr) && - unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) + else if (!ipv6_addr_is_multicast(raddr) && + unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", p->name); else @@ -968,8 +993,34 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, u8 proto; int err = -1; - if (!fl6->flowi6_mark) + /* NBMA tunnel */ + if (ipv6_addr_any(&t->parms.raddr)) { + struct in6_addr *addr6; + struct neighbour *neigh; + int addr_type; + + if (!skb_dst(skb)) + goto tx_err_link_failure; + + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (!neigh) + goto tx_err_link_failure; + + addr6 = (struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) + addr6 = &ipv6_hdr(skb)->daddr; + + memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); + neigh_release(neigh); + } else if (!fl6->flowi6_mark) dst = ip6_tnl_dst_check(t); + + if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) + goto tx_err_link_failure; + if (!dst) { ndst = ip6_route_output(net, NULL, fl6); @@ -1018,7 +1069,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb; - if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) + new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) goto tx_err_dst_release; if (skb->sk) @@ -1075,10 +1127,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct flowi6 fl6; __u8 dsfield; __u32 mtu; + u8 tproto; int err; - if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t)) + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != IPPROTO_IPIP && tproto != 0) return -1; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) @@ -1117,10 +1170,12 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct flowi6 fl6; __u8 dsfield; __u32 mtu; + u8 tproto; int err; - if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) + tproto = ACCESS_ONCE(t->parms.proto); + if ((tproto != IPPROTO_IPV6 && tproto != 0) || + ip6_tnl_addr_conflict(t, ipv6h)) return -1; offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); @@ -1282,6 +1337,14 @@ static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) return err; } +static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +{ + /* for default tnl0 device allow to change only the proto */ + t->parms.proto = p->proto; + netdev_state_change(t->dev); + return 0; +} + static void ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) { @@ -1381,7 +1444,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); - if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { + if (cmd == SIOCCHGTUNNEL) { if (t != NULL) { if (t->dev != dev) { err = -EEXIST; @@ -1389,8 +1452,10 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } else t = netdev_priv(dev); - - err = ip6_tnl_update(t, &p1); + if (dev == ip6n->fb_tnl_dev) + err = ip6_tnl0_update(t, &p1); + else + err = ip6_tnl_update(t, &p1); } if (t) { err = 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index bcda14de7f84..ace10d0b3aac 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -95,6 +95,7 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct in6_addr any; for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && @@ -102,6 +103,22 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, (t->dev->flags & IFF_UP)) return t; } + + memset(&any, 0, sizeof(any)); + hash = HASH(&any, local); + for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + (t->dev->flags & IFF_UP)) + return t; + } + + hash = HASH(remote, &any); + for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(remote, &t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; @@ -287,8 +304,8 @@ static int vti6_rcv(struct sk_buff *skb) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); rcu_read_lock(); - if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, - &ipv6h->daddr)) != NULL) { + t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + if (t != NULL) { if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); goto discard; @@ -412,6 +429,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct net_device_stats *stats = &t->dev->stats; struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; + struct xfrm_state *x; int err = -1; if (!dst) @@ -425,7 +443,12 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) goto tx_err_link_failure; } - if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr)) + x = dst->xfrm; + if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr)) + goto tx_err_link_failure; + + if (!ip6_tnl_xmit_ctl(t, (const struct in6_addr *)&x->props.saddr, + (const struct in6_addr *)&x->id.daddr)) goto tx_err_link_failure; tdev = dst->dev; @@ -480,7 +503,7 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) ipv6h = ipv6_hdr(skb); if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) + vti6_addr_conflict(t, ipv6h)) goto tx_err; xfrm_decode_session(skb, &fl, AF_INET6); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 1a01d79b8698..722669754bbf 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2094,7 +2094,7 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { struct mfc6_cache *cache_proxy; - /* For an (*,G) entry, we only check that the incomming + /* For an (*,G) entry, we only check that the incoming * interface is part of the static tree. */ cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index e1a9583bb419..66980d8d98d1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -110,12 +110,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } - opt = xchg(&inet6_sk(sk)->opt, opt); - } else { - spin_lock(&sk->sk_dst_lock); - opt = xchg(&inet6_sk(sk)->opt, opt); - spin_unlock(&sk->sk_dst_lock); } + opt = xchg(&inet6_sk(sk)->opt, opt); sk_dst_reset(sk); return opt; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ed2c4e400b46..5ce107c8aab3 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2824,11 +2824,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); if (v == SEQ_START_TOKEN) { - seq_printf(seq, - "%3s %6s " - "%32s %32s %6s %6s\n", "Idx", - "Device", "Multicast Address", - "Source Address", "INC", "EXC"); + seq_puts(seq, "Idx Device Multicast Address Source Address INC EXC\n"); } else { seq_printf(seq, "%3d %6.6s %pi6 %pi6 %6lu %6lu\n", diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f61429d391d3..b9779d441b12 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -97,16 +97,17 @@ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", - mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); + net_dbg_ratelimited("mip6: MH message too short: %d vs >=%d\n", + mh->ip6mh_hdrlen, + mip6_mh_len(mh->ip6mh_type)); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", - mh->ip6mh_proto); + net_dbg_ratelimited("mip6: MH invalid payload proto = %d\n", + mh->ip6mh_proto); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + skb_network_header_len(skb)); return -1; @@ -288,7 +289,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, * XXX: packet if HAO exists. */ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { - LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n"); + net_dbg_ratelimited("mip6: hao exists already, override\n"); return offset; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4cb45c1079a2..682866777d53 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -162,7 +162,8 @@ static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) memcpy(opt+2, data, data_len); data_len += 2; opt += data_len; - if ((space -= data_len) > 0) + space -= data_len; + if (space > 0) memset(opt, 0, space); } @@ -656,8 +657,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1)) saddr = &ipv6_hdr(skb)->saddr; - - if ((probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES)) < 0) { + probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); + if (probes < 0) { if (!(neigh->nud_state & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", @@ -1763,7 +1764,7 @@ int __init ndisc_init(void) /* * Initialize the neighbour table */ - neigh_table_init(&nd_tbl); + neigh_table_init(NEIGH_ND_TABLE, &nd_tbl); #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, @@ -1796,6 +1797,6 @@ void ndisc_cleanup(void) #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(&nd_tbl.parms); #endif - neigh_table_clear(&nd_tbl); + neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl); unregister_pernet_subsys(&ndisc_net_ops); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d38e6a8d8b9f..398377a9d018 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -36,7 +36,7 @@ int ip6_route_me_harder(struct sk_buff *skb) err = dst->error; if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); - LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); + net_dbg_ratelimited("ip6_route_me_harder: No more route\n"); dst_release(dst); return err; } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6af874fc187f..a069822936e6 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -91,6 +91,15 @@ config NFT_MASQ_IPV6 This is the expression that provides IPv4 masquerading support for nf_tables. +config NFT_REDIR_IPV6 + tristate "IPv6 redirect support for nf_tables" + depends on NF_TABLES_IPV6 + depends on NFT_REDIR + select NF_NAT_REDIRECT + help + This is the expression that provides IPv4 redirect support for + nf_tables. + endif # NF_NAT_IPV6 config IP6_NF_IPTABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index fbb25f01143c..c36e0a5490de 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o +obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 7b17a0be93e7..ddf07e6f59d7 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -5,6 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/spinlock.h> @@ -398,8 +399,17 @@ static int __init nf_log_ipv6_init(void) if (ret < 0) return ret; - nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + return 0; + +err1: + unregister_pernet_subsys(&nf_log_ipv6_net_ops); + return ret; } static void __exit nf_log_ipv6_exit(void) @@ -412,6 +422,6 @@ module_init(nf_log_ipv6_init); module_exit(nf_log_ipv6_exit); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("Netfilter IPv4 packet logging"); +MODULE_DESCRIPTION("Netfilter IPv6 packet logging"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NF_LOGGER(AF_INET6, 0); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 015eb8a80766..d05b36440e8b 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -11,6 +11,7 @@ #include <net/ip6_route.h> #include <net/ip6_fib.h> #include <net/ip6_checksum.h> +#include <net/netfilter/ipv6/nf_reject.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/ipv6/nf_reject.h> diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c new file mode 100644 index 000000000000..2433a6bfb191 --- /dev/null +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nft_redir.h> +#include <net/netfilter/nf_nat_redirect.h> + +static void nft_redir_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range range; + unsigned int verdict; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_proto_min) { + range.min_proto.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + range.max_proto.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + range.flags |= priv->flags; + + verdict = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->ops->hooknum); + data[NFT_REG_VERDICT].verdict = verdict; +} + +static struct nft_expr_type nft_redir_ipv6_type; +static const struct nft_expr_ops nft_redir_ipv6_ops = { + .type = &nft_redir_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv6_eval, + .init = nft_redir_init, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "redir", + .ops = &nft_redir_ipv6_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_ipv6_module_init(void) +{ + return nft_register_expr(&nft_redir_ipv6_type); +} + +static void __exit nft_redir_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv6_type); +} + +module_init(nft_redir_ipv6_module_init); +module_exit(nft_redir_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 0bc19fa87821..f73285924144 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -19,9 +19,9 @@ #include <net/netfilter/nft_reject.h> #include <net/netfilter/ipv6/nf_reject.h> -void nft_reject_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_reject_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); @@ -38,7 +38,6 @@ void nft_reject_ipv6_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NF_DROP; } -EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval); static struct nft_expr_type nft_reject_ipv6_type; static const struct nft_expr_ops nft_reject_ipv6_ops = { diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 5b7a1ed2aba9..2d3148378a1f 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -163,7 +163,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.icmph.checksum = 0; pfh.icmph.un.echo.id = inet->inet_sport; pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; - pfh.iov = msg->msg_iov; + /* XXX: stripping const */ + pfh.iov = (struct iovec *)msg->msg_iter.iov; pfh.wcheck = 0; pfh.family = AF_INET6; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 1752cd0b4882..679253d0af84 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -136,6 +136,7 @@ static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS), + SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 896af8807979..ee25631f8c29 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -486,13 +486,13 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, } if (skb_csum_unnecessary(skb)) { - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); } else if (msg->msg_flags&MSG_TRUNC) { if (__skb_checksum_complete(skb)) goto csum_copy_err; - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); } else { - err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov); + err = skb_copy_and_csum_datagram_msg(skb, 0, msg); if (err == -EINVAL) goto csum_copy_err; } @@ -548,7 +548,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, if (!rp->checksum) goto send; - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + skb = skb_peek(&sk->sk_write_queue); + if (!skb) goto out; offset = rp->offset; @@ -671,65 +672,62 @@ error: return err; } -static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) +struct raw6_frag_vec { + struct msghdr *msg; + int hlen; + char c[4]; +}; + +static int rawv6_probe_proto_opt(struct raw6_frag_vec *rfv, struct flowi6 *fl6) { - struct iovec *iov; - u8 __user *type = NULL; - u8 __user *code = NULL; - u8 len = 0; - int probed = 0; - int i; - - if (!msg->msg_iov) - return 0; + int err = 0; + switch (fl6->flowi6_proto) { + case IPPROTO_ICMPV6: + rfv->hlen = 2; + err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); + if (!err) { + fl6->fl6_icmp_type = rfv->c[0]; + fl6->fl6_icmp_code = rfv->c[1]; + } + break; + case IPPROTO_MH: + rfv->hlen = 4; + err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); + if (!err) + fl6->fl6_mh_type = rfv->c[2]; + } + return err; +} - for (i = 0; i < msg->msg_iovlen; i++) { - iov = &msg->msg_iov[i]; - if (!iov) - continue; +static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, + struct sk_buff *skb) +{ + struct raw6_frag_vec *rfv = from; - switch (fl6->flowi6_proto) { - case IPPROTO_ICMPV6: - /* check if one-byte field is readable or not. */ - if (iov->iov_base && iov->iov_len < 1) - break; - - if (!type) { - type = iov->iov_base; - /* check if code field is readable or not. */ - if (iov->iov_len > 1) - code = type + 1; - } else if (!code) - code = iov->iov_base; - - if (type && code) { - if (get_user(fl6->fl6_icmp_type, type) || - get_user(fl6->fl6_icmp_code, code)) - return -EFAULT; - probed = 1; - } - break; - case IPPROTO_MH: - if (iov->iov_base && iov->iov_len < 1) - break; - /* check if type field is readable or not. */ - if (iov->iov_len > 2 - len) { - u8 __user *p = iov->iov_base; - if (get_user(fl6->fl6_mh_type, &p[2 - len])) - return -EFAULT; - probed = 1; - } else - len += iov->iov_len; + if (offset < rfv->hlen) { + int copy = min(rfv->hlen - offset, len); - break; - default: - probed = 1; - break; - } - if (probed) - break; + if (skb->ip_summed == CHECKSUM_PARTIAL) + memcpy(to, rfv->c + offset, copy); + else + skb->csum = csum_block_add( + skb->csum, + csum_partial_copy_nocheck(rfv->c + offset, + to, copy, 0), + odd); + + odd = 0; + offset += copy; + to += copy; + len -= copy; + + if (!len) + return 0; } - return 0; + + offset -= rfv->hlen; + + return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, @@ -744,6 +742,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; + struct raw6_frag_vec rfv; struct flowi6 fl6; int addr_len = msg->msg_namelen; int hlimit = -1; @@ -847,7 +846,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = ipv6_fixup_options(&opt_space, opt); fl6.flowi6_proto = proto; - err = rawv6_probe_proto_opt(&fl6, msg); + rfv.msg = msg; + rfv.hlen = 0; + err = rawv6_probe_proto_opt(&rfv, &fl6); if (err) goto out; @@ -885,10 +886,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, back_from_confirm: if (inet->hdrincl) - err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags); + /* XXX: stripping const */ + err = rawv6_send_hdrinc(sk, (struct iovec *)msg->msg_iter.iov, len, &fl6, &dst, msg->msg_flags); else { lock_sock(sk); - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, + err = ip6_append_data(sk, raw6_getfrag, &rfv, len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, msg->msg_flags, dontfrag); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1a157ca2ebc1..d7d70e69973b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -69,7 +69,7 @@ struct ip6frag_skb_cb { #define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb)) -static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } @@ -178,7 +178,7 @@ static void ip6_frag_expire(unsigned long data) ip6_expire_frag_queue(net, fq, &ip6_frags); } -static __inline__ struct frag_queue * +static struct frag_queue * fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst, u8 ecn) { @@ -429,7 +429,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct sk_buff *clone; int i, plen = 0; - if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) + clone = alloc_skb(0, GFP_ATOMIC); + if (clone == NULL) goto out_oom; clone->next = head->next; head->next = clone; @@ -684,21 +685,21 @@ static void ip6_frags_sysctl_unregister(void) unregister_net_sysctl_table(ip6_ctl_header); } #else -static inline int ip6_frags_ns_sysctl_register(struct net *net) +static int ip6_frags_ns_sysctl_register(struct net *net) { return 0; } -static inline void ip6_frags_ns_sysctl_unregister(struct net *net) +static void ip6_frags_ns_sysctl_unregister(struct net *net) { } -static inline int ip6_frags_sysctl_register(void) +static int ip6_frags_sysctl_register(void) { return 0; } -static inline void ip6_frags_sysctl_unregister(void) +static void ip6_frags_sysctl_unregister(void) { } #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a318dd89b6d9..c91083156edb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -772,23 +772,22 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -#define BACKTRACK(__net, saddr) \ -do { \ - if (rt == __net->ipv6.ip6_null_entry) { \ - struct fib6_node *pn; \ - while (1) { \ - if (fn->fn_flags & RTN_TL_ROOT) \ - goto out; \ - pn = fn->parent; \ - if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ - fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ - else \ - fn = pn; \ - if (fn->fn_flags & RTN_RTINFO) \ - goto restart; \ - } \ - } \ -} while (0) +static struct fib6_node* fib6_backtrack(struct fib6_node *fn, + struct in6_addr *saddr) +{ + struct fib6_node *pn; + while (1) { + if (fn->fn_flags & RTN_TL_ROOT) + return NULL; + pn = fn->parent; + if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) + fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); + else + fn = pn; + if (fn->fn_flags & RTN_RTINFO) + return fn; + } +} static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, @@ -804,8 +803,11 @@ restart: rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); - BACKTRACK(net, &fl6->saddr); -out: + if (rt == net->ipv6.ip6_null_entry) { + fn = fib6_backtrack(fn, &fl6->saddr); + if (fn) + goto restart; + } dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); return rt; @@ -915,33 +917,48 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { - struct fib6_node *fn; + struct fib6_node *fn, *saved_fn; struct rt6_info *rt, *nrt; int strict = 0; int attempts = 3; int err; - int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; strict |= flags & RT6_LOOKUP_F_IFACE; + if (net->ipv6.devconf_all->forwarding == 0) + strict |= RT6_LOOKUP_F_REACHABLE; -relookup: +redo_fib6_lookup_lock: read_lock_bh(&table->tb6_lock); -restart_2: fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); + saved_fn = fn; -restart: - rt = rt6_select(fn, oif, strict | reachable); +redo_rt6_select: + rt = rt6_select(fn, oif, strict); if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(rt, fl6, oif, strict | reachable); - BACKTRACK(net, &fl6->saddr); - if (rt == net->ipv6.ip6_null_entry || - rt->rt6i_flags & RTF_CACHE) - goto out; + rt = rt6_multipath_select(rt, fl6, oif, strict); + if (rt == net->ipv6.ip6_null_entry) { + fn = fib6_backtrack(fn, &fl6->saddr); + if (fn) + goto redo_rt6_select; + else if (strict & RT6_LOOKUP_F_REACHABLE) { + /* also consider unreachable route */ + strict &= ~RT6_LOOKUP_F_REACHABLE; + fn = saved_fn; + goto redo_rt6_select; + } else { + dst_hold(&rt->dst); + read_unlock_bh(&table->tb6_lock); + goto out2; + } + } dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); + if (rt->rt6i_flags & RTF_CACHE) + goto out2; + if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) @@ -967,15 +984,8 @@ restart: * released someone could insert this route. Relookup. */ ip6_rt_put(rt); - goto relookup; + goto redo_fib6_lookup_lock; -out: - if (reachable) { - reachable = 0; - goto restart_2; - } - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); out2: rt->dst.lastuse = jiffies; rt->dst.__use++; @@ -1235,10 +1245,12 @@ restart: rt = net->ipv6.ip6_null_entry; else if (rt->dst.error) { rt = net->ipv6.ip6_null_entry; - goto out; + } else if (rt == net->ipv6.ip6_null_entry) { + fn = fib6_backtrack(fn, &fl6->saddr); + if (fn) + goto restart; } - BACKTRACK(net, &fl6->saddr); -out: + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a24557a1c1d8..213546bd6d5d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1241,7 +1241,8 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) goto done; err = -ENOENT; - if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL) + t = ipip6_tunnel_locate(net, &p, 0); + if (t == NULL) goto done; err = -EPERM; if (t == netdev_priv(sitn->fb_tunnel_dev)) @@ -1711,7 +1712,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, - tunnel->encap.dport)) + tunnel->encap.flags)) goto nla_put_failure; return 0; @@ -1836,8 +1837,8 @@ static int __net_init sit_init_net(struct net *net) goto err_dev_free; ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); - - if ((err = register_netdev(sitn->fb_tunnel_dev))) + err = register_netdev(sitn->fb_tunnel_dev); + if (err) goto err_reg_dev; t = netdev_priv(sitn->fb_tunnel_dev); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2f25cb6347ca..7337fc7947e2 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -166,13 +166,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; - bool ecn_ok = false; if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; - if (tcp_synq_no_recent_overflow(sk) || - (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) { + if (tcp_synq_no_recent_overflow(sk)) + goto out; + + mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); + if (mss == 0) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } @@ -183,7 +185,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); - if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) + if (!cookie_timestamp_decode(&tcp_opt)) goto out; ret = NULL; @@ -220,7 +222,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->num_retrans = 0; - ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; @@ -261,6 +262,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; + ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); ret = get_cookie_sock(sk, skb, req, dst); out: @@ -269,4 +271,3 @@ out_free: reqsk_free(req); return NULL; } - diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index dc495ae2ead0..5ff87805258e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -787,16 +787,16 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .queue_hash_add = inet6_csk_reqsk_queue_hash_add, }; -static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, int rst, u8 tclass, - u32 label) +static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, + u32 ack, u32 win, u32 tsval, u32 tsecr, + int oif, struct tcp_md5sig_key *key, int rst, + u8 tclass, u32 label) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; struct sk_buff *buff; struct flowi6 fl6; - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); struct dst_entry *dst; @@ -946,7 +946,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) (th->doff << 2); oif = sk ? sk->sk_bound_dev_if : 0; - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -957,13 +957,13 @@ release_sk1: #endif } -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 tsval, u32 tsecr, int oif, +static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb, u32 seq, + u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, u32 label) { - tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, - label); + tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, + tclass, label); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -971,7 +971,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), @@ -986,10 +986,10 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? + tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, - tcp_rsk(req)->rcv_nxt, - req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, + tcp_rsk(req)->rcv_nxt, req->rcv_wnd, + tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); } @@ -1296,6 +1296,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = sk->sk_rx_dst; sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, np->rx_dst_cookie) == NULL) { @@ -1325,6 +1326,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) */ if (nsk != sk) { sock_rps_save_rxhash(nsk, skb); + sk_mark_napi_id(sk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1457,7 +1459,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_napi_id(sk, skb); + sk_incoming_cpu_update(sk); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f6ba535b6feb..189dc4ae3eca 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -148,72 +148,85 @@ static inline int compute_score(struct sock *sk, struct net *net, const struct in6_addr *daddr, __be16 dport, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - sk->sk_family == PF_INET6) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6) + return -1; - score = 0; - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } + score = 0; + inet = inet_sk(sk); + + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score++; + } + + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + score++; + } + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) + return -1; + score++; } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + return score; } #define SCORE2_MAX (1 + 1 + 1) static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, unsigned short hnum, - int dif) + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - sk->sk_family == PF_INET6) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6) + return -1; - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + + score = 0; + inet = inet_sk(sk); + + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; - score = 0; - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } + score++; } + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) + return -1; + score++; + } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + return score; } - /* called with read_rcu_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -357,7 +370,8 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, struct sock *sk; const struct ipv6hdr *iph = ipv6_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) + sk = skb_steal_sock(skb); + if (unlikely(sk)) return sk; return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), @@ -424,10 +438,10 @@ try_again: } if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), + msg, copied); else { - err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); + err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg); if (err == -EINVAL) goto csum_copy_err; } @@ -577,6 +591,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (!ipv6_addr_any(&sk->sk_v6_daddr)) { sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); + sk_incoming_cpu_update(sk); } rc = sock_queue_rcv_skb(sk, skb); @@ -659,15 +674,13 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { if (up->pcrlen == 0) { /* full coverage was set */ - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" - " %d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); + net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " - "too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); goto drop; } } @@ -760,9 +773,9 @@ static void udp6_csum_zero_error(struct sk_buff *skb) /* RFC 2460 section 8.1 says that we SHOULD log * this error. Well, it is reasonable. */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", - &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), - &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); + net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); } /* @@ -771,7 +784,7 @@ static void udp6_csum_zero_error(struct sk_buff *skb) */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, - struct udp_table *udptable) + struct udp_table *udptable, int proto) { struct sock *sk, *stack[256 / sizeof(struct sock *)]; const struct udphdr *uh = udp_hdr(skb); @@ -781,6 +794,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif = inet6_iif(skb); unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); + bool inner_flushed = false; if (use_hash2) { hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & @@ -803,6 +817,7 @@ start_lookup: (uh->check || udp_sk(sk)->no_check6_rx)) { if (unlikely(count == ARRAY_SIZE(stack))) { flush_stack(stack, count, skb, ~0); + inner_flushed = true; count = 0; } stack[count++] = sk; @@ -821,7 +836,10 @@ start_lookup: if (count) { flush_stack(stack, count, skb, count - 1); } else { - kfree_skb(skb); + if (!inner_flushed) + UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); + consume_skb(skb); } return 0; } @@ -873,7 +891,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, */ if (ipv6_addr_is_multicast(daddr)) return __udp6_lib_mcast_deliver(net, skb, - saddr, daddr, udptable); + saddr, daddr, udptable, proto); /* Unicast */ @@ -925,14 +943,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", - saddr, - ntohs(uh->source), - ulen, - skb->len, - daddr, - ntohs(uh->dest)); + net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + saddr, ntohs(uh->source), + ulen, skb->len, + daddr, ntohs(uh->dest)); goto discard; csum_error: UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); @@ -1025,7 +1040,8 @@ static int udp_v6_push_pending_frames(struct sock *sk) fl6 = &inet->cork.fl.u.ip6; /* Grab the skbuff where UDP header space exists. */ - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + skb = skb_peek(&sk->sk_write_queue); + if (skb == NULL) goto out; /* @@ -1284,7 +1300,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); + net_dbg_ratelimited("udp cork app bug 2\n"); err = -EINVAL; goto out; } @@ -1296,7 +1312,7 @@ do_append_data: dontfrag = np->dontfrag; up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, + err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 6b8f543f6ac6..b6aa8ed18257 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -42,11 +42,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | + SKB_GSO_TUNNEL_REMCSUM | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_MPLS) || + SKB_GSO_SIT) || !(type & (SKB_GSO_UDP)))) goto out; |