diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 12 | ||||
-rw-r--r-- | net/ipv4/inet_hashtables.c | 10 | ||||
-rw-r--r-- | net/ipv4/inet_timewait_sock.c | 58 | ||||
-rw-r--r-- | net/ipv4/route.c | 23 | ||||
-rw-r--r-- | net/ipv4/tcp_bbr.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_cubic.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/udp.c | 2 |
10 files changed, 95 insertions, 31 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1e5b53c2bb26..53f5f956d948 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -155,10 +155,14 @@ static int inet_csk_bind_conflict(const struct sock *sk, */ sk_for_each_bound(sk2, &tb->owners) { - if (sk != sk2 && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + int bound_dev_if2; + + if (sk == sk2) + continue; + bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); + if ((!sk->sk_bound_dev_if || + !bound_dev_if2 || + sk->sk_bound_dev_if == bound_dev_if2)) { if (reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { if ((!relax || diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 5257a7575649..87354e20009a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -373,10 +373,10 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (likely(INET_MATCH(net, sk, acookie, ports, dif, sdif))) { + if (likely(inet_match(net, sk, acookie, ports, dif, sdif))) { if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; - if (unlikely(!INET_MATCH(net, sk, acookie, + if (unlikely(!inet_match(net, sk, acookie, ports, dif, sdif))) { sock_gen_put(sk); goto begin; @@ -426,7 +426,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, if (sk2->sk_hash != hash) continue; - if (likely(INET_MATCH(net, sk2, acookie, ports, dif, sdif))) { + if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) @@ -492,14 +492,14 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk, if (esk->sk_hash != sk->sk_hash) continue; if (sk->sk_family == AF_INET) { - if (unlikely(INET_MATCH(net, esk, acookie, + if (unlikely(inet_match(net, esk, acookie, ports, dif, sdif))) { return true; } } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - if (unlikely(INET6_MATCH(esk, net, + if (unlikely(inet6_match(net, esk, &sk->sk_v6_daddr, &sk->sk_v6_rcv_saddr, ports, dif, sdif))) { diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 9e0bbd026560..0ec501845cb3 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -52,7 +52,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) spin_unlock(lock); /* Disassociate with bind bucket. */ - bhead = &hashinfo->bhash[tw->tw_bslot]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); inet_twsk_bind_unhash(tw, hashinfo); @@ -111,12 +112,8 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - /* Cache inet_bhashfn(), because 'struct net' might be no longer - * available later in inet_twsk_kill(). - */ - tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num, - hashinfo->bhash_size); - bhead = &hashinfo->bhash[tw->tw_bslot]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); @@ -257,3 +254,50 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); + +void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) +{ + struct inet_timewait_sock *tw; + struct sock *sk; + struct hlist_nulls_node *node; + unsigned int slot; + + for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; +restart_rcu: + cond_resched(); + rcu_read_lock(); +restart: + sk_nulls_for_each_rcu(sk, node, &head->chain) { + if (sk->sk_state != TCP_TIME_WAIT) + continue; + tw = inet_twsk(sk); + if ((tw->tw_family != family) || + refcount_read(&twsk_net(tw)->ns.count)) + continue; + + if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) + continue; + + if (unlikely((tw->tw_family != family) || + refcount_read(&twsk_net(tw)->ns.count))) { + inet_twsk_put(tw); + goto restart; + } + + rcu_read_unlock(); + local_bh_disable(); + inet_twsk_deschedule_put(tw); + local_bh_enable(); + goto restart_rcu; + } + /* If the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto restart; + rcu_read_unlock(); + } +} +EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 444d4a2a422d..356f535f3443 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1727,6 +1727,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct in_device *in_dev = __in_dev_get_rcu(dev); unsigned int flags = RTCF_MULTICAST; struct rtable *rth; + bool no_policy; u32 itag = 0; int err; @@ -1737,8 +1738,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (our) flags |= RTCF_LOCAL; + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, - IN_DEV_ORCONF(in_dev, NOPOLICY), false); + no_policy, false); if (!rth) return -ENOBUFS; @@ -1797,7 +1802,7 @@ static int __mkroute_input(struct sk_buff *skb, struct rtable *rth; int err; struct in_device *out_dev; - bool do_cache; + bool do_cache, no_policy; u32 itag = 0; /* get a working reference to the output device */ @@ -1842,6 +1847,10 @@ static int __mkroute_input(struct sk_buff *skb, } } + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + fnhe = find_exception(nhc, daddr); if (do_cache) { if (fnhe) @@ -1854,8 +1863,7 @@ static int __mkroute_input(struct sk_buff *skb, } } - rth = rt_dst_alloc(out_dev->dev, 0, res->type, - IN_DEV_ORCONF(in_dev, NOPOLICY), + rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy, IN_DEV_ORCONF(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; @@ -2230,6 +2238,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct rtable *rth; struct flowi4 fl4; bool do_cache = true; + bool no_policy; /* IP on this device is disabled. */ @@ -2348,6 +2357,10 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: + no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY); + if (no_policy) + IPCB(skb)->flags |= IPSKB_NOPOLICY; + do_cache &= res->fi && !itag; if (do_cache) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); @@ -2362,7 +2375,7 @@ local_input: rth = rt_dst_alloc(ip_rt_get_dev(net, res), flags | RTCF_LOCAL, res->type, - IN_DEV_ORCONF(in_dev, NOPOLICY), false); + no_policy, false); if (!rth) goto e_nobufs; diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index c7d30a3bbd81..075e744bfb48 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -310,7 +310,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk) */ bytes = min_t(unsigned long, sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), - GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); + GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); return min(segs, 0x7FU); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index b0918839bee7..68178e7280ce 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -372,7 +372,7 @@ static void cubictcp_state(struct sock *sk, u8 new_state) * We apply another 100% factor because @rate is doubled at this point. * We cap the cushion to 1ms. */ -static u32 hystart_ack_delay(struct sock *sk) +static u32 hystart_ack_delay(const struct sock *sk) { unsigned long rate; @@ -380,7 +380,7 @@ static u32 hystart_ack_delay(struct sock *sk) if (!rate) return 0; return min_t(u64, USEC_PER_MSEC, - div64_ul((u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate)); + div64_ul((u64)sk->sk_gso_max_size * 4 * USEC_PER_SEC, rate)); } static void hystart_update(struct sock *sk, u32 delay) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 97cfcd85f84e..3231af73e430 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2620,12 +2620,12 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + tp->prior_cwnd - 1; sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; - } else if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) { - sndcnt = min_t(int, delta, - max_t(int, tp->prr_delivered - tp->prr_out, - newly_acked_sacked) + 1); } else { - sndcnt = min(delta, newly_acked_sacked); + sndcnt = max_t(int, tp->prr_delivered - tp->prr_out, + newly_acked_sacked); + if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) + sndcnt++; + sndcnt = min(delta, sndcnt); } /* Force a fast retransmit upon entering fast recovery */ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1)); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 218ad871c0e4..dac2650f3863 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2101,6 +2101,7 @@ bad_packet: } discard_it: + SKB_DR_OR(drop_reason, NOT_SPECIFIED); /* Discard frame. */ kfree_skb_reason(skb, drop_reason); return 0; @@ -3168,6 +3169,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; + inet_twsk_purge(&tcp_hashinfo, AF_INET); + list_for_each_entry(net, net_exit_list, exit_list) tcp_fastopen_ctx_destroy(net); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b092228e4342..b4b2284ed4a2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1553,7 +1553,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, * SO_SNDBUF values. * Also allow first and last skb in retransmit queue to be split. */ - limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE); + limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_LEGACY_MAX_SIZE); if (unlikely((sk->sk_wmem_queued >> 1) > limit && tcp_queue != TCP_FRAG_IN_WRITE_QUEUE && skb != tcp_rtx_queue_head(sk) && diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 53342ce17172..aa9f2ec3dc46 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2563,7 +2563,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, struct sock *sk; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - if (INET_MATCH(net, sk, acookie, ports, dif, sdif)) + if (inet_match(net, sk, acookie, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; |