summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inet_connection_sock.c12
-rw-r--r--net/ipv4/inet_hashtables.c10
-rw-r--r--net/ipv4/inet_timewait_sock.c58
-rw-r--r--net/ipv4/route.c23
-rw-r--r--net/ipv4/tcp_bbr.c2
-rw-r--r--net/ipv4/tcp_cubic.c4
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/udp.c2
10 files changed, 95 insertions, 31 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1e5b53c2bb26..53f5f956d948 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -155,10 +155,14 @@ static int inet_csk_bind_conflict(const struct sock *sk,
*/
sk_for_each_bound(sk2, &tb->owners) {
- if (sk != sk2 &&
- (!sk->sk_bound_dev_if ||
- !sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+ int bound_dev_if2;
+
+ if (sk == sk2)
+ continue;
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+ if ((!sk->sk_bound_dev_if ||
+ !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2)) {
if (reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN) {
if ((!relax ||
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 5257a7575649..87354e20009a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -373,10 +373,10 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (likely(INET_MATCH(net, sk, acookie, ports, dif, sdif))) {
+ if (likely(inet_match(net, sk, acookie, ports, dif, sdif))) {
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET_MATCH(net, sk, acookie,
+ if (unlikely(!inet_match(net, sk, acookie,
ports, dif, sdif))) {
sock_gen_put(sk);
goto begin;
@@ -426,7 +426,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
if (sk2->sk_hash != hash)
continue;
- if (likely(INET_MATCH(net, sk2, acookie, ports, dif, sdif))) {
+ if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
@@ -492,14 +492,14 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk,
if (esk->sk_hash != sk->sk_hash)
continue;
if (sk->sk_family == AF_INET) {
- if (unlikely(INET_MATCH(net, esk, acookie,
+ if (unlikely(inet_match(net, esk, acookie,
ports, dif, sdif))) {
return true;
}
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- if (unlikely(INET6_MATCH(esk, net,
+ if (unlikely(inet6_match(net, esk,
&sk->sk_v6_daddr,
&sk->sk_v6_rcv_saddr,
ports, dif, sdif))) {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 9e0bbd026560..0ec501845cb3 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -52,7 +52,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
spin_unlock(lock);
/* Disassociate with bind bucket. */
- bhead = &hashinfo->bhash[tw->tw_bslot];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
inet_twsk_bind_unhash(tw, hashinfo);
@@ -111,12 +112,8 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
- /* Cache inet_bhashfn(), because 'struct net' might be no longer
- * available later in inet_twsk_kill().
- */
- tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
- hashinfo->bhash_size);
- bhead = &hashinfo->bhash[tw->tw_bslot];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
@@ -257,3 +254,50 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
}
}
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+{
+ struct inet_timewait_sock *tw;
+ struct sock *sk;
+ struct hlist_nulls_node *node;
+ unsigned int slot;
+
+ for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
+ struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+restart_rcu:
+ cond_resched();
+ rcu_read_lock();
+restart:
+ sk_nulls_for_each_rcu(sk, node, &head->chain) {
+ if (sk->sk_state != TCP_TIME_WAIT)
+ continue;
+ tw = inet_twsk(sk);
+ if ((tw->tw_family != family) ||
+ refcount_read(&twsk_net(tw)->ns.count))
+ continue;
+
+ if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
+ continue;
+
+ if (unlikely((tw->tw_family != family) ||
+ refcount_read(&twsk_net(tw)->ns.count))) {
+ inet_twsk_put(tw);
+ goto restart;
+ }
+
+ rcu_read_unlock();
+ local_bh_disable();
+ inet_twsk_deschedule_put(tw);
+ local_bh_enable();
+ goto restart_rcu;
+ }
+ /* If the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != slot)
+ goto restart;
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL_GPL(inet_twsk_purge);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 444d4a2a422d..356f535f3443 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1727,6 +1727,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
struct rtable *rth;
+ bool no_policy;
u32 itag = 0;
int err;
@@ -1737,8 +1738,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (our)
flags |= RTCF_LOCAL;
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
- IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+ no_policy, false);
if (!rth)
return -ENOBUFS;
@@ -1797,7 +1802,7 @@ static int __mkroute_input(struct sk_buff *skb,
struct rtable *rth;
int err;
struct in_device *out_dev;
- bool do_cache;
+ bool do_cache, no_policy;
u32 itag = 0;
/* get a working reference to the output device */
@@ -1842,6 +1847,10 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
fnhe = find_exception(nhc, daddr);
if (do_cache) {
if (fnhe)
@@ -1854,8 +1863,7 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
- rth = rt_dst_alloc(out_dev->dev, 0, res->type,
- IN_DEV_ORCONF(in_dev, NOPOLICY),
+ rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy,
IN_DEV_ORCONF(out_dev, NOXFRM));
if (!rth) {
err = -ENOBUFS;
@@ -2230,6 +2238,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct rtable *rth;
struct flowi4 fl4;
bool do_cache = true;
+ bool no_policy;
/* IP on this device is disabled. */
@@ -2348,6 +2357,10 @@ brd_input:
RT_CACHE_STAT_INC(in_brd);
local_input:
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
do_cache &= res->fi && !itag;
if (do_cache) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
@@ -2362,7 +2375,7 @@ local_input:
rth = rt_dst_alloc(ip_rt_get_dev(net, res),
flags | RTCF_LOCAL, res->type,
- IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+ no_policy, false);
if (!rth)
goto e_nobufs;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index c7d30a3bbd81..075e744bfb48 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -310,7 +310,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
*/
bytes = min_t(unsigned long,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
- GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
return min(segs, 0x7FU);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index b0918839bee7..68178e7280ce 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -372,7 +372,7 @@ static void cubictcp_state(struct sock *sk, u8 new_state)
* We apply another 100% factor because @rate is doubled at this point.
* We cap the cushion to 1ms.
*/
-static u32 hystart_ack_delay(struct sock *sk)
+static u32 hystart_ack_delay(const struct sock *sk)
{
unsigned long rate;
@@ -380,7 +380,7 @@ static u32 hystart_ack_delay(struct sock *sk)
if (!rate)
return 0;
return min_t(u64, USEC_PER_MSEC,
- div64_ul((u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
+ div64_ul((u64)sk->sk_gso_max_size * 4 * USEC_PER_SEC, rate));
}
static void hystart_update(struct sock *sk, u32 delay)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 97cfcd85f84e..3231af73e430 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2620,12 +2620,12 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost,
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
tp->prior_cwnd - 1;
sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
- } else if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) {
- sndcnt = min_t(int, delta,
- max_t(int, tp->prr_delivered - tp->prr_out,
- newly_acked_sacked) + 1);
} else {
- sndcnt = min(delta, newly_acked_sacked);
+ sndcnt = max_t(int, tp->prr_delivered - tp->prr_out,
+ newly_acked_sacked);
+ if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost)
+ sndcnt++;
+ sndcnt = min(delta, sndcnt);
}
/* Force a fast retransmit upon entering fast recovery */
sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 218ad871c0e4..dac2650f3863 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2101,6 +2101,7 @@ bad_packet:
}
discard_it:
+ SKB_DR_OR(drop_reason, NOT_SPECIFIED);
/* Discard frame. */
kfree_skb_reason(skb, drop_reason);
return 0;
@@ -3168,6 +3169,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
+ inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
list_for_each_entry(net, net_exit_list, exit_list)
tcp_fastopen_ctx_destroy(net);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b092228e4342..b4b2284ed4a2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1553,7 +1553,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
* SO_SNDBUF values.
* Also allow first and last skb in retransmit queue to be split.
*/
- limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_LEGACY_MAX_SIZE);
if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
skb != tcp_rtx_queue_head(sk) &&
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 53342ce17172..aa9f2ec3dc46 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2563,7 +2563,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
struct sock *sk;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- if (INET_MATCH(net, sk, acookie, ports, dif, sdif))
+ if (inet_match(net, sk, acookie, ports, dif, sdif))
return sk;
/* Only check first socket in chain */
break;