summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fou_core.c29
-rw-r--r--net/ipv4/netfilter/iptable_nat.c18
-rw-r--r--net/ipv4/tcp.c18
-rw-r--r--net/ipv4/tcp_ao.c43
-rw-r--r--net/ipv4/tcp_bpf.c2
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_ipv4.c14
-rw-r--r--net/ipv4/tcp_offload.c3
-rw-r--r--net/ipv4/udp_offload.c11
9 files changed, 109 insertions, 34 deletions
diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
index 0abbc413e0fe..78b869b31492 100644
--- a/net/ipv4/fou_core.c
+++ b/net/ipv4/fou_core.c
@@ -50,7 +50,7 @@ struct fou_net {
static inline struct fou *fou_from_sock(struct sock *sk)
{
- return sk->sk_user_data;
+ return rcu_dereference_sk_user_data(sk);
}
static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len)
@@ -233,9 +233,15 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
struct sk_buff *skb)
{
const struct net_offload __rcu **offloads;
- u8 proto = fou_from_sock(sk)->protocol;
+ struct fou *fou = fou_from_sock(sk);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
+ u8 proto;
+
+ if (!fou)
+ goto out;
+
+ proto = fou->protocol;
/* We can clear the encap_mark for FOU as we are essentially doing
* one of two possible things. We are either adding an L4 tunnel
@@ -263,14 +269,24 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
int nhoff)
{
const struct net_offload __rcu **offloads;
- u8 proto = fou_from_sock(sk)->protocol;
+ struct fou *fou = fou_from_sock(sk);
const struct net_offload *ops;
- int err = -ENOSYS;
+ u8 proto;
+ int err;
+
+ if (!fou) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ proto = fou->protocol;
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
- if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete)) {
+ err = -ENOSYS;
goto out;
+ }
err = ops->callbacks.gro_complete(skb, nhoff);
@@ -320,6 +336,9 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
struct gro_remcsum grc;
u8 proto;
+ if (!fou)
+ goto out;
+
skb_gro_remcsum_init(&grc);
off = skb_gro_offset(skb);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 4d42d0756fd7..a5db7c67d61b 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -145,25 +145,27 @@ static struct pernet_operations iptable_nat_net_ops = {
static int __init iptable_nat_init(void)
{
- int ret = xt_register_template(&nf_nat_ipv4_table,
- iptable_nat_table_init);
+ int ret;
+ /* net->gen->ptr[iptable_nat_net_id] must be allocated
+ * before calling iptable_nat_table_init().
+ */
+ ret = register_pernet_subsys(&iptable_nat_net_ops);
if (ret < 0)
return ret;
- ret = register_pernet_subsys(&iptable_nat_net_ops);
- if (ret < 0) {
- xt_unregister_template(&nf_nat_ipv4_table);
- return ret;
- }
+ ret = xt_register_template(&nf_nat_ipv4_table,
+ iptable_nat_table_init);
+ if (ret < 0)
+ unregister_pernet_subsys(&iptable_nat_net_ops);
return ret;
}
static void __exit iptable_nat_exit(void)
{
- unregister_pernet_subsys(&iptable_nat_net_ops);
xt_unregister_template(&nf_nat_ipv4_table);
+ unregister_pernet_subsys(&iptable_nat_net_ops);
}
module_init(iptable_nat_init);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e03a342c9162..831a18dc7aa6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4637,6 +4637,13 @@ int tcp_abort(struct sock *sk, int err)
/* Don't race with userspace socket closes such as tcp_close. */
lock_sock(sk);
+ /* Avoid closing the same socket twice. */
+ if (sk->sk_state == TCP_CLOSE) {
+ if (!has_current_bpf_ctx())
+ release_sock(sk);
+ return -ENOENT;
+ }
+
if (sk->sk_state == TCP_LISTEN) {
tcp_set_state(sk, TCP_CLOSE);
inet_csk_listen_stop(sk);
@@ -4646,16 +4653,13 @@ int tcp_abort(struct sock *sk, int err)
local_bh_disable();
bh_lock_sock(sk);
- if (!sock_flag(sk, SOCK_DEAD)) {
- if (tcp_need_reset(sk->sk_state))
- tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
- tcp_done_with_error(sk, err);
- }
+ if (tcp_need_reset(sk->sk_state))
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
+ tcp_done_with_error(sk, err);
bh_unlock_sock(sk);
local_bh_enable();
- tcp_write_queue_purge(sk);
if (!has_current_bpf_ctx())
release_sock(sk);
return 0;
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 85531437890c..db6516092daf 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -267,32 +267,49 @@ static void tcp_ao_key_free_rcu(struct rcu_head *head)
kfree_sensitive(key);
}
-void tcp_ao_destroy_sock(struct sock *sk, bool twsk)
+static void tcp_ao_info_free_rcu(struct rcu_head *head)
{
- struct tcp_ao_info *ao;
+ struct tcp_ao_info *ao = container_of(head, struct tcp_ao_info, rcu);
struct tcp_ao_key *key;
struct hlist_node *n;
+ hlist_for_each_entry_safe(key, n, &ao->head, node) {
+ hlist_del(&key->node);
+ tcp_sigpool_release(key->tcp_sigpool_id);
+ kfree_sensitive(key);
+ }
+ kfree(ao);
+ static_branch_slow_dec_deferred(&tcp_ao_needed);
+}
+
+static void tcp_ao_sk_omem_free(struct sock *sk, struct tcp_ao_info *ao)
+{
+ size_t total_ao_sk_mem = 0;
+ struct tcp_ao_key *key;
+
+ hlist_for_each_entry(key, &ao->head, node)
+ total_ao_sk_mem += tcp_ao_sizeof_key(key);
+ atomic_sub(total_ao_sk_mem, &sk->sk_omem_alloc);
+}
+
+void tcp_ao_destroy_sock(struct sock *sk, bool twsk)
+{
+ struct tcp_ao_info *ao;
+
if (twsk) {
ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1);
- tcp_twsk(sk)->ao_info = NULL;
+ rcu_assign_pointer(tcp_twsk(sk)->ao_info, NULL);
} else {
ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1);
- tcp_sk(sk)->ao_info = NULL;
+ rcu_assign_pointer(tcp_sk(sk)->ao_info, NULL);
}
if (!ao || !refcount_dec_and_test(&ao->refcnt))
return;
- hlist_for_each_entry_safe(key, n, &ao->head, node) {
- hlist_del_rcu(&key->node);
- if (!twsk)
- atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc);
- call_rcu(&key->rcu, tcp_ao_key_free_rcu);
- }
-
- kfree_rcu(ao, rcu);
- static_branch_slow_dec_deferred(&tcp_ao_needed);
+ if (!twsk)
+ tcp_ao_sk_omem_free(sk, ao);
+ call_rcu(&ao->rcu, tcp_ao_info_free_rcu);
}
void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 53b0d62fd2c2..fe6178715ba0 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -577,7 +577,7 @@ out_err:
err = sk_stream_error(sk, msg->msg_flags, err);
release_sock(sk);
sk_psock_put(sk, psock);
- return copied ? copied : err;
+ return copied > 0 ? copied : err;
}
enum {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 454362e359da..e37488d3453f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -238,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
*/
if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
+ u8 old_ratio = tcp_sk(sk)->scaling_ratio;
do_div(val, skb->truesize);
tcp_sk(sk)->scaling_ratio = val ? val : 1;
+
+ if (old_ratio != tcp_sk(sk)->scaling_ratio)
+ WRITE_ONCE(tcp_sk(sk)->window_clamp,
+ tcp_win_from_space(sk, sk->sk_rcvbuf));
}
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fd17f25ff288..a4e510846905 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -97,6 +97,8 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
+static DEFINE_MUTEX(tcp_exit_batch_mutex);
+
static u32 tcp_v4_init_seq(const struct sk_buff *skb)
{
return secure_tcp_seq(ip_hdr(skb)->daddr,
@@ -3514,6 +3516,16 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
+ /* make sure concurrent calls to tcp_sk_exit_batch from net_cleanup_work
+ * and failed setup_net error unwinding path are serialized.
+ *
+ * tcp_twsk_purge() handles twsk in any dead netns, not just those in
+ * net_exit_list, the thread that dismantles a particular twsk must
+ * do so without other thread progressing to refcount_dec_and_test() of
+ * tcp_death_row.tw_refcount.
+ */
+ mutex_lock(&tcp_exit_batch_mutex);
+
tcp_twsk_purge(net_exit_list);
list_for_each_entry(net, net_exit_list, exit_list) {
@@ -3521,6 +3533,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount));
tcp_fastopen_ctx_destroy(net);
}
+
+ mutex_unlock(&tcp_exit_batch_mutex);
}
static struct pernet_operations __net_initdata tcp_sk_ops = {
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 4b791e74529e..e4ad3311e148 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -140,6 +140,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (thlen < sizeof(*th))
goto out;
+ if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb)))
+ goto out;
+
if (!pskb_may_pull(skb, thlen))
goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index aa2e0a28ca61..d842303587af 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -278,6 +278,17 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
+ if (unlikely(skb_checksum_start(gso_skb) !=
+ skb_transport_header(gso_skb) &&
+ !(skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)))
+ return ERR_PTR(-EINVAL);
+
+ /* We don't know if egress device can segment and checksum the packet
+ * when IPv6 extension headers are present. Fall back to software GSO.
+ */
+ if (gso_skb->ip_summed != CHECKSUM_PARTIAL)
+ features &= ~(NETIF_F_GSO_UDP_L4 | NETIF_F_CSUM_MASK);
+
if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),