summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c131
1 files changed, 116 insertions, 15 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8c5cd9efebbc..7f4056785acc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -252,6 +252,7 @@
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/poll.h>
+#include <linux/inet_diag.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/skbuff.h>
@@ -401,6 +402,7 @@ void tcp_init_sock(struct sock *sk)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
+ u64_stats_init(&tp->syncp);
tp->reordering = sysctl_tcp_reordering;
tcp_enable_early_retrans(tp);
@@ -693,8 +695,9 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
struct tcp_splice_state *tss = rd_desc->arg.data;
int ret;
- ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len),
- tss->flags);
+ ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
+ min(rd_desc->count, len), tss->flags,
+ skb_socket_splice);
if (ret > 0)
rd_desc->count -= ret;
return ret;
@@ -807,16 +810,28 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
EXPORT_SYMBOL(tcp_splice_read);
-struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
+struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+ bool force_schedule)
{
struct sk_buff *skb;
/* The TCP header must be at least 32-bit aligned. */
size = ALIGN(size, 4);
+ if (unlikely(tcp_under_memory_pressure(sk)))
+ sk_mem_reclaim_partial(sk);
+
skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
- if (skb) {
- if (sk_wmem_schedule(sk, skb->truesize)) {
+ if (likely(skb)) {
+ bool mem_scheduled;
+
+ if (force_schedule) {
+ mem_scheduled = true;
+ sk_forced_mem_schedule(sk, skb->truesize);
+ } else {
+ mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
+ }
+ if (likely(mem_scheduled)) {
skb_reserve(skb, sk->sk_prot->max_header);
/*
* Make sure that we have exactly size bytes
@@ -906,7 +921,8 @@ new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+ skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
+ skb_queue_empty(&sk->sk_write_queue));
if (!skb)
goto wait_for_memory;
@@ -985,6 +1001,9 @@ do_error:
if (copied)
goto out;
out_err:
+ /* make sure we wake any epoll edge trigger waiter */
+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+ sk->sk_write_space(sk);
return sk_stream_error(sk, flags, err);
}
@@ -1142,7 +1161,8 @@ new_segment:
skb = sk_stream_alloc_skb(sk,
select_size(sk, sg),
- sk->sk_allocation);
+ sk->sk_allocation,
+ skb_queue_empty(&sk->sk_write_queue));
if (!skb)
goto wait_for_memory;
@@ -1273,6 +1293,9 @@ do_error:
goto out;
out_err:
err = sk_stream_error(sk, flags, err);
+ /* make sure we wake any epoll edge trigger waiter */
+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+ sk->sk_write_space(sk);
release_sock(sk);
return err;
}
@@ -2481,6 +2504,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
icsk->icsk_syn_retries = val;
break;
+ case TCP_SAVE_SYN:
+ if (val < 0 || val > 1)
+ err = -EINVAL;
+ else
+ tp->save_syn = val;
+ break;
+
case TCP_LINGER2:
if (val < 0)
tp->linger2 = -1;
@@ -2543,10 +2573,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
- TCPF_LISTEN)))
+ TCPF_LISTEN))) {
+ tcp_fastopen_init_key_once(true);
+
err = fastopen_init_queue(sk, val);
- else
+ } else {
err = -EINVAL;
+ }
break;
case TCP_TIMESTAMP:
if (!tp->repair)
@@ -2592,14 +2625,17 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
#endif
/* Return information about state of tcp endpoint in API format. */
-void tcp_get_info(const struct sock *sk, struct tcp_info *info)
+void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
- const struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
+ unsigned int start;
u32 rate;
memset(info, 0, sizeof(*info));
+ if (sk->sk_type != SOCK_STREAM)
+ return;
info->tcpi_state = sk->sk_state;
info->tcpi_ca_state = icsk->icsk_ca_state;
@@ -2663,6 +2699,14 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
rate = READ_ONCE(sk->sk_max_pacing_rate);
info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tp->syncp);
+ info->tcpi_bytes_acked = tp->bytes_acked;
+ info->tcpi_bytes_received = tp->bytes_received;
+ } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
+ info->tcpi_segs_out = tp->segs_out;
+ info->tcpi_segs_in = tp->segs_in;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -2734,6 +2778,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EFAULT;
return 0;
}
+ case TCP_CC_INFO: {
+ const struct tcp_congestion_ops *ca_ops;
+ union tcp_cc_info info;
+ size_t sz = 0;
+ int attr;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ ca_ops = icsk->icsk_ca_ops;
+ if (ca_ops && ca_ops->get_info)
+ sz = ca_ops->get_info(sk, ~0U, &attr, &info);
+
+ len = min_t(unsigned int, len, sz);
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &info, len))
+ return -EFAULT;
+ return 0;
+ }
case TCP_QUICKACK:
val = !icsk->icsk_ack.pingpong;
break;
@@ -2792,6 +2856,42 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_NOTSENT_LOWAT:
val = tp->notsent_lowat;
break;
+ case TCP_SAVE_SYN:
+ val = tp->save_syn;
+ break;
+ case TCP_SAVED_SYN: {
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ lock_sock(sk);
+ if (tp->saved_syn) {
+ if (len < tp->saved_syn[0]) {
+ if (put_user(tp->saved_syn[0], optlen)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ release_sock(sk);
+ return -EINVAL;
+ }
+ len = tp->saved_syn[0];
+ if (put_user(len, optlen)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ if (copy_to_user(optval, tp->saved_syn + 1, len)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ tcp_saved_syn_free(tp);
+ release_sock(sk);
+ } else {
+ release_sock(sk);
+ len = 0;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ }
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
@@ -2996,11 +3096,12 @@ __setup("thash_entries=", set_thash_entries);
static void __init tcp_init_mem(void)
{
- unsigned long limit = nr_free_buffer_pages() / 8;
+ unsigned long limit = nr_free_buffer_pages() / 16;
+
limit = max(limit, 128UL);
- sysctl_tcp_mem[0] = limit / 4 * 3;
- sysctl_tcp_mem[1] = limit;
- sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
+ sysctl_tcp_mem[0] = limit / 4 * 3; /* 4.68 % */
+ sysctl_tcp_mem[1] = limit; /* 6.25 % */
+ sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */
}
void __init tcp_init(void)