summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2018-11-11 16:34:28 +0100
committerDavid S. Miller <davem@davemloft.net>2018-11-11 22:57:03 +0100
commitc73e5807e4f6fc6d373a5db55b45f639f8bb6328 (patch)
tree09c332f796e245e7ec15cc31905e61f793da451b
parentMerge branch 'tcp-tso-defer-improvements' (diff)
downloadlinux-c73e5807e4f6fc6d373a5db55b45f639f8bb6328.tar.xz
linux-c73e5807e4f6fc6d373a5db55b45f639f8bb6328.zip
tcp: tsq: no longer use limit_output_bytes for paced flows
FQ pacing guarantees that paced packets queued by one flow do not add head-of-line blocking for other flows. After TCP GSO conversion, increasing limit_output_bytes to 1 MB is safe, since this maps to 16 skbs at most in qdisc or device queues. (or slightly more if some drivers lower {gso_max_segs|size}) We still can queue at most 1 ms worth of traffic (this can be scaled by wifi drivers if they need to) Tested: # ethtool -c eth0 | egrep "tx-usecs:|tx-frames:" # 40 Gbit mlx4 NIC tx-usecs: 16 tx-frames: 16 # tc qdisc replace dev eth0 root fq # for f in {1..10};do netperf -P0 -H lpaa24,6 -o THROUGHPUT;done Before patch: 27711 26118 27107 27377 27712 27388 27340 27117 27278 27509 After patch: 37434 36949 36658 36998 37711 37291 37605 36659 36544 37349 Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt2
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_output.c5
3 files changed, 6 insertions, 5 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index aa9e6a331679..af2a69439b93 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -759,7 +759,7 @@ tcp_limit_output_bytes - INTEGER
flows, for typical pfifo_fast qdiscs. tcp_limit_output_bytes
limits the number of bytes on qdisc or device to reduce artificial
RTT/cwnd and reduce bufferbloat.
- Default: 262144
+ Default: 1048576 (16 * 65536)
tcp_challenge_ack_limit - INTEGER
Limits number of Challenge ACK sent per second, as recommended
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5424a4077c27..0952d4b772e7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2574,8 +2574,8 @@ static int __net_init tcp_sk_init(struct net *net)
* which are too large can cause TCP streams to be bursty.
*/
net->ipv4.sysctl_tcp_tso_win_divisor = 3;
- /* Default TSQ limit of four TSO segments */
- net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
+ /* Default TSQ limit of 16 TSO segments */
+ net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
/* rfc5961 challenge ack rate limiting */
net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
net->ipv4.sysctl_tcp_min_tso_segs = 2;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 75dcf4daca72..d40d4cc53319 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2220,8 +2220,9 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit = max_t(unsigned long,
2 * skb->truesize,
sk->sk_pacing_rate >> sk->sk_pacing_shift);
- limit = min_t(unsigned long, limit,
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+ if (sk->sk_pacing_status == SK_PACING_NONE)
+ limit = min_t(unsigned long, limit,
+ sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
if (refcount_read(&sk->sk_wmem_alloc) > limit) {