tcp: avoid retransmits of TCP packets hanging in host queues

In commit 0e280af026a5 ("tcp: introduce TCPSpuriousRtxHostQueues SNMP counter") we added a logic to detect when a packet was retransmitted while the prior clone was still in a qdisc or driver queue. We are now confident we can do better, and catch the problem before we fragment a TSO packet before retransmit, or in TLP path. This patch fully exploits the logic by simply canceling the spurious retransmit. Original packet is in a queue and will eventually leave the host. This helps to avoid network collapses when some events make the RTO estimations very wrong, particularly when dealing with huge number of sockets with synchronized blast. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Eric Dumazet <edumazet@google.com> 2014-04-21 02:58:17 +0200
committer: David S. Miller <davem@davemloft.net> 2014-04-23 03:27:57 +0200
commit: 1f3279ae0c13cd742731726b0ed195d5f09b14e4 (patch)
tree: 48cabf2c099db7586abdbf22ac92c1bcb2e1cb89 /net/ipv4/tcp_output.c
parent: ipv6: support IFA_F_MANAGETEMPADDR for address deletion too (diff)
download: linux-1f3279ae0c13cd742731726b0ed195d5f09b14e4.tar.xz
linux-1f3279ae0c13cd742731726b0ed195d5f09b14e4.zip
1 files changed, 26 insertions, 8 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 29dde97c3c41..20847de991ea 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -878,15 +878,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	BUG_ON(!skb || !tcp_skb_pcount(skb));
 
 	if (clone_it) {
-		const struct sk_buff *fclone = skb + 1;
-
 		skb_mstamp_get(&skb->skb_mstamp);
 
-		if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
-			     fclone->fclone == SKB_FCLONE_CLONE))
-			NET_INC_STATS(sock_net(sk),
-				      LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
-
 		if (unlikely(skb_cloned(skb)))
 			skb = pskb_copy(skb, gfp_mask);
 		else
@@ -2061,6 +2054,25 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	return true;
 }
 
+/* Thanks to skb fast clones, we can detect if a prior transmit of
+ * a packet is still in a qdisc or driver queue.
+ * In this case, there is very little point doing a retransmit !
+ * Note: This is called from BH context only.
+ */
+static bool skb_still_in_host_queue(const struct sock *sk,
+				    const struct sk_buff *skb)
+{
+	const struct sk_buff *fclone = skb + 1;
+
+	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
+		     fclone->fclone == SKB_FCLONE_CLONE)) {
+		NET_INC_STATS_BH(sock_net(sk),
+				 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+		return true;
+	}
+	return false;
+}
+
 /* When probe timeout (PTO) fires, send a new segment if one exists, else
  * retransmit the last segment.
  */
@@ -2086,6 +2098,9 @@ void tcp_send_loss_probe(struct sock *sk)
 	if (WARN_ON(!skb))
 		goto rearm_timer;
 
+	if (skb_still_in_host_queue(sk, skb))
+		goto rearm_timer;
+
 	pcount = tcp_skb_pcount(skb);
 	if (WARN_ON(!pcount))
 		goto rearm_timer;
@@ -2407,6 +2422,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
 		return -EAGAIN;
 
+	if (skb_still_in_host_queue(sk, skb))
+		return -EBUSY;
+
 	if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
 		if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
 			BUG();
@@ -2500,7 +2518,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		 * see tcp_input.c tcp_sacktag_write_queue().
 		 */
 		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
-	} else {
+	} else if (err != -EBUSY) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
 	}
 	return err;
author	Eric Dumazet <edumazet@google.com>	2014-04-21 02:58:17 +0200
committer	David S. Miller <davem@davemloft.net>	2014-04-23 03:27:57 +0200
commit	1f3279ae0c13cd742731726b0ed195d5f09b14e4 (patch)
tree	48cabf2c099db7586abdbf22ac92c1bcb2e1cb89 /net/ipv4/tcp_output.c
parent	ipv6: support IFA_F_MANAGETEMPADDR for address deletion too (diff)
download	linux-1f3279ae0c13cd742731726b0ed195d5f09b14e4.tar.xz linux-1f3279ae0c13cd742731726b0ed195d5f09b14e4.zip