summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2017-05-12 00:24:41 +0200
committerDavid S. Miller <davem@davemloft.net>2017-05-12 03:32:48 +0200
commitf6ba8d33cfbb46df569972e64dbb5bb7e929bfd9 (patch)
treea341e4360d881c74496da6f86d7127a9ffe65191
parentMerge branch 'generic-xdp-followups' (diff)
downloadlinux-f6ba8d33cfbb46df569972e64dbb5bb7e929bfd9.tar.xz
linux-f6ba8d33cfbb46df569972e64dbb5bb7e929bfd9.zip
netem: fix skb_orphan_partial()
I should have known that lowering skb->truesize was dangerous :/ In case packets are not leaving the host via a standard Ethernet device, but looped back to local sockets, bad things can happen, as reported by Michael Madsen ( https://bugzilla.kernel.org/show_bug.cgi?id=195713 ) So instead of tweaking skb->truesize, lets change skb->destructor and keep a reference on the owner socket via its sk_refcnt. Fixes: f2f872f9272a ("netem: Introduce skb_orphan_partial() helper") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Michael Madsen <mkm@nabto.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/core/sock.c20
1 files changed, 8 insertions, 12 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index 79c6aee6af9b..e43e71d7856b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1803,28 +1803,24 @@ EXPORT_SYMBOL(skb_set_owner_w);
* delay queue. We want to allow the owner socket to send more
* packets, as if they were already TX completed by a typical driver.
* But we also want to keep skb->sk set because some packet schedulers
- * rely on it (sch_fq for example). So we set skb->truesize to a small
- * amount (1) and decrease sk_wmem_alloc accordingly.
+ * rely on it (sch_fq for example).
*/
void skb_orphan_partial(struct sk_buff *skb)
{
- /* If this skb is a TCP pure ACK or already went here,
- * we have nothing to do. 2 is already a very small truesize.
- */
- if (skb->truesize <= 2)
+ if (skb_is_tcp_pure_ack(skb))
return;
- /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
- * so we do not completely orphan skb, but transfert all
- * accounted bytes but one, to avoid unexpected reorders.
- */
if (skb->destructor == sock_wfree
#ifdef CONFIG_INET
|| skb->destructor == tcp_wfree
#endif
) {
- atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
- skb->truesize = 1;
+ struct sock *sk = skb->sk;
+
+ if (atomic_inc_not_zero(&sk->sk_refcnt)) {
+ atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+ skb->destructor = sock_efree;
+ }
} else {
skb_orphan(skb);
}