summaryrefslogtreecommitdiffstats
path: root/net/ipv6/tcp_ipv6.c
diff options
context:
space:
mode:
authorAlexander Duyck <alexanderduyck@fb.com>2020-11-21 04:47:44 +0100
committerJakub Kicinski <kuba@kernel.org>2020-11-24 23:12:55 +0100
commit407c85c7ddd6b84d3cbdd2275616f70c27c17913 (patch)
tree26327cd536af5df1e55f646cabb3a3f70f1173da /net/ipv6/tcp_ipv6.c
parentdevlink: Fix reload stats structure (diff)
downloadlinux-407c85c7ddd6b84d3cbdd2275616f70c27c17913.tar.xz
linux-407c85c7ddd6b84d3cbdd2275616f70c27c17913.zip
tcp: Set ECT0 bit in tos/tclass for synack when BPF needs ECN
When a BPF program is used to select between a type of TCP congestion control algorithm that uses either ECN or not there is a case where the synack for the frame was coming up without the ECT0 bit set. A bit of research found that this was due to the final socket being configured to dctcp while the listener socket was staying in cubic. To reproduce it all that is needed is to monitor TCP traffic while running the sample bpf program "samples/bpf/tcp_cong_kern.c". What is observed, assuming tcp_dctcp module is loaded or compiled in and the traffic matches the rules in the sample file, is that for all frames with the exception of the synack the ECT0 bit is set. To address that it is necessary to make one additional call to tcp_bpf_ca_needs_ecn using the request socket and then use the output of that to set the ECT0 bit for the tos/tclass of the packet. Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control") Signed-off-by: Alexander Duyck <alexanderduyck@fb.com> Link: https://lore.kernel.org/r/160593039663.2604.1374502006916871573.stgit@localhost.localdomain Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv6/tcp_ipv6.c')
-rw-r--r--net/ipv6/tcp_ipv6.c9
1 files changed, 7 insertions, 2 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d2502911b7fa..992cbf3eb9e3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -527,11 +527,16 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- rcu_read_lock();
- opt = ireq->ipv6_opt;
tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
np->tclass;
+
+ if (!INET_ECN_is_capable(tclass) &&
+ tcp_bpf_ca_needs_ecn((struct sock *)req))
+ tclass |= INET_ECN_ECT_0;
+
+ rcu_read_lock();
+ opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,