summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2020-04-24 12:31:50 +0200
committerDavid S. Miller <davem@davemloft.net>2020-04-26 05:37:52 +0200
commit071c8ed6e88d2ac0a5f26948fb9c288fd4dd6e40 (patch)
tree2e7cbec60d5725716cfa20405843dbf3984fae4a /net
parentdpaa2-eth: add channel stat to debugfs (diff)
downloadlinux-071c8ed6e88d2ac0a5f26948fb9c288fd4dd6e40.tar.xz
linux-071c8ed6e88d2ac0a5f26948fb9c288fd4dd6e40.zip
tcp: mptcp: use mptcp receive buffer space to select rcv window
In MPTCP, the receive window is shared across all subflows, because it refers to the mptcp-level sequence space. MPTCP receivers already place incoming packets on the mptcp socket receive queue and will charge it to the mptcp socket rcvbuf until userspace consumes the data. Update __tcp_select_window to use the occupancy of the parent/mptcp socket instead of the subflow socket in case the tcp socket is part of a logical mptcp connection. This commit doesn't change choice of initial window for passive or active connections. While it would be possible to change those as well, this adds complexity (especially when handling MP_JOIN requests). Furthermore, the MPTCP RFC specifically says that a MPTCP sender 'MUST NOT use the RCV.WND field of a TCP segment at the connection level if it does not also carry a DSS option with a Data ACK field.' SYN/SYNACK packets do not carry a DSS option with a Data ACK field. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_output.c8
-rw-r--r--net/mptcp/subflow.c18
2 files changed, 24 insertions, 2 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2f45cde168c4..ba4482130f08 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2772,8 +2772,12 @@ u32 __tcp_select_window(struct sock *sk)
int mss = icsk->icsk_ack.rcv_mss;
int free_space = tcp_space(sk);
int allowed_space = tcp_full_space(sk);
- int full_space = min_t(int, tp->window_clamp, allowed_space);
- int window;
+ int full_space, window;
+
+ if (sk_is_mptcp(sk))
+ mptcp_space(sk, &free_space, &allowed_space);
+
+ full_space = min_t(int, tp->window_clamp, allowed_space);
if (unlikely(mss > full_space)) {
mss = full_space;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index fabd06f2ff45..87c094702d63 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -821,6 +821,24 @@ bool mptcp_subflow_data_available(struct sock *sk)
return subflow->data_avail;
}
+/* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
+ * not the ssk one.
+ *
+ * In mptcp, rwin is about the mptcp-level connection data.
+ *
+ * Data that is still on the ssk rx queue can thus be ignored,
+ * as far as mptcp peer is concerened that data is still inflight.
+ * DSS ACK is updated when skb is moved to the mptcp rx queue.
+ */
+void mptcp_space(const struct sock *ssk, int *space, int *full_space)
+{
+ const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ const struct sock *sk = subflow->conn;
+
+ *space = tcp_space(sk);
+ *full_space = tcp_full_space(sk);
+}
+
static void subflow_data_ready(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);