summaryrefslogtreecommitdiffstats
path: root/net/mptcp
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2021-09-24 23:12:34 +0200
committerDavid S. Miller <davem@davemloft.net>2021-09-25 12:36:51 +0200
commit0d199e4363b482badcedba764e2aceab53a4a10a (patch)
tree3faad2f883c63092c4f59dc262e56acfb6c5bf3e /net/mptcp
parentMerge tag 'mlx5-updates-2021-09-24' of git://git.kernel.org/pub/scm/linux/ker... (diff)
downloadlinux-0d199e4363b482badcedba764e2aceab53a4a10a.tar.xz
linux-0d199e4363b482badcedba764e2aceab53a4a10a.zip
mptcp: do not shrink snd_nxt when recovering
When recovering after a link failure, snd_nxt should not be set to a lower value. Else, update of snd_nxt is broken because: msk->snd_nxt += ret; (where ret is number of bytes sent) assumes that snd_nxt always moves forward. After reduction, its possible that snd_nxt update gets out of sync: dfrag we just sent might have had a data sequence number even past recovery_snd_nxt. This change factors the common msk state update to a helper and updates snd_nxt based on the current dfrag data sequence number. The conditional is required for the recovery phase where we may re-transmit old dfrags that are before current snd_nxt. After this change, snd_nxt only moves forward and covers all in-sequence data that was transmitted. recovery_snd_nxt is retained to detect when recovery has completed. Fixes: 1e1d9d6f119c5 ("mptcp: handle pending data on closed subflow") Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/options.c8
-rw-r--r--net/mptcp/protocol.c43
2 files changed, 34 insertions, 17 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c41273cefc51..1ec6529c4326 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1019,11 +1019,9 @@ static void ack_update_msk(struct mptcp_sock *msk,
old_snd_una = msk->snd_una;
new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
- /* ACK for data not even sent yet and even above recovery bound? Ignore.*/
- if (unlikely(after64(new_snd_una, snd_nxt))) {
- if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
- new_snd_una = old_snd_una;
- }
+ /* ACK for data not even sent yet? Ignore.*/
+ if (unlikely(after64(new_snd_una, snd_nxt)))
+ new_snd_una = old_snd_una;
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7e5f76092b64..3d1757b8ef69 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1525,6 +1525,32 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk,
release_sock(ssk);
}
+static void mptcp_update_post_push(struct mptcp_sock *msk,
+ struct mptcp_data_frag *dfrag,
+ u32 sent)
+{
+ u64 snd_nxt_new = dfrag->data_seq;
+
+ dfrag->already_sent += sent;
+
+ msk->snd_burst -= sent;
+ msk->tx_pending_data -= sent;
+
+ snd_nxt_new += dfrag->already_sent;
+
+ /* snd_nxt_new can be smaller than snd_nxt in case mptcp
+ * is recovering after a failover. In that event, this re-sends
+ * old segments.
+ *
+ * Thus compute snd_nxt_new candidate based on
+ * the dfrag->data_seq that was sent and the data
+ * that has been handed to the subflow for transmission
+ * and skip update in case it was old dfrag.
+ */
+ if (likely(after64(snd_nxt_new, msk->snd_nxt)))
+ msk->snd_nxt = snd_nxt_new;
+}
+
void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
struct sock *prev_ssk = NULL, *ssk = NULL;
@@ -1568,12 +1594,10 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
}
info.sent += ret;
- dfrag->already_sent += ret;
- msk->snd_nxt += ret;
- msk->snd_burst -= ret;
- msk->tx_pending_data -= ret;
copied += ret;
len -= ret;
+
+ mptcp_update_post_push(msk, dfrag, ret);
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
@@ -1626,13 +1650,11 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
goto out;
info.sent += ret;
- dfrag->already_sent += ret;
- msk->snd_nxt += ret;
- msk->snd_burst -= ret;
- msk->tx_pending_data -= ret;
copied += ret;
len -= ret;
first = false;
+
+ mptcp_update_post_push(msk, dfrag, ret);
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
@@ -2230,15 +2252,12 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
return false;
}
- /* will accept ack for reijected data before re-sending them */
- if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt))
- msk->recovery_snd_nxt = msk->snd_nxt;
+ msk->recovery_snd_nxt = msk->snd_nxt;
msk->recovery = true;
mptcp_data_unlock(sk);
msk->first_pending = rtx_head;
msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq;
- msk->snd_nxt = rtx_head->data_seq;
msk->snd_burst = 0;
/* be sure to clear the "sent status" on all re-injected fragments */