summaryrefslogtreecommitdiffstats
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c120
1 files changed, 81 insertions, 39 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 939a5045181a..34dd0e278a82 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -97,12 +97,7 @@ static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
if (likely(!__mptcp_needs_tcp_fallback(msk)))
return NULL;
- if (msk->subflow) {
- release_sock((struct sock *)msk);
- return msk->subflow;
- }
-
- return NULL;
+ return msk->subflow;
}
static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
@@ -734,9 +729,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}
+fallback:
ssock = __mptcp_tcp_fallback(msk);
if (unlikely(ssock)) {
-fallback:
+ release_sock(sk);
pr_debug("fallback passthrough");
ret = sock_sendmsg(ssock, msg);
return ret >= 0 ? ret + copied : (copied ? copied : ret);
@@ -769,8 +765,14 @@ fallback:
if (ret < 0)
break;
if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
+ /* Can happen for passive sockets:
+ * 3WHS negotiated MPTCP, but first packet after is
+ * plain TCP (e.g. due to middlebox filtering unknown
+ * options).
+ *
+ * Fall back to TCP.
+ */
release_sock(ssk);
- ssock = __mptcp_tcp_fallback(msk);
goto fallback;
}
@@ -883,6 +885,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
ssock = __mptcp_tcp_fallback(msk);
if (unlikely(ssock)) {
fallback:
+ release_sock(sk);
pr_debug("fallback-read subflow=%p",
mptcp_subflow_ctx(ssock->sk));
copied = sock_recvmsg(ssock, msg, flags);
@@ -951,7 +954,8 @@ fallback:
pr_debug("block timeout %ld", timeo);
mptcp_wait_data(sk, &timeo);
- if (unlikely(__mptcp_tcp_fallback(msk)))
+ ssock = __mptcp_tcp_fallback(msk);
+ if (unlikely(ssock))
goto fallback;
}
@@ -1259,11 +1263,14 @@ static void mptcp_close(struct sock *sk, long timeout)
lock_sock(sk);
- mptcp_token_destroy(msk->token);
inet_sk_state_store(sk, TCP_CLOSE);
- __mptcp_flush_join_list(msk);
-
+ /* be sure to always acquire the join list lock, to sync vs
+ * mptcp_finish_join().
+ */
+ spin_lock_bh(&msk->join_list_lock);
+ list_splice_tail_init(&msk->join_list, &msk->conn_list);
+ spin_unlock_bh(&msk->join_list_lock);
list_splice_init(&msk->conn_list, &conn_list);
data_fin_tx_seq = msk->write_seq;
@@ -1313,11 +1320,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
static int mptcp_disconnect(struct sock *sk, int flags)
{
- lock_sock(sk);
- __mptcp_clear_xmit(sk);
- release_sock(sk);
- mptcp_cancel_work(sk);
- return tcp_disconnect(sk, flags);
+ /* Should never be called.
+ * inet_stream_connect() calls ->disconnect, but that
+ * refers to the subflow socket, not the mptcp one.
+ */
+ WARN_ON_ONCE(1);
+ return 0;
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1329,7 +1337,9 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
}
#endif
-struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
+struct sock *mptcp_sk_clone(const struct sock *sk,
+ const struct mptcp_options_received *mp_opt,
+ struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
@@ -1352,26 +1362,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
msk->subflow = NULL;
if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) {
+ nsk->sk_state = TCP_CLOSE;
bh_unlock_sock(nsk);
/* we can't call into mptcp_close() here - possible BH context
- * free the sock directly
+ * free the sock directly.
+ * sk_clone_lock() sets nsk refcnt to two, hence call sk_free()
+ * too.
*/
- nsk->sk_prot->destroy(nsk);
+ sk_common_release(nsk);
sk_free(nsk);
return NULL;
}
msk->write_seq = subflow_req->idsn + 1;
atomic64_set(&msk->snd_una, msk->write_seq);
- if (subflow_req->remote_key_valid) {
+ if (mp_opt->mp_capable) {
msk->can_ack = true;
- msk->remote_key = subflow_req->remote_key;
+ msk->remote_key = mp_opt->sndr_key;
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++;
msk->ack_seq = ack_seq;
}
+ sock_reset_flag(nsk, SOCK_RCU_FREE);
/* will be fully established after successful MPC subflow creation */
inet_sk_state_store(nsk, TCP_SYN_RECV);
bh_unlock_sock(nsk);
@@ -1428,6 +1442,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
newsk = new_mptcp_sock;
mptcp_copy_inaddrs(newsk, ssk);
list_add(&subflow->node, &msk->conn_list);
+ inet_sk_state_store(newsk, TCP_ESTABLISHED);
bh_unlock_sock(new_mptcp_sock);
@@ -1445,6 +1460,7 @@ static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ mptcp_token_destroy(msk->token);
if (msk->cached_ext)
__skb_ext_put(msk->cached_ext);
@@ -1467,12 +1483,11 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
*/
lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
+ release_sock(sk);
if (ssock)
return tcp_setsockopt(ssock->sk, level, optname, optval,
optlen);
- release_sock(sk);
-
return -EOPNOTSUPP;
}
@@ -1492,12 +1507,11 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
*/
lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
+ release_sock(sk);
if (ssock)
return tcp_getsockopt(ssock->sk, level, optname, optval,
option);
- release_sock(sk);
-
return -EOPNOTSUPP;
}
@@ -1613,20 +1627,30 @@ bool mptcp_finish_join(struct sock *sk)
if (!msk->pm.server_side)
return true;
- /* passive connection, attach to msk socket */
+ if (!mptcp_pm_allow_new_subflow(msk))
+ return false;
+
+ /* active connections are already on conn_list, and we can't acquire
+ * msk lock here.
+ * use the join list lock as synchronization point and double-check
+ * msk status to avoid racing with mptcp_close()
+ */
+ spin_lock_bh(&msk->join_list_lock);
+ ret = inet_sk_state_load(parent) == TCP_ESTABLISHED;
+ if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node)))
+ list_add_tail(&subflow->node, &msk->join_list);
+ spin_unlock_bh(&msk->join_list_lock);
+ if (!ret)
+ return false;
+
+ /* attach to msk socket only after we are sure he will deal with us
+ * at close time
+ */
parent_sock = READ_ONCE(parent->sk_socket);
if (parent_sock && !sk->sk_socket)
mptcp_sock_graft(sk, parent_sock);
-
- ret = mptcp_pm_allow_new_subflow(msk);
- if (ret) {
- /* active connections are already on conn_list */
- spin_lock_bh(&msk->join_list_lock);
- if (!WARN_ON_ONCE(!list_empty(&subflow->node)))
- list_add_tail(&subflow->node, &msk->join_list);
- spin_unlock_bh(&msk->join_list_lock);
- }
- return ret;
+ subflow->map_seq = msk->ack_seq;
+ return true;
}
bool mptcp_sk_is_subflow(const struct sock *sk)
@@ -1700,6 +1724,14 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int err;
lock_sock(sock->sk);
+ if (sock->state != SS_UNCONNECTED && msk->subflow) {
+ /* pending connection or invalid state, let existing subflow
+ * cope with that
+ */
+ ssock = msk->subflow;
+ goto do_connect;
+ }
+
ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
if (IS_ERR(ssock)) {
err = PTR_ERR(ssock);
@@ -1714,9 +1746,17 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0;
#endif
+do_connect:
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
- inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ sock->state = ssock->state;
+
+ /* on successful connect, the msk state will be moved to established by
+ * subflow_finish_connect()
+ */
+ if (!err || err == EINPROGRESS)
+ mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ else
+ inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
unlock:
release_sock(sock->sk);
@@ -1774,6 +1814,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
goto unlock;
}
+ sock_set_flag(sock->sk, SOCK_RCU_FREE);
+
err = ssock->ops->listen(ssock, backlog);
inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
if (!err)