diff options
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r-- | net/mptcp/subflow.c | 181 |
1 files changed, 129 insertions, 52 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 50a8bea987c6..4931a29a6f08 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -124,16 +124,14 @@ static void subflow_init_req(struct request_sock *req, { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - struct tcp_options_received rx_opt; + struct mptcp_options_received mp_opt; pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); - memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp)); - mptcp_get_options(skb, &rx_opt); + mptcp_get_options(skb, &mp_opt); subflow_req->mp_capable = 0; subflow_req->mp_join = 0; - subflow_req->remote_key_valid = 0; #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -143,16 +141,16 @@ static void subflow_init_req(struct request_sock *req, return; #endif - if (rx_opt.mptcp.mp_capable) { + if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); - if (rx_opt.mptcp.mp_join) + if (mp_opt.mp_join) return; - } else if (rx_opt.mptcp.mp_join) { + } else if (mp_opt.mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); } - if (rx_opt.mptcp.mp_capable && listener->request_mptcp) { + if (mp_opt.mp_capable && listener->request_mptcp) { int err; err = mptcp_token_new_request(req); @@ -160,13 +158,13 @@ static void subflow_init_req(struct request_sock *req, subflow_req->mp_capable = 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; - } else if (rx_opt.mptcp.mp_join && listener->request_mptcp) { + } else if (mp_opt.mp_join && listener->request_mptcp) { subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; subflow_req->mp_join = 1; - subflow_req->backup = rx_opt.mptcp.backup; - subflow_req->remote_id = rx_opt.mptcp.join_id; - subflow_req->token = rx_opt.mptcp.token; - subflow_req->remote_nonce = rx_opt.mptcp.nonce; + subflow_req->backup = mp_opt.backup; + subflow_req->remote_id = mp_opt.join_id; + subflow_req->token = mp_opt.token; + subflow_req->remote_nonce = mp_opt.nonce; pr_debug("token=%u, remote_nonce=%u", subflow_req->token, subflow_req->remote_nonce); if (!subflow_token_join_request(req, skb)) { @@ -222,23 +220,47 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_options_received mp_opt; struct sock *parent = subflow->conn; + struct tcp_sock *tp = tcp_sk(sk); subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); - if (inet_sk_state_load(parent) != TCP_ESTABLISHED) { + if (inet_sk_state_load(parent) == TCP_SYN_SENT) { inet_sk_state_store(parent, TCP_ESTABLISHED); parent->sk_state_change(parent); } - if (subflow->conn_finished || !tcp_sk(sk)->is_mptcp) + /* be sure no special action on any packet other than syn-ack */ + if (subflow->conn_finished) + return; + + subflow->conn_finished = 1; + + mptcp_get_options(skb, &mp_opt); + if (subflow->request_mptcp && mp_opt.mp_capable) { + subflow->mp_capable = 1; + subflow->can_ack = 1; + subflow->remote_key = mp_opt.sndr_key; + pr_debug("subflow=%p, remote_key=%llu", subflow, + subflow->remote_key); + } else if (subflow->request_join && mp_opt.mp_join) { + subflow->mp_join = 1; + subflow->thmac = mp_opt.thmac; + subflow->remote_nonce = mp_opt.nonce; + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, + subflow->thmac, subflow->remote_nonce); + } else if (subflow->request_mptcp) { + tp->is_mptcp = 0; + } + + if (!tp->is_mptcp) return; if (subflow->mp_capable) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), subflow->remote_key); mptcp_finish_connect(sk); - subflow->conn_finished = 1; if (skb) { pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq); @@ -265,7 +287,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (!mptcp_finish_join(sk)) goto do_reset; - subflow->conn_finished = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); } else { do_reset: @@ -323,7 +344,7 @@ drop: /* validate hmac received in third ACK */ static bool subflow_hmac_valid(const struct request_sock *req, - const struct tcp_options_received *rx_opt) + const struct mptcp_options_received *mp_opt) { const struct mptcp_subflow_request_sock *subflow_req; u8 hmac[MPTCPOPT_HMAC_LEN]; @@ -340,13 +361,53 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req->local_nonce, hmac); ret = true; - if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac))) + if (crypto_memneq(hmac, mp_opt->hmac, sizeof(hmac))) ret = false; sock_put((struct sock *)msk); return ret; } +static void mptcp_sock_destruct(struct sock *sk) +{ + /* if new mptcp socket isn't accepted, it is free'd + * from the tcp listener sockets request queue, linked + * from req->sk. The tcp socket is released. + * This calls the ULP release function which will + * also remove the mptcp socket, via + * sock_put(ctx->conn). + * + * Problem is that the mptcp socket will not be in + * SYN_RECV state and doesn't have SOCK_DEAD flag. + * Both result in warnings from inet_sock_destruct. + */ + + if (sk->sk_state == TCP_SYN_RECV) { + sk->sk_state = TCP_CLOSE; + WARN_ON_ONCE(sk->sk_socket); + sock_orphan(sk); + } + + inet_sock_destruct(sk); +} + +static void mptcp_force_close(struct sock *sk) +{ + inet_sk_state_store(sk, TCP_CLOSE); + sk_common_release(sk); +} + +static void subflow_ulp_fallback(struct sock *sk, + struct mptcp_subflow_context *old_ctx) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + mptcp_subflow_tcp_fallback(sk, old_ctx); + icsk->icsk_ulp_ops = NULL; + rcu_assign_pointer(icsk->icsk_ulp_data, NULL); + tcp_sk(sk)->is_mptcp = 0; +} + static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -356,13 +417,18 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; - struct tcp_options_received opt_rx; + struct mptcp_options_received mp_opt; bool fallback_is_fatal = false; struct sock *new_msk = NULL; + bool fallback = false; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); + /* we need later a valid 'mp_capable' value even when options are not + * parsed + */ + mp_opt.mp_capable = 0; if (tcp_rsk(req)->is_mptcp == 0) goto create_child; @@ -377,26 +443,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto create_msk; } - opt_rx.mptcp.mp_capable = 0; - mptcp_get_options(skb, &opt_rx); - if (opt_rx.mptcp.mp_capable) { - subflow_req->remote_key = opt_rx.mptcp.sndr_key; - subflow_req->remote_key_valid = 1; - } else { - subflow_req->mp_capable = 0; + mptcp_get_options(skb, &mp_opt); + if (!mp_opt.mp_capable) { + fallback = true; goto create_child; } create_msk: - new_msk = mptcp_sk_clone(listener->conn, req); + new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); if (!new_msk) - subflow_req->mp_capable = 0; + fallback = true; } else if (subflow_req->mp_join) { fallback_is_fatal = true; - opt_rx.mptcp.mp_join = 0; - mptcp_get_options(skb, &opt_rx); - if (!opt_rx.mptcp.mp_join || - !subflow_hmac_valid(req, &opt_rx)) { + mptcp_get_options(skb, &mp_opt); + if (!mp_opt.mp_join || + !subflow_hmac_valid(req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); return NULL; } @@ -409,12 +470,18 @@ create_child: if (child && *own_req) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); - /* we have null ctx on TCP fallback, which is fatal on - * MPJ handshake + /* we need to fallback on ctx allocation failure and on pre-reqs + * checking above. In the latter scenario we additionally need + * to reset the context to non MPTCP status. */ - if (!ctx) { + if (!ctx || fallback) { if (fallback_is_fatal) goto close_child; + + if (ctx) { + subflow_ulp_fallback(child, ctx); + kfree_rcu(ctx, rcu); + } goto out; } @@ -422,10 +489,17 @@ create_child: /* new mpc subflow takes ownership of the newly * created mptcp socket */ - inet_sk_state_store(new_msk, TCP_ESTABLISHED); + new_msk->sk_destruct = mptcp_sock_destruct; mptcp_pm_new_connection(mptcp_sk(new_msk), 1); ctx->conn = new_msk; new_msk = NULL; + + /* with OoO packets we can reach here without ingress + * mpc option + */ + ctx->remote_key = mp_opt.sndr_key; + ctx->fully_established = mp_opt.mp_capable; + ctx->can_ack = mp_opt.mp_capable; } else if (ctx->mp_join) { struct mptcp_sock *owner; @@ -444,7 +518,14 @@ create_child: out: /* dispose of the left over mptcp master, if any */ if (unlikely(new_msk)) - sock_put(new_msk); + mptcp_force_close(new_msk); + + /* check for expected invariant - should never trigger, just help + * catching eariler subtle bugs + */ + WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp && + (!mptcp_subflow_ctx(child) || + !mptcp_subflow_ctx(child)->conn)); return child; close_child: @@ -931,6 +1012,16 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) if (err) return err; + /* the newly created socket really belongs to the owning MPTCP master + * socket, even if for additional subflows the allocation is performed + * by a kernel workqueue. Adjust inode references, so that the + * procfs/diag interaces really show this one belonging to the correct + * user. + */ + SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino; + SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid; + SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid; + subflow = mptcp_subflow_ctx(sf->sk); pr_debug("subflow=%p", subflow); @@ -1047,17 +1138,6 @@ static void subflow_ulp_release(struct sock *sk) kfree_rcu(ctx, rcu); } -static void subflow_ulp_fallback(struct sock *sk, - struct mptcp_subflow_context *old_ctx) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - mptcp_subflow_tcp_fallback(sk, old_ctx); - icsk->icsk_ulp_ops = NULL; - rcu_assign_pointer(icsk->icsk_ulp_data, NULL); - tcp_sk(sk)->is_mptcp = 0; -} - static void subflow_ulp_clone(const struct request_sock *req, struct sock *newsk, const gfp_t priority) @@ -1091,9 +1171,6 @@ static void subflow_ulp_clone(const struct request_sock *req, * is fully established only after we receive the remote key */ new_ctx->mp_capable = 1; - new_ctx->fully_established = subflow_req->remote_key_valid; - new_ctx->can_ack = subflow_req->remote_key_valid; - new_ctx->remote_key = subflow_req->remote_key; new_ctx->local_key = subflow_req->local_key; new_ctx->token = subflow_req->token; new_ctx->ssn_offset = subflow_req->ssn_offset; |