diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/crypto.c | 28 | ||||
-rw-r--r-- | net/mptcp/options.c | 105 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 12 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 120 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 50 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 194 |
6 files changed, 327 insertions, 182 deletions
diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c index c151628bd416..3d980713a9e2 100644 --- a/net/mptcp/crypto.c +++ b/net/mptcp/crypto.c @@ -47,8 +47,6 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn) void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) { u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE]; - __be32 mptcp_hashed_key[SHA256_DIGEST_WORDS]; - __be32 *hash_out = (__force __be32 *)hmac; struct sha256_state state; u8 key1be[8]; u8 key2be[8]; @@ -61,7 +59,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) put_unaligned_be64(key2, key2be); /* Generate key xored with ipad */ - memset(input, 0x36, SHA_MESSAGE_BYTES); + memset(input, 0x36, SHA256_BLOCK_SIZE); for (i = 0; i < 8; i++) input[i] ^= key1be[i]; for (i = 0; i < 8; i++) @@ -78,7 +76,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) sha256_final(&state, &input[SHA256_BLOCK_SIZE]); /* Prepare second part of hmac */ - memset(input, 0x5C, SHA_MESSAGE_BYTES); + memset(input, 0x5C, SHA256_BLOCK_SIZE); for (i = 0; i < 8; i++) input[i] ^= key1be[i]; for (i = 0; i < 8; i++) @@ -86,11 +84,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) sha256_init(&state); sha256_update(&state, input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE); - sha256_final(&state, (u8 *)mptcp_hashed_key); - - /* takes only first 160 bits */ - for (i = 0; i < 5; i++) - hash_out[i] = mptcp_hashed_key[i]; + sha256_final(&state, (u8 *)hmac); } #ifdef CONFIG_MPTCP_HMAC_TEST @@ -101,29 +95,29 @@ struct test_cast { }; /* we can't reuse RFC 4231 test vectors, as we have constraint on the - * input and key size, and we truncate the output. + * input and key size. */ static struct test_cast tests[] = { { .key = "0b0b0b0b0b0b0b0b", .msg = "48692054", - .result = "8385e24fb4235ac37556b6b886db106284a1da67", + .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa", }, { .key = "aaaaaaaaaaaaaaaa", .msg = "dddddddd", - .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492", + .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9", }, { .key = "0102030405060708", .msg = "cdcdcdcd", - .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6", + .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d", }, }; static int __init test_mptcp_crypto(void) { - char hmac[20], hmac_hex[41]; + char hmac[32], hmac_hex[65]; u32 nonce1, nonce2; u64 key1, key2; u8 msg[8]; @@ -140,11 +134,11 @@ static int __init test_mptcp_crypto(void) put_unaligned_be32(nonce2, &msg[4]); mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); - for (j = 0; j < 20; ++j) + for (j = 0; j < 32; ++j) sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff); - hmac_hex[40] = 0; + hmac_hex[64] = 0; - if (memcmp(hmac_hex, tests[i].result, 40)) + if (memcmp(hmac_hex, tests[i].result, 64)) pr_err("test %d failed, got %s expected %s", i, hmac_hex, tests[i].result); else diff --git a/net/mptcp/options.c b/net/mptcp/options.c index faf57585b892..7793b6011fa7 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> +#include <crypto/sha.h> #include <net/tcp.h> #include <net/mptcp.h> #include "protocol.h" @@ -16,10 +17,10 @@ static bool mptcp_cap_flag_sha256(u8 flags) return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256; } -void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, - int opsize, struct tcp_options_received *opt_rx) +static void mptcp_parse_option(const struct sk_buff *skb, + const unsigned char *ptr, int opsize, + struct mptcp_options_received *mp_opt) { - struct mptcp_options_received *mp_opt = &opt_rx->mptcp; u8 subtype = *ptr >> 4; int expected_opsize; u8 version; @@ -283,12 +284,20 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, } void mptcp_get_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx) + struct mptcp_options_received *mp_opt) { - const unsigned char *ptr; const struct tcphdr *th = tcp_hdr(skb); - int length = (th->doff * 4) - sizeof(struct tcphdr); + const unsigned char *ptr; + int length; + + /* initialize option status */ + mp_opt->mp_capable = 0; + mp_opt->mp_join = 0; + mp_opt->add_addr = 0; + mp_opt->rm_addr = 0; + mp_opt->dss = 0; + length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); while (length > 0) { @@ -308,7 +317,7 @@ void mptcp_get_options(const struct sk_buff *skb, if (opsize > length) return; /* don't parse partial options */ if (opcode == TCPOPT_MPTCP) - mptcp_parse_option(skb, ptr, opsize, opt_rx); + mptcp_parse_option(skb, ptr, opsize, mp_opt); ptr += opsize - 2; length -= opsize; } @@ -344,28 +353,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, return false; } -void mptcp_rcv_synsent(struct sock *sk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct tcp_sock *tp = tcp_sk(sk); - - if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) { - subflow->mp_capable = 1; - subflow->can_ack = 1; - subflow->remote_key = tp->rx_opt.mptcp.sndr_key; - pr_debug("subflow=%p, remote_key=%llu", subflow, - subflow->remote_key); - } else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) { - subflow->mp_join = 1; - subflow->thmac = tp->rx_opt.mptcp.thmac; - subflow->remote_nonce = tp->rx_opt.mptcp.nonce; - pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, - subflow->thmac, subflow->remote_nonce); - } else if (subflow->request_mptcp) { - tcp_sk(sk)->is_mptcp = 0; - } -} - /* MP_JOIN client subflow must wait for 4th ack before sending any data: * TCP can't schedule delack timer before the subflow is fully established. * MPTCP uses the delack timer to do 3rd ack retransmissions @@ -549,7 +536,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, struct in_addr *addr) { - u8 hmac[MPTCP_ADDR_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[7]; msg[0] = addr_id; @@ -559,14 +546,14 @@ static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac); - return get_unaligned_be64(hmac); + return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, struct in6_addr *addr) { - u8 hmac[MPTCP_ADDR_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19]; msg[0] = addr_id; @@ -576,7 +563,7 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac); - return get_unaligned_be64(hmac); + return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } #endif @@ -709,7 +696,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) return subflow->mp_capable; - if (mp_opt->use_ack) { + if (mp_opt->dss && mp_opt->use_ack) { /* subflows are fully established as soon as we get any * additional ack. */ @@ -717,8 +704,6 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, goto fully_established; } - WARN_ON_ONCE(subflow->can_ack); - /* If the first established packet does not contain MP_CAPABLE + data * then fallback to TCP */ @@ -728,6 +713,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, return false; } + if (unlikely(!READ_ONCE(msk->pm.server_side))) + pr_warn_once("bogus mpc option on established client sk"); subflow->fully_established = 1; subflow->remote_key = mp_opt->sndr_key; subflow->can_ack = 1; @@ -819,41 +806,41 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - struct mptcp_options_received *mp_opt; + struct mptcp_options_received mp_opt; struct mptcp_ext *mpext; - mp_opt = &opt_rx->mptcp; - if (!check_fully_established(msk, sk, subflow, skb, mp_opt)) + mptcp_get_options(skb, &mp_opt); + if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return; - if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) { + if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) { struct mptcp_addr_info addr; - addr.port = htons(mp_opt->port); - addr.id = mp_opt->addr_id; - if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { + addr.port = htons(mp_opt.port); + addr.id = mp_opt.addr_id; + if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) { addr.family = AF_INET; - addr.addr = mp_opt->addr; + addr.addr = mp_opt.addr; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (mp_opt->family == MPTCP_ADDR_IPVERSION_6) { + else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) { addr.family = AF_INET6; - addr.addr6 = mp_opt->addr6; + addr.addr6 = mp_opt.addr6; } #endif - if (!mp_opt->echo) + if (!mp_opt.echo) mptcp_pm_add_addr_received(msk, &addr); - mp_opt->add_addr = 0; + mp_opt.add_addr = 0; } - if (!mp_opt->dss) + if (!mp_opt.dss) return; /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck */ - if (mp_opt->use_ack) - update_una(msk, mp_opt); + if (mp_opt.use_ack) + update_una(msk, &mp_opt); mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) @@ -861,8 +848,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, memset(mpext, 0, sizeof(*mpext)); - if (mp_opt->use_map) { - if (mp_opt->mpc_map) { + if (mp_opt.use_map) { + if (mp_opt.mpc_map) { /* this is an MP_CAPABLE carrying MPTCP data * we know this map the first chunk of data */ @@ -872,16 +859,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, mpext->subflow_seq = 1; mpext->dsn64 = 1; mpext->mpc_map = 1; + mpext->data_fin = 0; } else { - mpext->data_seq = mp_opt->data_seq; - mpext->subflow_seq = mp_opt->subflow_seq; - mpext->dsn64 = mp_opt->dsn64; + mpext->data_seq = mp_opt.data_seq; + mpext->subflow_seq = mp_opt.subflow_seq; + mpext->dsn64 = mp_opt.dsn64; + mpext->data_fin = mp_opt.data_fin; } - mpext->data_len = mp_opt->data_len; + mpext->data_len = mp_opt.data_len; mpext->use_map = 1; } - - mpext->data_fin = mp_opt->data_fin; } void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 86d61ab34c7c..b78edf237ba0 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -599,12 +599,14 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb, nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex)) goto nla_put_failure; - if (addr->family == AF_INET) - nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4, - addr->addr.s_addr); + if (addr->family == AF_INET && + nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4, + addr->addr.s_addr)) + goto nla_put_failure; #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (addr->family == AF_INET6) - nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6); + else if (addr->family == AF_INET6 && + nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6)) + goto nla_put_failure; #endif nla_nest_end(skb, attr); return 0; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 939a5045181a..34dd0e278a82 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -97,12 +97,7 @@ static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk) if (likely(!__mptcp_needs_tcp_fallback(msk))) return NULL; - if (msk->subflow) { - release_sock((struct sock *)msk); - return msk->subflow; - } - - return NULL; + return msk->subflow; } static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk) @@ -734,9 +729,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; } +fallback: ssock = __mptcp_tcp_fallback(msk); if (unlikely(ssock)) { -fallback: + release_sock(sk); pr_debug("fallback passthrough"); ret = sock_sendmsg(ssock, msg); return ret >= 0 ? ret + copied : (copied ? copied : ret); @@ -769,8 +765,14 @@ fallback: if (ret < 0) break; if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) { + /* Can happen for passive sockets: + * 3WHS negotiated MPTCP, but first packet after is + * plain TCP (e.g. due to middlebox filtering unknown + * options). + * + * Fall back to TCP. + */ release_sock(ssk); - ssock = __mptcp_tcp_fallback(msk); goto fallback; } @@ -883,6 +885,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ssock = __mptcp_tcp_fallback(msk); if (unlikely(ssock)) { fallback: + release_sock(sk); pr_debug("fallback-read subflow=%p", mptcp_subflow_ctx(ssock->sk)); copied = sock_recvmsg(ssock, msg, flags); @@ -951,7 +954,8 @@ fallback: pr_debug("block timeout %ld", timeo); mptcp_wait_data(sk, &timeo); - if (unlikely(__mptcp_tcp_fallback(msk))) + ssock = __mptcp_tcp_fallback(msk); + if (unlikely(ssock)) goto fallback; } @@ -1259,11 +1263,14 @@ static void mptcp_close(struct sock *sk, long timeout) lock_sock(sk); - mptcp_token_destroy(msk->token); inet_sk_state_store(sk, TCP_CLOSE); - __mptcp_flush_join_list(msk); - + /* be sure to always acquire the join list lock, to sync vs + * mptcp_finish_join(). + */ + spin_lock_bh(&msk->join_list_lock); + list_splice_tail_init(&msk->join_list, &msk->conn_list); + spin_unlock_bh(&msk->join_list_lock); list_splice_init(&msk->conn_list, &conn_list); data_fin_tx_seq = msk->write_seq; @@ -1313,11 +1320,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - lock_sock(sk); - __mptcp_clear_xmit(sk); - release_sock(sk); - mptcp_cancel_work(sk); - return tcp_disconnect(sk, flags); + /* Should never be called. + * inet_stream_connect() calls ->disconnect, but that + * refers to the subflow socket, not the mptcp one. + */ + WARN_ON_ONCE(1); + return 0; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -1329,7 +1337,9 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) } #endif -struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) +struct sock *mptcp_sk_clone(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); @@ -1352,26 +1362,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) msk->subflow = NULL; if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) { + nsk->sk_state = TCP_CLOSE; bh_unlock_sock(nsk); /* we can't call into mptcp_close() here - possible BH context - * free the sock directly + * free the sock directly. + * sk_clone_lock() sets nsk refcnt to two, hence call sk_free() + * too. */ - nsk->sk_prot->destroy(nsk); + sk_common_release(nsk); sk_free(nsk); return NULL; } msk->write_seq = subflow_req->idsn + 1; atomic64_set(&msk->snd_una, msk->write_seq); - if (subflow_req->remote_key_valid) { + if (mp_opt->mp_capable) { msk->can_ack = true; - msk->remote_key = subflow_req->remote_key; + msk->remote_key = mp_opt->sndr_key; mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; msk->ack_seq = ack_seq; } + sock_reset_flag(nsk, SOCK_RCU_FREE); /* will be fully established after successful MPC subflow creation */ inet_sk_state_store(nsk, TCP_SYN_RECV); bh_unlock_sock(nsk); @@ -1428,6 +1442,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, newsk = new_mptcp_sock; mptcp_copy_inaddrs(newsk, ssk); list_add(&subflow->node, &msk->conn_list); + inet_sk_state_store(newsk, TCP_ESTABLISHED); bh_unlock_sock(new_mptcp_sock); @@ -1445,6 +1460,7 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); + mptcp_token_destroy(msk->token); if (msk->cached_ext) __skb_ext_put(msk->cached_ext); @@ -1467,12 +1483,11 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, */ lock_sock(sk); ssock = __mptcp_tcp_fallback(msk); + release_sock(sk); if (ssock) return tcp_setsockopt(ssock->sk, level, optname, optval, optlen); - release_sock(sk); - return -EOPNOTSUPP; } @@ -1492,12 +1507,11 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, */ lock_sock(sk); ssock = __mptcp_tcp_fallback(msk); + release_sock(sk); if (ssock) return tcp_getsockopt(ssock->sk, level, optname, optval, option); - release_sock(sk); - return -EOPNOTSUPP; } @@ -1613,20 +1627,30 @@ bool mptcp_finish_join(struct sock *sk) if (!msk->pm.server_side) return true; - /* passive connection, attach to msk socket */ + if (!mptcp_pm_allow_new_subflow(msk)) + return false; + + /* active connections are already on conn_list, and we can't acquire + * msk lock here. + * use the join list lock as synchronization point and double-check + * msk status to avoid racing with mptcp_close() + */ + spin_lock_bh(&msk->join_list_lock); + ret = inet_sk_state_load(parent) == TCP_ESTABLISHED; + if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node))) + list_add_tail(&subflow->node, &msk->join_list); + spin_unlock_bh(&msk->join_list_lock); + if (!ret) + return false; + + /* attach to msk socket only after we are sure he will deal with us + * at close time + */ parent_sock = READ_ONCE(parent->sk_socket); if (parent_sock && !sk->sk_socket) mptcp_sock_graft(sk, parent_sock); - - ret = mptcp_pm_allow_new_subflow(msk); - if (ret) { - /* active connections are already on conn_list */ - spin_lock_bh(&msk->join_list_lock); - if (!WARN_ON_ONCE(!list_empty(&subflow->node))) - list_add_tail(&subflow->node, &msk->join_list); - spin_unlock_bh(&msk->join_list_lock); - } - return ret; + subflow->map_seq = msk->ack_seq; + return true; } bool mptcp_sk_is_subflow(const struct sock *sk) @@ -1700,6 +1724,14 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int err; lock_sock(sock->sk); + if (sock->state != SS_UNCONNECTED && msk->subflow) { + /* pending connection or invalid state, let existing subflow + * cope with that + */ + ssock = msk->subflow; + goto do_connect; + } + ssock = __mptcp_socket_create(msk, TCP_SYN_SENT); if (IS_ERR(ssock)) { err = PTR_ERR(ssock); @@ -1714,9 +1746,17 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0; #endif +do_connect: err = ssock->ops->connect(ssock, uaddr, addr_len, flags); - inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); - mptcp_copy_inaddrs(sock->sk, ssock->sk); + sock->state = ssock->state; + + /* on successful connect, the msk state will be moved to established by + * subflow_finish_connect() + */ + if (!err || err == EINPROGRESS) + mptcp_copy_inaddrs(sock->sk, ssock->sk); + else + inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); unlock: release_sock(sock->sk); @@ -1774,6 +1814,8 @@ static int mptcp_listen(struct socket *sock, int backlog) goto unlock; } + sock_set_flag(sock->sk, SOCK_RCU_FREE); + err = ssock->ops->listen(ssock, backlog); inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); if (!err) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 67448002a2d7..d0803dfb8108 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -81,7 +81,6 @@ /* MPTCP ADD_ADDR flags */ #define MPTCP_ADDR_ECHO BIT(0) -#define MPTCP_ADDR_HMAC_LEN 20 #define MPTCP_ADDR_IPVERSION_4 4 #define MPTCP_ADDR_IPVERSION_6 6 @@ -91,6 +90,45 @@ #define MPTCP_WORK_RTX 2 #define MPTCP_WORK_EOF 3 +struct mptcp_options_received { + u64 sndr_key; + u64 rcvr_key; + u64 data_ack; + u64 data_seq; + u32 subflow_seq; + u16 data_len; + u16 mp_capable : 1, + mp_join : 1, + dss : 1, + add_addr : 1, + rm_addr : 1, + family : 4, + echo : 1, + backup : 1; + u32 token; + u32 nonce; + u64 thmac; + u8 hmac[20]; + u8 join_id; + u8 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + mpc_map:1, + __unused:2; + u8 addr_id; + u8 rm_id; + union { + struct in_addr addr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + struct in6_addr addr6; +#endif + }; + u64 ahmac; + u16 port; +}; + static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) { return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) | @@ -206,12 +244,10 @@ struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, mp_join : 1, - backup : 1, - remote_key_valid : 1; + backup : 1; u8 local_id; u8 remote_id; u64 local_key; - u64 remote_key; u64 idsn; u32 token; u32 ssn_offset; @@ -332,9 +368,11 @@ void mptcp_proto_init(void); int mptcp_proto_v6_init(void); #endif -struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req); +struct sock *mptcp_sk_clone(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx); + struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 50a8bea987c6..8968b2c065e7 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <crypto/algapi.h> +#include <crypto/sha.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -89,7 +90,7 @@ static bool subflow_token_join_request(struct request_sock *req, const struct sk_buff *skb) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; int local_id; @@ -124,16 +125,14 @@ static void subflow_init_req(struct request_sock *req, { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - struct tcp_options_received rx_opt; + struct mptcp_options_received mp_opt; pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); - memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp)); - mptcp_get_options(skb, &rx_opt); + mptcp_get_options(skb, &mp_opt); subflow_req->mp_capable = 0; subflow_req->mp_join = 0; - subflow_req->remote_key_valid = 0; #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -143,16 +142,16 @@ static void subflow_init_req(struct request_sock *req, return; #endif - if (rx_opt.mptcp.mp_capable) { + if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); - if (rx_opt.mptcp.mp_join) + if (mp_opt.mp_join) return; - } else if (rx_opt.mptcp.mp_join) { + } else if (mp_opt.mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); } - if (rx_opt.mptcp.mp_capable && listener->request_mptcp) { + if (mp_opt.mp_capable && listener->request_mptcp) { int err; err = mptcp_token_new_request(req); @@ -160,13 +159,13 @@ static void subflow_init_req(struct request_sock *req, subflow_req->mp_capable = 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; - } else if (rx_opt.mptcp.mp_join && listener->request_mptcp) { + } else if (mp_opt.mp_join && listener->request_mptcp) { subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; subflow_req->mp_join = 1; - subflow_req->backup = rx_opt.mptcp.backup; - subflow_req->remote_id = rx_opt.mptcp.join_id; - subflow_req->token = rx_opt.mptcp.token; - subflow_req->remote_nonce = rx_opt.mptcp.nonce; + subflow_req->backup = mp_opt.backup; + subflow_req->remote_id = mp_opt.join_id; + subflow_req->token = mp_opt.token; + subflow_req->remote_nonce = mp_opt.nonce; pr_debug("token=%u, remote_nonce=%u", subflow_req->token, subflow_req->remote_nonce); if (!subflow_token_join_request(req, skb)) { @@ -203,7 +202,7 @@ static void subflow_v6_init_req(struct request_sock *req, /* validate received truncated hmac and create hmac for third ACK */ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) { - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u64 thmac; subflow_generate_hmac(subflow->remote_key, subflow->local_key, @@ -222,29 +221,55 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_options_received mp_opt; struct sock *parent = subflow->conn; + struct tcp_sock *tp = tcp_sk(sk); subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); - if (inet_sk_state_load(parent) != TCP_ESTABLISHED) { + if (inet_sk_state_load(parent) == TCP_SYN_SENT) { inet_sk_state_store(parent, TCP_ESTABLISHED); parent->sk_state_change(parent); } - if (subflow->conn_finished || !tcp_sk(sk)->is_mptcp) + /* be sure no special action on any packet other than syn-ack */ + if (subflow->conn_finished) + return; + + subflow->conn_finished = 1; + + mptcp_get_options(skb, &mp_opt); + if (subflow->request_mptcp && mp_opt.mp_capable) { + subflow->mp_capable = 1; + subflow->can_ack = 1; + subflow->remote_key = mp_opt.sndr_key; + pr_debug("subflow=%p, remote_key=%llu", subflow, + subflow->remote_key); + } else if (subflow->request_join && mp_opt.mp_join) { + subflow->mp_join = 1; + subflow->thmac = mp_opt.thmac; + subflow->remote_nonce = mp_opt.nonce; + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, + subflow->thmac, subflow->remote_nonce); + } else if (subflow->request_mptcp) { + tp->is_mptcp = 0; + } + + if (!tp->is_mptcp) return; if (subflow->mp_capable) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), subflow->remote_key); mptcp_finish_connect(sk); - subflow->conn_finished = 1; if (skb) { pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq); subflow->ssn_offset = TCP_SKB_CB(skb)->seq; } } else if (subflow->mp_join) { + u8 hmac[SHA256_DIGEST_SIZE]; + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, subflow->thmac, subflow->remote_nonce); @@ -257,7 +282,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow_generate_hmac(subflow->local_key, subflow->remote_key, subflow->local_nonce, subflow->remote_nonce, - subflow->hmac); + hmac); + + memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); if (skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -265,7 +292,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (!mptcp_finish_join(sk)) goto do_reset; - subflow->conn_finished = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); } else { do_reset: @@ -323,10 +349,10 @@ drop: /* validate hmac received in third ACK */ static bool subflow_hmac_valid(const struct request_sock *req, - const struct tcp_options_received *rx_opt) + const struct mptcp_options_received *mp_opt) { const struct mptcp_subflow_request_sock *subflow_req; - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; bool ret; @@ -340,13 +366,53 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req->local_nonce, hmac); ret = true; - if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac))) + if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN)) ret = false; sock_put((struct sock *)msk); return ret; } +static void mptcp_sock_destruct(struct sock *sk) +{ + /* if new mptcp socket isn't accepted, it is free'd + * from the tcp listener sockets request queue, linked + * from req->sk. The tcp socket is released. + * This calls the ULP release function which will + * also remove the mptcp socket, via + * sock_put(ctx->conn). + * + * Problem is that the mptcp socket will not be in + * SYN_RECV state and doesn't have SOCK_DEAD flag. + * Both result in warnings from inet_sock_destruct. + */ + + if (sk->sk_state == TCP_SYN_RECV) { + sk->sk_state = TCP_CLOSE; + WARN_ON_ONCE(sk->sk_socket); + sock_orphan(sk); + } + + inet_sock_destruct(sk); +} + +static void mptcp_force_close(struct sock *sk) +{ + inet_sk_state_store(sk, TCP_CLOSE); + sk_common_release(sk); +} + +static void subflow_ulp_fallback(struct sock *sk, + struct mptcp_subflow_context *old_ctx) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + mptcp_subflow_tcp_fallback(sk, old_ctx); + icsk->icsk_ulp_ops = NULL; + rcu_assign_pointer(icsk->icsk_ulp_data, NULL); + tcp_sk(sk)->is_mptcp = 0; +} + static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -356,13 +422,18 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; - struct tcp_options_received opt_rx; + struct mptcp_options_received mp_opt; bool fallback_is_fatal = false; struct sock *new_msk = NULL; + bool fallback = false; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); + /* we need later a valid 'mp_capable' value even when options are not + * parsed + */ + mp_opt.mp_capable = 0; if (tcp_rsk(req)->is_mptcp == 0) goto create_child; @@ -377,26 +448,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto create_msk; } - opt_rx.mptcp.mp_capable = 0; - mptcp_get_options(skb, &opt_rx); - if (opt_rx.mptcp.mp_capable) { - subflow_req->remote_key = opt_rx.mptcp.sndr_key; - subflow_req->remote_key_valid = 1; - } else { - subflow_req->mp_capable = 0; + mptcp_get_options(skb, &mp_opt); + if (!mp_opt.mp_capable) { + fallback = true; goto create_child; } create_msk: - new_msk = mptcp_sk_clone(listener->conn, req); + new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); if (!new_msk) - subflow_req->mp_capable = 0; + fallback = true; } else if (subflow_req->mp_join) { fallback_is_fatal = true; - opt_rx.mptcp.mp_join = 0; - mptcp_get_options(skb, &opt_rx); - if (!opt_rx.mptcp.mp_join || - !subflow_hmac_valid(req, &opt_rx)) { + mptcp_get_options(skb, &mp_opt); + if (!mp_opt.mp_join || + !subflow_hmac_valid(req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); return NULL; } @@ -409,12 +475,18 @@ create_child: if (child && *own_req) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); - /* we have null ctx on TCP fallback, which is fatal on - * MPJ handshake + /* we need to fallback on ctx allocation failure and on pre-reqs + * checking above. In the latter scenario we additionally need + * to reset the context to non MPTCP status. */ - if (!ctx) { + if (!ctx || fallback) { if (fallback_is_fatal) goto close_child; + + if (ctx) { + subflow_ulp_fallback(child, ctx); + kfree_rcu(ctx, rcu); + } goto out; } @@ -422,10 +494,17 @@ create_child: /* new mpc subflow takes ownership of the newly * created mptcp socket */ - inet_sk_state_store(new_msk, TCP_ESTABLISHED); + new_msk->sk_destruct = mptcp_sock_destruct; mptcp_pm_new_connection(mptcp_sk(new_msk), 1); ctx->conn = new_msk; new_msk = NULL; + + /* with OoO packets we can reach here without ingress + * mpc option + */ + ctx->remote_key = mp_opt.sndr_key; + ctx->fully_established = mp_opt.mp_capable; + ctx->can_ack = mp_opt.mp_capable; } else if (ctx->mp_join) { struct mptcp_sock *owner; @@ -444,7 +523,14 @@ create_child: out: /* dispose of the left over mptcp master, if any */ if (unlikely(new_msk)) - sock_put(new_msk); + mptcp_force_close(new_msk); + + /* check for expected invariant - should never trigger, just help + * catching eariler subtle bugs + */ + WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp && + (!mptcp_subflow_ctx(child) || + !mptcp_subflow_ctx(child)->conn)); return child; close_child: @@ -931,6 +1017,16 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) if (err) return err; + /* the newly created socket really belongs to the owning MPTCP master + * socket, even if for additional subflows the allocation is performed + * by a kernel workqueue. Adjust inode references, so that the + * procfs/diag interaces really show this one belonging to the correct + * user. + */ + SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino; + SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid; + SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid; + subflow = mptcp_subflow_ctx(sf->sk); pr_debug("subflow=%p", subflow); @@ -1047,17 +1143,6 @@ static void subflow_ulp_release(struct sock *sk) kfree_rcu(ctx, rcu); } -static void subflow_ulp_fallback(struct sock *sk, - struct mptcp_subflow_context *old_ctx) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - mptcp_subflow_tcp_fallback(sk, old_ctx); - icsk->icsk_ulp_ops = NULL; - rcu_assign_pointer(icsk->icsk_ulp_data, NULL); - tcp_sk(sk)->is_mptcp = 0; -} - static void subflow_ulp_clone(const struct request_sock *req, struct sock *newsk, const gfp_t priority) @@ -1091,9 +1176,6 @@ static void subflow_ulp_clone(const struct request_sock *req, * is fully established only after we receive the remote key */ new_ctx->mp_capable = 1; - new_ctx->fully_established = subflow_req->remote_key_valid; - new_ctx->can_ack = subflow_req->remote_key_valid; - new_ctx->remote_key = subflow_req->remote_key; new_ctx->local_key = subflow_req->local_key; new_ctx->token = subflow_req->token; new_ctx->ssn_offset = subflow_req->ssn_offset; |