diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/mib.c | 3 | ||||
-rw-r--r-- | net/mptcp/mib.h | 3 | ||||
-rw-r--r-- | net/mptcp/options.c | 292 | ||||
-rw-r--r-- | net/mptcp/pm.c | 64 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 328 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 62 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 89 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 43 |
8 files changed, 540 insertions, 344 deletions
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 3780c29c321d..eb2dc6dbe212 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -10,9 +10,12 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE), + SNMP_MIB_ITEM("MPCapableSYNTX", MPTCP_MIB_MPCAPABLEACTIVE), + SNMP_MIB_ITEM("MPCapableSYNACKRX", MPTCP_MIB_MPCAPABLEACTIVEACK), SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), + SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 72afbc135f8e..f0da4f060fe1 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -3,9 +3,12 @@ enum linux_mptcp_mib_field { MPTCP_MIB_NUM = 0, MPTCP_MIB_MPCAPABLEPASSIVE, /* Received SYN with MP_CAPABLE */ + MPTCP_MIB_MPCAPABLEACTIVE, /* Sent SYN with MP_CAPABLE */ + MPTCP_MIB_MPCAPABLEACTIVEACK, /* Received SYN/ACK with MP_CAPABLE */ MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */ MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */ + MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */ MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 89a4225ed321..d51c3ad54d9a 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, int expected_opsize; u8 version; u8 flags; + u8 i; switch (subtype) { case MPTCPOPT_MP_CAPABLE: @@ -219,45 +220,45 @@ static void mptcp_parse_option(const struct sk_buff *skb, if (!mp_opt->echo) { if (opsize == TCPOLEN_MPTCP_ADD_ADDR || opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT) - mp_opt->family = MPTCP_ADDR_IPVERSION_4; + mp_opt->addr.family = AF_INET; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 || opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT) - mp_opt->family = MPTCP_ADDR_IPVERSION_6; + mp_opt->addr.family = AF_INET6; #endif else break; } else { if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE || opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) - mp_opt->family = MPTCP_ADDR_IPVERSION_4; + mp_opt->addr.family = AF_INET; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE || opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) - mp_opt->family = MPTCP_ADDR_IPVERSION_6; + mp_opt->addr.family = AF_INET6; #endif else break; } mp_opt->add_addr = 1; - mp_opt->addr_id = *ptr++; - if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { - memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4); + mp_opt->addr.id = *ptr++; + if (mp_opt->addr.family == AF_INET) { + memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4); ptr += 4; if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT || opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) { - mp_opt->port = get_unaligned_be16(ptr); + mp_opt->addr.port = htons(get_unaligned_be16(ptr)); ptr += 2; } } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else { - memcpy(mp_opt->addr6.s6_addr, (u8 *)ptr, 16); + memcpy(mp_opt->addr.addr6.s6_addr, (u8 *)ptr, 16); ptr += 16; if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT || opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) { - mp_opt->port = get_unaligned_be16(ptr); + mp_opt->addr.port = htons(get_unaligned_be16(ptr)); ptr += 2; } } @@ -267,19 +268,22 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 8; } pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d", - (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "", - mp_opt->addr_id, mp_opt->ahmac, mp_opt->echo, mp_opt->port); + (mp_opt->addr.family == AF_INET6) ? "6" : "", + mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port)); break; case MPTCPOPT_RM_ADDR: - if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE) + if (opsize < TCPOLEN_MPTCP_RM_ADDR_BASE + 1 || + opsize > TCPOLEN_MPTCP_RM_ADDR_BASE + MPTCP_RM_IDS_MAX) break; ptr++; mp_opt->rm_addr = 1; - mp_opt->rm_id = *ptr++; - pr_debug("RM_ADDR: id=%d", mp_opt->rm_id); + mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE; + for (i = 0; i < mp_opt->rm_list.nr; i++) + mp_opt->rm_list.ids[i] = *ptr++; + pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr); break; case MPTCPOPT_MP_PRIO: @@ -301,6 +305,18 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->fastclose = 1; break; + case MPTCPOPT_RST: + if (opsize != TCPOLEN_MPTCP_RST) + break; + + if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) + break; + mp_opt->reset = 1; + flags = *ptr++; + mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; + mp_opt->reset_reason = *ptr; + break; + default: break; } @@ -319,10 +335,11 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->add_addr = 0; mp_opt->ahmac = 0; mp_opt->fastclose = 0; - mp_opt->port = 0; + mp_opt->addr.port = 0; mp_opt->rm_addr = 0; mp_opt->dss = 0; mp_opt->mp_prio = 0; + mp_opt->reset = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -566,39 +583,32 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, return true; } -static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, - struct in_addr *addr, u16 port) -{ - u8 hmac[SHA256_DIGEST_SIZE]; - u8 msg[7]; - - msg[0] = addr_id; - memcpy(&msg[1], &addr->s_addr, 4); - msg[5] = port >> 8; - msg[6] = port & 0xFF; - - mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac); - - return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); -} - -#if IS_ENABLED(CONFIG_MPTCP_IPV6) -static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, - struct in6_addr *addr, u16 port) +static u64 add_addr_generate_hmac(u64 key1, u64 key2, + struct mptcp_addr_info *addr) { + u16 port = ntohs(addr->port); u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19]; + int i = 0; - msg[0] = addr_id; - memcpy(&msg[1], &addr->s6_addr, 16); - msg[17] = port >> 8; - msg[18] = port & 0xFF; + msg[i++] = addr->id; + if (addr->family == AF_INET) { + memcpy(&msg[i], &addr->addr.s_addr, 4); + i += 4; + } +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (addr->family == AF_INET6) { + memcpy(&msg[i], &addr->addr6.s6_addr, 16); + i += 16; + } +#endif + msg[i++] = port >> 8; + msg[i++] = port & 0xFF; - mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac); + mptcp_crypto_hmac_sha(key1, key2, msg, i, hmac); return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } -#endif static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb, unsigned int *size, @@ -609,13 +619,13 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * struct mptcp_sock *msk = mptcp_sk(subflow->conn); bool drop_other_suboptions = false; unsigned int opt_size = *size; - struct mptcp_addr_info saddr; bool echo; bool port; int len; if ((mptcp_pm_should_add_signal_ipv6(msk) || - mptcp_pm_should_add_signal_port(msk)) && + mptcp_pm_should_add_signal_port(msk) || + mptcp_pm_should_add_signal_echo(msk)) && skb && skb_is_tcp_pure_ack(skb)) { pr_debug("drop other suboptions"); opts->suboptions = 0; @@ -626,45 +636,24 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * } if (!mptcp_pm_should_add_signal(msk) || - !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo, &port))) + !(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port))) return false; - len = mptcp_add_addr_len(saddr.family, echo, port); + len = mptcp_add_addr_len(opts->addr.family, echo, port); if (remaining < len) return false; *size = len; if (drop_other_suboptions) *size -= opt_size; - opts->addr_id = saddr.id; - if (port) - opts->port = ntohs(saddr.port); - if (saddr.family == AF_INET) { - opts->suboptions |= OPTION_MPTCP_ADD_ADDR; - opts->addr = saddr.addr; - if (!echo) { - opts->ahmac = add_addr_generate_hmac(msk->local_key, - msk->remote_key, - opts->addr_id, - &opts->addr, - opts->port); - } - } -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (saddr.family == AF_INET6) { - opts->suboptions |= OPTION_MPTCP_ADD_ADDR6; - opts->addr6 = saddr.addr6; - if (!echo) { - opts->ahmac = add_addr6_generate_hmac(msk->local_key, - msk->remote_key, - opts->addr_id, - &opts->addr6, - opts->port); - } + opts->suboptions |= OPTION_MPTCP_ADD_ADDR; + if (!echo) { + opts->ahmac = add_addr_generate_hmac(msk->local_key, + msk->remote_key, + &opts->addr); } -#endif pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d", - opts->addr_id, opts->ahmac, echo, opts->port); + opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); return true; } @@ -676,20 +665,25 @@ static bool mptcp_established_options_rm_addr(struct sock *sk, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - u8 rm_id; + struct mptcp_rm_list rm_list; + int i, len; if (!mptcp_pm_should_rm_signal(msk) || - !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_id))) + !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_list))) return false; - if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE) + len = mptcp_rm_addr_len(&rm_list); + if (len < 0) + return false; + if (remaining < len) return false; - *size = TCPOLEN_MPTCP_RM_ADDR_BASE; + *size = len; opts->suboptions |= OPTION_MPTCP_RM_ADDR; - opts->rm_id = rm_id; + opts->rm_list = rm_list; - pr_debug("rm_id=%d", opts->rm_id); + for (i = 0; i < opts->rm_list.nr; i++) + pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]); return true; } @@ -717,6 +711,22 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, return true; } +static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb, + unsigned int *size, + unsigned int remaining, + struct mptcp_out_options *opts) +{ + const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + + if (remaining < TCPOLEN_MPTCP_RST) + return; + + *size = TCPOLEN_MPTCP_RST; + opts->suboptions |= OPTION_MPTCP_RST; + opts->reset_transient = subflow->reset_transient; + opts->reset_reason = subflow->reset_reason; +} + bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) @@ -732,11 +742,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, if (unlikely(__mptcp_check_fallback(msk))) return false; - /* prevent adding of any MPTCP related options on reset packet - * until we support MP_TCPRST/MP_FASTCLOSE - */ - if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) - return false; + if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { + mptcp_established_options_rst(sk, skb, size, remaining, opts); + return true; + } snd_data_fin = mptcp_data_fin_enabled(msk); if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts)) @@ -873,7 +882,7 @@ fully_established: subflow->pm_notified = 1; if (subflow->mp_join) { clear_3rdack_retransmission(ssk); - mptcp_pm_subflow_established(msk, subflow); + mptcp_pm_subflow_established(msk); } else { mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC); } @@ -943,7 +952,7 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool us * should match. If they mismatch, the peer is misbehaving and * we will prefer the most recent information. */ - if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first)) + if (READ_ONCE(msk->rcv_data_fin)) return false; WRITE_ONCE(msk->rcv_data_fin_seq, @@ -961,18 +970,9 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, if (mp_opt->echo) return true; - if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) - hmac = add_addr_generate_hmac(msk->remote_key, - msk->local_key, - mp_opt->addr_id, &mp_opt->addr, - mp_opt->port); -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else - hmac = add_addr6_generate_hmac(msk->remote_key, - msk->local_key, - mp_opt->addr_id, &mp_opt->addr6, - mp_opt->port); -#endif + hmac = add_addr_generate_hmac(msk->remote_key, + msk->local_key, + &mp_opt->addr); pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n", msk, (unsigned long long)hmac, @@ -1013,36 +1013,23 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) { - struct mptcp_addr_info addr; - - addr.port = htons(mp_opt.port); - addr.id = mp_opt.addr_id; - if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) { - addr.family = AF_INET; - addr.addr = mp_opt.addr; - } -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) { - addr.family = AF_INET6; - addr.addr6 = mp_opt.addr6; - } -#endif if (!mp_opt.echo) { - mptcp_pm_add_addr_received(msk, &addr); + mptcp_pm_add_addr_received(msk, &mp_opt.addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { - mptcp_pm_del_add_timer(msk, &addr); + mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); + mptcp_pm_del_add_timer(msk, &mp_opt.addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); } - if (mp_opt.port) + if (mp_opt.addr.port) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); mp_opt.add_addr = 0; } if (mp_opt.rm_addr) { - mptcp_pm_rm_addr_received(msk, mp_opt.rm_id); + mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list); mp_opt.rm_addr = 0; } @@ -1052,6 +1039,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mp_opt.mp_prio = 0; } + if (mp_opt.reset) { + subflow->reset_seen = 1; + subflow->reset_reason = mp_opt.reset_reason; + subflow->reset_transient = mp_opt.reset_transient; + } + if (!mp_opt.dss) return; @@ -1160,20 +1153,16 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, } mp_capable_done: - if ((OPTION_MPTCP_ADD_ADDR -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - | OPTION_MPTCP_ADD_ADDR6 -#endif - ) & opts->suboptions) { + if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; u8 echo = MPTCP_ADDR_ECHO; #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) + if (opts->addr.family == AF_INET6) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; #endif - if (opts->port) + if (opts->addr.port) len += TCPOLEN_MPTCP_PORT_LEN; if (opts->ahmac) { @@ -1182,28 +1171,30 @@ mp_capable_done: } *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR, - len, echo, opts->addr_id); - if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { - memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4); + len, echo, opts->addr.id); + if (opts->addr.family == AF_INET) { + memcpy((u8 *)ptr, (u8 *)&opts->addr.addr.s_addr, 4); ptr += 1; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) { - memcpy((u8 *)ptr, opts->addr6.s6_addr, 16); + else if (opts->addr.family == AF_INET6) { + memcpy((u8 *)ptr, opts->addr.addr6.s6_addr, 16); ptr += 4; } #endif - if (!opts->port) { + if (!opts->addr.port) { if (opts->ahmac) { put_unaligned_be64(opts->ahmac, ptr); ptr += 2; } } else { + u16 port = ntohs(opts->addr.port); + if (opts->ahmac) { u8 *bptr = (u8 *)ptr; - put_unaligned_be16(opts->port, bptr); + put_unaligned_be16(port, bptr); bptr += 2; put_unaligned_be64(opts->ahmac, bptr); bptr += 8; @@ -1212,7 +1203,7 @@ mp_capable_done: ptr += 3; } else { - put_unaligned_be32(opts->port << 16 | + put_unaligned_be32(port << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); ptr += 1; @@ -1221,9 +1212,23 @@ mp_capable_done: } if (OPTION_MPTCP_RM_ADDR & opts->suboptions) { + u8 i = 1; + *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR, - TCPOLEN_MPTCP_RM_ADDR_BASE, - 0, opts->rm_id); + TCPOLEN_MPTCP_RM_ADDR_BASE + opts->rm_list.nr, + 0, opts->rm_list.ids[0]); + + while (i < opts->rm_list.nr) { + u8 id1, id2, id3, id4; + + id1 = opts->rm_list.ids[i]; + id2 = i + 1 < opts->rm_list.nr ? opts->rm_list.ids[i + 1] : TCPOPT_NOP; + id3 = i + 2 < opts->rm_list.nr ? opts->rm_list.ids[i + 2] : TCPOPT_NOP; + id4 = i + 3 < opts->rm_list.nr ? opts->rm_list.ids[i + 3] : TCPOPT_NOP; + put_unaligned_be32(id1 << 24 | id2 << 16 | id3 << 8 | id4, ptr); + ptr += 1; + i += 4; + } } if (OPTION_MPTCP_PRIO & opts->suboptions) { @@ -1265,6 +1270,12 @@ mp_capable_done: ptr += 5; } + if (OPTION_MPTCP_RST & opts->suboptions) + *ptr++ = mptcp_option(MPTCPOPT_RST, + TCPOLEN_MPTCP_RST, + opts->reset_transient, + opts->reset_reason); + if (opts->ext_copy.use_ack || opts->ext_copy.use_map) { struct mptcp_ext *mpext = &opts->ext_copy; u8 len = TCPOLEN_MPTCP_DSS_BASE; @@ -1316,3 +1327,20 @@ mp_capable_done: if (tp) mptcp_set_rwin(tp); } + +__be32 mptcp_get_reset_option(const struct sk_buff *skb) +{ + const struct mptcp_ext *ext = mptcp_get_ext(skb); + u8 flags, reason; + + if (ext) { + flags = ext->reset_transient; + reason = ext->reset_reason; + + return mptcp_option(MPTCPOPT_RST, TCPOLEN_MPTCP_RST, + flags, reason); + } + + return htonl(0u); +} +EXPORT_SYMBOL_GPL(mptcp_get_reset_option); diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 6fd4b2c1b076..9d00fa6d22e9 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -14,7 +14,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, - bool echo, bool port) + bool echo) { u8 add_addr = READ_ONCE(msk->pm.addr_signal); @@ -33,35 +33,36 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); if (addr->family == AF_INET6) add_addr |= BIT(MPTCP_ADD_ADDR_IPV6); - if (port) + if (addr->port) add_addr |= BIT(MPTCP_ADD_ADDR_PORT); WRITE_ONCE(msk->pm.addr_signal, add_addr); return 0; } -int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id) +int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) { u8 rm_addr = READ_ONCE(msk->pm.addr_signal); - pr_debug("msk=%p, local_id=%d", msk, local_id); + pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); if (rm_addr) { pr_warn("addr_signal error, rm_addr=%d", rm_addr); return -EINVAL; } - msk->pm.rm_id = local_id; + msk->pm.rm_list_tx = *rm_list; rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); WRITE_ONCE(msk->pm.addr_signal, rm_addr); + mptcp_pm_nl_addr_send_ack(msk); return 0; } -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id) +int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) { - pr_debug("msk=%p, local_id=%d", msk, local_id); + pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, local_id); + mptcp_pm_nl_rm_subflow_received(msk, rm_list); spin_unlock_bh(&msk->pm.lock); return 0; } @@ -152,8 +153,7 @@ void mptcp_pm_connection_closed(struct mptcp_sock *msk) pr_debug("msk=%p", msk); } -void mptcp_pm_subflow_established(struct mptcp_sock *msk, - struct mptcp_subflow_context *subflow) +void mptcp_pm_subflow_established(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; @@ -188,7 +188,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, spin_lock_bh(&pm->lock); if (!READ_ONCE(pm->accept_addr)) { - mptcp_pm_announce_addr(msk, addr, true, addr->port); + mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->remote = *addr; @@ -197,6 +197,21 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, spin_unlock_bh(&pm->lock); } +void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_pm_data *pm = &msk->pm; + + pr_debug("msk=%p", msk); + + spin_lock_bh(&pm->lock); + + if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending)) + mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); + + spin_unlock_bh(&pm->lock); +} + void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) { if (!mptcp_pm_should_add_signal(msk)) @@ -205,17 +220,20 @@ void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); } -void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id) +void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) { struct mptcp_pm_data *pm = &msk->pm; + u8 i; - pr_debug("msk=%p remote_id=%d", msk, rm_id); + pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr); - mptcp_event_addr_removed(msk, rm_id); + for (i = 0; i < rm_list->nr; i++) + mptcp_event_addr_removed(msk, rm_list->ids[i]); spin_lock_bh(&pm->lock); mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED); - pm->rm_id = rm_id; + pm->rm_list_rx = *rm_list; spin_unlock_bh(&pm->lock); } @@ -258,9 +276,9 @@ out_unlock: } bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, - u8 *rm_id) + struct mptcp_rm_list *rm_list) { - int ret = false; + int ret = false, len; spin_lock_bh(&msk->pm.lock); @@ -268,10 +286,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, if (!mptcp_pm_should_rm_signal(msk)) goto out_unlock; - if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE) + len = mptcp_rm_addr_len(&msk->pm.rm_list_tx); + if (len < 0) { + WRITE_ONCE(msk->pm.addr_signal, 0); + goto out_unlock; + } + if (remaining < len) goto out_unlock; - *rm_id = msk->pm.rm_id; + *rm_list = msk->pm.rm_list_tx; WRITE_ONCE(msk->pm.addr_signal, 0); ret = true; @@ -291,7 +314,8 @@ void mptcp_pm_data_init(struct mptcp_sock *msk) msk->pm.add_addr_accepted = 0; msk->pm.local_addr_used = 0; msk->pm.subflows = 0; - msk->pm.rm_id = 0; + msk->pm.rm_list_tx.nr = 0; + msk->pm.rm_list_rx.nr = 0; WRITE_ONCE(msk->pm.work_pending, false); WRITE_ONCE(msk->pm.addr_signal, 0); WRITE_ONCE(msk->pm.accept_addr, false); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 8e8e35fa4002..6ba040897738 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -25,6 +25,8 @@ static int pm_nl_pernet_id; struct mptcp_pm_addr_entry { struct list_head list; struct mptcp_addr_info addr; + u8 flags; + int ifindex; struct rcu_head rcu; struct socket *lsk; }; @@ -56,8 +58,6 @@ struct pm_nl_pernet { #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 -static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk); - static bool addresses_equal(const struct mptcp_addr_info *a, struct mptcp_addr_info *b, bool use_port) { @@ -140,6 +140,24 @@ static bool lookup_subflow_by_saddr(const struct list_head *list, return false; } +static bool lookup_subflow_by_daddr(const struct list_head *list, + struct mptcp_addr_info *daddr) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info cur; + struct sock_common *skc; + + list_for_each_entry(subflow, list, node) { + skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); + + remote_address(skc, &cur); + if (addresses_equal(&cur, daddr, daddr->port)) + return true; + } + + return false; +} + static struct mptcp_pm_addr_entry * select_local_address(const struct pm_nl_pernet *pernet, struct mptcp_sock *msk) @@ -152,7 +170,7 @@ select_local_address(const struct pm_nl_pernet *pernet, rcu_read_lock(); __mptcp_flush_join_list(msk); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) continue; if (entry->addr.family != sk->sk_family) { @@ -190,7 +208,7 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos) * can lead to additional addresses not being announced. */ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue; if (i++ == pos) { ret = entry; @@ -245,9 +263,9 @@ static void check_work_pending(struct mptcp_sock *msk) WRITE_ONCE(msk->pm.work_pending, false); } -static struct mptcp_pm_add_entry * -lookup_anno_list_by_saddr(struct mptcp_sock *msk, - struct mptcp_addr_info *addr) +struct mptcp_pm_add_entry * +mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *entry; @@ -308,7 +326,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (!mptcp_pm_should_add_signal(msk)) { pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); - mptcp_pm_announce_addr(msk, &entry->addr, false, entry->addr.port); + mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); entry->retrans_times++; } @@ -319,6 +337,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer) spin_unlock_bh(&msk->pm.lock); + if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) + mptcp_pm_subflow_established(msk); + out: __sock_put(sk); } @@ -331,7 +352,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct sock *sk = (struct sock *)msk; spin_lock_bh(&msk->pm.lock); - entry = lookup_anno_list_by_saddr(msk, addr); + entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (entry) entry->retrans_times = ADD_ADDR_RETRANS_MAX; spin_unlock_bh(&msk->pm.lock); @@ -351,7 +372,7 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); - if (lookup_anno_list_by_saddr(msk, &entry->addr)) + if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr)) return false; add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); @@ -417,8 +438,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (local) { if (mptcp_pm_alloc_anno_list(msk, local)) { msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false, local->addr.port); - mptcp_pm_nl_add_addr_send_ack(msk); + mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_nl_addr_send_ack(msk); } } else { /* pick failed, avoid fourther attempts later */ @@ -440,7 +461,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) check_work_pending(msk); remote_address((struct sock_common *)sk, &remote); spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect(sk, &local->addr, &remote); + __mptcp_subflow_connect(sk, &local->addr, &remote, + local->flags, local->ifindex); spin_lock_bh(&msk->pm.lock); return; } @@ -468,7 +490,6 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) struct mptcp_addr_info remote; struct mptcp_addr_info local; unsigned int subflows_max; - bool use_port = false; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); @@ -476,6 +497,10 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) pr_debug("accepted %d:%d remote family %d", msk->pm.add_addr_accepted, add_addr_accept_max, msk->pm.remote.family); + + if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote)) + goto add_addr_echo; + msk->pm.add_addr_accepted++; msk->pm.subflows++; if (msk->pm.add_addr_accepted >= add_addr_accept_max || @@ -488,37 +513,37 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) remote = msk->pm.remote; if (!remote.port) remote.port = sk->sk_dport; - else - use_port = true; memset(&local, 0, sizeof(local)); local.family = remote.family; spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect(sk, &local, &remote); + __mptcp_subflow_connect(sk, &local, &remote, 0, 0); spin_lock_bh(&msk->pm.lock); - mptcp_pm_announce_addr(msk, &remote, true, use_port); - mptcp_pm_nl_add_addr_send_ack(msk); +add_addr_echo: + mptcp_pm_announce_addr(msk, &msk->pm.remote, true); + mptcp_pm_nl_addr_send_ack(msk); } -static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) +void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; msk_owned_by_me(msk); lockdep_assert_held(&msk->pm.lock); - if (!mptcp_pm_should_add_signal(msk)) + if (!mptcp_pm_should_add_signal(msk) && + !mptcp_pm_should_rm_signal(msk)) return; __mptcp_flush_join_list(msk); subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); if (subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - u8 add_addr; spin_unlock_bh(&msk->pm.lock); - pr_debug("send ack for add_addr%s%s", + pr_debug("send ack for %s%s%s", + mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr", mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "", mptcp_pm_should_add_signal_port(msk) ? " [port]" : ""); @@ -526,13 +551,6 @@ static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) tcp_send_ack(ssk); release_sock(ssk); spin_lock_bh(&msk->pm.lock); - - add_addr = READ_ONCE(msk->pm.addr_signal); - if (mptcp_pm_should_add_signal_ipv6(msk)) - add_addr &= ~BIT(MPTCP_ADD_ADDR_IPV6); - if (mptcp_pm_should_add_signal_port(msk)) - add_addr &= ~BIT(MPTCP_ADD_ADDR_PORT); - WRITE_ONCE(msk->pm.addr_signal, add_addr); } } @@ -571,43 +589,68 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, return -EINVAL; } -static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) +static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list, + enum linux_mptcp_mib_field rm_type) { struct mptcp_subflow_context *subflow, *tmp; struct sock *sk = (struct sock *)msk; + u8 i; - pr_debug("address rm_id %d", msk->pm.rm_id); + pr_debug("%s rm_list_nr %d", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); msk_owned_by_me(msk); - if (!msk->pm.rm_id) + if (!rm_list->nr) return; if (list_empty(&msk->conn_list)) return; - list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int how = RCV_SHUTDOWN | SEND_SHUTDOWN; + for (i = 0; i < rm_list->nr; i++) { + list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int how = RCV_SHUTDOWN | SEND_SHUTDOWN; + u8 id = subflow->local_id; - if (msk->pm.rm_id != subflow->remote_id) - continue; + if (rm_type == MPTCP_MIB_RMADDR) + id = subflow->remote_id; - spin_unlock_bh(&msk->pm.lock); - mptcp_subflow_shutdown(sk, ssk, how); - mptcp_close_ssk(sk, ssk, subflow); - spin_lock_bh(&msk->pm.lock); - - msk->pm.add_addr_accepted--; - msk->pm.subflows--; - WRITE_ONCE(msk->pm.accept_addr, true); + if (rm_list->ids[i] != id) + continue; - __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR); + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", + i, rm_list->ids[i], subflow->local_id, subflow->remote_id); + spin_unlock_bh(&msk->pm.lock); + mptcp_subflow_shutdown(sk, ssk, how); + mptcp_close_ssk(sk, ssk, subflow); + spin_lock_bh(&msk->pm.lock); - break; + if (rm_type == MPTCP_MIB_RMADDR) { + msk->pm.add_addr_accepted--; + WRITE_ONCE(msk->pm.accept_addr, true); + } else if (rm_type == MPTCP_MIB_RMSUBFLOW) { + msk->pm.local_addr_used--; + } + msk->pm.subflows--; + __MPTCP_INC_STATS(sock_net(sk), rm_type); + } } } +static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) +{ + mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); +} + +void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) +{ + mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); +} + void mptcp_pm_nl_work(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; @@ -623,7 +666,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) } if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); - mptcp_pm_nl_add_addr_send_ack(msk); + mptcp_pm_nl_addr_send_ack(msk); } if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); @@ -641,45 +684,9 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); } -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id) -{ - struct mptcp_subflow_context *subflow, *tmp; - struct sock *sk = (struct sock *)msk; - - pr_debug("subflow rm_id %d", rm_id); - - msk_owned_by_me(msk); - - if (!rm_id) - return; - - if (list_empty(&msk->conn_list)) - return; - - list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - - if (rm_id != subflow->local_id) - continue; - - spin_unlock_bh(&msk->pm.lock); - mptcp_subflow_shutdown(sk, ssk, how); - mptcp_close_ssk(sk, ssk, subflow); - spin_lock_bh(&msk->pm.lock); - - msk->pm.local_addr_used--; - msk->pm.subflows--; - - __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); - - break; - } -} - static bool address_use_port(struct mptcp_pm_addr_entry *entry) { - return (entry->addr.flags & + return (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == MPTCP_PM_ADDR_FLAG_SIGNAL; } @@ -731,11 +738,11 @@ find_next: if (entry->addr.id > pernet->next_id) pernet->next_id = entry->addr.id; - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { addr_max = pernet->add_addr_signal_max; WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); } - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { addr_max = pernet->local_addr_max; WRITE_ONCE(pernet->local_addr_max, addr_max + 1); } @@ -837,10 +844,10 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return -ENOMEM; entry->addr = skc_local; - entry->addr.ifindex = 0; - entry->addr.flags = 0; entry->addr.id = 0; entry->addr.port = 0; + entry->ifindex = 0; + entry->flags = 0; entry->lsk = NULL; ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); if (ret < 0) @@ -955,14 +962,14 @@ skip_family: if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) { u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]); - entry->addr.ifindex = val; + entry->ifindex = val; } if (tb[MPTCP_PM_ADDR_ATTR_ID]) entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) - entry->addr.flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); + entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); if (tb[MPTCP_PM_ADDR_ATTR_PORT]) entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); @@ -1071,12 +1078,15 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, struct mptcp_addr_info *addr, bool force) { + struct mptcp_rm_list list = { .nr = 0 }; bool ret; + list.ids[list.nr++] = addr->id; + ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); - mptcp_pm_remove_addr(msk, addr->id); + mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); } return ret; @@ -1087,9 +1097,12 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, { struct mptcp_sock *msk; long s_slot = 0, s_num = 0; + struct mptcp_rm_list list = { .nr = 0 }; pr_debug("remove_id=%d", addr->id); + list.ids[list.nr++] = addr->id; + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; bool remove_subflow; @@ -1103,7 +1116,7 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow); if (remove_subflow) - mptcp_pm_remove_subflow(msk, addr->id); + mptcp_pm_remove_subflow(msk, &list); release_sock(sk); next: @@ -1146,6 +1159,41 @@ static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry) } } +static int mptcp_nl_remove_id_zero_address(struct net *net, + struct mptcp_addr_info *addr) +{ + struct mptcp_rm_list list = { .nr = 0 }; + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + list.ids[list.nr++] = 0; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + struct mptcp_addr_info msk_local; + + if (list_empty(&msk->conn_list)) + goto next; + + local_address((struct sock_common *)msk, &msk_local); + if (!addresses_equal(&msk_local, addr, addr->port)) + goto next; + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + mptcp_pm_nl_rm_subflow_received(msk, &list); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -1158,6 +1206,14 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; + /* the zero id address is special: the first address used by the msk + * always gets such an id, so different subflows can have different zero + * id addresses. Additionally zero id is not accounted for in id_bitmap. + * Let's use an 'mptcp_rm_list' instead of the common remove code. + */ + if (addr.addr.id == 0) + return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); + spin_lock_bh(&pernet->lock); entry = __lookup_addr_by_id(pernet, addr.addr.id); if (!entry) { @@ -1165,11 +1221,11 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&pernet->lock); return -EINVAL; } - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { addr_max = pernet->add_addr_signal_max; WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); } - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { addr_max = pernet->local_addr_max; WRITE_ONCE(pernet->local_addr_max, addr_max - 1); } @@ -1185,14 +1241,61 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) return ret; } -static void __flush_addrs(struct net *net, struct list_head *list) +static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) +{ + struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, rm_list, list) { + if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) && + alist.nr < MPTCP_RM_IDS_MAX && + slist.nr < MPTCP_RM_IDS_MAX) { + alist.ids[alist.nr++] = entry->addr.id; + slist.ids[slist.nr++] = entry->addr.id; + } else if (remove_anno_list_by_saddr(msk, &entry->addr) && + alist.nr < MPTCP_RM_IDS_MAX) { + alist.ids[alist.nr++] = entry->addr.id; + } + } + + if (alist.nr) { + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &alist); + spin_unlock_bh(&msk->pm.lock); + } + if (slist.nr) + mptcp_pm_remove_subflow(msk, &slist); +} + +static void mptcp_nl_remove_addrs_list(struct net *net, + struct list_head *rm_list) +{ + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + if (list_empty(rm_list)) + return; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + + lock_sock(sk); + mptcp_pm_remove_addrs_and_subflows(msk, rm_list); + release_sock(sk); + + sock_put(sk); + cond_resched(); + } +} + +static void __flush_addrs(struct list_head *list) { while (!list_empty(list)) { struct mptcp_pm_addr_entry *cur; cur = list_entry(list->next, struct mptcp_pm_addr_entry, list); - mptcp_nl_remove_subflow_and_signal_addr(net, &cur->addr); list_del_rcu(&cur->list); mptcp_pm_free_addr_entry(cur); } @@ -1217,7 +1320,8 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) pernet->next_id = 1; bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); - __flush_addrs(sock_net(skb->sk), &free_list); + mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); + __flush_addrs(&free_list); return 0; } @@ -1237,10 +1341,10 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb, goto nla_put_failure; if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id)) goto nla_put_failure; - if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->addr.flags)) + if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags)) goto nla_put_failure; - if (entry->addr.ifindex && - nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->addr.ifindex)) + if (entry->ifindex && + nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex)) goto nla_put_failure; if (addr->family == AF_INET && @@ -1468,7 +1572,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; - if (addr.addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; list_for_each_entry(entry, &pernet->local_addr_list, list) { @@ -1478,9 +1582,9 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) return ret; if (bkup) - entry->addr.flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; else - entry->addr.flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; } } @@ -1586,9 +1690,21 @@ static int mptcp_event_sub_closed(struct sk_buff *skb, const struct mptcp_sock *msk, const struct sock *ssk) { + const struct mptcp_subflow_context *sf; + if (mptcp_event_put_token_and_ssk(skb, msk, ssk)) return -EMSGSIZE; + sf = mptcp_subflow_ctx(ssk); + if (!sf->reset_seen) + return 0; + + if (nla_put_u32(skb, MPTCP_ATTR_RESET_REASON, sf->reset_reason)) + return -EMSGSIZE; + + if (nla_put_u32(skb, MPTCP_ATTR_RESET_FLAGS, sf->reset_transient)) + return -EMSGSIZE; + return 0; } @@ -1814,7 +1930,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list) /* net is removed from namespace list, can't race with * other modifiers */ - __flush_addrs(net, &pernet->local_addr_list); + __flush_addrs(&pernet->local_addr_list); } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 4bde960e19dc..8009b3f8e4c1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -491,7 +491,7 @@ static bool mptcp_check_data_fin(struct sock *sk) u64 rcv_data_fin_seq; bool ret = false; - if (__mptcp_check_fallback(msk) || !msk->first) + if (__mptcp_check_fallback(msk)) return ret; /* Need to ack a DATA_FIN received from a peer while this side @@ -2045,28 +2045,21 @@ out_err: return copied; } -static void mptcp_retransmit_handler(struct sock *sk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - set_bit(MPTCP_WORK_RTX, &msk->flags); - mptcp_schedule_work(sk); -} - static void mptcp_retransmit_timer(struct timer_list *t) { struct inet_connection_sock *icsk = from_timer(icsk, t, icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; + struct mptcp_sock *msk = mptcp_sk(sk); bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - mptcp_retransmit_handler(sk); + /* we need a process context to retransmit */ + if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags)) + mptcp_schedule_work(sk); } else { /* delegate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, - &sk->sk_tsq_flags)) - sock_hold(sk); + set_bit(MPTCP_RETRANSMIT, &msk->flags); } bh_unlock_sock(sk); sock_put(sk); @@ -3001,17 +2994,16 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) } } -#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED) - /* processes deferred events and flush wmem */ static void mptcp_release_cb(struct sock *sk) { - unsigned long flags, nflags; - for (;;) { - flags = 0; + unsigned long flags = 0; + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) flags |= BIT(MPTCP_PUSH_PENDING); + if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags)) + flags |= BIT(MPTCP_RETRANSMIT); if (!flags) break; @@ -3026,6 +3018,8 @@ static void mptcp_release_cb(struct sock *sk) spin_unlock_bh(&sk->sk_lock.slock); if (flags & BIT(MPTCP_PUSH_PENDING)) __mptcp_push_pending(sk, 0); + if (flags & BIT(MPTCP_RETRANSMIT)) + __mptcp_retrans(sk); cond_resched(); spin_lock_bh(&sk->sk_lock.slock); @@ -3041,20 +3035,6 @@ static void mptcp_release_cb(struct sock *sk) */ __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); - - do { - flags = sk->sk_tsq_flags; - if (!(flags & MPTCP_DEFERRED_ALL)) - return; - nflags = flags & ~MPTCP_DEFERRED_ALL; - } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); - - sock_release_ownership(sk); - - if (flags & TCPF_WRITE_TIMER_DEFERRED) { - mptcp_retransmit_handler(sk); - __sock_put(sk); - } } void mptcp_subflow_process_delegated(struct sock *ssk) @@ -3153,14 +3133,18 @@ bool mptcp_finish_join(struct sock *ssk) pr_debug("msk=%p, subflow=%p", msk, subflow); /* mptcp socket already closing? */ - if (!mptcp_is_fully_established(parent)) + if (!mptcp_is_fully_established(parent)) { + subflow->reset_reason = MPTCP_RST_EMPTCP; return false; + } if (!msk->pm.server_side) goto out; - if (!mptcp_pm_allow_new_subflow(msk)) + if (!mptcp_pm_allow_new_subflow(msk)) { + subflow->reset_reason = MPTCP_RST_EPROHIBIT; return false; + } /* active connections are already on conn_list, and we can't acquire * msk lock here. @@ -3174,8 +3158,10 @@ bool mptcp_finish_join(struct sock *ssk) sock_hold(ssk); } spin_unlock_bh(&msk->join_list_lock); - if (!ret) + if (!ret) { + subflow->reset_reason = MPTCP_RST_EPROHIBIT; return false; + } /* attach to msk socket only after we are sure he will deal with us * at close time @@ -3287,8 +3273,12 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info)) mptcp_subflow_early_fallback(msk, subflow); #endif - if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) + if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) { + MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); + } + if (likely(!__mptcp_check_fallback(msk))) + MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE); do_connect: err = ssock->ops->connect(ssock, uaddr, addr_len, flags); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e21a5bc36cf0..d8de1e961ab0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -22,10 +22,10 @@ #define OPTION_MPTCP_MPJ_SYNACK BIT(4) #define OPTION_MPTCP_MPJ_ACK BIT(5) #define OPTION_MPTCP_ADD_ADDR BIT(6) -#define OPTION_MPTCP_ADD_ADDR6 BIT(7) -#define OPTION_MPTCP_RM_ADDR BIT(8) -#define OPTION_MPTCP_FASTCLOSE BIT(9) -#define OPTION_MPTCP_PRIO BIT(10) +#define OPTION_MPTCP_RM_ADDR BIT(7) +#define OPTION_MPTCP_FASTCLOSE BIT(8) +#define OPTION_MPTCP_PRIO BIT(9) +#define OPTION_MPTCP_RST BIT(10) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 @@ -36,6 +36,7 @@ #define MPTCPOPT_MP_PRIO 5 #define MPTCPOPT_MP_FAIL 6 #define MPTCPOPT_MP_FASTCLOSE 7 +#define MPTCPOPT_RST 8 /* MPTCP suboption lengths */ #define TCPOLEN_MPTCP_MPC_SYN 4 @@ -61,10 +62,11 @@ #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 #define TCPOLEN_MPTCP_PORT_LEN 2 #define TCPOLEN_MPTCP_PORT_ALIGN 2 -#define TCPOLEN_MPTCP_RM_ADDR_BASE 4 +#define TCPOLEN_MPTCP_RM_ADDR_BASE 3 #define TCPOLEN_MPTCP_PRIO 3 #define TCPOLEN_MPTCP_PRIO_ALIGN 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 +#define TCPOLEN_MPTCP_RST 4 /* MPTCP MP_JOIN flags */ #define MPTCPOPT_BACKUP BIT(0) @@ -88,12 +90,13 @@ /* MPTCP ADD_ADDR flags */ #define MPTCP_ADDR_ECHO BIT(0) -#define MPTCP_ADDR_IPVERSION_4 4 -#define MPTCP_ADDR_IPVERSION_6 6 /* MPTCP MP_PRIO flags */ #define MPTCP_PRIO_BKUP BIT(0) +/* MPTCP TCPRST flags */ +#define MPTCP_RST_TRANSIENT BIT(0) + /* MPTCP socket flags */ #define MPTCP_DATA_READY 0 #define MPTCP_NOSPACE 1 @@ -104,6 +107,7 @@ #define MPTCP_PUSH_PENDING 6 #define MPTCP_CLEAN_UNA 7 #define MPTCP_ERROR_REPORT 8 +#define MPTCP_RETRANSMIT 9 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -122,11 +126,11 @@ struct mptcp_options_received { u16 mp_capable : 1, mp_join : 1, fastclose : 1, + reset : 1, dss : 1, add_addr : 1, rm_addr : 1, mp_prio : 1, - family : 4, echo : 1, backup : 1; u32 token; @@ -141,16 +145,11 @@ struct mptcp_options_received { ack64:1, mpc_map:1, __unused:2; - u8 addr_id; - u8 rm_id; - union { - struct in_addr addr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - struct in6_addr addr6; -#endif - }; + struct mptcp_addr_info addr; + struct mptcp_rm_list rm_list; u64 ahmac; - u16 port; + u8 reset_reason:4; + u8 reset_transient:1; }; static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) @@ -159,20 +158,6 @@ static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) ((nib & 0xF) << 8) | field); } -struct mptcp_addr_info { - sa_family_t family; - __be16 port; - u8 id; - u8 flags; - int ifindex; - union { - struct in_addr addr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - struct in6_addr addr6; -#endif - }; -}; - enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_RECEIVED, MPTCP_PM_ADD_ADDR_SEND_ACK, @@ -207,7 +192,8 @@ struct mptcp_pm_data { u8 local_addr_used; u8 subflows; u8 status; - u8 rm_id; + struct mptcp_rm_list rm_list_tx; + struct mptcp_rm_list rm_list_rx; }; struct mptcp_data_frag { @@ -420,6 +406,9 @@ struct mptcp_subflow_context { u8 hmac[MPTCPOPT_HMAC_LEN]; u8 local_id; u8 remote_id; + u8 reset_seen:1; + u8 reset_transient:1; + u8 reset_reason:4; long delegated_status; struct list_head delegated_node; /* link into delegated_action, protected by local BH */ @@ -543,7 +532,8 @@ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *remote); + const struct mptcp_addr_info *remote, + u8 flags, int ifindex); int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, @@ -641,13 +631,16 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); -void mptcp_pm_subflow_established(struct mptcp_sock *msk, - struct mptcp_subflow_context *subflow); +void mptcp_pm_subflow_established(struct mptcp_sock *msk); void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id); void mptcp_pm_add_addr_received(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); +void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, + struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); -void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id); +void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); +void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *addr, @@ -657,12 +650,15 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_addr_info *addr); +struct mptcp_pm_add_entry * +mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, + struct mptcp_addr_info *addr); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, - bool echo, bool port); -int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id); -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id); + bool echo); +int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); +int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); @@ -709,23 +705,32 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) return len; } +static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list) +{ + if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX) + return -EINVAL; + + return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1; +} + bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_addr_info *saddr, bool *echo, bool *port); bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, - u8 *rm_id); + struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_data_init(struct mptcp_sock *msk); void mptcp_pm_nl_work(struct mptcp_sock *msk); -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id); +void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk); unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk); -static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb) +static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) { return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d17d39ccdf34..3c19a5265a0f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -115,6 +115,16 @@ static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct soc return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport; } +static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason) +{ + struct mptcp_ext *mpext = skb_ext_add(skb, SKB_EXT_MPTCP); + + if (mpext) { + memset(mpext, 0, sizeof(*mpext)); + mpext->reset_reason = reason; + } +} + /* Init mptcp request socket. * * Returns an error code if a JOIN has failed and a TCP reset @@ -165,6 +175,7 @@ again: if (mptcp_token_exists(subflow_req->token)) { if (retries-- > 0) goto again; + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT); } else { subflow_req->mp_capable = 1; } @@ -176,6 +187,8 @@ again: subflow_req->mp_capable = 1; else if (retries-- > 0) goto again; + else + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT); } else if (mp_opt.mp_join && listener->request_mptcp) { subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -187,8 +200,10 @@ again: subflow_req->msk = subflow_token_join_request(req); /* Can't fall back to TCP in this case. */ - if (!subflow_req->msk) + if (!subflow_req->msk) { + subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); return -EPERM; + } if (subflow_use_different_sport(subflow_req->msk, sk_listener)) { pr_debug("syn inet_sport=%d %d", @@ -392,12 +407,15 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->remote_key = mp_opt.sndr_key; pr_debug("subflow=%p, remote_key=%llu", subflow, subflow->remote_key); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; - if (!mp_opt.mp_join) + if (!mp_opt.mp_join) { + subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; + } subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; @@ -406,6 +424,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (!subflow_thmac_valid(subflow)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); + subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } @@ -434,6 +453,7 @@ fallback: return; do_reset: + subflow->reset_transient = 0; mptcp_subflow_reset(sk); } @@ -650,8 +670,10 @@ create_child: * to reset the context to non MPTCP status. */ if (!ctx || fallback) { - if (fallback_is_fatal) + if (fallback_is_fatal) { + subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); goto dispose_child; + } subflow_drop_ctx(child); goto out; @@ -686,8 +708,10 @@ create_child: struct mptcp_sock *owner; owner = subflow_req->msk; - if (!owner) + if (!owner) { + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; + } /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; @@ -1052,6 +1076,8 @@ fatal: smp_wmb(); ssk->sk_error_report(ssk); tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMPTCP; tcp_send_active_reset(ssk, GFP_ATOMIC); subflow->data_avail = 0; return false; @@ -1081,7 +1107,7 @@ bool mptcp_subflow_data_available(struct sock *sk) * In mptcp, rwin is about the mptcp-level connection data. * * Data that is still on the ssk rx queue can thus be ignored, - * as far as mptcp peer is concerened that data is still inflight. + * as far as mptcp peer is concerned that data is still inflight. * DSS ACK is updated when skb is moved to the mptcp rx queue. */ void mptcp_space(const struct sock *ssk, int *space, int *full_space) @@ -1230,7 +1256,8 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, } int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *remote) + const struct mptcp_addr_info *remote, + u8 flags, int ifindex) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; @@ -1274,7 +1301,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif - ssk->sk_bound_dev_if = loc->ifindex; + ssk->sk_bound_dev_if = ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); if (err) goto failed; @@ -1286,7 +1313,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->local_id = local_id; subflow->remote_id = remote_id; subflow->request_join = 1; - subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); mptcp_info2sockaddr(remote, &addr, ssk->sk_family); mptcp_add_pending_subflow(msk, subflow); |