diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/options.c | 2 | ||||
-rw-r--r-- | net/mptcp/pm.c | 2 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 2 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 134 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 2 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 3 | ||||
-rw-r--r-- | net/mptcp/token.c | 9 |
7 files changed, 132 insertions, 22 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index bd220ee4aac9..faf57585b892 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -4,6 +4,8 @@ * Copyright (c) 2017 - 2019, Intel Corporation. */ +#define pr_fmt(fmt) "MPTCP: " fmt + #include <linux/kernel.h> #include <net/tcp.h> #include <net/mptcp.h> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 064639f72487..977d9c8b1453 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -3,6 +3,8 @@ * * Copyright (c) 2019, Intel Corporation. */ +#define pr_fmt(fmt) "MPTCP: " fmt + #include <linux/kernel.h> #include <net/tcp.h> #include <net/mptcp.h> diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a0ce7f324499..86d61ab34c7c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -4,6 +4,8 @@ * Copyright (c) 2020, Red Hat, Inc. */ +#define pr_fmt(fmt) "MPTCP: " fmt + #include <linux/inet.h> #include <linux/kernel.h> #include <net/tcp.h> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1833bc1f4a43..9936e33ac351 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -57,21 +57,49 @@ static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk) return msk->first && !sk_is_mptcp(msk->first); } -static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk) +static struct socket *mptcp_is_tcpsk(struct sock *sk) { - sock_owned_by_me((const struct sock *)msk); + struct socket *sock = sk->sk_socket; - if (likely(!__mptcp_needs_tcp_fallback(msk))) + if (sock->sk != sk) return NULL; - if (msk->subflow) { - release_sock((struct sock *)msk); - return msk->subflow; + if (unlikely(sk->sk_prot == &tcp_prot)) { + /* we are being invoked after mptcp_accept() has + * accepted a non-mp-capable flow: sk is a tcp_sk, + * not an mptcp one. + * + * Hand the socket over to tcp so all further socket ops + * bypass mptcp. + */ + sock->ops = &inet_stream_ops; + return sock; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + } else if (unlikely(sk->sk_prot == &tcpv6_prot)) { + sock->ops = &inet6_stream_ops; + return sock; +#endif } return NULL; } +static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk) +{ + struct socket *sock; + + sock_owned_by_me((const struct sock *)msk); + + sock = mptcp_is_tcpsk((struct sock *)msk); + if (unlikely(sock)) + return sock; + + if (likely(!__mptcp_needs_tcp_fallback(msk))) + return NULL; + + return msk->subflow; +} + static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk) { return !msk->first; @@ -84,6 +112,10 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) struct socket *ssock; int err; + ssock = __mptcp_tcp_fallback(msk); + if (unlikely(ssock)) + return ssock; + ssock = __mptcp_nmpc_socket(msk); if (ssock) goto set_state; @@ -121,6 +153,27 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, MPTCP_SKB_CB(skb)->offset = offset; } +/* both sockets must be locked */ +static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk, + struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + u64 dsn = mptcp_subflow_get_mapped_dsn(subflow); + + /* revalidate data sequence number. + * + * mptcp_subflow_data_available() is usually called + * without msk lock. Its unlikely (but possible) + * that msk->ack_seq has been advanced since the last + * call found in-sequence data. + */ + if (likely(dsn == msk->ack_seq)) + return true; + + subflow->data_avail = 0; + return mptcp_subflow_data_available(ssk); +} + static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct sock *ssk, unsigned int *bytes) @@ -132,6 +185,11 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct tcp_sock *tp; bool done = false; + if (!mptcp_subflow_dsn_valid(msk, ssk)) { + *bytes = 0; + return false; + } + if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvbuf = max(ssk->sk_rcvbuf, sk->sk_rcvbuf); @@ -290,6 +348,15 @@ void mptcp_data_acked(struct sock *sk) sock_hold(sk); } +void mptcp_subflow_eof(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + if (!test_and_set_bit(MPTCP_WORK_EOF, &msk->flags) && + schedule_work(&msk->work)) + sock_hold(sk); +} + static void mptcp_stop_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -662,9 +729,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; } +fallback: ssock = __mptcp_tcp_fallback(msk); if (unlikely(ssock)) { -fallback: + release_sock(sk); pr_debug("fallback passthrough"); ret = sock_sendmsg(ssock, msg); return ret >= 0 ? ret + copied : (copied ? copied : ret); @@ -697,8 +765,14 @@ fallback: if (ret < 0) break; if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) { + /* Can happen for passive sockets: + * 3WHS negotiated MPTCP, but first packet after is + * plain TCP (e.g. due to middlebox filtering unknown + * options). + * + * Fall back to TCP. + */ release_sock(ssk); - ssock = __mptcp_tcp_fallback(msk); goto fallback; } @@ -811,6 +885,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ssock = __mptcp_tcp_fallback(msk); if (unlikely(ssock)) { fallback: + release_sock(sk); pr_debug("fallback-read subflow=%p", mptcp_subflow_ctx(ssock->sk)); copied = sock_recvmsg(ssock, msg, flags); @@ -994,6 +1069,27 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) return 0; } +static void mptcp_check_for_eof(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + int receivers = 0; + + mptcp_for_each_subflow(msk, subflow) + receivers += !subflow->rx_eof; + + if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) { + /* hopefully temporary hack: propagate shutdown status + * to msk, when all subflows agree on it + */ + sk->sk_shutdown |= RCV_SHUTDOWN; + + smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ + set_bit(MPTCP_DATA_READY, &msk->flags); + sk->sk_data_ready(sk); + } +} + static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); @@ -1010,6 +1106,9 @@ static void mptcp_worker(struct work_struct *work) __mptcp_flush_join_list(msk); __mptcp_move_skbs(msk); + if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) + mptcp_check_for_eof(msk); + if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) goto unlock; @@ -1371,12 +1470,11 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, */ lock_sock(sk); ssock = __mptcp_tcp_fallback(msk); + release_sock(sk); if (ssock) return tcp_setsockopt(ssock->sk, level, optname, optval, optlen); - release_sock(sk); - return -EOPNOTSUPP; } @@ -1396,12 +1494,11 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, */ lock_sock(sk); ssock = __mptcp_tcp_fallback(msk); + release_sock(sk); if (ssock) return tcp_getsockopt(ssock->sk, level, optname, optval, option); - release_sock(sk); - return -EOPNOTSUPP; } @@ -1752,7 +1849,9 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, msk = mptcp_sk(sk); lock_sock(sk); - ssock = __mptcp_nmpc_socket(msk); + ssock = __mptcp_tcp_fallback(msk); + if (!ssock) + ssock = __mptcp_nmpc_socket(msk); if (ssock) { mask = ssock->ops->poll(file, ssock, wait); release_sock(sk); @@ -1762,9 +1861,6 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, release_sock(sk); sock_poll_wait(file, sock, wait); lock_sock(sk); - ssock = __mptcp_tcp_fallback(msk); - if (unlikely(ssock)) - return ssock->ops->poll(file, ssock, NULL); if (test_bit(MPTCP_DATA_READY, &msk->flags)) mask = EPOLLIN | EPOLLRDNORM; @@ -1783,11 +1879,17 @@ static int mptcp_shutdown(struct socket *sock, int how) { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct mptcp_subflow_context *subflow; + struct socket *ssock; int ret = 0; pr_debug("sk=%p, how=%d", msk, how); lock_sock(sock->sk); + ssock = __mptcp_tcp_fallback(msk); + if (ssock) { + release_sock(sock->sk); + return inet_shutdown(ssock, how); + } if (how == SHUT_WR || how == SHUT_RDWR) inet_sk_state_store(sock->sk, TCP_FIN_WAIT1); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f733c5425552..67448002a2d7 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -89,6 +89,7 @@ #define MPTCP_DATA_READY 0 #define MPTCP_SEND_SPACE 1 #define MPTCP_WORK_RTX 2 +#define MPTCP_WORK_EOF 3 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) { @@ -339,6 +340,7 @@ void mptcp_finish_connect(struct sock *sk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); void mptcp_data_acked(struct sock *sk); +void mptcp_subflow_eof(struct sock *sk); int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(u32 token); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index b5180c81588e..50a8bea987c6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -994,8 +994,7 @@ static void subflow_state_change(struct sock *sk) if (!(parent->sk_shutdown & RCV_SHUTDOWN) && !subflow->rx_eof && subflow_is_done(sk)) { subflow->rx_eof = 1; - parent->sk_shutdown |= RCV_SHUTDOWN; - __subflow_state_change(parent); + mptcp_subflow_eof(parent); } } diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 129a5ad1bc35..33352dd99d4d 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -40,7 +40,7 @@ static int token_used __read_mostly; /** * mptcp_token_new_request - create new key/idsn/token for subflow_request - * @req - the request socket + * @req: the request socket * * This function is called when a new mptcp connection is coming in. * @@ -80,7 +80,7 @@ int mptcp_token_new_request(struct request_sock *req) /** * mptcp_token_new_connect - create new key/idsn/token for subflow - * @sk - the socket that will initiate a connection + * @sk: the socket that will initiate a connection * * This function is called when a new outgoing mptcp connection is * initiated. @@ -125,6 +125,7 @@ int mptcp_token_new_connect(struct sock *sk) /** * mptcp_token_new_accept - insert token for later processing * @token: the token to insert to the tree + * @conn: the just cloned socket linked to the new connection * * Called when a SYN packet creates a new logical connection, i.e. * is not a join request. @@ -169,7 +170,7 @@ struct mptcp_sock *mptcp_token_get_sock(u32 token) /** * mptcp_token_destroy_request - remove mptcp connection/token - * @token - token of mptcp connection to remove + * @token: token of mptcp connection to remove * * Remove not-yet-fully-established incoming connection identified * by @token. @@ -183,7 +184,7 @@ void mptcp_token_destroy_request(u32 token) /** * mptcp_token_destroy - remove mptcp connection/token - * @token - token of mptcp connection to remove + * @token: token of mptcp connection to remove * * Remove the connection identified by @token. */ |