summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/inet_hashtables.h9
-rw-r--r--net/ipv4/inet_connection_sock.c106
2 files changed, 95 insertions, 20 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 3fc0366743da..1178931288cb 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -74,12 +74,21 @@ struct inet_ehash_bucket {
* users logged onto your box, isn't it nice to know that new data
* ports are created in O(1) time? I thought so. ;-) -DaveM
*/
+#define FASTREUSEPORT_ANY 1
+#define FASTREUSEPORT_STRICT 2
+
struct inet_bind_bucket {
possible_net_t ib_net;
unsigned short port;
signed char fastreuse;
signed char fastreuseport;
kuid_t fastuid;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr fast_v6_rcv_saddr;
+#endif
+ __be32 fast_rcv_saddr;
+ unsigned short fast_sk_family;
+ bool fast_ipv6_only;
struct hlist_node node;
struct hlist_head owners;
};
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index bbe28920e2d8..096a085611ab 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -38,20 +38,21 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
* IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
* and 0.0.0.0 equals to 0.0.0.0 only
*/
-static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
+ const struct in6_addr *sk2_rcv_saddr6,
+ __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+ bool sk1_ipv6only, bool sk2_ipv6only,
bool match_wildcard)
{
- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
/* if both are mapped, treat as IPv4 */
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
if (!sk2_ipv6only) {
- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ if (sk1_rcv_saddr == sk2_rcv_saddr)
return 1;
- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ if (!sk1_rcv_saddr || !sk2_rcv_saddr)
return match_wildcard;
}
return 0;
@@ -65,11 +66,11 @@ static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
return 1;
if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
+ !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
return 1;
if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
+ ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
return 1;
return 0;
@@ -80,13 +81,13 @@ static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
* match_wildcard == false: addresses must be exactly the same, i.e.
* 0.0.0.0 only equals to 0.0.0.0
*/
-static int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
+static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+ bool sk2_ipv6only, bool match_wildcard)
{
- if (!ipv6_only_sock(sk2)) {
- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ if (!sk2_ipv6only) {
+ if (sk1_rcv_saddr == sk2_rcv_saddr)
return 1;
- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ if (!sk1_rcv_saddr || !sk2_rcv_saddr)
return match_wildcard;
}
return 0;
@@ -97,9 +98,16 @@ int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
{
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
- return ipv6_rcv_saddr_equal(sk, sk2, match_wildcard);
+ return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr,
+ &sk2->sk_v6_rcv_saddr,
+ sk->sk_rcv_saddr,
+ sk2->sk_rcv_saddr,
+ ipv6_only_sock(sk),
+ ipv6_only_sock(sk2),
+ match_wildcard);
#endif
- return ipv4_rcv_saddr_equal(sk, sk2, match_wildcard);
+ return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
+ ipv6_only_sock(sk2), match_wildcard);
}
EXPORT_SYMBOL(inet_rcv_saddr_equal);
@@ -234,6 +242,39 @@ success:
return head;
}
+static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
+ struct sock *sk)
+{
+ kuid_t uid = sock_i_uid(sk);
+
+ if (tb->fastreuseport <= 0)
+ return 0;
+ if (!sk->sk_reuseport)
+ return 0;
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ return 0;
+ if (!uid_eq(tb->fastuid, uid))
+ return 0;
+ /* We only need to check the rcv_saddr if this tb was once marked
+ * without fastreuseport and then was reset, as we can only know that
+ * the fast_*rcv_saddr doesn't have any conflicts with the socks on the
+ * owners list.
+ */
+ if (tb->fastreuseport == FASTREUSEPORT_ANY)
+ return 1;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (tb->fast_sk_family == AF_INET6)
+ return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
+ &sk->sk_v6_rcv_saddr,
+ tb->fast_rcv_saddr,
+ sk->sk_rcv_saddr,
+ tb->fast_ipv6_only,
+ ipv6_only_sock(sk), true);
+#endif
+ return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
+ ipv6_only_sock(sk), true);
+}
+
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
* We try to allocate an odd port (and leave even ports for connect())
@@ -273,9 +314,7 @@ tb_found:
goto success;
if ((tb->fastreuse > 0 && reuse) ||
- (tb->fastreuseport > 0 &&
- !rcu_access_pointer(sk->sk_reuseport_cb) &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
+ sk_reuseport_match(tb, sk))
goto success;
if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
@@ -284,16 +323,43 @@ success:
if (!hlist_empty(&tb->owners)) {
tb->fastreuse = reuse;
if (sk->sk_reuseport) {
- tb->fastreuseport = 1;
+ tb->fastreuseport = FASTREUSEPORT_ANY;
tb->fastuid = uid;
+ tb->fast_rcv_saddr = sk->sk_rcv_saddr;
+ tb->fast_ipv6_only = ipv6_only_sock(sk);
+#if IS_ENABLED(CONFIG_IPV6)
+ tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+#endif
} else {
tb->fastreuseport = 0;
}
} else {
if (!reuse)
tb->fastreuse = 0;
- if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
+ if (sk->sk_reuseport) {
+ /* We didn't match or we don't have fastreuseport set on
+ * the tb, but we have sk_reuseport set on this socket
+ * and we know that there are no bind conflicts with
+ * this socket in this tb, so reset our tb's reuseport
+ * settings so that any subsequent sockets that match
+ * our current socket will be put on the fast path.
+ *
+ * If we reset we need to set FASTREUSEPORT_STRICT so we
+ * do extra checking for all subsequent sk_reuseport
+ * socks.
+ */
+ if (!sk_reuseport_match(tb, sk)) {
+ tb->fastreuseport = FASTREUSEPORT_STRICT;
+ tb->fastuid = uid;
+ tb->fast_rcv_saddr = sk->sk_rcv_saddr;
+ tb->fast_ipv6_only = ipv6_only_sock(sk);
+#if IS_ENABLED(CONFIG_IPV6)
+ tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+#endif
+ }
+ } else {
tb->fastreuseport = 0;
+ }
}
if (!inet_csk(sk)->icsk_bind_hash)
inet_bind_hash(sk, tb, port);