summaryrefslogtreecommitdiffstats
path: root/net/ipv4/inet_connection_sock.c
diff options
context:
space:
mode:
authorCraig Gallek <kraig@google.com>2016-02-10 17:50:40 +0100
committerDavid S. Miller <davem@davemloft.net>2016-02-11 09:54:15 +0100
commitc125e80b88687b25b321795457309eaaee4bf270 (patch)
treed61eb6c67a62046691c8f354c0fad948ef298f99 /net/ipv4/inet_connection_sock.c
parentsoreuseport: Prep for fast reuseport TCP socket selection (diff)
downloadlinux-c125e80b88687b25b321795457309eaaee4bf270.tar.xz
linux-c125e80b88687b25b321795457309eaaee4bf270.zip
soreuseport: fast reuseport TCP socket selection
This change extends the fast SO_REUSEPORT socket lookup implemented for UDP to TCP. Listener sockets with SO_REUSEPORT and the same receive address are additionally added to an array for faster random access. This means that only a single socket from the group must be found in the listener list before any socket in the group can be used to receive a packet. Previously, every socket in the group needed to be considered before handing off the incoming packet. This feature also exposes the ability to use a BPF program when selecting a socket from a reuseport group. Signed-off-by: Craig Gallek <kraig@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_connection_sock.c')
-rw-r--r--net/ipv4/inet_connection_sock.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 12c8d389dc18..c16a2e6273d9 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,6 +24,7 @@
#include <net/tcp_states.h>
#include <net/xfrm.h>
#include <net/tcp.h>
+#include <net/sock_reuseport.h>
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -67,7 +68,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
- (sk2->sk_state != TCP_TIME_WAIT &&
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2))))) {
if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@ -132,6 +134,7 @@ again:
sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(tb->fastuid, uid))) &&
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
@@ -193,15 +196,18 @@ tb_found:
if (((tb->fastreuse > 0 &&
sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size == -1) {
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) && smallest_size == -1) {
goto success;
} else {
ret = 1;
if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) &&
smallest_size != -1 && --attempts >= 0) {
spin_unlock(&head->lock);
goto again;