summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2016-06-04 23:00:00 +0200
committerDavid S. Miller <davem@davemloft.net>2016-06-08 00:10:15 +0200
commit9c79440e2c5e2518879f1599270f64c3ddda3baf (patch)
tree69cbc888d7d91fe1e3c197443f608e35d54a4c30 /net
parentRDS: TCP: Retransmit half-sent datagrams when switching sockets in rds_tcp_re... (diff)
downloadlinux-9c79440e2c5e2518879f1599270f64c3ddda3baf.tar.xz
linux-9c79440e2c5e2518879f1599270f64c3ddda3baf.zip
RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one()
The send path needs to be quiesced before resetting callbacks from rds_tcp_accept_one(), and commit eb192840266f ("RDS:TCP: Synchronize rds_tcp_accept_one with rds_send_xmit when resetting t_sock") achieves this using the c_state and RDS_IN_XMIT bit following the pattern used by rds_conn_shutdown(). However this leaves the possibility of a race window as shown in the sequence below take t_conn_lock in rds_tcp_conn_connect send outgoing syn to peer drop t_conn_lock in rds_tcp_conn_connect incoming from peer triggers rds_tcp_accept_one, conn is marked CONNECTING wait for RDS_IN_XMIT to quiesce any rds_send_xmit threads call rds_tcp_reset_callbacks [.. race-window where incoming syn-ack can cause the conn to be marked UP from rds_tcp_state_change ..] lock_sock called from rds_tcp_reset_callbacks, and we set t_sock to null As soon as the conn is marked UP in the race-window above, rds_send_xmit() threads will proceed to rds_tcp_xmit and may encounter a null-pointer deref on the t_sock. Given that rds_tcp_state_change() is invoked in softirq context, whereas rds_tcp_reset_callbacks() is in workq context, and testing for RDS_IN_XMIT after lock_sock could result in a deadlock with tcp_sendmsg, this commit fixes the race by using a new c_state, RDS_TCP_RESETTING, which will prevent a transition to RDS_CONN_UP from rds_tcp_state_change(). Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rds/tcp.c14
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_listen.c7
-rw-r--r--net/rds/threads.c10
5 files changed, 27 insertions, 8 deletions
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 80256b08eac0..387df5f32e49 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -74,6 +74,7 @@ enum {
RDS_CONN_CONNECTING,
RDS_CONN_DISCONNECTING,
RDS_CONN_UP,
+ RDS_CONN_RESETTING,
RDS_CONN_ERROR,
};
@@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *);
void rds_shutdown_worker(struct work_struct *);
void rds_send_worker(struct work_struct *);
void rds_recv_worker(struct work_struct *);
+void rds_connect_path_complete(struct rds_connection *conn, int curr);
void rds_connect_complete(struct rds_connection *conn);
/* transport.c */
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 7ab1b41ffc88..74ee126a6fe6 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -148,11 +148,23 @@ void rds_tcp_reset_callbacks(struct socket *sock,
* potentially have transitioned to the RDS_CONN_UP state,
* so we must quiesce any send threads before resetting
* c_transport_data. We quiesce these threads by setting
- * cp_state to something other than RDS_CONN_UP, and then
+ * c_state to something other than RDS_CONN_UP, and then
* waiting for any existing threads in rds_send_xmit to
* complete release_in_xmit(). (Subsequent threads entering
* rds_send_xmit() will bail on !rds_conn_up().
+ *
+ * However an incoming syn-ack at this point would end up
+ * marking the conn as RDS_CONN_UP, and would again permit
+ * rds_send_xmi() threads through, so ideally we would
+ * synchronize on RDS_CONN_UP after lock_sock(), but cannot
+ * do that: waiting on !RDS_IN_XMIT after lock_sock() may
+ * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT
+ * would not get set. As a result, we set c_state to
+ * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
+ * cannot mark rds_conn_path_up() in the window before lock_sock()
*/
+ atomic_set(&conn->c_state, RDS_CONN_RESETTING);
+ wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
lock_sock(osock->sk);
/* reset receive side state for rds_tcp_data_recv() for osock */
if (tc->t_tinc) {
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index fb82e0a0bf89..fba13d0305fb 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk)
case TCP_SYN_RECV:
break;
case TCP_ESTABLISHED:
- rds_connect_complete(conn);
+ rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
break;
case TCP_CLOSE_WAIT:
case TCP_CLOSE:
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index d9fe53675d95..686b1d03a558 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -135,16 +135,15 @@ int rds_tcp_accept_one(struct socket *sock)
!conn->c_outgoing) {
goto rst_nsk;
} else {
- atomic_set(&conn->c_state, RDS_CONN_CONNECTING);
- wait_event(conn->c_waitq,
- !test_bit(RDS_IN_XMIT, &conn->c_flags));
rds_tcp_reset_callbacks(new_sock, conn);
conn->c_outgoing = 0;
+ /* rds_connect_path_complete() marks RDS_CONN_UP */
+ rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING);
}
} else {
rds_tcp_set_callbacks(new_sock, conn);
+ rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
}
- rds_connect_complete(conn); /* marks RDS_CONN_UP */
new_sock = NULL;
ret = 0;
goto out;
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 454aa6d23327..4a323045719b 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -71,9 +71,9 @@
struct workqueue_struct *rds_wq;
EXPORT_SYMBOL_GPL(rds_wq);
-void rds_connect_complete(struct rds_connection *conn)
+void rds_connect_path_complete(struct rds_connection *conn, int curr)
{
- if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) {
+ if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) {
printk(KERN_WARNING "%s: Cannot transition to state UP, "
"current state is %d\n",
__func__,
@@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn)
queue_delayed_work(rds_wq, &conn->c_send_w, 0);
queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
}
+EXPORT_SYMBOL_GPL(rds_connect_path_complete);
+
+void rds_connect_complete(struct rds_connection *conn)
+{
+ rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
+}
EXPORT_SYMBOL_GPL(rds_connect_complete);
/*