diff options
Diffstat (limited to 'net/rds')
-rw-r--r-- | net/rds/af_rds.c | 4 | ||||
-rw-r--r-- | net/rds/connection.c | 18 | ||||
-rw-r--r-- | net/rds/message.c | 1 | ||||
-rw-r--r-- | net/rds/rds.h | 12 | ||||
-rw-r--r-- | net/rds/recv.c | 36 | ||||
-rw-r--r-- | net/rds/send.c | 9 | ||||
-rw-r--r-- | net/rds/tcp.c | 24 | ||||
-rw-r--r-- | net/rds/tcp_connect.c | 14 | ||||
-rw-r--r-- | net/rds/tcp_listen.c | 31 | ||||
-rw-r--r-- | net/rds/tcp_send.c | 3 | ||||
-rw-r--r-- | net/rds/threads.c | 3 |
11 files changed, 104 insertions, 51 deletions
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 6beaeb1138f3..2ac1e6194be3 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -605,10 +605,14 @@ static void rds_exit(void) } module_exit(rds_exit); +u32 rds_gen_num; + static int rds_init(void) { int ret; + net_get_random_once(&rds_gen_num, sizeof(rds_gen_num)); + ret = rds_bind_lock_init(); if (ret) goto out; diff --git a/net/rds/connection.c b/net/rds/connection.c index f5058559bb08..fe9d31c0b22d 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -269,6 +269,8 @@ static struct rds_connection *__rds_conn_create(struct net *net, kmem_cache_free(rds_conn_slab, conn); conn = found; } else { + conn->c_my_gen_num = rds_gen_num; + conn->c_peer_gen_num = 0; hlist_add_head_rcu(&conn->c_hash_node, head); rds_cong_add_conn(conn); rds_conn_count++; @@ -681,6 +683,7 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp) !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); } +EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); void rds_conn_connect_if_down(struct rds_connection *conn) { @@ -689,21 +692,6 @@ void rds_conn_connect_if_down(struct rds_connection *conn) } EXPORT_SYMBOL_GPL(rds_conn_connect_if_down); -/* - * An error occurred on the connection - */ -void -__rds_conn_error(struct rds_connection *conn, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - vprintk(fmt, ap); - va_end(ap); - - rds_conn_drop(conn); -} - void __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...) { diff --git a/net/rds/message.c b/net/rds/message.c index 6cb91061556a..49bfb512d808 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -42,6 +42,7 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { [RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma), [RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest), [RDS_EXTHDR_NPATHS] = sizeof(u16), +[RDS_EXTHDR_GEN_NUM] = sizeof(u32), }; diff --git a/net/rds/rds.h b/net/rds/rds.h index 67ba67c058b1..ebbf909b87ec 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -151,6 +151,9 @@ struct rds_connection { struct rds_conn_path c_path[RDS_MPATH_WORKERS]; wait_queue_head_t c_hs_waitq; /* handshake waitq */ + + u32 c_my_gen_num; + u32 c_peer_gen_num; }; static inline @@ -243,7 +246,8 @@ struct rds_ext_header_rdma_dest { /* Extension header announcing number of paths. * Implicit length = 2 bytes. */ -#define RDS_EXTHDR_NPATHS 4 +#define RDS_EXTHDR_NPATHS 5 +#define RDS_EXTHDR_GEN_NUM 6 #define __RDS_EXTHDR_MAX 16 /* for now */ @@ -338,6 +342,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) #define RDS_MSG_RETRANSMITTED 5 #define RDS_MSG_MAPPED 6 #define RDS_MSG_PAGEVEC 7 +#define RDS_MSG_FLUSH 8 struct rds_message { atomic_t m_refcount; @@ -664,6 +669,7 @@ void rds_cong_exit(void); struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); /* conn.c */ +extern u32 rds_gen_num; int rds_conn_init(void); void rds_conn_exit(void); struct rds_connection *rds_conn_create(struct net *net, @@ -683,10 +689,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), size_t item_len); -__printf(2, 3) -void __rds_conn_error(struct rds_connection *conn, const char *, ...); -#define rds_conn_error(conn, fmt...) \ - __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) __printf(2, 3) void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...); diff --git a/net/rds/recv.c b/net/rds/recv.c index cbfabdf3ff48..9d0666e5fe35 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -120,6 +120,36 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk, /* do nothing if no change in cong state */ } +static void rds_conn_peer_gen_update(struct rds_connection *conn, + u32 peer_gen_num) +{ + int i; + struct rds_message *rm, *tmp; + unsigned long flags; + + WARN_ON(conn->c_trans->t_type != RDS_TRANS_TCP); + if (peer_gen_num != 0) { + if (conn->c_peer_gen_num != 0 && + peer_gen_num != conn->c_peer_gen_num) { + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + struct rds_conn_path *cp; + + cp = &conn->c_path[i]; + spin_lock_irqsave(&cp->cp_lock, flags); + cp->cp_next_tx_seq = 1; + cp->cp_next_rx_seq = 0; + list_for_each_entry_safe(rm, tmp, + &cp->cp_retrans, + m_conn_item) { + set_bit(RDS_MSG_FLUSH, &rm->m_flags); + } + spin_unlock_irqrestore(&cp->cp_lock, flags); + } + } + conn->c_peer_gen_num = peer_gen_num; + } +} + /* * Process all extension headers that come with this message. */ @@ -163,7 +193,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, union { struct rds_ext_header_version version; u16 rds_npaths; + u32 rds_gen_num; } buffer; + u32 new_peer_gen_num = 0; while (1) { len = sizeof(buffer); @@ -176,6 +208,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, conn->c_npaths = min_t(int, RDS_MPATH_WORKERS, buffer.rds_npaths); break; + case RDS_EXTHDR_GEN_NUM: + new_peer_gen_num = buffer.rds_gen_num; + break; default: pr_warn_ratelimited("ignoring unknown exthdr type " "0x%x\n", type); @@ -183,6 +218,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, } /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */ conn->c_npaths = max_t(int, conn->c_npaths, 1); + rds_conn_peer_gen_update(conn, new_peer_gen_num); } /* rds_start_mprds() will synchronously start multiple paths when appropriate. diff --git a/net/rds/send.c b/net/rds/send.c index 896626b9a0ef..77c8c6e613ad 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -259,8 +259,9 @@ restart: * connection. * Therefore, we never retransmit messages with RDMA ops. */ - if (rm->rdma.op_active && - test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { + if (test_bit(RDS_MSG_FLUSH, &rm->m_flags) || + (rm->rdma.op_active && + test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))) { spin_lock_irqsave(&cp->cp_lock, flags); if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) list_move(&rm->m_conn_item, &to_be_dropped); @@ -1209,6 +1210,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport, rds_message_add_extension(&rm->m_inc.i_hdr, RDS_EXTHDR_NPATHS, &npaths, sizeof(npaths)); + rds_message_add_extension(&rm->m_inc.i_hdr, + RDS_EXTHDR_GEN_NUM, + &cp->cp_conn->c_my_gen_num, + sizeof(u32)); } spin_unlock_irqrestore(&cp->cp_lock, flags); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index fcddacc92e01..57bb52361e0f 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -220,7 +220,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) write_unlock_bh(&sock->sk->sk_callback_lock); } -static void rds_tcp_tc_info(struct socket *sock, unsigned int len, +static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { @@ -229,6 +229,7 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len, unsigned long flags; struct sockaddr_in sin; int sinlen; + struct socket *sock; spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); @@ -237,12 +238,17 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len, list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { - sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0); - tsinfo.local_addr = sin.sin_addr.s_addr; - tsinfo.local_port = sin.sin_port; - sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1); - tsinfo.peer_addr = sin.sin_addr.s_addr; - tsinfo.peer_port = sin.sin_port; + sock = tc->t_sock; + if (sock) { + sock->ops->getname(sock, (struct sockaddr *)&sin, + &sinlen, 0); + tsinfo.local_addr = sin.sin_addr.s_addr; + tsinfo.local_port = sin.sin_port; + sock->ops->getname(sock, (struct sockaddr *)&sin, + &sinlen, 1); + tsinfo.peer_addr = sin.sin_addr.s_addr; + tsinfo.peer_port = sin.sin_port; + } tsinfo.hdr_rem = tc->t_tinc_hdr_rem; tsinfo.data_rem = tc->t_tinc_data_rem; @@ -360,7 +366,7 @@ struct rds_transport rds_tcp_transport = { .t_mp_capable = 1, }; -static int rds_tcp_netid; +static unsigned int rds_tcp_netid; /* per-network namespace private data for this module */ struct rds_tcp_net { @@ -659,6 +665,8 @@ out_recv: out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); out_slab: + if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) + pr_warn("could not unregister rds_tcp_dev_notifier\n"); kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 05f61c533ed3..d6839d96d539 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -60,7 +60,19 @@ void rds_tcp_state_change(struct sock *sk) case TCP_SYN_RECV: break; case TCP_ESTABLISHED: - rds_connect_path_complete(cp, RDS_CONN_CONNECTING); + /* Force the peer to reconnect so that we have the + * TCP ports going from <smaller-ip>.<transient> to + * <larger-ip>.<RDS_TCP_PORT>. We avoid marking the + * RDS connection as RDS_CONN_UP until the reconnect, + * to avoid RDS datagram loss. + */ + if (cp->cp_conn->c_laddr > cp->cp_conn->c_faddr && + rds_conn_path_transition(cp, RDS_CONN_CONNECTING, + RDS_CONN_ERROR)) { + rds_conn_path_drop(cp); + } else { + rds_connect_path_complete(cp, RDS_CONN_CONNECTING); + } break; case TCP_CLOSE_WAIT: case TCP_CLOSE: diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index e0b23fb5b8d5..f74bab3ecdca 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -83,27 +83,22 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) { int i; bool peer_is_smaller = (conn->c_faddr < conn->c_laddr); - int npaths = conn->c_npaths; - - if (npaths <= 1) { - struct rds_conn_path *cp = &conn->c_path[0]; - int ret; - - ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, - RDS_CONN_CONNECTING); - if (!ret) - rds_conn_path_transition(cp, RDS_CONN_ERROR, - RDS_CONN_CONNECTING); - return cp->cp_transport_data; - } + int npaths = max_t(int, 1, conn->c_npaths); - /* for mprds, paths with cp_index > 0 MUST be initiated by the peer + /* for mprds, all paths MUST be initiated by the peer * with the smaller address. */ - if (!peer_is_smaller) + if (!peer_is_smaller) { + /* Make sure we initiate at least one path if this + * has not already been done; rds_start_mprds() will + * take care of additional paths, if necessary. + */ + if (npaths == 1) + rds_conn_path_connect_if_down(&conn->c_path[0]); return NULL; + } - for (i = 1; i < npaths; i++) { + for (i = 0; i < npaths; i++) { struct rds_conn_path *cp = &conn->c_path[i]; if (rds_conn_path_transition(cp, RDS_CONN_DOWN, @@ -171,8 +166,8 @@ int rds_tcp_accept_one(struct socket *sock) mutex_lock(&rs_tcp->t_conn_path_lock); cp = rs_tcp->t_cpath; conn_state = rds_conn_path_state(cp); - if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP && - conn_state != RDS_CONN_ERROR) + WARN_ON(conn_state == RDS_CONN_UP); + if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR) goto rst_nsk; if (rs_tcp->t_sock) { /* Need to resolve a duelling SYN between peers. diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 89d09b481f47..dcf4742083ea 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -100,6 +100,9 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags); tc->t_last_expected_una = rm->m_ack_seq + 1; + if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) + rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; + rdsdebug("rm %p tcp nxt %u ack_seq %llu\n", rm, rds_tcp_snd_nxt(tc), (unsigned long long)rm->m_ack_seq); diff --git a/net/rds/threads.c b/net/rds/threads.c index e42df11bf30a..e36e333a0aa0 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -171,8 +171,7 @@ void rds_connect_worker(struct work_struct *work) RDS_CONN_DOWN)) rds_queue_reconnect(cp); else - rds_conn_path_error(cp, - "RDS: connect failed\n"); + rds_conn_path_error(cp, "connect failed\n"); } } } |