summaryrefslogtreecommitdiffstats
path: root/net/smc
diff options
context:
space:
mode:
Diffstat (limited to 'net/smc')
-rw-r--r--net/smc/af_smc.c192
-rw-r--r--net/smc/smc.h7
-rw-r--r--net/smc/smc_cdc.c88
-rw-r--r--net/smc/smc_cdc.h21
-rw-r--r--net/smc/smc_clc.c12
-rw-r--r--net/smc/smc_clc.h2
-rw-r--r--net/smc/smc_core.c1175
-rw-r--r--net/smc/smc_core.h156
-rw-r--r--net/smc/smc_ib.c77
-rw-r--r--net/smc/smc_ib.h10
-rw-r--r--net/smc/smc_ism.c9
-rw-r--r--net/smc/smc_llc.c1645
-rw-r--r--net/smc/smc_llc.h69
-rw-r--r--net/smc/smc_pnet.c593
-rw-r--r--net/smc/smc_pnet.h7
-rw-r--r--net/smc/smc_tx.c25
-rw-r--r--net/smc/smc_wr.c62
-rw-r--r--net/smc/smc_wr.h3
18 files changed, 3183 insertions, 970 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6fd44bdb0fc3..903321543838 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -337,50 +337,61 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
-/* register a new rmb, send confirm_rkey msg to register with peer */
-static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
- bool conf_rkey)
-{
- if (!rmb_desc->wr_reg) {
- /* register memory region for new rmb */
- if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
- rmb_desc->regerr = 1;
- return -EFAULT;
- }
- rmb_desc->wr_reg = 1;
+/* register the new rmb on all links */
+static int smcr_lgr_reg_rmbs(struct smc_link *link,
+ struct smc_buf_desc *rmb_desc)
+{
+ struct smc_link_group *lgr = link->lgr;
+ int i, rc = 0;
+
+ rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
+ if (rc)
+ return rc;
+ /* protect against parallel smc_llc_cli_rkey_exchange() and
+ * parallel smcr_link_reg_rmb()
+ */
+ mutex_lock(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
+ continue;
+ rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
+ if (rc)
+ goto out;
}
- if (!conf_rkey)
- return 0;
+
/* exchange confirm_rkey msg with peer */
- if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
- rmb_desc->regerr = 1;
- return -EFAULT;
+ rc = smc_llc_do_confirm_rkey(link, rmb_desc);
+ if (rc) {
+ rc = -EFAULT;
+ goto out;
}
- return 0;
+ rmb_desc->is_conf_rkey = true;
+out:
+ mutex_unlock(&lgr->llc_conf_mutex);
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+ return rc;
}
-static int smc_clnt_conf_first_link(struct smc_sock *smc)
+static int smcr_clnt_conf_first_link(struct smc_sock *smc)
{
- struct net *net = sock_net(smc->clcsock->sk);
- struct smc_link_group *lgr = smc->conn.lgr;
- struct smc_link *link;
- int rest;
+ struct smc_link *link = smc->conn.lnk;
+ struct smc_llc_qentry *qentry;
int rc;
- link = &lgr->lnk[SMC_SINGLE_LINK];
/* receive CONFIRM LINK request from server over RoCE fabric */
- rest = wait_for_completion_interruptible_timeout(
- &link->llc_confirm,
- SMC_LLC_WAIT_FIRST_TIME);
- if (rest <= 0) {
+ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+ SMC_LLC_CONFIRM_LINK);
+ if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
}
-
- if (link->llc_confirm_rc)
+ smc_llc_save_peer_uid(qentry);
+ rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
+ smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
+ if (rc)
return SMC_CLC_DECL_RMBE_EC;
rc = smc_ib_modify_qp_rts(link);
@@ -389,34 +400,34 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
smc_wr_remember_qp_attr(link);
- if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
+ if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
+ /* confirm_rkey is implicit on 1st contact */
+ smc->conn.rmb_desc->is_conf_rkey = true;
+
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TIMEOUT_CL;
- /* receive ADD LINK request from server over RoCE fabric */
- rest = wait_for_completion_interruptible_timeout(&link->llc_add,
- SMC_LLC_WAIT_TIME);
- if (rest <= 0) {
+ smc_llc_link_active(link);
+ smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
+
+ /* optional 2nd link, receive ADD LINK request from server */
+ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+ SMC_LLC_ADD_LINK);
+ if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
- return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
+ if (rc == -EAGAIN)
+ rc = 0; /* no DECLINE received, go with one link */
+ return rc;
}
-
- /* send add link reject message, only one link supported for now */
- rc = smc_llc_send_add_link(link,
- link->smcibdev->mac[link->ibport - 1],
- link->gid, SMC_LLC_RESP);
- if (rc < 0)
- return SMC_CLC_DECL_TIMEOUT_AL;
-
- smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
-
+ smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
+ smc_llc_cli_add_link(link, qentry);
return 0;
}
@@ -596,8 +607,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
+ int i, reason_code = 0;
struct smc_link *link;
- int reason_code = 0;
ini->is_smcd = false;
ini->ib_lcl = &aclc->lcl;
@@ -610,10 +621,28 @@ static int smc_connect_rdma(struct smc_sock *smc,
mutex_unlock(&smc_client_lgr_pending);
return reason_code;
}
- link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
smc_conn_save_peer_info(smc, aclc);
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
+ link = smc->conn.lnk;
+ } else {
+ /* set link that was assigned by server */
+ link = NULL;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *l = &smc->conn.lgr->lnk[i];
+
+ if (l->peer_qpn == ntoh24(aclc->qpn)) {
+ link = l;
+ break;
+ }
+ }
+ if (!link)
+ return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK,
+ ini->cln_first_contact);
+ smc->conn.lnk = link;
+ }
+
/* create send buffer and rmb */
if (smc_buf_create(smc, false))
return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
@@ -622,7 +651,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, aclc);
- if (smc_rmb_rtoken_handling(&smc->conn, aclc))
+ if (smc_rmb_rtoken_handling(&smc->conn, link, aclc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
ini->cln_first_contact);
@@ -634,7 +663,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
ini->cln_first_contact);
} else {
- if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
+ if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
ini->cln_first_contact);
}
@@ -649,7 +678,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
- reason_code = smc_clnt_conf_first_link(smc);
+ smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
+ reason_code = smcr_clnt_conf_first_link(smc);
+ smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
if (reason_code)
return smc_connect_abort(smc, reason_code,
ini->cln_first_contact);
@@ -999,17 +1030,13 @@ void smc_close_non_accepted(struct sock *sk)
sock_put(sk); /* final sock_put */
}
-static int smc_serv_conf_first_link(struct smc_sock *smc)
+static int smcr_serv_conf_first_link(struct smc_sock *smc)
{
- struct net *net = sock_net(smc->clcsock->sk);
- struct smc_link_group *lgr = smc->conn.lgr;
- struct smc_link *link;
- int rest;
+ struct smc_link *link = smc->conn.lnk;
+ struct smc_llc_qentry *qentry;
int rc;
- link = &lgr->lnk[SMC_SINGLE_LINK];
-
- if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
+ if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
/* send CONFIRM LINK request to client over the RoCE fabric */
@@ -1018,40 +1045,29 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
return SMC_CLC_DECL_TIMEOUT_CL;
/* receive CONFIRM LINK response from client over the RoCE fabric */
- rest = wait_for_completion_interruptible_timeout(
- &link->llc_confirm_resp,
- SMC_LLC_WAIT_FIRST_TIME);
- if (rest <= 0) {
+ qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
+ SMC_LLC_CONFIRM_LINK);
+ if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
}
-
- if (link->llc_confirm_resp_rc)
+ smc_llc_save_peer_uid(qentry);
+ rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
+ smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
+ if (rc)
return SMC_CLC_DECL_RMBE_EC;
- /* send ADD LINK request to client over the RoCE fabric */
- rc = smc_llc_send_add_link(link,
- link->smcibdev->mac[link->ibport - 1],
- link->gid, SMC_LLC_REQ);
- if (rc < 0)
- return SMC_CLC_DECL_TIMEOUT_AL;
-
- /* receive ADD LINK response from client over the RoCE fabric */
- rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
- SMC_LLC_WAIT_TIME);
- if (rest <= 0) {
- struct smc_clc_msg_decline dclc;
-
- rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
- SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
- return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
- }
+ /* confirm_rkey is implicit on 1st contact */
+ smc->conn.rmb_desc->is_conf_rkey = true;
- smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
+ smc_llc_link_active(link);
+ smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
+ /* initial contact - try to establish second link */
+ smc_llc_srv_add_link(link);
return 0;
}
@@ -1194,10 +1210,10 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
{
- struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
+ struct smc_connection *conn = &new_smc->conn;
if (local_contact != SMC_FIRST_CONTACT) {
- if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
+ if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
}
smc_rmb_sync_sg_for_device(&new_smc->conn);
@@ -1210,13 +1226,13 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc,
int local_contact)
{
- struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
+ struct smc_link *link = new_smc->conn.lnk;
int reason_code = 0;
if (local_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, cclc);
- if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
+ if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) {
reason_code = SMC_CLC_DECL_ERR_RTOK;
goto decline;
}
@@ -1227,7 +1243,9 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
goto decline;
}
/* QP confirmation over RoCE fabric */
- reason_code = smc_serv_conf_first_link(new_smc);
+ smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
+ reason_code = smcr_serv_conf_first_link(new_smc);
+ smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
if (reason_code)
goto decline;
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index be11ba41190f..6f1c42da7a4c 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -121,6 +121,7 @@ enum smc_urg_state {
struct smc_connection {
struct rb_node alert_node;
struct smc_link_group *lgr; /* link group of connection */
+ struct smc_link *lnk; /* assigned SMC-R link */
u32 alert_token_local; /* unique conn. id */
u8 peer_rmbe_idx; /* from tcp handshake */
int peer_rmbe_size; /* size of peer rx buffer */
@@ -142,6 +143,9 @@ struct smc_connection {
* .prod cf. TCP snd_nxt
* .cons cf. TCP sends ack
*/
+ union smc_host_cursor local_tx_ctrl_fin;
+ /* prod crsr - confirmed by peer
+ */
union smc_host_cursor tx_curs_prep; /* tx - prepared data
* snd_max..wmem_alloc
*/
@@ -153,6 +157,7 @@ struct smc_connection {
*/
atomic_t sndbuf_space; /* remaining space in sndbuf */
u16 tx_cdc_seq; /* sequence # for CDC send */
+ u16 tx_cdc_seq_fin; /* sequence # - tx completed */
spinlock_t send_lock; /* protect wr_sends */
struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
u32 tx_off; /* base offset in peer rmb */
@@ -183,12 +188,14 @@ struct smc_connection {
spinlock_t acurs_lock; /* protect cursors */
#endif
struct work_struct close_work; /* peer sent some closing */
+ struct work_struct abort_work; /* abort the connection */
struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */
u8 rx_off; /* receive offset:
* 0 for SMC-R, 32 for SMC-D
*/
u64 peer_token; /* SMC-D token of peer */
u8 killed : 1; /* abnormal termination */
+ u8 out_of_sync : 1; /* out of sync with peer */
};
struct smc_sock { /* smc sock container */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 164f1584861b..a47e8855e045 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -47,17 +47,20 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
smp_mb__after_atomic();
smc_curs_copy(&conn->tx_curs_fin, &cdcpend->cursor, conn);
+ smc_curs_copy(&conn->local_tx_ctrl_fin, &cdcpend->p_cursor,
+ conn);
+ conn->tx_cdc_seq_fin = cdcpend->ctrl_seq;
}
smc_tx_sndbuf_nonfull(smc);
bh_unlock_sock(&smc->sk);
}
int smc_cdc_get_free_slot(struct smc_connection *conn,
+ struct smc_link *link,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend)
{
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
int rc;
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
@@ -91,12 +94,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,
struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend)
{
+ struct smc_link *link = conn->lnk;
union smc_host_cursor cfed;
- struct smc_link *link;
int rc;
- link = &conn->lgr->lnk[SMC_SINGLE_LINK];
-
smc_cdc_add_pending_send(conn, pend);
conn->tx_cdc_seq++;
@@ -106,22 +107,60 @@ int smc_cdc_msg_send(struct smc_connection *conn,
if (!rc) {
smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
+ } else {
+ conn->tx_cdc_seq--;
+ conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
}
return rc;
}
+/* send a validation msg indicating the move of a conn to an other QP link */
+int smcr_cdc_msg_send_validation(struct smc_connection *conn,
+ struct smc_cdc_tx_pend *pend,
+ struct smc_wr_buf *wr_buf)
+{
+ struct smc_host_cdc_msg *local = &conn->local_tx_ctrl;
+ struct smc_link *link = conn->lnk;
+ struct smc_cdc_msg *peer;
+ int rc;
+
+ peer = (struct smc_cdc_msg *)wr_buf;
+ peer->common.type = local->common.type;
+ peer->len = local->len;
+ peer->seqno = htons(conn->tx_cdc_seq_fin); /* seqno last compl. tx */
+ peer->token = htonl(local->token);
+ peer->prod_flags.failover_validation = 1;
+
+ rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
+ return rc;
+}
+
static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
struct smc_cdc_tx_pend *pend;
struct smc_wr_buf *wr_buf;
+ struct smc_link *link;
+ bool again = false;
int rc;
- rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend);
+again:
+ link = conn->lnk;
+ rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
if (rc)
return rc;
spin_lock_bh(&conn->send_lock);
+ if (link != conn->lnk) {
+ /* link of connection changed, try again one time*/
+ spin_unlock_bh(&conn->send_lock);
+ smc_wr_tx_put_slot(link,
+ (struct smc_wr_tx_pend_priv *)pend);
+ if (again)
+ return -ENOLINK;
+ again = true;
+ goto again;
+ }
rc = smc_cdc_msg_send(conn, wr_buf, pend);
spin_unlock_bh(&conn->send_lock);
return rc;
@@ -165,7 +204,7 @@ static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
{
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ struct smc_link *link = conn->lnk;
smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
smc_cdc_tx_filter, smc_cdc_tx_dismisser,
@@ -239,6 +278,28 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc,
sk_send_sigurg(&smc->sk);
}
+static void smc_cdc_msg_validate(struct smc_sock *smc, struct smc_cdc_msg *cdc,
+ struct smc_link *link)
+{
+ struct smc_connection *conn = &smc->conn;
+ u16 recv_seq = ntohs(cdc->seqno);
+ s16 diff;
+
+ /* check that seqnum was seen before */
+ diff = conn->local_rx_ctrl.seqno - recv_seq;
+ if (diff < 0) { /* diff larger than 0x7fff */
+ /* drop connection */
+ conn->out_of_sync = 1; /* prevent any further receives */
+ spin_lock_bh(&conn->send_lock);
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ conn->lnk = link;
+ spin_unlock_bh(&conn->send_lock);
+ sock_hold(&smc->sk); /* sock_put in abort_work */
+ if (!schedule_work(&conn->abort_work))
+ sock_put(&smc->sk);
+ }
+}
+
static void smc_cdc_msg_recv_action(struct smc_sock *smc,
struct smc_cdc_msg *cdc)
{
@@ -369,16 +430,19 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
read_lock_bh(&lgr->conns_lock);
conn = smc_lgr_find_conn(ntohl(cdc->token), lgr);
read_unlock_bh(&lgr->conns_lock);
- if (!conn)
+ if (!conn || conn->out_of_sync)
return;
smc = container_of(conn, struct smc_sock, conn);
- if (!cdc->prod_flags.failover_validation) {
- if (smc_cdc_before(ntohs(cdc->seqno),
- conn->local_rx_ctrl.seqno))
- /* received seqno is old */
- return;
+ if (cdc->prod_flags.failover_validation) {
+ smc_cdc_msg_validate(smc, cdc, link);
+ return;
}
+ if (smc_cdc_before(ntohs(cdc->seqno),
+ conn->local_rx_ctrl.seqno))
+ /* received seqno is old */
+ return;
+
smc_cdc_msg_recv(smc, cdc);
}
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 861dc24c588c..0a0a89abd38b 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -97,23 +97,6 @@ static inline void smc_curs_add(int size, union smc_host_cursor *curs,
}
}
-/* SMC cursors are 8 bytes long and require atomic reading and writing */
-static inline u64 smc_curs_read(union smc_host_cursor *curs,
- struct smc_connection *conn)
-{
-#ifndef KERNEL_HAS_ATOMIC64
- unsigned long flags;
- u64 ret;
-
- spin_lock_irqsave(&conn->acurs_lock, flags);
- ret = curs->acurs;
- spin_unlock_irqrestore(&conn->acurs_lock, flags);
- return ret;
-#else
- return atomic64_read(&curs->acurs);
-#endif
-}
-
/* Copy cursor src into tgt */
static inline void smc_curs_copy(union smc_host_cursor *tgt,
union smc_host_cursor *src,
@@ -304,6 +287,7 @@ struct smc_cdc_tx_pend {
};
int smc_cdc_get_free_slot(struct smc_connection *conn,
+ struct smc_link *link,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend);
@@ -312,6 +296,9 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
int smcd_cdc_msg_send(struct smc_connection *conn);
+int smcr_cdc_msg_send_validation(struct smc_connection *conn,
+ struct smc_cdc_tx_pend *pend,
+ struct smc_wr_buf *wr_buf);
int smc_cdc_init(void) __init;
void smcd_cdc_rx_init(struct smc_connection *conn);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index ea0068f0173c..d5627df24215 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -496,7 +496,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
sizeof(SMCD_EYECATCHER));
} else {
/* SMC-R specific settings */
- link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ link = conn->lnk;
memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
cclc.hdr.path = SMC_TYPE_R;
@@ -508,13 +508,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
ETH_ALEN);
hton24(cclc.qpn, link->roce_qp->qp_num);
cclc.rmb_rkey =
- htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
cclc.rmbe_alert_token = htonl(conn->alert_token_local);
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
cclc.rmbe_size = conn->rmbe_size_short;
cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
- (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+ (conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(cclc.psn, link->psn_initial);
memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
@@ -572,7 +572,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
aclc.hdr.path = SMC_TYPE_R;
- link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ link = conn->lnk;
memcpy(aclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
@@ -580,13 +580,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
ETH_ALEN);
hton24(aclc.qpn, link->roce_qp->qp_num);
aclc.rmb_rkey =
- htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */
aclc.rmbe_alert_token = htonl(conn->alert_token_local);
aclc.qp_mtu = link->path_mtu;
aclc.rmbe_size = conn->rmbe_size_short,
aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
- (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+ (conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(aclc.psn, link->psn_initial);
memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index ca209272e5fa..465876701b75 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -44,6 +44,8 @@
#define SMC_CLC_DECL_DIFFPREFIX 0x03070000 /* IP prefix / subnet mismatch */
#define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/
#define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */
+#define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */
+#define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 824c5211b027..7964a21e5e6f 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -44,10 +44,20 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
+struct smc_ib_up_work {
+ struct work_struct work;
+ struct smc_link_group *lgr;
+ struct smc_ib_device *smcibdev;
+ u8 ibport;
+};
+
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc);
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
+static void smc_link_up_work(struct work_struct *work);
+static void smc_link_down_work(struct work_struct *work);
+
/* return head of link group list and its lock for a given link group */
static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
spinlock_t **lgr_lock)
@@ -111,16 +121,60 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn)
rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
}
+/* assign an SMC-R link to the connection */
+static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
+{
+ enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
+ SMC_LNK_ACTIVE;
+ int i, j;
+
+ /* do link balancing */
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &conn->lgr->lnk[i];
+
+ if (lnk->state != expected || lnk->link_is_asym)
+ continue;
+ if (conn->lgr->role == SMC_CLNT) {
+ conn->lnk = lnk; /* temporary, SMC server assigns link*/
+ break;
+ }
+ if (conn->lgr->conns_num % 2) {
+ for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
+ struct smc_link *lnk2;
+
+ lnk2 = &conn->lgr->lnk[j];
+ if (lnk2->state == expected &&
+ !lnk2->link_is_asym) {
+ conn->lnk = lnk2;
+ break;
+ }
+ }
+ }
+ if (!conn->lnk)
+ conn->lnk = lnk;
+ break;
+ }
+ if (!conn->lnk)
+ return SMC_CLC_DECL_NOACTLINK;
+ return 0;
+}
+
/* Register connection in link group by assigning an alert token
* registered in a search tree.
* Requires @conns_lock
* Note that '0' is a reserved value and not assigned.
*/
-static void smc_lgr_register_conn(struct smc_connection *conn)
+static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
static atomic_t nexttoken = ATOMIC_INIT(0);
+ int rc;
+ if (!conn->lgr->is_smcd) {
+ rc = smcr_lgr_conn_assign_link(conn, first);
+ if (rc)
+ return rc;
+ }
/* find a new alert_token_local value not yet used by some connection
* in this link group
*/
@@ -132,6 +186,7 @@ static void smc_lgr_register_conn(struct smc_connection *conn)
}
smc_lgr_add_alert_token(conn);
conn->lgr->conns_num++;
+ return 0;
}
/* Unregister connection and reset the alert token of the given connection<
@@ -166,27 +221,33 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
void smc_lgr_cleanup_early(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
+ struct list_head *lgr_list;
+ spinlock_t *lgr_lock;
if (!lgr)
return;
smc_conn_free(conn);
- smc_lgr_forget(lgr);
+ lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(lgr_list))
+ list_del_init(lgr_list);
+ spin_unlock_bh(lgr_lock);
smc_lgr_schedule_free_work_fast(lgr);
}
-/* Send delete link, either as client to request the initiation
- * of the DELETE LINK sequence from server; or as server to
- * initiate the delete processing. See smc_llc_rx_delete_link().
- */
-static int smc_link_send_delete(struct smc_link *lnk, bool orderly)
+static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
{
- if (lnk->state == SMC_LNK_ACTIVE &&
- !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
- smc_llc_link_deleting(lnk);
- return 0;
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
+
+ if (smc_link_usable(lnk))
+ lnk->state = SMC_LNK_INACTIVE;
}
- return -ENOTCONN;
+ wake_up_interruptible_all(&lgr->llc_waiter);
}
static void smc_lgr_free(struct smc_link_group *lgr);
@@ -197,7 +258,6 @@ static void smc_lgr_free_work(struct work_struct *work)
struct smc_link_group,
free_work);
spinlock_t *lgr_lock;
- struct smc_link *lnk;
bool conns;
smc_lgr_list_head(lgr, &lgr_lock);
@@ -214,26 +274,17 @@ static void smc_lgr_free_work(struct work_struct *work)
return;
}
list_del_init(&lgr->list); /* remove from smc_lgr_list */
-
- lnk = &lgr->lnk[SMC_SINGLE_LINK];
- if (!lgr->is_smcd && !lgr->terminating) {
- /* try to send del link msg, on error free lgr immediately */
- if (lnk->state == SMC_LNK_ACTIVE &&
- !smc_link_send_delete(lnk, true)) {
- /* reschedule in case we never receive a response */
- smc_lgr_schedule_free_work(lgr);
- spin_unlock_bh(lgr_lock);
- return;
- }
- }
lgr->freeing = 1; /* this instance does the freeing, no new schedule */
spin_unlock_bh(lgr_lock);
cancel_delayed_work(&lgr->free_work);
- if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
- smc_llc_link_inactive(lnk);
+ if (!lgr->is_smcd && !lgr->terminating)
+ smc_llc_send_link_delete_all(lgr, true,
+ SMC_LLC_DEL_PROG_INIT_TERM);
if (lgr->is_smcd && !lgr->terminating)
smc_ism_signal_shutdown(lgr);
+ if (!lgr->is_smcd)
+ smcr_lgr_link_deactivate_all(lgr);
smc_lgr_free(lgr);
}
@@ -245,6 +296,89 @@ static void smc_lgr_terminate_work(struct work_struct *work)
__smc_lgr_terminate(lgr, true);
}
+/* return next unique link id for the lgr */
+static u8 smcr_next_link_id(struct smc_link_group *lgr)
+{
+ u8 link_id;
+ int i;
+
+ while (1) {
+ link_id = ++lgr->next_link_id;
+ if (!link_id) /* skip zero as link_id */
+ link_id = ++lgr->next_link_id;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (smc_link_usable(&lgr->lnk[i]) &&
+ lgr->lnk[i].link_id == link_id)
+ continue;
+ }
+ break;
+ }
+ return link_id;
+}
+
+int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
+ u8 link_idx, struct smc_init_info *ini)
+{
+ u8 rndvec[3];
+ int rc;
+
+ get_device(&ini->ib_dev->ibdev->dev);
+ atomic_inc(&ini->ib_dev->lnk_cnt);
+ lnk->state = SMC_LNK_ACTIVATING;
+ lnk->link_id = smcr_next_link_id(lgr);
+ lnk->lgr = lgr;
+ lnk->link_idx = link_idx;
+ lnk->smcibdev = ini->ib_dev;
+ lnk->ibport = ini->ib_port;
+ lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+ smc_llc_link_set_uid(lnk);
+ INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
+ if (!ini->ib_dev->initialized) {
+ rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
+ if (rc)
+ goto out;
+ }
+ get_random_bytes(rndvec, sizeof(rndvec));
+ lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
+ (rndvec[2] << 16);
+ rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
+ ini->vlan_id, lnk->gid, &lnk->sgid_index);
+ if (rc)
+ goto out;
+ rc = smc_llc_link_init(lnk);
+ if (rc)
+ goto out;
+ rc = smc_wr_alloc_link_mem(lnk);
+ if (rc)
+ goto clear_llc_lnk;
+ rc = smc_ib_create_protection_domain(lnk);
+ if (rc)
+ goto free_link_mem;
+ rc = smc_ib_create_queue_pair(lnk);
+ if (rc)
+ goto dealloc_pd;
+ rc = smc_wr_create_link(lnk);
+ if (rc)
+ goto destroy_qp;
+ return 0;
+
+destroy_qp:
+ smc_ib_destroy_queue_pair(lnk);
+dealloc_pd:
+ smc_ib_dealloc_protection_domain(lnk);
+free_link_mem:
+ smc_wr_free_link_mem(lnk);
+clear_llc_lnk:
+ smc_llc_link_clear(lnk, false);
+out:
+ put_device(&ini->ib_dev->ibdev->dev);
+ memset(lnk, 0, sizeof(struct smc_link));
+ lnk->state = SMC_LNK_UNUSED;
+ if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
+ wake_up(&ini->ib_dev->lnks_deleted);
+ return rc;
+}
+
/* create a new SMC link group */
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
@@ -252,7 +386,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
struct list_head *lgr_list;
struct smc_link *lnk;
spinlock_t *lgr_lock;
- u8 rndvec[3];
+ u8 link_idx;
int rc = 0;
int i;
@@ -274,13 +408,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->freefast = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
- rwlock_init(&lgr->sndbufs_lock);
- rwlock_init(&lgr->rmbs_lock);
+ mutex_init(&lgr->sndbufs_lock);
+ mutex_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
INIT_LIST_HEAD(&lgr->sndbufs[i]);
INIT_LIST_HEAD(&lgr->rmbs[i]);
}
+ lgr->next_link_id = 0;
smc_lgr_list.num += SMC_LGR_NUM_INCR;
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
@@ -297,48 +432,21 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
atomic_inc(&ini->ism_dev->lgr_cnt);
} else {
/* SMC-R specific settings */
- get_device(&ini->ib_dev->ibdev->dev);
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
SMC_SYSTEMID_LEN);
+ memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
+ SMC_MAX_PNETID_LEN);
+ smc_llc_lgr_init(lgr, smc);
- lnk = &lgr->lnk[SMC_SINGLE_LINK];
- /* initialize link */
- lnk->state = SMC_LNK_ACTIVATING;
- lnk->link_id = SMC_SINGLE_LINK;
- lnk->smcibdev = ini->ib_dev;
- lnk->ibport = ini->ib_port;
- lgr_list = &smc_lgr_list.list;
- lgr_lock = &smc_lgr_list.lock;
- lnk->path_mtu =
- ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
- if (!ini->ib_dev->initialized)
- smc_ib_setup_per_ibdev(ini->ib_dev);
- get_random_bytes(rndvec, sizeof(rndvec));
- lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
- (rndvec[2] << 16);
- rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
- ini->vlan_id, lnk->gid,
- &lnk->sgid_index);
+ link_idx = SMC_SINGLE_LINK;
+ lnk = &lgr->lnk[link_idx];
+ rc = smcr_link_init(lgr, lnk, link_idx, ini);
if (rc)
goto free_lgr;
- rc = smc_llc_link_init(lnk);
- if (rc)
- goto free_lgr;
- rc = smc_wr_alloc_link_mem(lnk);
- if (rc)
- goto clear_llc_lnk;
- rc = smc_ib_create_protection_domain(lnk);
- if (rc)
- goto free_link_mem;
- rc = smc_ib_create_queue_pair(lnk);
- if (rc)
- goto dealloc_pd;
- rc = smc_wr_create_link(lnk);
- if (rc)
- goto destroy_qp;
+ lgr_list = &smc_lgr_list.list;
+ lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
- atomic_inc(&ini->ib_dev->lnk_cnt);
}
smc->conn.lgr = lgr;
spin_lock_bh(lgr_lock);
@@ -346,14 +454,6 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
spin_unlock_bh(lgr_lock);
return 0;
-destroy_qp:
- smc_ib_destroy_queue_pair(lnk);
-dealloc_pd:
- smc_ib_dealloc_protection_domain(lnk);
-free_link_mem:
- smc_wr_free_link_mem(lnk);
-clear_llc_lnk:
- smc_llc_link_clear(lnk);
free_lgr:
kfree(lgr);
ism_put_vlan:
@@ -369,29 +469,186 @@ out:
return rc;
}
+static int smc_write_space(struct smc_connection *conn)
+{
+ int buffer_len = conn->peer_rmbe_size;
+ union smc_host_cursor prod;
+ union smc_host_cursor cons;
+ int space;
+
+ smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
+ smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
+ /* determine rx_buf space */
+ space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
+ return space;
+}
+
+static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
+ struct smc_wr_buf *wr_buf)
+{
+ struct smc_connection *conn = &smc->conn;
+ union smc_host_cursor cons, fin;
+ int rc = 0;
+ int diff;
+
+ smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
+ smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
+ /* set prod cursor to old state, enforce tx_rdma_writes() */
+ smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
+ smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
+
+ if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
+ /* cons cursor advanced more than fin, and prod was set
+ * fin above, so now prod is smaller than cons. Fix that.
+ */
+ diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
+ smc_curs_add(conn->sndbuf_desc->len,
+ &conn->tx_curs_sent, diff);
+ smc_curs_add(conn->sndbuf_desc->len,
+ &conn->tx_curs_fin, diff);
+
+ smp_mb__before_atomic();
+ atomic_add(diff, &conn->sndbuf_space);
+ smp_mb__after_atomic();
+
+ smc_curs_add(conn->peer_rmbe_size,
+ &conn->local_tx_ctrl.prod, diff);
+ smc_curs_add(conn->peer_rmbe_size,
+ &conn->local_tx_ctrl_fin, diff);
+ }
+ /* recalculate, value is used by tx_rdma_writes() */
+ atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
+
+ if (smc->sk.sk_state != SMC_INIT &&
+ smc->sk.sk_state != SMC_CLOSED) {
+ rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
+ if (!rc) {
+ schedule_delayed_work(&conn->tx_work, 0);
+ smc->sk.sk_data_ready(&smc->sk);
+ }
+ } else {
+ smc_wr_tx_put_slot(conn->lnk,
+ (struct smc_wr_tx_pend_priv *)pend);
+ }
+ return rc;
+}
+
+struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
+ struct smc_link *from_lnk, bool is_dev_err)
+{
+ struct smc_link *to_lnk = NULL;
+ struct smc_cdc_tx_pend *pend;
+ struct smc_connection *conn;
+ struct smc_wr_buf *wr_buf;
+ struct smc_sock *smc;
+ struct rb_node *node;
+ int i, rc = 0;
+
+ /* link is inactive, wake up tx waiters */
+ smc_wr_wakeup_tx_wait(from_lnk);
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
+ i == from_lnk->link_idx)
+ continue;
+ if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
+ from_lnk->ibport == lgr->lnk[i].ibport) {
+ continue;
+ }
+ to_lnk = &lgr->lnk[i];
+ break;
+ }
+ if (!to_lnk) {
+ smc_lgr_terminate_sched(lgr);
+ return NULL;
+ }
+again:
+ read_lock_bh(&lgr->conns_lock);
+ for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
+ conn = rb_entry(node, struct smc_connection, alert_node);
+ if (conn->lnk != from_lnk)
+ continue;
+ smc = container_of(conn, struct smc_sock, conn);
+ /* conn->lnk not yet set in SMC_INIT state */
+ if (smc->sk.sk_state == SMC_INIT)
+ continue;
+ if (smc->sk.sk_state == SMC_CLOSED ||
+ smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
+ smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
+ smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
+ smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
+ smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
+ smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
+ smc->sk.sk_state == SMC_PEERABORTWAIT ||
+ smc->sk.sk_state == SMC_PROCESSABORT) {
+ spin_lock_bh(&conn->send_lock);
+ conn->lnk = to_lnk;
+ spin_unlock_bh(&conn->send_lock);
+ continue;
+ }
+ sock_hold(&smc->sk);
+ read_unlock_bh(&lgr->conns_lock);
+ /* pre-fetch buffer outside of send_lock, might sleep */
+ rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
+ if (rc) {
+ smcr_link_down_cond_sched(to_lnk);
+ return NULL;
+ }
+ /* avoid race with smcr_tx_sndbuf_nonempty() */
+ spin_lock_bh(&conn->send_lock);
+ conn->lnk = to_lnk;
+ rc = smc_switch_cursor(smc, pend, wr_buf);
+ spin_unlock_bh(&conn->send_lock);
+ sock_put(&smc->sk);
+ if (rc) {
+ smcr_link_down_cond_sched(to_lnk);
+ return NULL;
+ }
+ goto again;
+ }
+ read_unlock_bh(&lgr->conns_lock);
+ return to_lnk;
+}
+
+static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
+ struct smc_link_group *lgr)
+{
+ int rc;
+
+ if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
+ /* unregister rmb with peer */
+ rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
+ if (!rc) {
+ /* protect against smc_llc_cli_rkey_exchange() */
+ mutex_lock(&lgr->llc_conf_mutex);
+ smc_llc_do_delete_rkey(lgr, rmb_desc);
+ rmb_desc->is_conf_rkey = false;
+ mutex_unlock(&lgr->llc_conf_mutex);
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+ }
+ }
+
+ if (rmb_desc->is_reg_err) {
+ /* buf registration failed, reuse not possible */
+ mutex_lock(&lgr->rmbs_lock);
+ list_del(&rmb_desc->list);
+ mutex_unlock(&lgr->rmbs_lock);
+
+ smc_buf_free(lgr, true, rmb_desc);
+ } else {
+ rmb_desc->used = 0;
+ }
+}
+
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
if (conn->sndbuf_desc)
conn->sndbuf_desc->used = 0;
- if (conn->rmb_desc) {
- if (!conn->rmb_desc->regerr) {
- if (!lgr->is_smcd && !list_empty(&lgr->list)) {
- /* unregister rmb with peer */
- smc_llc_do_delete_rkey(
- &lgr->lnk[SMC_SINGLE_LINK],
- conn->rmb_desc);
- }
- conn->rmb_desc->used = 0;
- } else {
- /* buf registration failed, reuse not possible */
- write_lock_bh(&lgr->rmbs_lock);
- list_del(&conn->rmb_desc->list);
- write_unlock_bh(&lgr->rmbs_lock);
-
- smc_buf_free(lgr, true, conn->rmb_desc);
- }
- }
+ if (conn->rmb_desc && lgr->is_smcd)
+ conn->rmb_desc->used = 0;
+ else if (conn->rmb_desc)
+ smcr_buf_unuse(conn->rmb_desc, lgr);
}
/* remove a finished connection from its link group */
@@ -407,6 +664,8 @@ void smc_conn_free(struct smc_connection *conn)
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_tx_dismiss_slots(conn);
+ if (current_work() != &conn->abort_work)
+ cancel_work_sync(&conn->abort_work);
}
if (!list_empty(&lgr->list)) {
smc_lgr_unregister_conn(conn);
@@ -417,35 +676,91 @@ void smc_conn_free(struct smc_connection *conn)
smc_lgr_schedule_free_work(lgr);
}
-static void smc_link_clear(struct smc_link *lnk)
+/* unregister a link from a buf_desc */
+static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
+ struct smc_link *lnk)
+{
+ if (is_rmb)
+ buf_desc->is_reg_mr[lnk->link_idx] = false;
+ if (!buf_desc->is_map_ib[lnk->link_idx])
+ return;
+ if (is_rmb) {
+ if (buf_desc->mr_rx[lnk->link_idx]) {
+ smc_ib_put_memory_region(
+ buf_desc->mr_rx[lnk->link_idx]);
+ buf_desc->mr_rx[lnk->link_idx] = NULL;
+ }
+ smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
+ } else {
+ smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
+ }
+ sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+ buf_desc->is_map_ib[lnk->link_idx] = false;
+}
+
+/* unmap all buffers of lgr for a deleted link */
+static void smcr_buf_unmap_lgr(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ struct smc_buf_desc *buf_desc, *bf;
+ int i;
+
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ mutex_lock(&lgr->rmbs_lock);
+ list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
+ smcr_buf_unmap_link(buf_desc, true, lnk);
+ mutex_unlock(&lgr->rmbs_lock);
+ mutex_lock(&lgr->sndbufs_lock);
+ list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
+ list)
+ smcr_buf_unmap_link(buf_desc, false, lnk);
+ mutex_unlock(&lgr->sndbufs_lock);
+ }
+}
+
+static void smcr_rtoken_clear_link(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ int i;
+
+ for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+ lgr->rtokens[i][lnk->link_idx].rkey = 0;
+ lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
+ }
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_clear(struct smc_link *lnk, bool log)
{
+ struct smc_ib_device *smcibdev;
+
+ if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
+ return;
lnk->peer_qpn = 0;
- smc_llc_link_clear(lnk);
+ smc_llc_link_clear(lnk, log);
+ smcr_buf_unmap_lgr(lnk);
+ smcr_rtoken_clear_link(lnk);
smc_ib_modify_qp_reset(lnk);
smc_wr_free_link(lnk);
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
smc_wr_free_link_mem(lnk);
- if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt))
- wake_up(&lnk->smcibdev->lnks_deleted);
+ put_device(&lnk->smcibdev->ibdev->dev);
+ smcibdev = lnk->smcibdev;
+ memset(lnk, 0, sizeof(struct smc_link));
+ lnk->state = SMC_LNK_UNUSED;
+ if (!atomic_dec_return(&smcibdev->lnk_cnt))
+ wake_up(&smcibdev->lnks_deleted);
}
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc)
{
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+ smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
- if (is_rmb) {
- if (buf_desc->mr_rx[SMC_SINGLE_LINK])
- smc_ib_put_memory_region(
- buf_desc->mr_rx[SMC_SINGLE_LINK]);
- smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
- DMA_FROM_DEVICE);
- } else {
- smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
- DMA_TO_DEVICE);
- }
- sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
if (buf_desc->pages)
__free_pages(buf_desc->pages, buf_desc->order);
kfree(buf_desc);
@@ -503,6 +818,18 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
/* remove a link group */
static void smc_lgr_free(struct smc_link_group *lgr)
{
+ int i;
+
+ if (!lgr->is_smcd) {
+ mutex_lock(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_UNUSED)
+ smcr_link_clear(&lgr->lnk[i], false);
+ }
+ mutex_unlock(&lgr->llc_conf_mutex);
+ smc_llc_lgr_clear(lgr);
+ }
+
smc_lgr_free_bufs(lgr);
if (lgr->is_smcd) {
if (!lgr->terminating) {
@@ -512,27 +839,12 @@ static void smc_lgr_free(struct smc_link_group *lgr)
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
- smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
- put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
kfree(lgr);
}
-void smc_lgr_forget(struct smc_link_group *lgr)
-{
- struct list_head *lgr_list;
- spinlock_t *lgr_lock;
-
- lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
- spin_lock_bh(lgr_lock);
- /* do not use this link group for new connections */
- if (!list_empty(lgr_list))
- list_del_init(lgr_list);
- spin_unlock_bh(lgr_lock);
-}
-
static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
{
int i;
@@ -587,10 +899,12 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
} else {
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ u32 rsn = lgr->llc_termination_rsn;
- if (lnk->state != SMC_LNK_INACTIVE)
- smc_llc_link_inactive(lnk);
+ if (!rsn)
+ rsn = SMC_LLC_DEL_PROG_INIT_TERM;
+ smc_llc_send_link_delete_all(lgr, false, rsn);
+ smcr_lgr_link_deactivate_all(lgr);
}
}
@@ -606,11 +920,9 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
if (lgr->terminating)
return; /* lgr already terminating */
- if (!soft)
- cancel_delayed_work_sync(&lgr->free_work);
+ /* cancel free_work sync, will terminate when lgr->freeing is set */
+ cancel_delayed_work_sync(&lgr->free_work);
lgr->terminating = 1;
- if (!lgr->is_smcd)
- smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
/* kill remaining link group connections */
read_lock_bh(&lgr->conns_lock);
@@ -629,10 +941,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
}
read_unlock_bh(&lgr->conns_lock);
smc_lgr_cleanup(lgr);
- if (soft)
- smc_lgr_schedule_free_work_fast(lgr);
- else
- smc_lgr_free(lgr);
+ smc_lgr_free(lgr);
}
/* unlink link group and schedule termination */
@@ -647,33 +956,11 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr)
return; /* lgr already terminating */
}
list_del_init(&lgr->list);
+ lgr->freeing = 1;
spin_unlock_bh(lgr_lock);
schedule_work(&lgr->terminate_work);
}
-/* Called when IB port is terminated */
-void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
-{
- struct smc_link_group *lgr, *l;
- LIST_HEAD(lgr_free_list);
-
- spin_lock_bh(&smc_lgr_list.lock);
- list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
- if (!lgr->is_smcd &&
- lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
- lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) {
- list_move(&lgr->list, &lgr_free_list);
- lgr->freeing = 1;
- }
- }
- spin_unlock_bh(&smc_lgr_list.lock);
-
- list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
- list_del_init(&lgr->list);
- __smc_lgr_terminate(lgr, false);
- }
-}
-
/* Called when peer lgr shutdown (regularly or abnormally) is received */
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
{
@@ -688,6 +975,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
if (peer_gid) /* peer triggered termination */
lgr->peer_shutdown = 1;
list_move(&lgr->list, &lgr_free_list);
+ lgr->freeing = 1;
}
}
spin_unlock_bh(&dev->lgr_lock);
@@ -728,6 +1016,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
+ int i;
spin_lock_bh(&smc_lgr_list.lock);
if (!smcibdev) {
@@ -736,9 +1025,9 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
lgr->freeing = 1;
} else {
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
- if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev) {
- list_move(&lgr->list, &lgr_free_list);
- lgr->freeing = 1;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].smcibdev == smcibdev)
+ smcr_link_down_cond_sched(&lgr->lnk[i]);
}
}
}
@@ -746,6 +1035,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
list_del_init(&lgr->list);
+ smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
__smc_lgr_terminate(lgr, false);
}
@@ -759,6 +1049,225 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
}
}
+/* set new lgr type and clear all asymmetric link tagging */
+void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
+{
+ char *lgr_type = "";
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+ if (smc_link_usable(&lgr->lnk[i]))
+ lgr->lnk[i].link_is_asym = false;
+ if (lgr->type == new_type)
+ return;
+ lgr->type = new_type;
+
+ switch (lgr->type) {
+ case SMC_LGR_NONE:
+ lgr_type = "NONE";
+ break;
+ case SMC_LGR_SINGLE:
+ lgr_type = "SINGLE";
+ break;
+ case SMC_LGR_SYMMETRIC:
+ lgr_type = "SYMMETRIC";
+ break;
+ case SMC_LGR_ASYMMETRIC_PEER:
+ lgr_type = "ASYMMETRIC_PEER";
+ break;
+ case SMC_LGR_ASYMMETRIC_LOCAL:
+ lgr_type = "ASYMMETRIC_LOCAL";
+ break;
+ }
+ pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
+ "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
+ lgr_type, lgr->pnet_id);
+}
+
+/* set new lgr type and tag a link as asymmetric */
+void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
+ enum smc_lgr_type new_type, int asym_lnk_idx)
+{
+ smcr_lgr_set_type(lgr, new_type);
+ lgr->lnk[asym_lnk_idx].link_is_asym = true;
+}
+
+/* abort connection, abort_work scheduled from tasklet context */
+static void smc_conn_abort_work(struct work_struct *work)
+{
+ struct smc_connection *conn = container_of(work,
+ struct smc_connection,
+ abort_work);
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+ smc_conn_kill(conn, true);
+ sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
+}
+
+/* link is up - establish alternate link if applicable */
+static void smcr_link_up(struct smc_link_group *lgr,
+ struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link *link = NULL;
+
+ if (list_empty(&lgr->list) ||
+ lgr->type == SMC_LGR_SYMMETRIC ||
+ lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+ return;
+
+ if (lgr->role == SMC_SERV) {
+ /* trigger local add link processing */
+ link = smc_llc_usable_link(lgr);
+ if (!link)
+ return;
+ smc_llc_srv_add_link_local(link);
+ } else {
+ /* invite server to start add link processing */
+ u8 gid[SMC_GID_SIZE];
+
+ if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
+ NULL))
+ return;
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+ /* some other llc task is ongoing */
+ wait_event_interruptible_timeout(lgr->llc_waiter,
+ (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+ SMC_LLC_WAIT_TIME);
+ }
+ if (list_empty(&lgr->list) ||
+ !smc_ib_port_active(smcibdev, ibport))
+ return; /* lgr or device no longer active */
+ link = smc_llc_usable_link(lgr);
+ if (!link)
+ return;
+ smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
+ NULL, SMC_LLC_REQ);
+ }
+}
+
+void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_ib_up_work *ib_work;
+ struct smc_link_group *lgr, *n;
+
+ list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+ if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+ SMC_MAX_PNETID_LEN) ||
+ lgr->type == SMC_LGR_SYMMETRIC ||
+ lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+ continue;
+ ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
+ if (!ib_work)
+ continue;
+ INIT_WORK(&ib_work->work, smc_link_up_work);
+ ib_work->lgr = lgr;
+ ib_work->smcibdev = smcibdev;
+ ib_work->ibport = ibport;
+ schedule_work(&ib_work->work);
+ }
+}
+
+/* link is down - switch connections to alternate link,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+static void smcr_link_down(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ struct smc_link *to_lnk;
+ int del_link_id;
+
+ if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
+ return;
+
+ smc_ib_modify_qp_reset(lnk);
+ to_lnk = smc_switch_conns(lgr, lnk, true);
+ if (!to_lnk) { /* no backup link available */
+ smcr_link_clear(lnk, true);
+ return;
+ }
+ smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
+ del_link_id = lnk->link_id;
+
+ if (lgr->role == SMC_SERV) {
+ /* trigger local delete link processing */
+ smc_llc_srv_delete_link_local(to_lnk, del_link_id);
+ } else {
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+ /* another llc task is ongoing */
+ mutex_unlock(&lgr->llc_conf_mutex);
+ wait_event_interruptible_timeout(lgr->llc_waiter,
+ (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+ SMC_LLC_WAIT_TIME);
+ mutex_lock(&lgr->llc_conf_mutex);
+ }
+ smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
+ SMC_LLC_DEL_LOST_PATH);
+ }
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_down_cond(struct smc_link *lnk)
+{
+ if (smc_link_downing(&lnk->state))
+ smcr_link_down(lnk);
+}
+
+/* will get the lgr->llc_conf_mutex lock */
+void smcr_link_down_cond_sched(struct smc_link *lnk)
+{
+ if (smc_link_downing(&lnk->state))
+ schedule_work(&lnk->link_down_wrk);
+}
+
+void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link_group *lgr, *n;
+ int i;
+
+ list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+ if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+ SMC_MAX_PNETID_LEN))
+ continue; /* lgr is not affected */
+ if (list_empty(&lgr->list))
+ continue;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
+
+ if (smc_link_usable(lnk) &&
+ lnk->smcibdev == smcibdev && lnk->ibport == ibport)
+ smcr_link_down_cond_sched(lnk);
+ }
+ }
+}
+
+static void smc_link_up_work(struct work_struct *work)
+{
+ struct smc_ib_up_work *ib_work = container_of(work,
+ struct smc_ib_up_work,
+ work);
+ struct smc_link_group *lgr = ib_work->lgr;
+
+ if (list_empty(&lgr->list))
+ goto out;
+ smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
+out:
+ kfree(ib_work);
+}
+
+static void smc_link_down_work(struct work_struct *work)
+{
+ struct smc_link *link = container_of(work, struct smc_link,
+ link_down_wrk);
+ struct smc_link_group *lgr = link->lgr;
+
+ if (list_empty(&lgr->list))
+ return;
+ wake_up_interruptible_all(&lgr->llc_waiter);
+ mutex_lock(&lgr->llc_conf_mutex);
+ smcr_link_down(link);
+ mutex_unlock(&lgr->llc_conf_mutex);
+}
+
/* Determine vlan of internal TCP socket.
* @vlan_id: address to store the determined vlan id into
*/
@@ -810,15 +1319,21 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
struct smc_clc_msg_local *lcl,
enum smc_lgr_role role, u32 clcqpn)
{
- return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
- SMC_SYSTEMID_LEN) &&
- !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
- SMC_GID_SIZE) &&
- !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
- sizeof(lcl->mac)) &&
- lgr->role == role &&
- (lgr->role == SMC_SERV ||
- lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
+ int i;
+
+ if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
+ lgr->role != role)
+ return false;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
+ continue;
+ if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
+ !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
+ !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
+ return true;
+ }
+ return false;
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -859,15 +1374,17 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
/* link group found */
ini->cln_first_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
- smc_lgr_register_conn(conn); /* add smc conn to lgr */
- if (delayed_work_pending(&lgr->free_work))
- cancel_delayed_work(&lgr->free_work);
+ rc = smc_lgr_register_conn(conn, false);
write_unlock_bh(&lgr->conns_lock);
+ if (!rc && delayed_work_pending(&lgr->free_work))
+ cancel_delayed_work(&lgr->free_work);
break;
}
write_unlock_bh(&lgr->conns_lock);
}
spin_unlock_bh(lgr_lock);
+ if (rc)
+ return rc;
if (role == SMC_CLNT && !ini->srv_first_contact &&
ini->cln_first_contact == SMC_FIRST_CONTACT) {
@@ -885,12 +1402,15 @@ create:
goto out;
lgr = conn->lgr;
write_lock_bh(&lgr->conns_lock);
- smc_lgr_register_conn(conn); /* add smc conn to lgr */
+ rc = smc_lgr_register_conn(conn, true);
write_unlock_bh(&lgr->conns_lock);
+ if (rc)
+ goto out;
}
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
+ INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
if (ini->is_smcd) {
conn->rx_off = sizeof(struct smcd_cdc_msg);
smcd_cdc_rx_init(conn); /* init tasklet for this conn */
@@ -934,19 +1454,19 @@ int smc_uncompress_bufsize(u8 compressed)
* buffer size; if not available, return NULL
*/
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
- rwlock_t *lock,
+ struct mutex *lock,
struct list_head *buf_list)
{
struct smc_buf_desc *buf_slot;
- read_lock_bh(lock);
+ mutex_lock(lock);
list_for_each_entry(buf_slot, buf_list, list) {
if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
- read_unlock_bh(lock);
+ mutex_unlock(lock);
return buf_slot;
}
}
- read_unlock_bh(lock);
+ mutex_unlock(lock);
return NULL;
}
@@ -959,12 +1479,135 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
+/* map an rmb buf to a link */
+static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
+ struct smc_link *lnk)
+{
+ int rc;
+
+ if (buf_desc->is_map_ib[lnk->link_idx])
+ return 0;
+
+ rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
+ if (rc)
+ return rc;
+ sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
+ buf_desc->cpu_addr, buf_desc->len);
+
+ /* map sg table to DMA address */
+ rc = smc_ib_buf_map_sg(lnk, buf_desc,
+ is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ /* SMC protocol depends on mapping to one DMA address only */
+ if (rc != 1) {
+ rc = -EAGAIN;
+ goto free_table;
+ }
+
+ /* create a new memory region for the RMB */
+ if (is_rmb) {
+ rc = smc_ib_get_memory_region(lnk->roce_pd,
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_LOCAL_WRITE,
+ buf_desc, lnk->link_idx);
+ if (rc)
+ goto buf_unmap;
+ smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
+ }
+ buf_desc->is_map_ib[lnk->link_idx] = true;
+ return 0;
+
+buf_unmap:
+ smc_ib_buf_unmap_sg(lnk, buf_desc,
+ is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+free_table:
+ sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+ return rc;
+}
+
+/* register a new rmb on IB device,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
+{
+ if (list_empty(&link->lgr->list))
+ return -ENOLINK;
+ if (!rmb_desc->is_reg_mr[link->link_idx]) {
+ /* register memory region for new rmb */
+ if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
+ rmb_desc->is_reg_err = true;
+ return -EFAULT;
+ }
+ rmb_desc->is_reg_mr[link->link_idx] = true;
+ }
+ return 0;
+}
+
+static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
+ struct list_head *lst, bool is_rmb)
+{
+ struct smc_buf_desc *buf_desc, *bf;
+ int rc = 0;
+
+ mutex_lock(lock);
+ list_for_each_entry_safe(buf_desc, bf, lst, list) {
+ if (!buf_desc->used)
+ continue;
+ rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
+ if (rc)
+ goto out;
+ }
+out:
+ mutex_unlock(lock);
+ return rc;
+}
+
+/* map all used buffers of lgr for a new link */
+int smcr_buf_map_lgr(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ int i, rc = 0;
+
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
+ &lgr->rmbs[i], true);
+ if (rc)
+ return rc;
+ rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
+ &lgr->sndbufs[i], false);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+/* register all used buffers of lgr for a new link,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+int smcr_buf_reg_lgr(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ struct smc_buf_desc *buf_desc, *bf;
+ int i, rc = 0;
+
+ mutex_lock(&lgr->rmbs_lock);
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
+ if (!buf_desc->used)
+ continue;
+ rc = smcr_link_reg_rmb(lnk, buf_desc);
+ if (rc)
+ goto out;
+ }
+ }
+out:
+ mutex_unlock(&lgr->rmbs_lock);
+ return rc;
+}
+
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
bool is_rmb, int bufsize)
{
struct smc_buf_desc *buf_desc;
- struct smc_link *lnk;
- int rc;
/* try to alloc a new buffer */
buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
@@ -981,41 +1624,33 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
return ERR_PTR(-EAGAIN);
}
buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
+ buf_desc->len = bufsize;
+ return buf_desc;
+}
- /* build the sg table from the pages */
- lnk = &lgr->lnk[SMC_SINGLE_LINK];
- rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
- GFP_KERNEL);
- if (rc) {
- smc_buf_free(lgr, is_rmb, buf_desc);
- return ERR_PTR(rc);
- }
- sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
- buf_desc->cpu_addr, bufsize);
+/* map buf_desc on all usable links,
+ * unused buffers stay mapped as long as the link is up
+ */
+static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
+ struct smc_buf_desc *buf_desc, bool is_rmb)
+{
+ int i, rc = 0;
- /* map sg table to DMA address */
- rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
- is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
- /* SMC protocol depends on mapping to one DMA address only */
- if (rc != 1) {
- smc_buf_free(lgr, is_rmb, buf_desc);
- return ERR_PTR(-EAGAIN);
- }
+ /* protect against parallel link reconfiguration */
+ mutex_lock(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
- /* create a new memory region for the RMB */
- if (is_rmb) {
- rc = smc_ib_get_memory_region(lnk->roce_pd,
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_LOCAL_WRITE,
- buf_desc);
- if (rc) {
- smc_buf_free(lgr, is_rmb, buf_desc);
- return ERR_PTR(rc);
+ if (!smc_link_usable(lnk))
+ continue;
+ if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
+ rc = -ENOMEM;
+ goto out;
}
}
-
- buf_desc->len = bufsize;
- return buf_desc;
+out:
+ mutex_unlock(&lgr->llc_conf_mutex);
+ return rc;
}
#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
@@ -1062,8 +1697,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
int bufsize, bufsize_short;
+ struct mutex *lock; /* lock buffer list */
int sk_buf_size;
- rwlock_t *lock;
if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
@@ -1104,15 +1739,22 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
continue;
buf_desc->used = 1;
- write_lock_bh(lock);
+ mutex_lock(lock);
list_add(&buf_desc->list, buf_list);
- write_unlock_bh(lock);
+ mutex_unlock(lock);
break; /* found */
}
if (IS_ERR(buf_desc))
return -ENOMEM;
+ if (!is_smcd) {
+ if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
+ smcr_buf_unuse(buf_desc, lgr);
+ return -ENOMEM;
+ }
+ }
+
if (is_rmb) {
conn->rmb_desc = buf_desc;
conn->rmbe_size_short = bufsize_short;
@@ -1132,42 +1774,44 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
return;
- smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->sndbuf_desc, DMA_TO_DEVICE);
+ smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
return;
- smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->sndbuf_desc, DMA_TO_DEVICE);
+ smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
+ int i;
if (!conn->lgr || conn->lgr->is_smcd)
return;
- smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->rmb_desc, DMA_FROM_DEVICE);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_usable(&conn->lgr->lnk[i]))
+ continue;
+ smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
+ DMA_FROM_DEVICE);
+ }
}
void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
+ int i;
if (!conn->lgr || conn->lgr->is_smcd)
return;
- smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->rmb_desc, DMA_FROM_DEVICE);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_usable(&conn->lgr->lnk[i]))
+ continue;
+ smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
+ DMA_FROM_DEVICE);
+ }
}
/* create the send and receive buffer for an SMC socket;
@@ -1202,16 +1846,64 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
return -ENOSPC;
}
+static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
+ u32 rkey)
+{
+ int i;
+
+ for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+ if (test_bit(i, lgr->rtokens_used_mask) &&
+ lgr->rtokens[i][lnk_idx].rkey == rkey)
+ return i;
+ }
+ return -ENOENT;
+}
+
+/* set rtoken for a new link to an existing rmb */
+void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
+ __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
+{
+ int rtok_idx;
+
+ rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
+ if (rtok_idx == -ENOENT)
+ return;
+ lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
+ lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
+}
+
+/* set rtoken for a new link whose link_id is given */
+void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
+ __be64 nw_vaddr, __be32 nw_rkey)
+{
+ u64 dma_addr = be64_to_cpu(nw_vaddr);
+ u32 rkey = ntohl(nw_rkey);
+ bool found = false;
+ int link_idx;
+
+ for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
+ if (lgr->lnk[link_idx].link_id == link_id) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return;
+ lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
+ lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
+}
+
/* add a new rtoken from peer */
-int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
+int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
{
+ struct smc_link_group *lgr = smc_get_lgr(lnk);
u64 dma_addr = be64_to_cpu(nw_vaddr);
u32 rkey = ntohl(nw_rkey);
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
- if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
- (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
+ if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
+ lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
test_bit(i, lgr->rtokens_used_mask)) {
/* already in list */
return i;
@@ -1220,23 +1912,25 @@ int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
i = smc_rmb_reserve_rtoken_idx(lgr);
if (i < 0)
return i;
- lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
- lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+ lgr->rtokens[i][lnk->link_idx].rkey = rkey;
+ lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
return i;
}
-/* delete an rtoken */
-int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+/* delete an rtoken from all links */
+int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
{
+ struct smc_link_group *lgr = smc_get_lgr(lnk);
u32 rkey = ntohl(nw_rkey);
- int i;
+ int i, j;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
- if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+ if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
test_bit(i, lgr->rtokens_used_mask)) {
- lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
- lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
-
+ for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
+ lgr->rtokens[i][j].rkey = 0;
+ lgr->rtokens[i][j].dma_addr = 0;
+ }
clear_bit(i, lgr->rtokens_used_mask);
return 0;
}
@@ -1246,9 +1940,10 @@ int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
/* save rkey and dma_addr received from peer during clc handshake */
int smc_rmb_rtoken_handling(struct smc_connection *conn,
+ struct smc_link *lnk,
struct smc_clc_msg_accept_confirm *clc)
{
- conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+ conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
clc->rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 8041db20c753..86d160f0d187 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -32,10 +32,10 @@ enum smc_lgr_role { /* possible roles of a link group */
};
enum smc_link_state { /* possible states of a link */
+ SMC_LNK_UNUSED, /* link is unused */
SMC_LNK_INACTIVE, /* link is inactive */
SMC_LNK_ACTIVATING, /* link is being activated */
SMC_LNK_ACTIVE, /* link is active */
- SMC_LNK_DELETING, /* link is being deleted */
};
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
@@ -70,6 +70,8 @@ struct smc_rdma_wr { /* work requests per message
struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES];
};
+#define SMC_LGR_ID_SIZE 4
+
struct smc_link {
struct smc_ib_device *smcibdev; /* ib-device */
u8 ibport; /* port - values 1 | 2 */
@@ -85,6 +87,7 @@ struct smc_link {
struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/
struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */
struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */
+ struct completion *wr_tx_compl; /* WR send CQE completion */
/* above four vectors have wr_tx_cnt elements and use the same index */
dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */
atomic_long_t wr_tx_id; /* seq # of last sent WR */
@@ -115,29 +118,23 @@ struct smc_link {
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/
u8 link_id; /* unique # within link group */
+ u8 link_uid[SMC_LGR_ID_SIZE]; /* unique lnk id */
+ u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */
+ u8 link_idx; /* index in lgr link array */
+ u8 link_is_asym; /* is link asymmetric? */
+ struct smc_link_group *lgr; /* parent link group */
+ struct work_struct link_down_wrk; /* wrk to bring link down */
enum smc_link_state state; /* state of link */
- struct workqueue_struct *llc_wq; /* single thread work queue */
- struct completion llc_confirm; /* wait for rx of conf link */
- struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
- int llc_confirm_rc; /* rc from confirm link msg */
- int llc_confirm_resp_rc; /* rc from conf_resp msg */
- struct completion llc_add; /* wait for rx of add link */
- struct completion llc_add_resp; /* wait for rx of add link rsp*/
struct delayed_work llc_testlink_wrk; /* testlink worker */
struct completion llc_testlink_resp; /* wait for rx of testlink */
int llc_testlink_time; /* testlink interval */
- struct completion llc_confirm_rkey; /* wait 4 rx of cnf rkey */
- int llc_confirm_rkey_rc; /* rc from cnf rkey msg */
- struct completion llc_delete_rkey; /* wait 4 rx of del rkey */
- int llc_delete_rkey_rc; /* rc from del rkey msg */
- struct mutex llc_delete_rkey_mutex; /* serialize usage */
};
/* For now we just allow one parallel link per link group. The SMC protocol
* allows more (up to 8).
*/
-#define SMC_LINKS_PER_LGR_MAX 1
+#define SMC_LINKS_PER_LGR_MAX 3
#define SMC_SINGLE_LINK 0
#define SMC_FIRST_CONTACT 1 /* first contact to a peer */
@@ -150,25 +147,32 @@ struct smc_buf_desc {
struct page *pages;
int len; /* length of buffer */
u32 used; /* currently used / unused */
- u8 wr_reg : 1; /* mem region registered */
- u8 regerr : 1; /* err during registration */
union {
struct { /* SMC-R */
- struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
- /* virtual buffer */
- struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
- /* for rmb only: memory region
- * incl. rkey provided to peer
- */
- u32 order; /* allocation order */
+ struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
+ /* virtual buffer */
+ struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
+ /* for rmb only: memory region
+ * incl. rkey provided to peer
+ */
+ u32 order; /* allocation order */
+
+ u8 is_conf_rkey;
+ /* confirm_rkey done */
+ u8 is_reg_mr[SMC_LINKS_PER_LGR_MAX];
+ /* mem region registered */
+ u8 is_map_ib[SMC_LINKS_PER_LGR_MAX];
+ /* mem region mapped to lnk */
+ u8 is_reg_err;
+ /* buffer registration err */
};
struct { /* SMC-D */
- unsigned short sba_idx;
- /* SBA index number */
- u64 token;
- /* DMB token number */
- dma_addr_t dma_addr;
- /* DMA address */
+ unsigned short sba_idx;
+ /* SBA index number */
+ u64 token;
+ /* DMB token number */
+ dma_addr_t dma_addr;
+ /* DMA address */
};
};
};
@@ -178,7 +182,6 @@ struct smc_rtoken { /* address/key of remote RMB */
u32 rkey;
};
-#define SMC_LGR_ID_SIZE 4
#define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */
#define SMC_RMBE_SIZES 16 /* number of distinct RMBE sizes */
/* theoretically, the RFC states that largest size would be 512K,
@@ -188,6 +191,28 @@ struct smc_rtoken { /* address/key of remote RMB */
struct smcd_dev;
+enum smc_lgr_type { /* redundancy state of lgr */
+ SMC_LGR_NONE, /* no active links, lgr to be deleted */
+ SMC_LGR_SINGLE, /* 1 active RNIC on each peer */
+ SMC_LGR_SYMMETRIC, /* 2 active RNICs on each peer */
+ SMC_LGR_ASYMMETRIC_PEER, /* local has 2, peer 1 active RNICs */
+ SMC_LGR_ASYMMETRIC_LOCAL, /* local has 1, peer 2 active RNICs */
+};
+
+enum smc_llc_flowtype {
+ SMC_LLC_FLOW_NONE = 0,
+ SMC_LLC_FLOW_ADD_LINK = 2,
+ SMC_LLC_FLOW_DEL_LINK = 4,
+ SMC_LLC_FLOW_RKEY = 6,
+};
+
+struct smc_llc_qentry;
+
+struct smc_llc_flow {
+ enum smc_llc_flowtype type;
+ struct smc_llc_qentry *qentry;
+};
+
struct smc_link_group {
struct list_head list;
struct rb_root conns_all; /* connection tree */
@@ -196,9 +221,9 @@ struct smc_link_group {
unsigned short vlan_id; /* vlan id of link group */
struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */
- rwlock_t sndbufs_lock; /* protects tx buffers */
+ struct mutex sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
- rwlock_t rmbs_lock; /* protects rx buffers */
+ struct mutex rmbs_lock; /* protects rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
@@ -222,6 +247,35 @@ struct smc_link_group {
/* remote addr/key pairs */
DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
/* used rtoken elements */
+ u8 next_link_id;
+ enum smc_lgr_type type;
+ /* redundancy state */
+ u8 pnet_id[SMC_MAX_PNETID_LEN + 1];
+ /* pnet id of this lgr */
+ struct list_head llc_event_q;
+ /* queue for llc events */
+ spinlock_t llc_event_q_lock;
+ /* protects llc_event_q */
+ struct mutex llc_conf_mutex;
+ /* protects lgr reconfig. */
+ struct work_struct llc_add_link_work;
+ struct work_struct llc_del_link_work;
+ struct work_struct llc_event_work;
+ /* llc event worker */
+ wait_queue_head_t llc_waiter;
+ /* w4 next llc event */
+ struct smc_llc_flow llc_flow_lcl;
+ /* llc local control field */
+ struct smc_llc_flow llc_flow_rmt;
+ /* llc remote control field */
+ struct smc_llc_qentry *delayed_event;
+ /* arrived when flow active */
+ spinlock_t llc_flow_lock;
+ /* protects llc flow */
+ int llc_testlink_time;
+ /* link keep alive time */
+ u32 llc_termination_rsn;
+ /* rsn code for termination */
};
struct { /* SMC-D */
u64 peer_gid;
@@ -285,24 +339,36 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
+/* returns true if the specified link is usable */
+static inline bool smc_link_usable(struct smc_link *lnk)
+{
+ if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE)
+ return false;
+ return true;
+}
+
struct smc_sock;
struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
-void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_cleanup_early(struct smc_connection *conn);
void smc_lgr_terminate_sched(struct smc_link_group *lgr);
-void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
+void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
+void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
unsigned short vlan);
void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
+int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc);
-int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
-int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
+int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey);
+void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
+ __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey);
+void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
+ __be64 nw_vaddr, __be32 nw_rkey);
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
@@ -315,8 +381,22 @@ void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
int smc_core_init(void);
void smc_core_exit(void);
+int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
+ u8 link_idx, struct smc_init_info *ini);
+void smcr_link_clear(struct smc_link *lnk, bool log);
+int smcr_buf_map_lgr(struct smc_link *lnk);
+int smcr_buf_reg_lgr(struct smc_link *lnk);
+void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
+void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
+ enum smc_lgr_type new_type, int asym_lnk_idx);
+int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
+struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
+ struct smc_link *from_lnk, bool is_dev_err);
+void smcr_link_down_cond(struct smc_link *lnk);
+void smcr_link_down_cond_sched(struct smc_link *lnk);
+
static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
{
- return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+ return link->lgr;
}
#endif
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 04b6fefb8bce..f0a5064bf9bd 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -249,9 +249,10 @@ static void smc_ib_port_event_work(struct work_struct *work)
clear_bit(port_idx, &smcibdev->port_event_mask);
if (!smc_ib_port_active(smcibdev, port_idx + 1)) {
set_bit(port_idx, smcibdev->ports_going_away);
- smc_port_terminate(smcibdev, port_idx + 1);
+ smcr_port_err(smcibdev, port_idx + 1);
} else {
clear_bit(port_idx, smcibdev->ports_going_away);
+ smcr_port_add(smcibdev, port_idx + 1);
}
}
}
@@ -389,15 +390,15 @@ void smc_ib_put_memory_region(struct ib_mr *mr)
ib_dereg_mr(mr);
}
-static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
+static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx)
{
unsigned int offset = 0;
int sg_num;
/* map the largest prefix of a dma mapped SG list */
- sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
- buf_slot->sgt[SMC_SINGLE_LINK].sgl,
- buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+ sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx],
+ buf_slot->sgt[link_idx].sgl,
+ buf_slot->sgt[link_idx].orig_nents,
&offset, PAGE_SIZE);
return sg_num;
@@ -405,29 +406,29 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
/* Allocate a memory region and map the dma mapped SG list of buf_slot */
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
- struct smc_buf_desc *buf_slot)
+ struct smc_buf_desc *buf_slot, u8 link_idx)
{
- if (buf_slot->mr_rx[SMC_SINGLE_LINK])
+ if (buf_slot->mr_rx[link_idx])
return 0; /* already done */
- buf_slot->mr_rx[SMC_SINGLE_LINK] =
+ buf_slot->mr_rx[link_idx] =
ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
- if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
+ if (IS_ERR(buf_slot->mr_rx[link_idx])) {
int rc;
- rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
- buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
+ rc = PTR_ERR(buf_slot->mr_rx[link_idx]);
+ buf_slot->mr_rx[link_idx] = NULL;
return rc;
}
- if (smc_ib_map_mr_sg(buf_slot) != 1)
+ if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1)
return -EINVAL;
return 0;
}
/* synchronize buffer usage for cpu access */
-void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
@@ -435,11 +436,11 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
unsigned int i;
/* for now there is just one DMA address */
- for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
- buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+ for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
+ buf_slot->sgt[lnk->link_idx].nents, i) {
if (!sg_dma_len(sg))
break;
- ib_dma_sync_single_for_cpu(smcibdev->ibdev,
+ ib_dma_sync_single_for_cpu(lnk->smcibdev->ibdev,
sg_dma_address(sg),
sg_dma_len(sg),
data_direction);
@@ -447,7 +448,7 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
}
/* synchronize buffer usage for device access */
-void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
@@ -455,11 +456,11 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
unsigned int i;
/* for now there is just one DMA address */
- for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
- buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+ for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
+ buf_slot->sgt[lnk->link_idx].nents, i) {
if (!sg_dma_len(sg))
break;
- ib_dma_sync_single_for_device(smcibdev->ibdev,
+ ib_dma_sync_single_for_device(lnk->smcibdev->ibdev,
sg_dma_address(sg),
sg_dma_len(sg),
data_direction);
@@ -467,15 +468,15 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
}
/* Map a new TX or RX buffer SG-table to DMA */
-int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
+int smc_ib_buf_map_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
int mapped_nents;
- mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
- buf_slot->sgt[SMC_SINGLE_LINK].sgl,
- buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+ mapped_nents = ib_dma_map_sg(lnk->smcibdev->ibdev,
+ buf_slot->sgt[lnk->link_idx].sgl,
+ buf_slot->sgt[lnk->link_idx].orig_nents,
data_direction);
if (!mapped_nents)
return -ENOMEM;
@@ -483,18 +484,18 @@ int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
return mapped_nents;
}
-void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+void smc_ib_buf_unmap_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
- if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
+ if (!buf_slot->sgt[lnk->link_idx].sgl->dma_address)
return; /* already unmapped */
- ib_dma_unmap_sg(smcibdev->ibdev,
- buf_slot->sgt[SMC_SINGLE_LINK].sgl,
- buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+ ib_dma_unmap_sg(lnk->smcibdev->ibdev,
+ buf_slot->sgt[lnk->link_idx].sgl,
+ buf_slot->sgt[lnk->link_idx].orig_nents,
data_direction);
- buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
+ buf_slot->sgt[lnk->link_idx].sgl->dma_address = 0;
}
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
@@ -574,13 +575,23 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
/* trigger reading of the port attributes */
port_cnt = smcibdev->ibdev->phys_port_cnt;
+ pr_warn_ratelimited("smc: adding ib device %s with port count %d\n",
+ smcibdev->ibdev->name, port_cnt);
for (i = 0;
i < min_t(size_t, port_cnt, SMC_MAX_PORTS);
i++) {
set_bit(i, &smcibdev->port_event_mask);
/* determine pnetids of the port */
- smc_pnetid_by_dev_port(ibdev->dev.parent, i,
- smcibdev->pnetid[i]);
+ if (smc_pnetid_by_dev_port(ibdev->dev.parent, i,
+ smcibdev->pnetid[i]))
+ smc_pnetid_by_table_ib(smcibdev, i + 1);
+ pr_warn_ratelimited("smc: ib device %s port %d has pnetid "
+ "%.16s%s\n",
+ smcibdev->ibdev->name, i + 1,
+ smcibdev->pnetid[i],
+ smcibdev->pnetid_by_user[i] ?
+ " (user defined)" :
+ "");
}
schedule_work(&smcibdev->port_event_work);
}
@@ -597,6 +608,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
spin_lock(&smc_ib_devices.lock);
list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
spin_unlock(&smc_ib_devices.lock);
+ pr_warn_ratelimited("smc: removing ib device %s\n",
+ smcibdev->ibdev->name);
smc_smcr_terminate_all(smcibdev);
smc_ib_cleanup_per_ibdev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 5c2b115d36da..e6a696ae15f3 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -59,10 +59,10 @@ struct smc_link;
int smc_ib_register_client(void) __init;
void smc_ib_unregister_client(void);
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
+int smc_ib_buf_map_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
-void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+void smc_ib_buf_unmap_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
@@ -74,12 +74,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk);
int smc_ib_modify_qp_reset(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
- struct smc_buf_desc *buf_slot);
+ struct smc_buf_desc *buf_slot, u8 link_idx);
void smc_ib_put_memory_region(struct ib_mr *mr);
-void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
-void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 5c4727d5066e..91f85fc09fb8 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -296,7 +296,8 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
device_initialize(&smcd->dev);
dev_set_name(&smcd->dev, name);
smcd->ops = ops;
- smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
+ if (smc_pnetid_by_dev_port(parent, 0, smcd->pnetid))
+ smc_pnetid_by_table_smcd(smcd);
spin_lock_init(&smcd->lock);
spin_lock_init(&smcd->lgr_lock);
@@ -320,12 +321,18 @@ int smcd_register_dev(struct smcd_dev *smcd)
list_add_tail(&smcd->list, &smcd_dev_list.list);
spin_unlock(&smcd_dev_list.lock);
+ pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
+ dev_name(&smcd->dev), smcd->pnetid,
+ smcd->pnetid_by_user ? " (user defined)" : "");
+
return device_add(&smcd->dev);
}
EXPORT_SYMBOL_GPL(smcd_register_dev);
void smcd_unregister_dev(struct smcd_dev *smcd)
{
+ pr_warn_ratelimited("smc: removing smcd device %s\n",
+ dev_name(&smcd->dev));
spin_lock(&smcd_dev_list.lock);
list_del_init(&smcd->list);
spin_unlock(&smcd_dev_list.lock);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 0e52aab53d97..391237b601fe 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -17,6 +17,7 @@
#include "smc_core.h"
#include "smc_clc.h"
#include "smc_llc.h"
+#include "smc_pnet.h"
#define SMC_LLC_DATA_LEN 40
@@ -58,11 +59,34 @@ struct smc_llc_msg_add_link { /* type 0x02 */
u8 sender_gid[SMC_GID_SIZE];
u8 sender_qp_num[3];
u8 link_num;
- u8 flags2; /* QP mtu */
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved3 : 4,
+ qp_mtu : 4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 qp_mtu : 4,
+ reserved3 : 4;
+#endif
u8 initial_psn[3];
u8 reserved[8];
};
+struct smc_llc_msg_add_link_cont_rt {
+ __be32 rmb_key;
+ __be32 rmb_key_new;
+ __be64 rmb_vaddr_new;
+};
+
+#define SMC_LLC_RKEYS_PER_CONT_MSG 2
+
+struct smc_llc_msg_add_link_cont { /* type 0x03 */
+ struct smc_llc_hdr hd;
+ u8 link_num;
+ u8 num_rkeys;
+ u8 reserved2[2];
+ struct smc_llc_msg_add_link_cont_rt rt[SMC_LLC_RKEYS_PER_CONT_MSG];
+ u8 reserved[4];
+} __packed; /* format defined in RFC7609 */
+
#define SMC_LLC_FLAG_DEL_LINK_ALL 0x40
#define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20
@@ -98,13 +122,8 @@ struct smc_llc_msg_confirm_rkey { /* type 0x06 */
u8 reserved;
};
-struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */
- struct smc_llc_hdr hd;
- u8 num_rkeys;
- struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
-};
-
#define SMC_LLC_DEL_RKEY_MAX 8
+#define SMC_LLC_FLAG_RKEY_RETRY 0x10
#define SMC_LLC_FLAG_RKEY_NEG 0x20
struct smc_llc_msg_delete_rkey { /* type 0x09 */
@@ -119,10 +138,10 @@ struct smc_llc_msg_delete_rkey { /* type 0x09 */
union smc_llc_msg {
struct smc_llc_msg_confirm_link confirm_link;
struct smc_llc_msg_add_link add_link;
+ struct smc_llc_msg_add_link_cont add_link_cont;
struct smc_llc_msg_del_link delete_link;
struct smc_llc_msg_confirm_rkey confirm_rkey;
- struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
struct smc_llc_msg_delete_rkey delete_rkey;
struct smc_llc_msg_test_link test_link;
@@ -134,6 +153,162 @@ union smc_llc_msg {
#define SMC_LLC_FLAG_RESP 0x80
+struct smc_llc_qentry {
+ struct list_head list;
+ struct smc_link *link;
+ union smc_llc_msg msg;
+};
+
+static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc);
+
+struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow)
+{
+ struct smc_llc_qentry *qentry = flow->qentry;
+
+ flow->qentry = NULL;
+ return qentry;
+}
+
+void smc_llc_flow_qentry_del(struct smc_llc_flow *flow)
+{
+ struct smc_llc_qentry *qentry;
+
+ if (flow->qentry) {
+ qentry = flow->qentry;
+ flow->qentry = NULL;
+ kfree(qentry);
+ }
+}
+
+static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
+ struct smc_llc_qentry *qentry)
+{
+ flow->qentry = qentry;
+}
+
+/* try to start a new llc flow, initiated by an incoming llc msg */
+static bool smc_llc_flow_start(struct smc_llc_flow *flow,
+ struct smc_llc_qentry *qentry)
+{
+ struct smc_link_group *lgr = qentry->link->lgr;
+
+ spin_lock_bh(&lgr->llc_flow_lock);
+ if (flow->type) {
+ /* a flow is already active */
+ if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK ||
+ qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) &&
+ !lgr->delayed_event) {
+ lgr->delayed_event = qentry;
+ } else {
+ /* forget this llc request */
+ kfree(qentry);
+ }
+ spin_unlock_bh(&lgr->llc_flow_lock);
+ return false;
+ }
+ switch (qentry->msg.raw.hdr.common.type) {
+ case SMC_LLC_ADD_LINK:
+ flow->type = SMC_LLC_FLOW_ADD_LINK;
+ break;
+ case SMC_LLC_DELETE_LINK:
+ flow->type = SMC_LLC_FLOW_DEL_LINK;
+ break;
+ case SMC_LLC_CONFIRM_RKEY:
+ case SMC_LLC_DELETE_RKEY:
+ flow->type = SMC_LLC_FLOW_RKEY;
+ break;
+ default:
+ flow->type = SMC_LLC_FLOW_NONE;
+ }
+ if (qentry == lgr->delayed_event)
+ lgr->delayed_event = NULL;
+ spin_unlock_bh(&lgr->llc_flow_lock);
+ smc_llc_flow_qentry_set(flow, qentry);
+ return true;
+}
+
+/* start a new local llc flow, wait till current flow finished */
+int smc_llc_flow_initiate(struct smc_link_group *lgr,
+ enum smc_llc_flowtype type)
+{
+ enum smc_llc_flowtype allowed_remote = SMC_LLC_FLOW_NONE;
+ int rc;
+
+ /* all flows except confirm_rkey and delete_rkey are exclusive,
+ * confirm/delete rkey flows can run concurrently (local and remote)
+ */
+ if (type == SMC_LLC_FLOW_RKEY)
+ allowed_remote = SMC_LLC_FLOW_RKEY;
+again:
+ if (list_empty(&lgr->list))
+ return -ENODEV;
+ spin_lock_bh(&lgr->llc_flow_lock);
+ if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
+ (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
+ lgr->llc_flow_rmt.type == allowed_remote)) {
+ lgr->llc_flow_lcl.type = type;
+ spin_unlock_bh(&lgr->llc_flow_lock);
+ return 0;
+ }
+ spin_unlock_bh(&lgr->llc_flow_lock);
+ rc = wait_event_interruptible_timeout(lgr->llc_waiter,
+ (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
+ (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
+ lgr->llc_flow_rmt.type == allowed_remote)),
+ SMC_LLC_WAIT_TIME);
+ if (!rc)
+ return -ETIMEDOUT;
+ goto again;
+}
+
+/* finish the current llc flow */
+void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow)
+{
+ spin_lock_bh(&lgr->llc_flow_lock);
+ memset(flow, 0, sizeof(*flow));
+ flow->type = SMC_LLC_FLOW_NONE;
+ spin_unlock_bh(&lgr->llc_flow_lock);
+ if (!list_empty(&lgr->list) && lgr->delayed_event &&
+ flow == &lgr->llc_flow_lcl)
+ schedule_work(&lgr->llc_event_work);
+ else
+ wake_up_interruptible(&lgr->llc_waiter);
+}
+
+/* lnk is optional and used for early wakeup when link goes down, useful in
+ * cases where we wait for a response on the link after we sent a request
+ */
+struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
+ struct smc_link *lnk,
+ int time_out, u8 exp_msg)
+{
+ struct smc_llc_flow *flow = &lgr->llc_flow_lcl;
+
+ wait_event_interruptible_timeout(lgr->llc_waiter,
+ (flow->qentry ||
+ (lnk && !smc_link_usable(lnk)) ||
+ list_empty(&lgr->list)),
+ time_out);
+ if (!flow->qentry ||
+ (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) {
+ smc_llc_flow_qentry_del(flow);
+ goto out;
+ }
+ if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) {
+ if (exp_msg == SMC_LLC_ADD_LINK &&
+ flow->qentry->msg.raw.hdr.common.type ==
+ SMC_LLC_DELETE_LINK) {
+ /* flow_start will delay the unexpected msg */
+ smc_llc_flow_start(&lgr->llc_flow_lcl,
+ smc_llc_flow_qentry_clr(flow));
+ return NULL;
+ }
+ smc_llc_flow_qentry_del(flow);
+ }
+out:
+ return flow->qentry;
+}
+
/********************************** send *************************************/
struct smc_llc_tx_pend {
@@ -186,7 +361,6 @@ static int smc_llc_add_pending_send(struct smc_link *link,
int smc_llc_send_confirm_link(struct smc_link *link,
enum smc_llc_reqresp reqresp)
{
- struct smc_link_group *lgr = smc_get_lgr(link);
struct smc_llc_msg_confirm_link *confllc;
struct smc_wr_tx_pend_priv *pend;
struct smc_wr_buf *wr_buf;
@@ -207,35 +381,52 @@ int smc_llc_send_confirm_link(struct smc_link *link,
memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
confllc->link_num = link->link_id;
- memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
- confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+ memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE);
+ confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
}
/* send LLC confirm rkey request */
-static int smc_llc_send_confirm_rkey(struct smc_link *link,
+static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
struct smc_buf_desc *rmb_desc)
{
struct smc_llc_msg_confirm_rkey *rkeyllc;
struct smc_wr_tx_pend_priv *pend;
struct smc_wr_buf *wr_buf;
- int rc;
+ struct smc_link *link;
+ int i, rc, rtok_ix;
- rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
if (rc)
return rc;
rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
+
+ rtok_ix = 1;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ link = &send_link->lgr->lnk[i];
+ if (link->state == SMC_LNK_ACTIVE && link != send_link) {
+ rkeyllc->rtoken[rtok_ix].link_id = link->link_id;
+ rkeyllc->rtoken[rtok_ix].rmb_key =
+ htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
+ rkeyllc->rtoken[rtok_ix].rmb_vaddr = cpu_to_be64(
+ (u64)sg_dma_address(
+ rmb_desc->sgt[link->link_idx].sgl));
+ rtok_ix++;
+ }
+ }
+ /* rkey of send_link is in rtoken[0] */
+ rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1;
rkeyllc->rtoken[0].rmb_key =
- htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ htonl(rmb_desc->mr_rx[send_link->link_idx]->rkey);
rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
- (u64)sg_dma_address(rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+ (u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
/* send llc message */
- rc = smc_wr_tx_send(link, pend);
+ rc = smc_wr_tx_send(send_link, pend);
return rc;
}
@@ -256,32 +447,15 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
rkeyllc->num_rkeys = 1;
- rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
}
-/* prepare an add link message */
-static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
- struct smc_link *link, u8 mac[], u8 gid[],
- enum smc_llc_reqresp reqresp)
-{
- memset(addllc, 0, sizeof(*addllc));
- addllc->hd.common.type = SMC_LLC_ADD_LINK;
- addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
- if (reqresp == SMC_LLC_RESP) {
- addllc->hd.flags |= SMC_LLC_FLAG_RESP;
- /* always reject more links for now */
- addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
- addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
- }
- memcpy(addllc->sender_mac, mac, ETH_ALEN);
- memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
-}
-
/* send ADD LINK request or response */
int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
+ struct smc_link *link_new,
enum smc_llc_reqresp reqresp)
{
struct smc_llc_msg_add_link *addllc;
@@ -293,32 +467,33 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
if (rc)
return rc;
addllc = (struct smc_llc_msg_add_link *)wr_buf;
- smc_llc_prep_add_link(addllc, link, mac, gid, reqresp);
+
+ memset(addllc, 0, sizeof(*addllc));
+ addllc->hd.common.type = SMC_LLC_ADD_LINK;
+ addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ if (reqresp == SMC_LLC_RESP)
+ addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ memcpy(addllc->sender_mac, mac, ETH_ALEN);
+ memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+ if (link_new) {
+ addllc->link_num = link_new->link_id;
+ hton24(addllc->sender_qp_num, link_new->roce_qp->qp_num);
+ hton24(addllc->initial_psn, link_new->psn_initial);
+ if (reqresp == SMC_LLC_REQ)
+ addllc->qp_mtu = link_new->path_mtu;
+ else
+ addllc->qp_mtu = min(link_new->path_mtu,
+ link_new->peer_mtu);
+ }
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
}
-/* prepare a delete link message */
-static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
- struct smc_link *link,
- enum smc_llc_reqresp reqresp, bool orderly)
-{
- memset(delllc, 0, sizeof(*delllc));
- delllc->hd.common.type = SMC_LLC_DELETE_LINK;
- delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
- if (reqresp == SMC_LLC_RESP)
- delllc->hd.flags |= SMC_LLC_FLAG_RESP;
- /* DEL_LINK_ALL because only 1 link supported */
- delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
- if (orderly)
- delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
- delllc->link_num = link->link_id;
-}
-
/* send DELETE LINK request or response */
-int smc_llc_send_delete_link(struct smc_link *link,
- enum smc_llc_reqresp reqresp, bool orderly)
+int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
+ enum smc_llc_reqresp reqresp, bool orderly,
+ u32 reason)
{
struct smc_llc_msg_del_link *delllc;
struct smc_wr_tx_pend_priv *pend;
@@ -329,7 +504,19 @@ int smc_llc_send_delete_link(struct smc_link *link,
if (rc)
return rc;
delllc = (struct smc_llc_msg_del_link *)wr_buf;
- smc_llc_prep_delete_link(delllc, link, reqresp, orderly);
+
+ memset(delllc, 0, sizeof(*delllc));
+ delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+ delllc->hd.length = sizeof(struct smc_llc_msg_del_link);
+ if (reqresp == SMC_LLC_RESP)
+ delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ if (orderly)
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+ if (link_del_id)
+ delllc->link_num = link_del_id;
+ else
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+ delllc->reason = htonl(reason);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
@@ -356,238 +543,1094 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
return rc;
}
-struct smc_llc_send_work {
- struct work_struct work;
- struct smc_link *link;
- int llclen;
- union smc_llc_msg llcbuf;
-};
-
-/* worker that sends a prepared message */
-static void smc_llc_send_message_work(struct work_struct *work)
+/* schedule an llc send on link, may wait for buffers */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
{
- struct smc_llc_send_work *llcwrk = container_of(work,
- struct smc_llc_send_work, work);
struct smc_wr_tx_pend_priv *pend;
struct smc_wr_buf *wr_buf;
int rc;
- if (llcwrk->link->state == SMC_LNK_INACTIVE)
- goto out;
- rc = smc_llc_add_pending_send(llcwrk->link, &wr_buf, &pend);
+ if (!smc_link_usable(link))
+ return -ENOLINK;
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- goto out;
- memcpy(wr_buf, &llcwrk->llcbuf, llcwrk->llclen);
- smc_wr_tx_send(llcwrk->link, pend);
-out:
- kfree(llcwrk);
+ return rc;
+ memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
+ return smc_wr_tx_send(link, pend);
}
-/* copy llcbuf and schedule an llc send on link */
-static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+/* schedule an llc send on link, may wait for buffers,
+ * and wait for send completion notification.
+ * @return 0 on success
+ */
+static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf)
{
- struct smc_llc_send_work *wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
- if (!wrk)
- return -ENOMEM;
- INIT_WORK(&wrk->work, smc_llc_send_message_work);
- wrk->link = link;
- wrk->llclen = llclen;
- memcpy(&wrk->llcbuf, llcbuf, llclen);
- queue_work(link->llc_wq, &wrk->work);
- return 0;
+ if (!smc_link_usable(link))
+ return -ENOLINK;
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
+ return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
}
/********************************* receive ***********************************/
-static void smc_llc_rx_confirm_link(struct smc_link *link,
- struct smc_llc_msg_confirm_link *llc)
+static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
+ enum smc_lgr_type lgr_new_t)
{
- struct smc_link_group *lgr = smc_get_lgr(link);
- int conf_rc;
+ int i;
+
+ if (lgr->type == SMC_LGR_SYMMETRIC ||
+ (lgr->type != SMC_LGR_SINGLE &&
+ (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+ lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)))
+ return -EMLINK;
+
+ if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+ lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) {
+ for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--)
+ if (lgr->lnk[i].state == SMC_LNK_UNUSED)
+ return i;
+ } else {
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+ if (lgr->lnk[i].state == SMC_LNK_UNUSED)
+ return i;
+ }
+ return -EMLINK;
+}
- /* RMBE eyecatchers are not supported */
- if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
- conf_rc = 0;
- else
- conf_rc = ENOTSUPP;
+/* return first buffer from any of the next buf lists */
+static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
+ int *buf_lst)
+{
+ struct smc_buf_desc *buf_pos;
+
+ while (*buf_lst < SMC_RMBE_SIZES) {
+ buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
+ struct smc_buf_desc, list);
+ if (buf_pos)
+ return buf_pos;
+ (*buf_lst)++;
+ }
+ return NULL;
+}
+
+/* return next rmb from buffer lists */
+static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
+ int *buf_lst,
+ struct smc_buf_desc *buf_pos)
+{
+ struct smc_buf_desc *buf_next;
+
+ if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
+ (*buf_lst)++;
+ return _smc_llc_get_next_rmb(lgr, buf_lst);
+ }
+ buf_next = list_next_entry(buf_pos, list);
+ return buf_next;
+}
+
+static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
+ int *buf_lst)
+{
+ *buf_lst = 0;
+ return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
+}
+
+/* send one add_link_continue msg */
+static int smc_llc_add_link_cont(struct smc_link *link,
+ struct smc_link *link_new, u8 *num_rkeys_todo,
+ int *buf_lst, struct smc_buf_desc **buf_pos)
+{
+ struct smc_llc_msg_add_link_cont *addc_llc;
+ struct smc_link_group *lgr = link->lgr;
+ int prim_lnk_idx, lnk_idx, i, rc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ struct smc_buf_desc *rmb;
+ u8 n;
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (lgr->role == SMC_SERV &&
- link->state == SMC_LNK_ACTIVATING) {
- link->llc_confirm_resp_rc = conf_rc;
- complete(&link->llc_confirm_resp);
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf;
+ memset(addc_llc, 0, sizeof(*addc_llc));
+
+ prim_lnk_idx = link->link_idx;
+ lnk_idx = link_new->link_idx;
+ addc_llc->link_num = link_new->link_id;
+ addc_llc->num_rkeys = *num_rkeys_todo;
+ n = *num_rkeys_todo;
+ for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) {
+ if (!*buf_pos) {
+ addc_llc->num_rkeys = addc_llc->num_rkeys -
+ *num_rkeys_todo;
+ *num_rkeys_todo = 0;
+ break;
}
- } else {
- if (lgr->role == SMC_CLNT &&
- link->state == SMC_LNK_ACTIVATING) {
- link->llc_confirm_rc = conf_rc;
- link->link_id = llc->link_num;
- complete(&link->llc_confirm);
+ rmb = *buf_pos;
+
+ addc_llc->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
+ addc_llc->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
+ addc_llc->rt[i].rmb_vaddr_new =
+ cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
+
+ (*num_rkeys_todo)--;
+ *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
+ while (*buf_pos && !(*buf_pos)->used)
+ *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
+ }
+ addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT;
+ addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
+ if (lgr->role == SMC_CLNT)
+ addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ return smc_wr_tx_send(link, pend);
+}
+
+static int smc_llc_cli_rkey_exchange(struct smc_link *link,
+ struct smc_link *link_new)
+{
+ struct smc_llc_msg_add_link_cont *addc_llc;
+ struct smc_link_group *lgr = link->lgr;
+ u8 max, num_rkeys_send, num_rkeys_recv;
+ struct smc_llc_qentry *qentry;
+ struct smc_buf_desc *buf_pos;
+ int buf_lst;
+ int rc = 0;
+ int i;
+
+ mutex_lock(&lgr->rmbs_lock);
+ num_rkeys_send = lgr->conns_num;
+ buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
+ do {
+ qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_TIME,
+ SMC_LLC_ADD_LINK_CONT);
+ if (!qentry) {
+ rc = -ETIMEDOUT;
+ break;
+ }
+ addc_llc = &qentry->msg.add_link_cont;
+ num_rkeys_recv = addc_llc->num_rkeys;
+ max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG);
+ for (i = 0; i < max; i++) {
+ smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
+ addc_llc->rt[i].rmb_key,
+ addc_llc->rt[i].rmb_vaddr_new,
+ addc_llc->rt[i].rmb_key_new);
+ num_rkeys_recv--;
}
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ rc = smc_llc_add_link_cont(link, link_new, &num_rkeys_send,
+ &buf_lst, &buf_pos);
+ if (rc)
+ break;
+ } while (num_rkeys_send || num_rkeys_recv);
+
+ mutex_unlock(&lgr->rmbs_lock);
+ return rc;
+}
+
+/* prepare and send an add link reject response */
+static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry)
+{
+ qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
+ qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+ qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+ return smc_llc_send_message(qentry->link, &qentry->msg);
+}
+
+static int smc_llc_cli_conf_link(struct smc_link *link,
+ struct smc_init_info *ini,
+ struct smc_link *link_new,
+ enum smc_lgr_type lgr_new_t)
+{
+ struct smc_link_group *lgr = link->lgr;
+ struct smc_llc_qentry *qentry = NULL;
+ int rc = 0;
+
+ /* receive CONFIRM LINK request over RoCE fabric */
+ qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_FIRST_TIME, 0);
+ if (!qentry) {
+ rc = smc_llc_send_delete_link(link, link_new->link_id,
+ SMC_LLC_REQ, false,
+ SMC_LLC_DEL_LOST_PATH);
+ return -ENOLINK;
+ }
+ if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) {
+ /* received DELETE_LINK instead */
+ qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, &qentry->msg);
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ return -ENOLINK;
+ }
+ smc_llc_save_peer_uid(qentry);
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+
+ rc = smc_ib_modify_qp_rts(link_new);
+ if (rc) {
+ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
+ false, SMC_LLC_DEL_LOST_PATH);
+ return -ENOLINK;
}
+ smc_wr_remember_qp_attr(link_new);
+
+ rc = smcr_buf_reg_lgr(link_new);
+ if (rc) {
+ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
+ false, SMC_LLC_DEL_LOST_PATH);
+ return -ENOLINK;
+ }
+
+ /* send CONFIRM LINK response over RoCE fabric */
+ rc = smc_llc_send_confirm_link(link_new, SMC_LLC_RESP);
+ if (rc) {
+ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
+ false, SMC_LLC_DEL_LOST_PATH);
+ return -ENOLINK;
+ }
+ smc_llc_link_active(link_new);
+ if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+ lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
+ smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx);
+ else
+ smcr_lgr_set_type(lgr, lgr_new_t);
+ return 0;
}
-static void smc_llc_rx_add_link(struct smc_link *link,
- struct smc_llc_msg_add_link *llc)
+static void smc_llc_save_add_link_info(struct smc_link *link,
+ struct smc_llc_msg_add_link *add_llc)
{
+ link->peer_qpn = ntoh24(add_llc->sender_qp_num);
+ memcpy(link->peer_gid, add_llc->sender_gid, SMC_GID_SIZE);
+ memcpy(link->peer_mac, add_llc->sender_mac, ETH_ALEN);
+ link->peer_psn = ntoh24(add_llc->initial_psn);
+ link->peer_mtu = add_llc->qp_mtu;
+}
+
+/* as an SMC client, process an add link request */
+int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
+{
+ struct smc_llc_msg_add_link *llc = &qentry->msg.add_link;
+ enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
struct smc_link_group *lgr = smc_get_lgr(link);
+ struct smc_link *lnk_new = NULL;
+ struct smc_init_info ini;
+ int lnk_idx, rc = 0;
+
+ ini.vlan_id = lgr->vlan_id;
+ smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
+ if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
+ !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) {
+ if (!ini.ib_dev)
+ goto out_reject;
+ lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
+ }
+ if (!ini.ib_dev) {
+ lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
+ ini.ib_dev = link->smcibdev;
+ ini.ib_port = link->ibport;
+ }
+ lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
+ if (lnk_idx < 0)
+ goto out_reject;
+ lnk_new = &lgr->lnk[lnk_idx];
+ rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini);
+ if (rc)
+ goto out_reject;
+ smc_llc_save_add_link_info(lnk_new, llc);
+ lnk_new->link_id = llc->link_num; /* SMC server assigns link id */
+ smc_llc_link_set_uid(lnk_new);
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (link->state == SMC_LNK_ACTIVATING)
- complete(&link->llc_add_resp);
- } else {
- if (link->state == SMC_LNK_ACTIVATING) {
- complete(&link->llc_add);
- return;
- }
+ rc = smc_ib_ready_link(lnk_new);
+ if (rc)
+ goto out_clear_lnk;
- if (lgr->role == SMC_SERV) {
- smc_llc_prep_add_link(llc, link,
- link->smcibdev->mac[link->ibport - 1],
- link->gid, SMC_LLC_REQ);
+ rc = smcr_buf_map_lgr(lnk_new);
+ if (rc)
+ goto out_clear_lnk;
- } else {
- smc_llc_prep_add_link(llc, link,
- link->smcibdev->mac[link->ibport - 1],
- link->gid, SMC_LLC_RESP);
+ rc = smc_llc_send_add_link(link,
+ lnk_new->smcibdev->mac[ini.ib_port - 1],
+ lnk_new->gid, lnk_new, SMC_LLC_RESP);
+ if (rc)
+ goto out_clear_lnk;
+ rc = smc_llc_cli_rkey_exchange(link, lnk_new);
+ if (rc) {
+ rc = 0;
+ goto out_clear_lnk;
+ }
+ rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t);
+ if (!rc)
+ goto out;
+out_clear_lnk:
+ smcr_link_clear(lnk_new, false);
+out_reject:
+ smc_llc_cli_add_link_reject(qentry);
+out:
+ kfree(qentry);
+ return rc;
+}
+
+static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
+{
+ struct smc_llc_qentry *qentry;
+
+ qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
+
+ mutex_lock(&lgr->llc_conf_mutex);
+ smc_llc_cli_add_link(qentry->link, qentry);
+ mutex_unlock(&lgr->llc_conf_mutex);
+}
+
+static int smc_llc_active_link_count(struct smc_link_group *lgr)
+{
+ int i, link_count = 0;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_usable(&lgr->lnk[i]))
+ continue;
+ link_count++;
+ }
+ return link_count;
+}
+
+/* find the asymmetric link when 3 links are established */
+static struct smc_link *smc_llc_find_asym_link(struct smc_link_group *lgr)
+{
+ int asym_idx = -ENOENT;
+ int i, j, k;
+ bool found;
+
+ /* determine asymmetric link */
+ found = false;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
+ if (!smc_link_usable(&lgr->lnk[i]) ||
+ !smc_link_usable(&lgr->lnk[j]))
+ continue;
+ if (!memcmp(lgr->lnk[i].gid, lgr->lnk[j].gid,
+ SMC_GID_SIZE)) {
+ found = true; /* asym_lnk is i or j */
+ break;
+ }
}
- smc_llc_send_message(link, llc, sizeof(*llc));
+ if (found)
+ break;
}
+ if (!found)
+ goto out; /* no asymmetric link */
+ for (k = 0; k < SMC_LINKS_PER_LGR_MAX; k++) {
+ if (!smc_link_usable(&lgr->lnk[k]))
+ continue;
+ if (k != i &&
+ !memcmp(lgr->lnk[i].peer_gid, lgr->lnk[k].peer_gid,
+ SMC_GID_SIZE)) {
+ asym_idx = i;
+ break;
+ }
+ if (k != j &&
+ !memcmp(lgr->lnk[j].peer_gid, lgr->lnk[k].peer_gid,
+ SMC_GID_SIZE)) {
+ asym_idx = j;
+ break;
+ }
+ }
+out:
+ return (asym_idx < 0) ? NULL : &lgr->lnk[asym_idx];
}
-static void smc_llc_rx_delete_link(struct smc_link *link,
- struct smc_llc_msg_del_link *llc)
+static void smc_llc_delete_asym_link(struct smc_link_group *lgr)
{
- struct smc_link_group *lgr = smc_get_lgr(link);
+ struct smc_link *lnk_new = NULL, *lnk_asym;
+ struct smc_llc_qentry *qentry;
+ int rc;
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (lgr->role == SMC_SERV)
- smc_lgr_schedule_free_work_fast(lgr);
- } else {
- smc_lgr_forget(lgr);
- smc_llc_link_deleting(link);
- if (lgr->role == SMC_SERV) {
- /* client asks to delete this link, send request */
- smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
- } else {
- /* server requests to delete this link, send response */
- smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
+ lnk_asym = smc_llc_find_asym_link(lgr);
+ if (!lnk_asym)
+ return; /* no asymmetric link */
+ if (!smc_link_downing(&lnk_asym->state))
+ return;
+ lnk_new = smc_switch_conns(lgr, lnk_asym, false);
+ smc_wr_tx_wait_no_pending_sends(lnk_asym);
+ if (!lnk_new)
+ goto out_free;
+ /* change flow type from ADD_LINK into DEL_LINK */
+ lgr->llc_flow_lcl.type = SMC_LLC_FLOW_DEL_LINK;
+ rc = smc_llc_send_delete_link(lnk_new, lnk_asym->link_id, SMC_LLC_REQ,
+ true, SMC_LLC_DEL_NO_ASYM_NEEDED);
+ if (rc) {
+ smcr_link_down_cond(lnk_new);
+ goto out_free;
+ }
+ qentry = smc_llc_wait(lgr, lnk_new, SMC_LLC_WAIT_TIME,
+ SMC_LLC_DELETE_LINK);
+ if (!qentry) {
+ smcr_link_down_cond(lnk_new);
+ goto out_free;
+ }
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+out_free:
+ smcr_link_clear(lnk_asym, true);
+}
+
+static int smc_llc_srv_rkey_exchange(struct smc_link *link,
+ struct smc_link *link_new)
+{
+ struct smc_llc_msg_add_link_cont *addc_llc;
+ struct smc_link_group *lgr = link->lgr;
+ u8 max, num_rkeys_send, num_rkeys_recv;
+ struct smc_llc_qentry *qentry = NULL;
+ struct smc_buf_desc *buf_pos;
+ int buf_lst;
+ int rc = 0;
+ int i;
+
+ mutex_lock(&lgr->rmbs_lock);
+ num_rkeys_send = lgr->conns_num;
+ buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
+ do {
+ smc_llc_add_link_cont(link, link_new, &num_rkeys_send,
+ &buf_lst, &buf_pos);
+ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME,
+ SMC_LLC_ADD_LINK_CONT);
+ if (!qentry) {
+ rc = -ETIMEDOUT;
+ goto out;
}
- smc_llc_send_message(link, llc, sizeof(*llc));
- smc_lgr_terminate_sched(lgr);
+ addc_llc = &qentry->msg.add_link_cont;
+ num_rkeys_recv = addc_llc->num_rkeys;
+ max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG);
+ for (i = 0; i < max; i++) {
+ smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
+ addc_llc->rt[i].rmb_key,
+ addc_llc->rt[i].rmb_vaddr_new,
+ addc_llc->rt[i].rmb_key_new);
+ num_rkeys_recv--;
+ }
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ } while (num_rkeys_send || num_rkeys_recv);
+out:
+ mutex_unlock(&lgr->rmbs_lock);
+ return rc;
+}
+
+static int smc_llc_srv_conf_link(struct smc_link *link,
+ struct smc_link *link_new,
+ enum smc_lgr_type lgr_new_t)
+{
+ struct smc_link_group *lgr = link->lgr;
+ struct smc_llc_qentry *qentry = NULL;
+ int rc;
+
+ /* send CONFIRM LINK request over the RoCE fabric */
+ rc = smc_llc_send_confirm_link(link_new, SMC_LLC_REQ);
+ if (rc)
+ return -ENOLINK;
+ /* receive CONFIRM LINK response over the RoCE fabric */
+ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME,
+ SMC_LLC_CONFIRM_LINK);
+ if (!qentry) {
+ /* send DELETE LINK */
+ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
+ false, SMC_LLC_DEL_LOST_PATH);
+ return -ENOLINK;
}
+ smc_llc_save_peer_uid(qentry);
+ smc_llc_link_active(link_new);
+ if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+ lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
+ smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx);
+ else
+ smcr_lgr_set_type(lgr, lgr_new_t);
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ return 0;
}
-static void smc_llc_rx_test_link(struct smc_link *link,
- struct smc_llc_msg_test_link *llc)
+int smc_llc_srv_add_link(struct smc_link *link)
{
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (link->state == SMC_LNK_ACTIVE)
- complete(&link->llc_testlink_resp);
- } else {
- llc->hd.flags |= SMC_LLC_FLAG_RESP;
- smc_llc_send_message(link, llc, sizeof(*llc));
+ enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
+ struct smc_link_group *lgr = link->lgr;
+ struct smc_llc_msg_add_link *add_llc;
+ struct smc_llc_qentry *qentry = NULL;
+ struct smc_link *link_new;
+ struct smc_init_info ini;
+ int lnk_idx, rc = 0;
+
+ /* ignore client add link recommendation, start new flow */
+ ini.vlan_id = lgr->vlan_id;
+ smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
+ if (!ini.ib_dev) {
+ lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
+ ini.ib_dev = link->smcibdev;
+ ini.ib_port = link->ibport;
+ }
+ lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
+ if (lnk_idx < 0)
+ return 0;
+
+ rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, &ini);
+ if (rc)
+ return rc;
+ link_new = &lgr->lnk[lnk_idx];
+ rc = smc_llc_send_add_link(link,
+ link_new->smcibdev->mac[ini.ib_port - 1],
+ link_new->gid, link_new, SMC_LLC_REQ);
+ if (rc)
+ goto out_err;
+ /* receive ADD LINK response over the RoCE fabric */
+ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK);
+ if (!qentry) {
+ rc = -ETIMEDOUT;
+ goto out_err;
}
+ add_llc = &qentry->msg.add_link;
+ if (add_llc->hd.flags & SMC_LLC_FLAG_ADD_LNK_REJ) {
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ rc = -ENOLINK;
+ goto out_err;
+ }
+ if (lgr->type == SMC_LGR_SINGLE &&
+ (!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
+ !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN))) {
+ lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
+ }
+ smc_llc_save_add_link_info(link_new, add_llc);
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+
+ rc = smc_ib_ready_link(link_new);
+ if (rc)
+ goto out_err;
+ rc = smcr_buf_map_lgr(link_new);
+ if (rc)
+ goto out_err;
+ rc = smcr_buf_reg_lgr(link_new);
+ if (rc)
+ goto out_err;
+ rc = smc_llc_srv_rkey_exchange(link, link_new);
+ if (rc)
+ goto out_err;
+ rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t);
+ if (rc)
+ goto out_err;
+ return 0;
+out_err:
+ smcr_link_clear(link_new, false);
+ return rc;
}
-static void smc_llc_rx_confirm_rkey(struct smc_link *link,
- struct smc_llc_msg_confirm_rkey *llc)
+static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
{
+ struct smc_link *link = lgr->llc_flow_lcl.qentry->link;
int rc;
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- link->llc_confirm_rkey_rc = llc->hd.flags &
- SMC_LLC_FLAG_RKEY_NEG;
- complete(&link->llc_confirm_rkey);
- } else {
- rc = smc_rtoken_add(smc_get_lgr(link),
- llc->rtoken[0].rmb_vaddr,
- llc->rtoken[0].rmb_key);
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
- /* ignore rtokens for other links, we have only one link */
+ mutex_lock(&lgr->llc_conf_mutex);
+ rc = smc_llc_srv_add_link(link);
+ if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
+ /* delete any asymmetric link */
+ smc_llc_delete_asym_link(lgr);
+ }
+ mutex_unlock(&lgr->llc_conf_mutex);
+}
+
+/* enqueue a local add_link req to trigger a new add_link flow, only as SERV */
+void smc_llc_srv_add_link_local(struct smc_link *link)
+{
+ struct smc_llc_msg_add_link add_llc = {0};
+
+ add_llc.hd.length = sizeof(add_llc);
+ add_llc.hd.common.type = SMC_LLC_ADD_LINK;
+ /* no dev and port needed, we as server ignore client data anyway */
+ smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc);
+}
+
+/* worker to process an add link message */
+static void smc_llc_add_link_work(struct work_struct *work)
+{
+ struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+ llc_add_link_work);
- llc->hd.flags |= SMC_LLC_FLAG_RESP;
- if (rc < 0)
- llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
- smc_llc_send_message(link, llc, sizeof(*llc));
+ if (list_empty(&lgr->list)) {
+ /* link group is terminating */
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ goto out;
}
+
+ if (lgr->role == SMC_CLNT)
+ smc_llc_process_cli_add_link(lgr);
+ else
+ smc_llc_process_srv_add_link(lgr);
+out:
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
-static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
- struct smc_llc_msg_confirm_rkey_cont *llc)
+/* enqueue a local del_link msg to trigger a new del_link flow,
+ * called only for role SMC_SERV
+ */
+void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
{
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- /* unused as long as we don't send this type of msg */
- } else {
- /* ignore rtokens for other links, we have only one link */
- llc->hd.flags |= SMC_LLC_FLAG_RESP;
- smc_llc_send_message(link, llc, sizeof(*llc));
+ struct smc_llc_msg_del_link del_llc = {0};
+
+ del_llc.hd.length = sizeof(del_llc);
+ del_llc.hd.common.type = SMC_LLC_DELETE_LINK;
+ del_llc.link_num = del_link_id;
+ del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH);
+ del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+ smc_llc_enqueue(link, (union smc_llc_msg *)&del_llc);
+}
+
+static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
+{
+ struct smc_link *lnk_del = NULL, *lnk_asym, *lnk;
+ struct smc_llc_msg_del_link *del_llc;
+ struct smc_llc_qentry *qentry;
+ int active_links;
+ int lnk_idx;
+
+ qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
+ lnk = qentry->link;
+ del_llc = &qentry->msg.delete_link;
+
+ if (del_llc->hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) {
+ smc_lgr_terminate_sched(lgr);
+ goto out;
+ }
+ mutex_lock(&lgr->llc_conf_mutex);
+ /* delete single link */
+ for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) {
+ if (lgr->lnk[lnk_idx].link_id != del_llc->link_num)
+ continue;
+ lnk_del = &lgr->lnk[lnk_idx];
+ break;
+ }
+ del_llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ if (!lnk_del) {
+ /* link was not found */
+ del_llc->reason = htonl(SMC_LLC_DEL_NOLNK);
+ smc_llc_send_message(lnk, &qentry->msg);
+ goto out_unlock;
+ }
+ lnk_asym = smc_llc_find_asym_link(lgr);
+
+ del_llc->reason = 0;
+ smc_llc_send_message(lnk, &qentry->msg); /* response */
+
+ if (smc_link_downing(&lnk_del->state)) {
+ smc_switch_conns(lgr, lnk_del, false);
+ smc_wr_tx_wait_no_pending_sends(lnk_del);
+ }
+ smcr_link_clear(lnk_del, true);
+
+ active_links = smc_llc_active_link_count(lgr);
+ if (lnk_del == lnk_asym) {
+ /* expected deletion of asym link, don't change lgr state */
+ } else if (active_links == 1) {
+ smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
+ } else if (!active_links) {
+ smcr_lgr_set_type(lgr, SMC_LGR_NONE);
+ smc_lgr_terminate_sched(lgr);
}
+out_unlock:
+ mutex_unlock(&lgr->llc_conf_mutex);
+out:
+ kfree(qentry);
}
-static void smc_llc_rx_delete_rkey(struct smc_link *link,
- struct smc_llc_msg_delete_rkey *llc)
+/* try to send a DELETE LINK ALL request on any active link,
+ * waiting for send completion
+ */
+void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
{
- u8 err_mask = 0;
- int i, max;
+ struct smc_llc_msg_del_link delllc = {0};
+ int i;
+
+ delllc.hd.common.type = SMC_LLC_DELETE_LINK;
+ delllc.hd.length = sizeof(delllc);
+ if (ord)
+ delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+ delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+ delllc.reason = htonl(rsn);
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_usable(&lgr->lnk[i]))
+ continue;
+ if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
+ break;
+ }
+}
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- link->llc_delete_rkey_rc = llc->hd.flags &
- SMC_LLC_FLAG_RKEY_NEG;
- complete(&link->llc_delete_rkey);
- } else {
- max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
- for (i = 0; i < max; i++) {
- if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i]))
- err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
+{
+ struct smc_llc_msg_del_link *del_llc;
+ struct smc_link *lnk, *lnk_del;
+ struct smc_llc_qentry *qentry;
+ int active_links;
+ int i;
+
+ mutex_lock(&lgr->llc_conf_mutex);
+ qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
+ lnk = qentry->link;
+ del_llc = &qentry->msg.delete_link;
+
+ if (qentry->msg.delete_link.hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) {
+ /* delete entire lgr */
+ smc_llc_send_link_delete_all(lgr, true, ntohl(
+ qentry->msg.delete_link.reason));
+ smc_lgr_terminate_sched(lgr);
+ goto out;
+ }
+ /* delete single link */
+ lnk_del = NULL;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].link_id == del_llc->link_num) {
+ lnk_del = &lgr->lnk[i];
+ break;
}
+ }
+ if (!lnk_del)
+ goto out; /* asymmetric link already deleted */
- if (err_mask) {
- llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
- llc->err_mask = err_mask;
+ if (smc_link_downing(&lnk_del->state)) {
+ smc_switch_conns(lgr, lnk_del, false);
+ smc_wr_tx_wait_no_pending_sends(lnk_del);
+ }
+ if (!list_empty(&lgr->list)) {
+ /* qentry is either a request from peer (send it back to
+ * initiate the DELETE_LINK processing), or a locally
+ * enqueued DELETE_LINK request (forward it)
+ */
+ if (!smc_llc_send_message(lnk, &qentry->msg)) {
+ struct smc_llc_qentry *qentry2;
+
+ qentry2 = smc_llc_wait(lgr, lnk, SMC_LLC_WAIT_TIME,
+ SMC_LLC_DELETE_LINK);
+ if (qentry2)
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
}
+ }
+ smcr_link_clear(lnk_del, true);
- llc->hd.flags |= SMC_LLC_FLAG_RESP;
- smc_llc_send_message(link, llc, sizeof(*llc));
+ active_links = smc_llc_active_link_count(lgr);
+ if (active_links == 1) {
+ smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
+ } else if (!active_links) {
+ smcr_lgr_set_type(lgr, SMC_LGR_NONE);
+ smc_lgr_terminate_sched(lgr);
}
+
+ if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) {
+ /* trigger setup of asymm alt link */
+ smc_llc_srv_add_link_local(lnk);
+ }
+out:
+ mutex_unlock(&lgr->llc_conf_mutex);
+ kfree(qentry);
}
-static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
+static void smc_llc_delete_link_work(struct work_struct *work)
{
- struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
- union smc_llc_msg *llc = buf;
+ struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+ llc_del_link_work);
- if (wc->byte_len < sizeof(*llc))
- return; /* short message */
- if (llc->raw.hdr.length != sizeof(*llc))
- return; /* invalid message */
- if (link->state == SMC_LNK_INACTIVE)
- return; /* link not active, drop msg */
+ if (list_empty(&lgr->list)) {
+ /* link group is terminating */
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ goto out;
+ }
+
+ if (lgr->role == SMC_CLNT)
+ smc_llc_process_cli_delete_link(lgr);
+ else
+ smc_llc_process_srv_delete_link(lgr);
+out:
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+}
+
+/* process a confirm_rkey request from peer, remote flow */
+static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
+{
+ struct smc_llc_msg_confirm_rkey *llc;
+ struct smc_llc_qentry *qentry;
+ struct smc_link *link;
+ int num_entries;
+ int rk_idx;
+ int i;
+
+ qentry = lgr->llc_flow_rmt.qentry;
+ llc = &qentry->msg.confirm_rkey;
+ link = qentry->link;
+
+ num_entries = llc->rtoken[0].num_rkeys;
+ /* first rkey entry is for receiving link */
+ rk_idx = smc_rtoken_add(link,
+ llc->rtoken[0].rmb_vaddr,
+ llc->rtoken[0].rmb_key);
+ if (rk_idx < 0)
+ goto out_err;
+
+ for (i = 1; i <= min_t(u8, num_entries, SMC_LLC_RKEYS_PER_MSG - 1); i++)
+ smc_rtoken_set2(lgr, rk_idx, llc->rtoken[i].link_id,
+ llc->rtoken[i].rmb_vaddr,
+ llc->rtoken[i].rmb_key);
+ /* max links is 3 so there is no need to support conf_rkey_cont msgs */
+ goto out;
+out_err:
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY;
+out:
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, &qentry->msg);
+ smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
+}
+
+/* process a delete_rkey request from peer, remote flow */
+static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
+{
+ struct smc_llc_msg_delete_rkey *llc;
+ struct smc_llc_qentry *qentry;
+ struct smc_link *link;
+ u8 err_mask = 0;
+ int i, max;
+
+ qentry = lgr->llc_flow_rmt.qentry;
+ llc = &qentry->msg.delete_rkey;
+ link = qentry->link;
+
+ max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+ for (i = 0; i < max; i++) {
+ if (smc_rtoken_delete(link, llc->rkey[i]))
+ err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+ }
+ if (err_mask) {
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ llc->err_mask = err_mask;
+ }
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, &qentry->msg);
+ smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
+}
+
+static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type)
+{
+ pr_warn_ratelimited("smc: SMC-R lg %*phN LLC protocol violation: "
+ "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, type);
+ smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL);
+ smc_lgr_terminate_sched(lgr);
+}
+
+/* flush the llc event queue */
+static void smc_llc_event_flush(struct smc_link_group *lgr)
+{
+ struct smc_llc_qentry *qentry, *q;
+
+ spin_lock_bh(&lgr->llc_event_q_lock);
+ list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) {
+ list_del_init(&qentry->list);
+ kfree(qentry);
+ }
+ spin_unlock_bh(&lgr->llc_event_q_lock);
+}
+
+static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
+{
+ union smc_llc_msg *llc = &qentry->msg;
+ struct smc_link *link = qentry->link;
+ struct smc_link_group *lgr = link->lgr;
+
+ if (!smc_link_usable(link))
+ goto out;
switch (llc->raw.hdr.common.type) {
case SMC_LLC_TEST_LINK:
- smc_llc_rx_test_link(link, &llc->test_link);
- break;
- case SMC_LLC_CONFIRM_LINK:
- smc_llc_rx_confirm_link(link, &llc->confirm_link);
+ llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, llc);
break;
case SMC_LLC_ADD_LINK:
- smc_llc_rx_add_link(link, &llc->add_link);
+ if (list_empty(&lgr->list))
+ goto out; /* lgr is terminating */
+ if (lgr->role == SMC_CLNT) {
+ if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) {
+ /* a flow is waiting for this message */
+ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
+ qentry);
+ wake_up_interruptible(&lgr->llc_waiter);
+ } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
+ qentry)) {
+ schedule_work(&lgr->llc_add_link_work);
+ }
+ } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
+ /* as smc server, handle client suggestion */
+ schedule_work(&lgr->llc_add_link_work);
+ }
+ return;
+ case SMC_LLC_CONFIRM_LINK:
+ case SMC_LLC_ADD_LINK_CONT:
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+ /* a flow is waiting for this message */
+ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
+ wake_up_interruptible(&lgr->llc_waiter);
+ return;
+ }
break;
case SMC_LLC_DELETE_LINK:
- smc_llc_rx_delete_link(link, &llc->delete_link);
- break;
+ if (lgr->role == SMC_CLNT) {
+ /* server requests to delete this link, send response */
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+ /* DEL LINK REQ during ADD LINK SEQ */
+ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
+ qentry);
+ wake_up_interruptible(&lgr->llc_waiter);
+ } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
+ qentry)) {
+ schedule_work(&lgr->llc_del_link_work);
+ }
+ } else {
+ if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK &&
+ !lgr->llc_flow_lcl.qentry) {
+ /* DEL LINK REQ during ADD LINK SEQ */
+ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
+ qentry);
+ wake_up_interruptible(&lgr->llc_waiter);
+ } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
+ qentry)) {
+ schedule_work(&lgr->llc_del_link_work);
+ }
+ }
+ return;
case SMC_LLC_CONFIRM_RKEY:
- smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
- break;
+ /* new request from remote, assign to remote flow */
+ if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
+ /* process here, does not wait for more llc msgs */
+ smc_llc_rmt_conf_rkey(lgr);
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
+ }
+ return;
case SMC_LLC_CONFIRM_RKEY_CONT:
- smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+ /* not used because max links is 3, and 3 rkeys fit into
+ * one CONFIRM_RKEY message
+ */
break;
case SMC_LLC_DELETE_RKEY:
- smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+ /* new request from remote, assign to remote flow */
+ if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
+ /* process here, does not wait for more llc msgs */
+ smc_llc_rmt_delete_rkey(lgr);
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
+ }
+ return;
+ default:
+ smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type);
break;
}
+out:
+ kfree(qentry);
+}
+
+/* worker to process llc messages on the event queue */
+static void smc_llc_event_work(struct work_struct *work)
+{
+ struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+ llc_event_work);
+ struct smc_llc_qentry *qentry;
+
+ if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
+ if (smc_link_usable(lgr->delayed_event->link)) {
+ smc_llc_event_handler(lgr->delayed_event);
+ } else {
+ qentry = lgr->delayed_event;
+ lgr->delayed_event = NULL;
+ kfree(qentry);
+ }
+ }
+
+again:
+ spin_lock_bh(&lgr->llc_event_q_lock);
+ if (!list_empty(&lgr->llc_event_q)) {
+ qentry = list_first_entry(&lgr->llc_event_q,
+ struct smc_llc_qentry, list);
+ list_del_init(&qentry->list);
+ spin_unlock_bh(&lgr->llc_event_q_lock);
+ smc_llc_event_handler(qentry);
+ goto again;
+ }
+ spin_unlock_bh(&lgr->llc_event_q_lock);
+}
+
+/* process llc responses in tasklet context */
+static void smc_llc_rx_response(struct smc_link *link,
+ struct smc_llc_qentry *qentry)
+{
+ u8 llc_type = qentry->msg.raw.hdr.common.type;
+
+ switch (llc_type) {
+ case SMC_LLC_TEST_LINK:
+ if (link->state == SMC_LNK_ACTIVE)
+ complete(&link->llc_testlink_resp);
+ break;
+ case SMC_LLC_ADD_LINK:
+ case SMC_LLC_DELETE_LINK:
+ case SMC_LLC_CONFIRM_LINK:
+ case SMC_LLC_ADD_LINK_CONT:
+ case SMC_LLC_CONFIRM_RKEY:
+ case SMC_LLC_DELETE_RKEY:
+ /* assign responses to the local flow, we requested them */
+ smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
+ wake_up_interruptible(&link->lgr->llc_waiter);
+ return;
+ case SMC_LLC_CONFIRM_RKEY_CONT:
+ /* not used because max links is 3 */
+ break;
+ default:
+ smc_llc_protocol_violation(link->lgr, llc_type);
+ break;
+ }
+ kfree(qentry);
+}
+
+static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
+{
+ struct smc_link_group *lgr = link->lgr;
+ struct smc_llc_qentry *qentry;
+ unsigned long flags;
+
+ qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
+ if (!qentry)
+ return;
+ qentry->link = link;
+ INIT_LIST_HEAD(&qentry->list);
+ memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
+
+ /* process responses immediately */
+ if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
+ smc_llc_rx_response(link, qentry);
+ return;
+ }
+
+ /* add requests to event queue */
+ spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
+ list_add_tail(&qentry->list, &lgr->llc_event_q);
+ spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
+ schedule_work(&link->lgr->llc_event_work);
+}
+
+/* copy received msg and add it to the event queue */
+static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
+{
+ struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
+ union smc_llc_msg *llc = buf;
+
+ if (wc->byte_len < sizeof(*llc))
+ return; /* short message */
+ if (llc->raw.hdr.length != sizeof(*llc))
+ return; /* invalid message */
+
+ smc_llc_enqueue(link, llc);
}
/***************************** worker, utils *********************************/
@@ -613,112 +1656,162 @@ static void smc_llc_testlink_work(struct work_struct *work)
/* receive TEST LINK response over RoCE fabric */
rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
SMC_LLC_WAIT_TIME);
+ if (link->state != SMC_LNK_ACTIVE)
+ return; /* link state changed */
if (rc <= 0) {
- smc_lgr_terminate_sched(smc_get_lgr(link));
+ smcr_link_down_cond_sched(link);
return;
}
next_interval = link->llc_testlink_time;
out:
- queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
- next_interval);
+ schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
}
-int smc_llc_link_init(struct smc_link *link)
+void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
{
- struct smc_link_group *lgr = smc_get_lgr(link);
- link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM,
- *((u32 *)lgr->id),
- link->link_id);
- if (!link->llc_wq)
- return -ENOMEM;
- init_completion(&link->llc_confirm);
- init_completion(&link->llc_confirm_resp);
- init_completion(&link->llc_add);
- init_completion(&link->llc_add_resp);
- init_completion(&link->llc_confirm_rkey);
- init_completion(&link->llc_delete_rkey);
- mutex_init(&link->llc_delete_rkey_mutex);
- init_completion(&link->llc_testlink_resp);
- INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
- return 0;
+ struct net *net = sock_net(smc->clcsock->sk);
+
+ INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
+ INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work);
+ INIT_WORK(&lgr->llc_del_link_work, smc_llc_delete_link_work);
+ INIT_LIST_HEAD(&lgr->llc_event_q);
+ spin_lock_init(&lgr->llc_event_q_lock);
+ spin_lock_init(&lgr->llc_flow_lock);
+ init_waitqueue_head(&lgr->llc_waiter);
+ mutex_init(&lgr->llc_conf_mutex);
+ lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
}
-void smc_llc_link_active(struct smc_link *link, int testlink_time)
+/* called after lgr was removed from lgr_list */
+void smc_llc_lgr_clear(struct smc_link_group *lgr)
{
- link->state = SMC_LNK_ACTIVE;
- if (testlink_time) {
- link->llc_testlink_time = testlink_time * HZ;
- queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
- link->llc_testlink_time);
+ smc_llc_event_flush(lgr);
+ wake_up_interruptible_all(&lgr->llc_waiter);
+ cancel_work_sync(&lgr->llc_event_work);
+ cancel_work_sync(&lgr->llc_add_link_work);
+ cancel_work_sync(&lgr->llc_del_link_work);
+ if (lgr->delayed_event) {
+ kfree(lgr->delayed_event);
+ lgr->delayed_event = NULL;
}
}
-void smc_llc_link_deleting(struct smc_link *link)
+int smc_llc_link_init(struct smc_link *link)
{
- link->state = SMC_LNK_DELETING;
- smc_wr_wakeup_tx_wait(link);
+ init_completion(&link->llc_testlink_resp);
+ INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
+ return 0;
}
-/* called in tasklet context */
-void smc_llc_link_inactive(struct smc_link *link)
+void smc_llc_link_active(struct smc_link *link)
{
- link->state = SMC_LNK_INACTIVE;
- cancel_delayed_work(&link->llc_testlink_wrk);
- smc_wr_wakeup_reg_wait(link);
- smc_wr_wakeup_tx_wait(link);
+ pr_warn_ratelimited("smc: SMC-R lg %*phN link added: id %*phN, "
+ "peerid %*phN, ibdev %s, ibport %d\n",
+ SMC_LGR_ID_SIZE, &link->lgr->id,
+ SMC_LGR_ID_SIZE, &link->link_uid,
+ SMC_LGR_ID_SIZE, &link->peer_link_uid,
+ link->smcibdev->ibdev->name, link->ibport);
+ link->state = SMC_LNK_ACTIVE;
+ if (link->lgr->llc_testlink_time) {
+ link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
+ schedule_delayed_work(&link->llc_testlink_wrk,
+ link->llc_testlink_time);
+ }
}
/* called in worker context */
-void smc_llc_link_clear(struct smc_link *link)
+void smc_llc_link_clear(struct smc_link *link, bool log)
{
- flush_workqueue(link->llc_wq);
- destroy_workqueue(link->llc_wq);
+ if (log)
+ pr_warn_ratelimited("smc: SMC-R lg %*phN link removed: id %*phN"
+ ", peerid %*phN, ibdev %s, ibport %d\n",
+ SMC_LGR_ID_SIZE, &link->lgr->id,
+ SMC_LGR_ID_SIZE, &link->link_uid,
+ SMC_LGR_ID_SIZE, &link->peer_link_uid,
+ link->smcibdev->ibdev->name, link->ibport);
+ complete(&link->llc_testlink_resp);
+ cancel_delayed_work_sync(&link->llc_testlink_wrk);
+ smc_wr_wakeup_reg_wait(link);
+ smc_wr_wakeup_tx_wait(link);
}
-/* register a new rtoken at the remote peer */
-int smc_llc_do_confirm_rkey(struct smc_link *link,
+/* register a new rtoken at the remote peer (for all links) */
+int smc_llc_do_confirm_rkey(struct smc_link *send_link,
struct smc_buf_desc *rmb_desc)
{
- int rc;
+ struct smc_link_group *lgr = send_link->lgr;
+ struct smc_llc_qentry *qentry = NULL;
+ int rc = 0;
- /* protected by mutex smc_create_lgr_pending */
- reinit_completion(&link->llc_confirm_rkey);
- rc = smc_llc_send_confirm_rkey(link, rmb_desc);
+ rc = smc_llc_send_confirm_rkey(send_link, rmb_desc);
if (rc)
- return rc;
+ goto out;
/* receive CONFIRM RKEY response from server over RoCE fabric */
- rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey,
- SMC_LLC_WAIT_TIME);
- if (rc <= 0 || link->llc_confirm_rkey_rc)
- return -EFAULT;
- return 0;
+ qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
+ SMC_LLC_CONFIRM_RKEY);
+ if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
+ rc = -EFAULT;
+out:
+ if (qentry)
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ return rc;
}
/* unregister an rtoken at the remote peer */
-int smc_llc_do_delete_rkey(struct smc_link *link,
+int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
struct smc_buf_desc *rmb_desc)
{
+ struct smc_llc_qentry *qentry = NULL;
+ struct smc_link *send_link;
int rc = 0;
- mutex_lock(&link->llc_delete_rkey_mutex);
- if (link->state != SMC_LNK_ACTIVE)
- goto out;
- reinit_completion(&link->llc_delete_rkey);
- rc = smc_llc_send_delete_rkey(link, rmb_desc);
+ send_link = smc_llc_usable_link(lgr);
+ if (!send_link)
+ return -ENOLINK;
+
+ /* protected by llc_flow control */
+ rc = smc_llc_send_delete_rkey(send_link, rmb_desc);
if (rc)
goto out;
/* receive DELETE RKEY response from server over RoCE fabric */
- rc = wait_for_completion_interruptible_timeout(&link->llc_delete_rkey,
- SMC_LLC_WAIT_TIME);
- if (rc <= 0 || link->llc_delete_rkey_rc)
+ qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
+ SMC_LLC_DELETE_RKEY);
+ if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
rc = -EFAULT;
- else
- rc = 0;
out:
- mutex_unlock(&link->llc_delete_rkey_mutex);
+ if (qentry)
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
return rc;
}
+void smc_llc_link_set_uid(struct smc_link *link)
+{
+ __be32 link_uid;
+
+ link_uid = htonl(*((u32 *)link->lgr->id) + link->link_id);
+ memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE);
+}
+
+/* save peers link user id, used for debug purposes */
+void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry)
+{
+ memcpy(qentry->link->peer_link_uid, qentry->msg.confirm_link.link_uid,
+ SMC_LGR_ID_SIZE);
+}
+
+/* evaluate confirm link request or response */
+int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
+ enum smc_llc_reqresp type)
+{
+ if (type == SMC_LLC_REQ) { /* SMC server assigns link_id */
+ qentry->link->link_id = qentry->msg.confirm_link.link_num;
+ smc_llc_link_set_uid(qentry->link);
+ }
+ if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
+ return -ENOTSUPP;
+ return 0;
+}
+
/***************************** init, exit, misc ******************************/
static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
@@ -736,6 +1829,10 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
},
{
.handler = smc_llc_rx_handler,
+ .type = SMC_LLC_ADD_LINK_CONT
+ },
+ {
+ .handler = smc_llc_rx_handler,
.type = SMC_LLC_DELETE_LINK
},
{
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 461c0c3ef76e..a5d2fe3eea61 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -28,6 +28,7 @@ enum smc_llc_reqresp {
enum smc_llc_msg_type {
SMC_LLC_CONFIRM_LINK = 0x01,
SMC_LLC_ADD_LINK = 0x02,
+ SMC_LLC_ADD_LINK_CONT = 0x03,
SMC_LLC_DELETE_LINK = 0x04,
SMC_LLC_CONFIRM_RKEY = 0x06,
SMC_LLC_TEST_LINK = 0x07,
@@ -35,22 +36,74 @@ enum smc_llc_msg_type {
SMC_LLC_DELETE_RKEY = 0x09,
};
+#define smc_link_downing(state) \
+ (cmpxchg(state, SMC_LNK_ACTIVE, SMC_LNK_INACTIVE) == SMC_LNK_ACTIVE)
+
+/* LLC DELETE LINK Request Reason Codes */
+#define SMC_LLC_DEL_LOST_PATH 0x00010000
+#define SMC_LLC_DEL_OP_INIT_TERM 0x00020000
+#define SMC_LLC_DEL_PROG_INIT_TERM 0x00030000
+#define SMC_LLC_DEL_PROT_VIOL 0x00040000
+#define SMC_LLC_DEL_NO_ASYM_NEEDED 0x00050000
+/* LLC DELETE LINK Response Reason Codes */
+#define SMC_LLC_DEL_NOLNK 0x00100000 /* Unknown Link ID (no link) */
+#define SMC_LLC_DEL_NOLGR 0x00200000 /* Unknown Link Group */
+
+/* returns a usable link of the link group, or NULL */
+static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
+{
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+ if (smc_link_usable(&lgr->lnk[i]))
+ return &lgr->lnk[i];
+ return NULL;
+}
+
+/* set the termination reason code for the link group */
+static inline void smc_llc_set_termination_rsn(struct smc_link_group *lgr,
+ u32 rsn)
+{
+ if (!lgr->llc_termination_rsn)
+ lgr->llc_termination_rsn = rsn;
+}
+
/* transmit */
int smc_llc_send_confirm_link(struct smc_link *lnk,
enum smc_llc_reqresp reqresp);
int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
+ struct smc_link *link_new,
enum smc_llc_reqresp reqresp);
-int smc_llc_send_delete_link(struct smc_link *link,
- enum smc_llc_reqresp reqresp, bool orderly);
+int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
+ enum smc_llc_reqresp reqresp, bool orderly,
+ u32 reason);
+void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id);
+void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
+void smc_llc_lgr_clear(struct smc_link_group *lgr);
int smc_llc_link_init(struct smc_link *link);
-void smc_llc_link_active(struct smc_link *link, int testlink_time);
-void smc_llc_link_deleting(struct smc_link *link);
-void smc_llc_link_inactive(struct smc_link *link);
-void smc_llc_link_clear(struct smc_link *link);
-int smc_llc_do_confirm_rkey(struct smc_link *link,
+void smc_llc_link_active(struct smc_link *link);
+void smc_llc_link_clear(struct smc_link *link, bool log);
+int smc_llc_do_confirm_rkey(struct smc_link *send_link,
struct smc_buf_desc *rmb_desc);
-int smc_llc_do_delete_rkey(struct smc_link *link,
+int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
struct smc_buf_desc *rmb_desc);
+int smc_llc_flow_initiate(struct smc_link_group *lgr,
+ enum smc_llc_flowtype type);
+void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow);
+int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
+ enum smc_llc_reqresp type);
+void smc_llc_link_set_uid(struct smc_link *link);
+void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry);
+struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
+ struct smc_link *lnk,
+ int time_out, u8 exp_msg);
+struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow);
+void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
+void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord,
+ u32 rsn);
+int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
+int smc_llc_srv_add_link(struct smc_link *link);
+void smc_llc_srv_add_link_local(struct smc_link *link);
int smc_llc_init(void) __init;
#endif /* SMC_LLC_H */
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 2a5ed47c3e08..014d91b9778e 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -32,7 +32,7 @@
static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
-static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
+static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
[SMC_PNETID_NAME] = {
.type = NLA_NUL_STRING,
.len = SMC_MAX_PNETID_LEN
@@ -50,29 +50,26 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
static struct genl_family smc_pnet_nl_family;
-/**
- * struct smc_user_pnetentry - pnet identifier name entry for/from user
- * @list: List node.
- * @pnet_name: Pnet identifier name
- * @ndev: pointer to network device.
- * @smcibdev: Pointer to IB device.
- * @ib_port: Port of IB device.
- * @smcd_dev: Pointer to smcd device.
- */
-struct smc_user_pnetentry {
- struct list_head list;
- char pnet_name[SMC_MAX_PNETID_LEN + 1];
- struct net_device *ndev;
- struct smc_ib_device *smcibdev;
- u8 ib_port;
- struct smcd_dev *smcd_dev;
+enum smc_pnet_nametype {
+ SMC_PNET_ETH = 1,
+ SMC_PNET_IB = 2,
};
/* pnet entry stored in pnet table */
struct smc_pnetentry {
struct list_head list;
char pnet_name[SMC_MAX_PNETID_LEN + 1];
- struct net_device *ndev;
+ enum smc_pnet_nametype type;
+ union {
+ struct {
+ char eth_name[IFNAMSIZ + 1];
+ struct net_device *ndev;
+ };
+ struct {
+ char ib_name[IB_DEVICE_NAME_MAX + 1];
+ u8 ib_port;
+ };
+ };
};
/* Check if two given pnetids match */
@@ -106,14 +103,21 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- /* remove netdevices */
+ /* remove table entry */
write_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
list) {
if (!pnet_name ||
smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
list_del(&pnetelem->list);
- dev_put(pnetelem->ndev);
+ if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) {
+ dev_put(pnetelem->ndev);
+ pr_warn_ratelimited("smc: net device %s "
+ "erased user defined "
+ "pnetid %.16s\n",
+ pnetelem->eth_name,
+ pnetelem->pnet_name);
+ }
kfree(pnetelem);
rc = 0;
}
@@ -132,6 +136,12 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
(!pnet_name ||
smc_pnet_match(pnet_name,
ibdev->pnetid[ibport]))) {
+ pr_warn_ratelimited("smc: ib device %s ibport "
+ "%d erased user defined "
+ "pnetid %.16s\n",
+ ibdev->ibdev->name,
+ ibport + 1,
+ ibdev->pnetid[ibport]);
memset(ibdev->pnetid[ibport], 0,
SMC_MAX_PNETID_LEN);
ibdev->pnetid_by_user[ibport] = false;
@@ -146,6 +156,10 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
if (smcd_dev->pnetid_by_user &&
(!pnet_name ||
smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
+ pr_warn_ratelimited("smc: smcd device %s "
+ "erased user defined pnetid "
+ "%.16s\n", dev_name(&smcd_dev->dev),
+ smcd_dev->pnetid);
memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
smcd_dev->pnetid_by_user = false;
rc = 0;
@@ -155,9 +169,9 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
return rc;
}
-/* Remove a pnet entry mentioning a given network device from the pnet table.
+/* Add the reference to a given network device to the pnet table.
*/
-static int smc_pnet_remove_by_ndev(struct net_device *ndev)
+static int smc_pnet_add_by_ndev(struct net_device *ndev)
{
struct smc_pnetentry *pnetelem, *tmp_pe;
struct smc_pnettable *pnettable;
@@ -171,11 +185,15 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
write_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
- if (pnetelem->ndev == ndev) {
- list_del(&pnetelem->list);
- dev_put(pnetelem->ndev);
- kfree(pnetelem);
+ if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
+ !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
+ dev_hold(ndev);
+ pnetelem->ndev = ndev;
rc = 0;
+ pr_warn_ratelimited("smc: adding net device %s with "
+ "user defined pnetid %.16s\n",
+ pnetelem->eth_name,
+ pnetelem->pnet_name);
break;
}
}
@@ -183,80 +201,71 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
return rc;
}
-/* Append a pnetid to the end of the pnet table if not already on this list.
+/* Remove the reference to a given network device from the pnet table.
*/
-static int smc_pnet_enter(struct smc_pnettable *pnettable,
- struct smc_user_pnetentry *new_pnetelem)
+static int smc_pnet_remove_by_ndev(struct net_device *ndev)
{
- u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
- u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
- struct smc_pnetentry *tmp_pnetelem;
- struct smc_pnetentry *pnetelem;
- bool new_smcddev = false;
- struct net_device *ndev;
- bool new_netdev = true;
- bool new_ibdev = false;
-
- if (new_pnetelem->smcibdev) {
- struct smc_ib_device *ib_dev = new_pnetelem->smcibdev;
- int ib_port = new_pnetelem->ib_port;
+ struct smc_pnetentry *pnetelem, *tmp_pe;
+ struct smc_pnettable *pnettable;
+ struct net *net = dev_net(ndev);
+ struct smc_net *sn;
+ int rc = -ENOENT;
- spin_lock(&smc_ib_devices.lock);
- if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
- memcpy(ib_dev->pnetid[ib_port - 1],
- new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
- ib_dev->pnetid_by_user[ib_port - 1] = true;
- new_ibdev = true;
- }
- spin_unlock(&smc_ib_devices.lock);
- }
- if (new_pnetelem->smcd_dev) {
- struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;
+ /* get pnettable for namespace */
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
- spin_lock(&smcd_dev_list.lock);
- if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
- memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name,
- SMC_MAX_PNETID_LEN);
- smcd_dev->pnetid_by_user = true;
- new_smcddev = true;
+ write_lock(&pnettable->lock);
+ list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
+ if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
+ dev_put(pnetelem->ndev);
+ pnetelem->ndev = NULL;
+ rc = 0;
+ pr_warn_ratelimited("smc: removing net device %s with "
+ "user defined pnetid %.16s\n",
+ pnetelem->eth_name,
+ pnetelem->pnet_name);
+ break;
}
- spin_unlock(&smcd_dev_list.lock);
}
+ write_unlock(&pnettable->lock);
+ return rc;
+}
- if (!new_pnetelem->ndev)
- return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
+/* Apply pnetid to ib device when no pnetid is set.
+ */
+static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
+ char *pnet_name)
+{
+ u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
+ bool applied = false;
- /* check if (base) netdev already has a pnetid. If there is one, we do
- * not want to add a pnet table entry
- */
- ndev = pnet_find_base_ndev(new_pnetelem->ndev);
- if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
- ndev_pnetid))
- return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
+ spin_lock(&smc_ib_devices.lock);
+ if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
+ memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
+ SMC_MAX_PNETID_LEN);
+ ib_dev->pnetid_by_user[ib_port - 1] = true;
+ applied = true;
+ }
+ spin_unlock(&smc_ib_devices.lock);
+ return applied;
+}
- /* add a new netdev entry to the pnet table if there isn't one */
- tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
- if (!tmp_pnetelem)
- return -ENOMEM;
- memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name,
- SMC_MAX_PNETID_LEN);
- tmp_pnetelem->ndev = new_pnetelem->ndev;
+/* Apply pnetid to smcd device when no pnetid is set.
+ */
+static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name)
+{
+ u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
+ bool applied = false;
- write_lock(&pnettable->lock);
- list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
- if (pnetelem->ndev == new_pnetelem->ndev)
- new_netdev = false;
- }
- if (new_netdev) {
- dev_hold(tmp_pnetelem->ndev);
- list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist);
- write_unlock(&pnettable->lock);
- } else {
- write_unlock(&pnettable->lock);
- kfree(tmp_pnetelem);
+ spin_lock(&smcd_dev_list.lock);
+ if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
+ memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN);
+ smcd_dev->pnetid_by_user = true;
+ applied = true;
}
-
- return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST;
+ spin_unlock(&smcd_dev_list.lock);
+ return applied;
}
/* The limit for pnetid is 16 characters.
@@ -323,57 +332,184 @@ out:
return smcd_dev;
}
-/* Parse the supplied netlink attributes and fill a pnetentry structure.
- * For ethernet and infiniband device names verify that the devices exist.
+static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
+ char *eth_name, char *pnet_name)
+{
+ struct smc_pnetentry *tmp_pe, *new_pe;
+ struct net_device *ndev, *base_ndev;
+ u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+ bool new_netdev;
+ int rc;
+
+ /* check if (base) netdev already has a pnetid. If there is one, we do
+ * not want to add a pnet table entry
+ */
+ rc = -EEXIST;
+ ndev = dev_get_by_name(net, eth_name); /* dev_hold() */
+ if (ndev) {
+ base_ndev = pnet_find_base_ndev(ndev);
+ if (!smc_pnetid_by_dev_port(base_ndev->dev.parent,
+ base_ndev->dev_port, ndev_pnetid))
+ goto out_put;
+ }
+
+ /* add a new netdev entry to the pnet table if there isn't one */
+ rc = -ENOMEM;
+ new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
+ if (!new_pe)
+ goto out_put;
+ new_pe->type = SMC_PNET_ETH;
+ memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
+ strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
+ new_pe->ndev = ndev;
+
+ rc = -EEXIST;
+ new_netdev = true;
+ write_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_ETH &&
+ !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
+ new_netdev = false;
+ break;
+ }
+ }
+ if (new_netdev) {
+ list_add_tail(&new_pe->list, &pnettable->pnetlist);
+ write_unlock(&pnettable->lock);
+ } else {
+ write_unlock(&pnettable->lock);
+ kfree(new_pe);
+ goto out_put;
+ }
+ if (ndev)
+ pr_warn_ratelimited("smc: net device %s "
+ "applied user defined pnetid %.16s\n",
+ new_pe->eth_name, new_pe->pnet_name);
+ return 0;
+
+out_put:
+ if (ndev)
+ dev_put(ndev);
+ return rc;
+}
+
+static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
+ u8 ib_port, char *pnet_name)
+{
+ struct smc_pnetentry *tmp_pe, *new_pe;
+ struct smc_ib_device *ib_dev;
+ bool smcddev_applied = true;
+ bool ibdev_applied = true;
+ struct smcd_dev *smcd_dev;
+ bool new_ibdev;
+
+ /* try to apply the pnetid to active devices */
+ ib_dev = smc_pnet_find_ib(ib_name);
+ if (ib_dev) {
+ ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name);
+ if (ibdev_applied)
+ pr_warn_ratelimited("smc: ib device %s ibport %d "
+ "applied user defined pnetid "
+ "%.16s\n", ib_dev->ibdev->name,
+ ib_port,
+ ib_dev->pnetid[ib_port - 1]);
+ }
+ smcd_dev = smc_pnet_find_smcd(ib_name);
+ if (smcd_dev) {
+ smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
+ if (smcddev_applied)
+ pr_warn_ratelimited("smc: smcd device %s "
+ "applied user defined pnetid "
+ "%.16s\n", dev_name(&smcd_dev->dev),
+ smcd_dev->pnetid);
+ }
+ /* Apply fails when a device has a hardware-defined pnetid set, do not
+ * add a pnet table entry in that case.
+ */
+ if (!ibdev_applied || !smcddev_applied)
+ return -EEXIST;
+
+ /* add a new ib entry to the pnet table if there isn't one */
+ new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
+ if (!new_pe)
+ return -ENOMEM;
+ new_pe->type = SMC_PNET_IB;
+ memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
+ strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX);
+ new_pe->ib_port = ib_port;
+
+ new_ibdev = true;
+ write_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_IB &&
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
+ new_ibdev = false;
+ break;
+ }
+ }
+ if (new_ibdev) {
+ list_add_tail(&new_pe->list, &pnettable->pnetlist);
+ write_unlock(&pnettable->lock);
+ } else {
+ write_unlock(&pnettable->lock);
+ kfree(new_pe);
+ }
+ return (new_ibdev) ? 0 : -EEXIST;
+}
+
+/* Append a pnetid to the end of the pnet table if not already on this list.
*/
-static int smc_pnet_fill_entry(struct net *net,
- struct smc_user_pnetentry *pnetelem,
- struct nlattr *tb[])
+static int smc_pnet_enter(struct net *net, struct nlattr *tb[])
{
- char *string, *ibname;
+ char pnet_name[SMC_MAX_PNETID_LEN + 1];
+ struct smc_pnettable *pnettable;
+ bool new_netdev = false;
+ bool new_ibdev = false;
+ struct smc_net *sn;
+ u8 ibport = 1;
+ char *string;
int rc;
- memset(pnetelem, 0, sizeof(*pnetelem));
- INIT_LIST_HEAD(&pnetelem->list);
+ /* get pnettable for namespace */
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
rc = -EINVAL;
if (!tb[SMC_PNETID_NAME])
goto error;
string = (char *)nla_data(tb[SMC_PNETID_NAME]);
- if (!smc_pnetid_valid(string, pnetelem->pnet_name))
+ if (!smc_pnetid_valid(string, pnet_name))
goto error;
- rc = -EINVAL;
if (tb[SMC_PNETID_ETHNAME]) {
string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
- pnetelem->ndev = dev_get_by_name(net, string);
- if (!pnetelem->ndev)
+ rc = smc_pnet_add_eth(pnettable, net, string, pnet_name);
+ if (!rc)
+ new_netdev = true;
+ else if (rc != -EEXIST)
goto error;
}
/* if this is not the initial namespace, stop here */
if (net != &init_net)
- return 0;
+ return new_netdev ? 0 : -EEXIST;
rc = -EINVAL;
if (tb[SMC_PNETID_IBNAME]) {
- ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
- ibname = strim(ibname);
- pnetelem->smcibdev = smc_pnet_find_ib(ibname);
- pnetelem->smcd_dev = smc_pnet_find_smcd(ibname);
- if (!pnetelem->smcibdev && !pnetelem->smcd_dev)
- goto error;
- if (pnetelem->smcibdev) {
- if (!tb[SMC_PNETID_IBPORT])
- goto error;
- pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
- if (pnetelem->ib_port < 1 ||
- pnetelem->ib_port > SMC_MAX_PORTS)
+ string = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+ string = strim(string);
+ if (tb[SMC_PNETID_IBPORT]) {
+ ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+ if (ibport < 1 || ibport > SMC_MAX_PORTS)
goto error;
}
+ rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name);
+ if (!rc)
+ new_ibdev = true;
+ else if (rc != -EEXIST)
+ goto error;
}
-
- return 0;
+ return (new_netdev || new_ibdev) ? 0 : -EEXIST;
error:
return rc;
@@ -381,28 +517,22 @@ error:
/* Convert an smc_pnetentry to a netlink attribute sequence */
static int smc_pnet_set_nla(struct sk_buff *msg,
- struct smc_user_pnetentry *pnetelem)
+ struct smc_pnetentry *pnetelem)
{
if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
return -1;
- if (pnetelem->ndev) {
+ if (pnetelem->type == SMC_PNET_ETH) {
if (nla_put_string(msg, SMC_PNETID_ETHNAME,
- pnetelem->ndev->name))
+ pnetelem->eth_name))
return -1;
} else {
if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
return -1;
}
- if (pnetelem->smcibdev) {
- if (nla_put_string(msg, SMC_PNETID_IBNAME,
- dev_name(pnetelem->smcibdev->ibdev->dev.parent)) ||
+ if (pnetelem->type == SMC_PNET_IB) {
+ if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) ||
nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
return -1;
- } else if (pnetelem->smcd_dev) {
- if (nla_put_string(msg, SMC_PNETID_IBNAME,
- dev_name(&pnetelem->smcd_dev->dev)) ||
- nla_put_u8(msg, SMC_PNETID_IBPORT, 1))
- return -1;
} else {
if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
@@ -415,21 +545,8 @@ static int smc_pnet_set_nla(struct sk_buff *msg,
static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
- struct smc_user_pnetentry pnetelem;
- struct smc_pnettable *pnettable;
- struct smc_net *sn;
- int rc;
-
- /* get pnettable for namespace */
- sn = net_generic(net, smc_net_id);
- pnettable = &sn->pnettable;
- rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs);
- if (!rc)
- rc = smc_pnet_enter(pnettable, &pnetelem);
- if (pnetelem.ndev)
- dev_put(pnetelem.ndev);
- return rc;
+ return smc_pnet_enter(net, info->attrs);
}
static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
@@ -450,7 +567,7 @@ static int smc_pnet_dump_start(struct netlink_callback *cb)
static int smc_pnet_dumpinfo(struct sk_buff *skb,
u32 portid, u32 seq, u32 flags,
- struct smc_user_pnetentry *pnetelem)
+ struct smc_pnetentry *pnetelem)
{
void *hdr;
@@ -469,91 +586,32 @@ static int smc_pnet_dumpinfo(struct sk_buff *skb,
static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u8 *pnetid, int start_idx)
{
- struct smc_user_pnetentry tmp_entry;
struct smc_pnettable *pnettable;
struct smc_pnetentry *pnetelem;
- struct smc_ib_device *ibdev;
- struct smcd_dev *smcd_dev;
struct smc_net *sn;
int idx = 0;
- int ibport;
/* get pnettable for namespace */
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- /* dump netdevices */
+ /* dump pnettable entries */
read_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
continue;
if (idx++ < start_idx)
continue;
- memset(&tmp_entry, 0, sizeof(tmp_entry));
- memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name,
- SMC_MAX_PNETID_LEN);
- tmp_entry.ndev = pnetelem->ndev;
+ /* if this is not the initial namespace, dump only netdev */
+ if (net != &init_net && pnetelem->type != SMC_PNET_ETH)
+ continue;
if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
- &tmp_entry)) {
+ pnetelem)) {
--idx;
break;
}
}
read_unlock(&pnettable->lock);
-
- /* if this is not the initial namespace, stop here */
- if (net != &init_net)
- return idx;
-
- /* dump ib devices */
- spin_lock(&smc_ib_devices.lock);
- list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
- for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
- if (ibdev->pnetid_by_user[ibport]) {
- if (pnetid &&
- !smc_pnet_match(ibdev->pnetid[ibport],
- pnetid))
- continue;
- if (idx++ < start_idx)
- continue;
- memset(&tmp_entry, 0, sizeof(tmp_entry));
- memcpy(&tmp_entry.pnet_name,
- ibdev->pnetid[ibport],
- SMC_MAX_PNETID_LEN);
- tmp_entry.smcibdev = ibdev;
- tmp_entry.ib_port = ibport + 1;
- if (smc_pnet_dumpinfo(skb, portid, seq,
- NLM_F_MULTI,
- &tmp_entry)) {
- --idx;
- break;
- }
- }
- }
- }
- spin_unlock(&smc_ib_devices.lock);
-
- /* dump smcd devices */
- spin_lock(&smcd_dev_list.lock);
- list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
- if (smcd_dev->pnetid_by_user) {
- if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid))
- continue;
- if (idx++ < start_idx)
- continue;
- memset(&tmp_entry, 0, sizeof(tmp_entry));
- memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid,
- SMC_MAX_PNETID_LEN);
- tmp_entry.smcd_dev = smcd_dev;
- if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
- &tmp_entry)) {
- --idx;
- break;
- }
- }
- }
- spin_unlock(&smcd_dev_list.lock);
-
return idx;
}
@@ -659,6 +717,9 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
case NETDEV_UNREGISTER:
smc_pnet_remove_by_ndev(event_dev);
return NOTIFY_OK;
+ case NETDEV_REGISTER:
+ smc_pnet_add_by_ndev(event_dev);
+ return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
@@ -744,7 +805,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
read_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
- if (ndev == pnetelem->ndev) {
+ if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
/* get pnetid of netdev device */
memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
rc = 0;
@@ -755,6 +816,45 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
return rc;
}
+/* find a roce device for the given pnetid */
+static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
+ struct smc_init_info *ini,
+ struct smc_ib_device *known_dev)
+{
+ struct smc_ib_device *ibdev;
+ int i;
+
+ ini->ib_dev = NULL;
+ spin_lock(&smc_ib_devices.lock);
+ list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+ if (ibdev == known_dev)
+ continue;
+ for (i = 1; i <= SMC_MAX_PORTS; i++) {
+ if (!rdma_is_port_valid(ibdev->ibdev, i))
+ continue;
+ if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
+ smc_ib_port_active(ibdev, i) &&
+ !test_bit(i - 1, ibdev->ports_going_away) &&
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+ ini->ib_gid, NULL)) {
+ ini->ib_dev = ibdev;
+ ini->ib_port = i;
+ goto out;
+ }
+ }
+ }
+out:
+ spin_unlock(&smc_ib_devices.lock);
+}
+
+/* find alternate roce device with same pnet_id and vlan_id */
+void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
+ struct smc_init_info *ini,
+ struct smc_ib_device *known_dev)
+{
+ _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev);
+}
+
/* if handshake network device belongs to a roce device, return its
* IB device and port
*/
@@ -801,8 +901,6 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
- struct smc_ib_device *ibdev;
- int i;
ndev = pnet_find_base_ndev(ndev);
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
@@ -811,25 +909,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
smc_pnet_find_rdma_dev(ndev, ini);
return; /* pnetid could not be determined */
}
-
- spin_lock(&smc_ib_devices.lock);
- list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
- for (i = 1; i <= SMC_MAX_PORTS; i++) {
- if (!rdma_is_port_valid(ibdev->ibdev, i))
- continue;
- if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
- smc_ib_port_active(ibdev, i) &&
- !test_bit(i - 1, ibdev->ports_going_away) &&
- !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
- ini->ib_gid, NULL)) {
- ini->ib_dev = ibdev;
- ini->ib_port = i;
- goto out;
- }
- }
- }
-out:
- spin_unlock(&smc_ib_devices.lock);
+ _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL);
}
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
@@ -895,3 +975,60 @@ out_rel:
out:
return;
}
+
+/* Lookup and apply a pnet table entry to the given ib device.
+ */
+int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
+{
+ char *ib_name = smcibdev->ibdev->name;
+ struct smc_pnettable *pnettable;
+ struct smc_pnetentry *tmp_pe;
+ struct smc_net *sn;
+ int rc = -ENOENT;
+
+ /* get pnettable for init namespace */
+ sn = net_generic(&init_net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+ read_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_IB &&
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
+ tmp_pe->ib_port == ib_port) {
+ smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name);
+ rc = 0;
+ break;
+ }
+ }
+ read_unlock(&pnettable->lock);
+
+ return rc;
+}
+
+/* Lookup and apply a pnet table entry to the given smcd device.
+ */
+int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
+{
+ const char *ib_name = dev_name(&smcddev->dev);
+ struct smc_pnettable *pnettable;
+ struct smc_pnetentry *tmp_pe;
+ struct smc_net *sn;
+ int rc = -ENOENT;
+
+ /* get pnettable for init namespace */
+ sn = net_generic(&init_net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+ read_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_IB &&
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
+ smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name);
+ rc = 0;
+ break;
+ }
+ }
+ read_unlock(&pnettable->lock);
+
+ return rc;
+}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 4564e4d69c2e..811a65986691 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -19,6 +19,7 @@
struct smc_ib_device;
struct smcd_dev;
struct smc_init_info;
+struct smc_link_group;
/**
* struct smc_pnettable - SMC PNET table anchor
@@ -46,5 +47,9 @@ void smc_pnet_exit(void);
void smc_pnet_net_exit(struct net *net);
void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
-
+int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port);
+int smc_pnetid_by_table_smcd(struct smcd_dev *smcd);
+void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
+ struct smc_init_info *ini,
+ struct smc_ib_device *known_dev);
#endif
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 9f1ade86d70e..54ba0443847e 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -269,22 +269,21 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
int num_sges, struct ib_rdma_wr *rdma_wr)
{
struct smc_link_group *lgr = conn->lgr;
- struct smc_link *link;
+ struct smc_link *link = conn->lnk;
int rc;
- link = &lgr->lnk[SMC_SINGLE_LINK];
rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
rdma_wr->wr.num_sge = num_sges;
rdma_wr->remote_addr =
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
+ lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr +
/* RMBE within RMB */
conn->tx_off +
/* offset within RMBE */
peer_rmbe_offset;
- rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
+ rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
if (rc)
- smc_lgr_terminate_sched(lgr);
+ smcr_link_down_cond_sched(link);
return rc;
}
@@ -310,8 +309,10 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
size_t dst_off, size_t dst_len,
struct smc_rdma_wr *wr_rdma_buf)
{
+ struct smc_link *link = conn->lnk;
+
dma_addr_t dma_addr =
- sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
+ sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl);
int src_len_sum = src_len, dst_len_sum = dst_len;
int sent_count = src_off;
int srcchunk, dstchunk;
@@ -481,12 +482,13 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
+ struct smc_link *link = conn->lnk;
struct smc_rdma_wr *wr_rdma_buf;
struct smc_cdc_tx_pend *pend;
struct smc_wr_buf *wr_buf;
int rc;
- rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend);
+ rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend);
if (rc < 0) {
if (rc == -EBUSY) {
struct smc_sock *smc =
@@ -504,10 +506,17 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
}
spin_lock_bh(&conn->send_lock);
+ if (link != conn->lnk) {
+ /* link of connection changed, tx_work will restart */
+ smc_wr_tx_put_slot(link,
+ (struct smc_wr_tx_pend_priv *)pend);
+ rc = -ENOLINK;
+ goto out_unlock;
+ }
if (!pflags->urg_data_present) {
rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
if (rc) {
- smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
+ smc_wr_tx_put_slot(link,
(struct smc_wr_tx_pend_priv *)pend);
goto out_unlock;
}
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 337ee52ad3d3..7239ba9b99dc 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -44,6 +44,7 @@ struct smc_wr_tx_pend { /* control data for a pending send request */
struct smc_link *link;
u32 idx;
struct smc_wr_tx_pend_priv priv;
+ u8 compl_requested;
};
/******************************** send queue *********************************/
@@ -61,7 +62,7 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link)
}
/* wait till all pending tx work requests on the given link are completed */
-static inline int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
+int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
{
if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link),
SMC_WR_TX_WAIT_PENDING_TIME))
@@ -103,6 +104,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
if (pnd_snd_idx == link->wr_tx_cnt)
return;
link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
+ if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
+ complete(&link->wr_tx_compl[pnd_snd_idx]);
memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd));
/* clear the full struct smc_wr_tx_pend including .priv */
memset(&link->wr_tx_pends[pnd_snd_idx], 0,
@@ -120,8 +123,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
sizeof(link->wr_tx_bufs[i]));
clear_bit(i, link->wr_tx_mask);
}
- /* terminate connections of this link group abnormally */
- smc_lgr_terminate_sched(smc_get_lgr(link));
+ /* terminate link */
+ smcr_link_down_cond_sched(link);
}
if (pnd_snd.handler)
pnd_snd.handler(&pnd_snd.priv, link, wc->status);
@@ -207,13 +210,13 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
} else {
rc = wait_event_interruptible_timeout(
link->wr_tx_wait,
- link->state == SMC_LNK_INACTIVE ||
+ !smc_link_usable(link) ||
lgr->terminating ||
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
- /* timeout - terminate connections */
- smc_lgr_terminate_sched(lgr);
+ /* timeout - terminate link */
+ smcr_link_down_cond_sched(link);
return -EPIPE;
}
if (idx == link->wr_tx_cnt)
@@ -270,11 +273,38 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
if (rc) {
smc_wr_tx_put_slot(link, priv);
- smc_lgr_terminate_sched(smc_get_lgr(link));
+ smcr_link_down_cond_sched(link);
}
return rc;
}
+/* Send prepared WR slot via ib_post_send and wait for send completion
+ * notification.
+ * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
+ */
+int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
+ unsigned long timeout)
+{
+ struct smc_wr_tx_pend *pend;
+ int rc;
+
+ pend = container_of(priv, struct smc_wr_tx_pend, priv);
+ pend->compl_requested = 1;
+ init_completion(&link->wr_tx_compl[pend->idx]);
+
+ rc = smc_wr_tx_send(link, priv);
+ if (rc)
+ return rc;
+ /* wait for completion by smc_wr_tx_process_cqe() */
+ rc = wait_for_completion_interruptible_timeout(
+ &link->wr_tx_compl[pend->idx], timeout);
+ if (rc <= 0)
+ rc = -ENODATA;
+ if (rc > 0)
+ rc = 0;
+ return rc;
+}
+
/* Register a memory region and wait for result. */
int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
{
@@ -294,8 +324,8 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
(link->wr_reg_state != POSTED),
SMC_WR_REG_MR_WAIT_TIME);
if (!rc) {
- /* timeout - terminate connections */
- smc_lgr_terminate_sched(smc_get_lgr(link));
+ /* timeout - terminate link */
+ smcr_link_down_cond_sched(link);
return -EPIPE;
}
if (rc == -ERESTARTSYS)
@@ -393,10 +423,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
case IB_WC_RETRY_EXC_ERR:
case IB_WC_RNR_RETRY_EXC_ERR:
case IB_WC_WR_FLUSH_ERR:
- /* terminate connections of this link group
- * abnormally
- */
- smc_lgr_terminate_sched(smc_get_lgr(link));
+ smcr_link_down_cond_sched(link);
break;
default:
smc_wr_rx_post(link); /* refill WR RX */
@@ -558,6 +585,8 @@ void smc_wr_free_link(struct smc_link *lnk)
void smc_wr_free_link_mem(struct smc_link *lnk)
{
+ kfree(lnk->wr_tx_compl);
+ lnk->wr_tx_compl = NULL;
kfree(lnk->wr_tx_pends);
lnk->wr_tx_pends = NULL;
kfree(lnk->wr_tx_mask);
@@ -628,8 +657,15 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
GFP_KERNEL);
if (!link->wr_tx_pends)
goto no_mem_wr_tx_mask;
+ link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT,
+ sizeof(link->wr_tx_compl[0]),
+ GFP_KERNEL);
+ if (!link->wr_tx_compl)
+ goto no_mem_wr_tx_pends;
return 0;
+no_mem_wr_tx_pends:
+ kfree(link->wr_tx_pends);
no_mem_wr_tx_mask:
kfree(link->wr_tx_mask);
no_mem_wr_rx_sges:
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 3ac99c898418..423b8709f1c9 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -101,11 +101,14 @@ int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
+int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
+ unsigned long timeout);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
unsigned long data);
+int smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
int smc_wr_rx_post_init(struct smc_link *link);