diff options
Diffstat (limited to 'net/smc')
-rw-r--r-- | net/smc/af_smc.c | 881 | ||||
-rw-r--r-- | net/smc/smc.h | 19 | ||||
-rw-r--r-- | net/smc/smc_cdc.c | 4 | ||||
-rw-r--r-- | net/smc/smc_clc.c | 500 | ||||
-rw-r--r-- | net/smc/smc_clc.h | 250 | ||||
-rw-r--r-- | net/smc/smc_close.c | 4 | ||||
-rw-r--r-- | net/smc/smc_core.c | 82 | ||||
-rw-r--r-- | net/smc/smc_core.h | 24 | ||||
-rw-r--r-- | net/smc/smc_diag.c | 30 | ||||
-rw-r--r-- | net/smc/smc_ism.c | 32 | ||||
-rw-r--r-- | net/smc/smc_ism.h | 8 | ||||
-rw-r--r-- | net/smc/smc_llc.c | 21 | ||||
-rw-r--r-- | net/smc/smc_netns.h | 1 | ||||
-rw-r--r-- | net/smc/smc_pnet.c | 174 | ||||
-rw-r--r-- | net/smc/smc_pnet.h | 15 | ||||
-rw-r--r-- | net/smc/smc_tx.c | 10 |
16 files changed, 1494 insertions, 561 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e7649bbc2b87..82be0bd0f6e8 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -26,6 +26,7 @@ #include <linux/sched/signal.h> #include <linux/if_vlan.h> #include <linux/rcupdate_wait.h> +#include <linux/ctype.h> #include <net/sock.h> #include <net/tcp.h> @@ -55,6 +56,9 @@ static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group * creation on client */ +struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ +struct workqueue_struct *smc_close_wq; /* wq for close work */ + static void smc_tcp_listen_work(struct work_struct *); static void smc_connect_work(struct work_struct *); @@ -436,26 +440,52 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) static void smcr_conn_save_peer_info(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { - int bufsize = smc_uncompress_bufsize(clc->rmbe_size); + int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size); - smc->conn.peer_rmbe_idx = clc->rmbe_idx; - smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); + smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx; + smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token); smc->conn.peer_rmbe_size = bufsize; atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); } +static bool smc_isascii(char *hostname) +{ + int i; + + for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++) + if (!isascii(hostname[i])) + return false; + return true; +} + static void smcd_conn_save_peer_info(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { - int bufsize = smc_uncompress_bufsize(clc->dmbe_size); + int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size); - smc->conn.peer_rmbe_idx = clc->dmbe_idx; - smc->conn.peer_token = clc->token; + smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx; + smc->conn.peer_token = clc->d0.token; /* msg header takes up space in the buffer */ smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; + if (clc->hdr.version > SMC_V1 && + (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) { + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)clc; + struct smc_clc_first_contact_ext *fce = + (struct smc_clc_first_contact_ext *) + (((u8 *)clc_v2) + sizeof(*clc_v2)); + + memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid, + SMC_MAX_EID_LEN); + smc->conn.lgr->peer_os = fce->os_type; + smc->conn.lgr->peer_smc_release = fce->release; + if (smc_isascii(fce->hostname)) + memcpy(smc->conn.lgr->peer_hostname, fce->hostname, + SMC_MAX_HOSTNAME_LEN); + } } static void smc_conn_save_peer_info(struct smc_sock *smc, @@ -470,11 +500,11 @@ static void smc_conn_save_peer_info(struct smc_sock *smc, static void smc_link_save_peer_info(struct smc_link *link, struct smc_clc_msg_accept_confirm *clc) { - link->peer_qpn = ntoh24(clc->qpn); - memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); - memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); - link->peer_psn = ntoh24(clc->psn); - link->peer_mtu = clc->qp_mtu; + link->peer_qpn = ntoh24(clc->r0.qpn); + memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE); + memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac)); + link->peer_psn = ntoh24(clc->r0.psn); + link->peer_mtu = clc->r0.qp_mtu; } static void smc_switch_to_fallback(struct smc_sock *smc) @@ -501,7 +531,8 @@ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) } /* decline and fall back during connect */ -static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) +static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code, + u8 version) { int rc; @@ -511,7 +542,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) return reason_code; } if (reason_code != SMC_CLC_DECL_PEERDECL) { - rc = smc_clc_send_decline(smc, reason_code); + rc = smc_clc_send_decline(smc, reason_code, version); if (rc < 0) { if (smc->sk.sk_state == SMC_INIT) sock_put(&smc->sk); /* passive closing */ @@ -522,23 +553,12 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) } /* abort connecting */ -static int smc_connect_abort(struct smc_sock *smc, int reason_code, - int local_contact) +static void smc_connect_abort(struct smc_sock *smc, int local_first) { - bool is_smcd = smc->conn.lgr->is_smcd; - - if (local_contact == SMC_FIRST_CONTACT) + if (local_first) smc_lgr_cleanup_early(&smc->conn); else smc_conn_free(&smc->conn); - if (is_smcd) - /* there is only one lgr role for SMC-D; use server lock */ - mutex_unlock(&smc_server_lgr_pending); - else - mutex_unlock(&smc_client_lgr_pending); - - smc->connect_nonblock = 0; - return reason_code; } /* check if there is a rdma device available for this connection. */ @@ -561,47 +581,137 @@ static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) { /* Find ISM device with same PNETID as connecting interface */ smc_pnet_find_ism_resource(smc->clcsock->sk, ini); - if (!ini->ism_dev) + if (!ini->ism_dev[0]) return SMC_CLC_DECL_NOSMCDDEV; + else + ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]); return 0; } +/* is chid unique for the ism devices that are already determined? */ +static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini, + int cnt) +{ + int i = (!ini->ism_dev[0]) ? 1 : 0; + + for (; i < cnt; i++) + if (ini->ism_chid[i] == chid) + return false; + return true; +} + +/* determine possible V2 ISM devices (either without PNETID or with PNETID plus + * PNETID matching net_device) + */ +static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, + struct smc_init_info *ini) +{ + int rc = SMC_CLC_DECL_NOSMCDDEV; + struct smcd_dev *smcd; + int i = 1; + u16 chid; + + if (smcd_indicated(ini->smc_type_v1)) + rc = 0; /* already initialized for V1 */ + mutex_lock(&smcd_dev_list.mutex); + list_for_each_entry(smcd, &smcd_dev_list.list, list) { + if (smcd->going_away || smcd == ini->ism_dev[0]) + continue; + chid = smc_ism_get_chid(smcd); + if (!smc_find_ism_v2_is_unique_chid(chid, ini, i)) + continue; + if (!smc_pnet_is_pnetid_set(smcd->pnetid) || + smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) { + ini->ism_dev[i] = smcd; + ini->ism_chid[i] = chid; + ini->is_smcd = true; + rc = 0; + i++; + if (i > SMC_MAX_ISM_DEVS) + break; + } + } + mutex_unlock(&smcd_dev_list.mutex); + ini->ism_offered_cnt = i - 1; + if (!ini->ism_dev[0] && !ini->ism_dev[1]) + ini->smcd_version = 0; + + return rc; +} + /* Check for VLAN ID and register it on ISM device just for CLC handshake */ static int smc_connect_ism_vlan_setup(struct smc_sock *smc, struct smc_init_info *ini) { - if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) + if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id)) return SMC_CLC_DECL_ISMVLANERR; return 0; } +static int smc_find_proposal_devices(struct smc_sock *smc, + struct smc_init_info *ini) +{ + int rc = 0; + + /* check if there is an ism device available */ + if (ini->smcd_version & SMC_V1) { + if (smc_find_ism_device(smc, ini) || + smc_connect_ism_vlan_setup(smc, ini)) { + if (ini->smc_type_v1 == SMC_TYPE_B) + ini->smc_type_v1 = SMC_TYPE_R; + else + ini->smc_type_v1 = SMC_TYPE_N; + } /* else ISM V1 is supported for this connection */ + if (smc_find_rdma_device(smc, ini)) { + if (ini->smc_type_v1 == SMC_TYPE_B) + ini->smc_type_v1 = SMC_TYPE_D; + else + ini->smc_type_v1 = SMC_TYPE_N; + } /* else RDMA is supported for this connection */ + } + if (smc_ism_v2_capable && smc_find_ism_v2_device_clnt(smc, ini)) + ini->smc_type_v2 = SMC_TYPE_N; + + /* if neither ISM nor RDMA are supported, fallback */ + if (!smcr_indicated(ini->smc_type_v1) && + ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N) + rc = SMC_CLC_DECL_NOSMCDEV; + + return rc; +} + /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is * used, the VLAN ID will be registered again during the connection setup. */ -static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, +static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, struct smc_init_info *ini) { - if (!is_smcd) + if (!smcd_indicated(ini->smc_type_v1)) return 0; - if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id)) + if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id)) return SMC_CLC_DECL_CNFERR; return 0; } +#define SMC_CLC_MAX_ACCEPT_LEN \ + (sizeof(struct smc_clc_msg_accept_confirm_v2) + \ + sizeof(struct smc_clc_first_contact_ext) + \ + sizeof(struct smc_clc_msg_trail)) + /* CLC handshake during connect */ -static int smc_connect_clc(struct smc_sock *smc, int smc_type, - struct smc_clc_msg_accept_confirm *aclc, +static int smc_connect_clc(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm_v2 *aclc2, struct smc_init_info *ini) { int rc = 0; /* do inband token exchange */ - rc = smc_clc_send_proposal(smc, smc_type, ini); + rc = smc_clc_send_proposal(smc, ini); if (rc) return rc; /* receive SMC Accept CLC message */ - return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT, - CLC_WAIT_TIME); + return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN, + SMC_CLC_ACCEPT, CLC_WAIT_TIME); } /* setup for RDMA connection of client */ @@ -613,9 +723,9 @@ static int smc_connect_rdma(struct smc_sock *smc, struct smc_link *link; ini->is_smcd = false; - ini->ib_lcl = &aclc->lcl; - ini->ib_clcqpn = ntoh24(aclc->qpn); - ini->srv_first_contact = aclc->hdr.flag; + ini->ib_lcl = &aclc->r0.lcl; + ini->ib_clcqpn = ntoh24(aclc->r0.qpn); + ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; mutex_lock(&smc_client_lgr_pending); reason_code = smc_conn_create(smc, ini); @@ -626,7 +736,7 @@ static int smc_connect_rdma(struct smc_sock *smc, smc_conn_save_peer_info(smc, aclc); - if (ini->cln_first_contact == SMC_FIRST_CONTACT) { + if (ini->first_contact_local) { link = smc->conn.lnk; } else { /* set link that was assigned by server */ @@ -634,60 +744,66 @@ static int smc_connect_rdma(struct smc_sock *smc, for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *l = &smc->conn.lgr->lnk[i]; - if (l->peer_qpn == ntoh24(aclc->qpn) && - !memcmp(l->peer_gid, &aclc->lcl.gid, SMC_GID_SIZE) && - !memcmp(l->peer_mac, &aclc->lcl.mac, sizeof(l->peer_mac))) { + if (l->peer_qpn == ntoh24(aclc->r0.qpn) && + !memcmp(l->peer_gid, &aclc->r0.lcl.gid, + SMC_GID_SIZE) && + !memcmp(l->peer_mac, &aclc->r0.lcl.mac, + sizeof(l->peer_mac))) { link = l; break; } } - if (!link) - return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK, - ini->cln_first_contact); + if (!link) { + reason_code = SMC_CLC_DECL_NOSRVLINK; + goto connect_abort; + } smc->conn.lnk = link; } /* create send buffer and rmb */ - if (smc_buf_create(smc, false)) - return smc_connect_abort(smc, SMC_CLC_DECL_MEM, - ini->cln_first_contact); + if (smc_buf_create(smc, false)) { + reason_code = SMC_CLC_DECL_MEM; + goto connect_abort; + } - if (ini->cln_first_contact == SMC_FIRST_CONTACT) + if (ini->first_contact_local) smc_link_save_peer_info(link, aclc); - if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) - return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, - ini->cln_first_contact); + if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) { + reason_code = SMC_CLC_DECL_ERR_RTOK; + goto connect_abort; + } smc_close_init(smc); smc_rx_init(smc); - if (ini->cln_first_contact == SMC_FIRST_CONTACT) { - if (smc_ib_ready_link(link)) - return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, - ini->cln_first_contact); + if (ini->first_contact_local) { + if (smc_ib_ready_link(link)) { + reason_code = SMC_CLC_DECL_ERR_RDYLNK; + goto connect_abort; + } } else { - if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) - return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, - ini->cln_first_contact); + if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) { + reason_code = SMC_CLC_DECL_ERR_REGRMB; + goto connect_abort; + } } smc_rmb_sync_sg_for_device(&smc->conn); - reason_code = smc_clc_send_confirm(smc); + reason_code = smc_clc_send_confirm(smc, ini->first_contact_local, + SMC_V1); if (reason_code) - return smc_connect_abort(smc, reason_code, - ini->cln_first_contact); + goto connect_abort; smc_tx_init(smc); - if (ini->cln_first_contact == SMC_FIRST_CONTACT) { + if (ini->first_contact_local) { /* QP confirmation over RoCE fabric */ smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); reason_code = smcr_clnt_conf_first_link(smc); smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); if (reason_code) - return smc_connect_abort(smc, reason_code, - ini->cln_first_contact); + goto connect_abort; } mutex_unlock(&smc_client_lgr_pending); @@ -697,6 +813,31 @@ static int smc_connect_rdma(struct smc_sock *smc, smc->sk.sk_state = SMC_ACTIVE; return 0; +connect_abort: + smc_connect_abort(smc, ini->first_contact_local); + mutex_unlock(&smc_client_lgr_pending); + smc->connect_nonblock = 0; + + return reason_code; +} + +/* The server has chosen one of the proposed ISM devices for the communication. + * Determine from the CHID of the received CLC ACCEPT the ISM device chosen. + */ +static int +smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, + struct smc_init_info *ini) +{ + int i; + + for (i = 0; i < ini->ism_offered_cnt + 1; i++) { + if (ini->ism_chid[i] == ntohs(aclc->chid)) { + ini->ism_selected = i; + return 0; + } + } + + return -EPROTO; } /* setup for ISM connection of client */ @@ -707,8 +848,17 @@ static int smc_connect_ism(struct smc_sock *smc, int rc = 0; ini->is_smcd = true; - ini->ism_gid = aclc->gid; - ini->srv_first_contact = aclc->hdr.flag; + ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; + + if (aclc->hdr.version == SMC_V2) { + struct smc_clc_msg_accept_confirm_v2 *aclc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)aclc; + + rc = smc_v2_determine_accepted_chid(aclc_v2, ini); + if (rc) + return rc; + } + ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid; /* there is only one lgr role for SMC-D; use server lock */ mutex_lock(&smc_server_lgr_pending); @@ -720,20 +870,20 @@ static int smc_connect_ism(struct smc_sock *smc, /* Create send and receive buffers */ rc = smc_buf_create(smc, true); - if (rc) - return smc_connect_abort(smc, (rc == -ENOSPC) ? - SMC_CLC_DECL_MAX_DMB : - SMC_CLC_DECL_MEM, - ini->cln_first_contact); + if (rc) { + rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM; + goto connect_abort; + } smc_conn_save_peer_info(smc, aclc); smc_close_init(smc); smc_rx_init(smc); smc_tx_init(smc); - rc = smc_clc_send_confirm(smc); + rc = smc_clc_send_confirm(smc, ini->first_contact_local, + aclc->hdr.version); if (rc) - return smc_connect_abort(smc, rc, ini->cln_first_contact); + goto connect_abort; mutex_unlock(&smc_server_lgr_pending); smc_copy_sock_settings_to_clc(smc); @@ -742,15 +892,40 @@ static int smc_connect_ism(struct smc_sock *smc, smc->sk.sk_state = SMC_ACTIVE; return 0; +connect_abort: + smc_connect_abort(smc, ini->first_contact_local); + mutex_unlock(&smc_server_lgr_pending); + smc->connect_nonblock = 0; + + return rc; +} + +/* check if received accept type and version matches a proposed one */ +static int smc_connect_check_aclc(struct smc_init_info *ini, + struct smc_clc_msg_accept_confirm *aclc) +{ + if ((aclc->hdr.typev1 == SMC_TYPE_R && + !smcr_indicated(ini->smc_type_v1)) || + (aclc->hdr.typev1 == SMC_TYPE_D && + ((!smcd_indicated(ini->smc_type_v1) && + !smcd_indicated(ini->smc_type_v2)) || + (aclc->hdr.version == SMC_V1 && + !smcd_indicated(ini->smc_type_v1)) || + (aclc->hdr.version == SMC_V2 && + !smcd_indicated(ini->smc_type_v2))))) + return SMC_CLC_DECL_MODEUNSUPP; + + return 0; } /* perform steps before actually connecting */ static int __smc_connect(struct smc_sock *smc) { - bool ism_supported = false, rdma_supported = false; - struct smc_clc_msg_accept_confirm aclc; - struct smc_init_info ini = {0}; - int smc_type; + u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; + struct smc_clc_msg_accept_confirm_v2 *aclc2; + struct smc_clc_msg_accept_confirm *aclc; + struct smc_init_info *ini = NULL; + u8 *buf = NULL; int rc = 0; if (smc->use_fallback) @@ -760,58 +935,73 @@ static int __smc_connect(struct smc_sock *smc) if (!tcp_sk(smc->clcsock->sk)->syn_smc) return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); - /* IPSec connections opt out of SMC-R optimizations */ + /* IPSec connections opt out of SMC optimizations */ if (using_ipsec(smc)) - return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC, + version); - /* get vlan id from IP device */ - if (smc_vlan_by_tcpsk(smc->clcsock, &ini)) - return smc_connect_decline_fallback(smc, - SMC_CLC_DECL_GETVLANERR); + ini = kzalloc(sizeof(*ini), GFP_KERNEL); + if (!ini) + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM, + version); - /* check if there is an ism device available */ - if (!smc_find_ism_device(smc, &ini) && - !smc_connect_ism_vlan_setup(smc, &ini)) { - /* ISM is supported for this connection */ - ism_supported = true; - smc_type = SMC_TYPE_D; - } - - /* check if there is a rdma device available */ - if (!smc_find_rdma_device(smc, &ini)) { - /* RDMA is supported for this connection */ - rdma_supported = true; - if (ism_supported) - smc_type = SMC_TYPE_B; /* both */ - else - smc_type = SMC_TYPE_R; /* only RDMA */ + ini->smcd_version = SMC_V1; + ini->smcd_version |= smc_ism_v2_capable ? SMC_V2 : 0; + ini->smc_type_v1 = SMC_TYPE_B; + ini->smc_type_v2 = smc_ism_v2_capable ? SMC_TYPE_D : SMC_TYPE_N; + + /* get vlan id from IP device */ + if (smc_vlan_by_tcpsk(smc->clcsock, ini)) { + ini->smcd_version &= ~SMC_V1; + ini->smc_type_v1 = SMC_TYPE_N; + if (!ini->smcd_version) { + rc = SMC_CLC_DECL_GETVLANERR; + goto fallback; + } } - /* if neither ISM nor RDMA are supported, fallback */ - if (!rdma_supported && !ism_supported) - return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); + rc = smc_find_proposal_devices(smc, ini); + if (rc) + goto fallback; - /* perform CLC handshake */ - rc = smc_connect_clc(smc, smc_type, &aclc, &ini); - if (rc) { - smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); - return smc_connect_decline_fallback(smc, rc); + buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL); + if (!buf) { + rc = SMC_CLC_DECL_MEM; + goto fallback; } + aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf; + aclc = (struct smc_clc_msg_accept_confirm *)aclc2; + + /* perform CLC handshake */ + rc = smc_connect_clc(smc, aclc2, ini); + if (rc) + goto vlan_cleanup; + + /* check if smc modes and versions of CLC proposal and accept match */ + rc = smc_connect_check_aclc(ini, aclc); + version = aclc->hdr.version == SMC_V1 ? SMC_V1 : version; + if (rc) + goto vlan_cleanup; /* depending on previous steps, connect using rdma or ism */ - if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) - rc = smc_connect_rdma(smc, &aclc, &ini); - else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) - rc = smc_connect_ism(smc, &aclc, &ini); - else - rc = SMC_CLC_DECL_MODEUNSUPP; - if (rc) { - smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); - return smc_connect_decline_fallback(smc, rc); - } + if (aclc->hdr.typev1 == SMC_TYPE_R) + rc = smc_connect_rdma(smc, aclc, ini); + else if (aclc->hdr.typev1 == SMC_TYPE_D) + rc = smc_connect_ism(smc, aclc, ini); + if (rc) + goto vlan_cleanup; - smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); + smc_connect_ism_vlan_cleanup(smc, ini); + kfree(buf); + kfree(ini); return 0; + +vlan_cleanup: + smc_connect_ism_vlan_cleanup(smc, ini); + kfree(buf); +fallback: + kfree(ini); + return smc_connect_decline_fallback(smc, rc, version); } static void smc_connect_work(struct work_struct *work) @@ -903,7 +1093,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, if (smc->use_fallback) goto out; if (flags & O_NONBLOCK) { - if (schedule_work(&smc->connect_work)) + if (queue_work(smc_hs_wq, &smc->connect_work)) smc->connect_nonblock = 1; rc = -EINPROGRESS; } else { @@ -940,10 +1130,10 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) mutex_lock(&lsmc->clcsock_release_lock); if (lsmc->clcsock) - rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); + rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK); mutex_unlock(&lsmc->clcsock_release_lock); lock_sock(lsk); - if (rc < 0) + if (rc < 0 && rc != -EAGAIN) lsk->sk_err = -rc; if (rc < 0 || lsk->sk_state == SMC_CLOSED) { new_sk->sk_prot->unhash(new_sk); @@ -956,6 +1146,10 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) goto out; } + /* new clcsock has inherited the smc listen-specific sk_data_ready + * function; switch it back to the original sk_data_ready function + */ + new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready; (*new_smc)->clcsock = new_clcsock; out: return rc; @@ -1123,10 +1317,10 @@ static void smc_listen_out_err(struct smc_sock *new_smc) /* listen worker: decline and fall back if possible */ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, - int local_contact) + struct smc_init_info *ini, u8 version) { /* RDMA setup failed, switch back to TCP */ - if (local_contact == SMC_FIRST_CONTACT) + if (ini->first_contact_local) smc_lgr_cleanup_early(&new_smc->conn); else smc_conn_free(&new_smc->conn); @@ -1137,7 +1331,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, smc_switch_to_fallback(new_smc); new_smc->fallback_rsn = reason_code; if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { - if (smc_clc_send_decline(new_smc, reason_code) < 0) { + if (smc_clc_send_decline(new_smc, reason_code, version) < 0) { smc_listen_out_err(new_smc); return; } @@ -1145,6 +1339,47 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, smc_listen_out_connected(new_smc); } +/* listen worker: version checking */ +static int smc_listen_v2_check(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext; + struct smc_clc_v2_extension *pclc_v2_ext; + + ini->smc_type_v1 = pclc->hdr.typev1; + ini->smc_type_v2 = pclc->hdr.typev2; + ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0; + if (pclc->hdr.version > SMC_V1) + ini->smcd_version |= + ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0; + if (!smc_ism_v2_capable) { + ini->smcd_version &= ~SMC_V2; + goto out; + } + pclc_v2_ext = smc_get_clc_v2_ext(pclc); + if (!pclc_v2_ext) { + ini->smcd_version &= ~SMC_V2; + goto out; + } + pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext); + if (!pclc_smcd_v2_ext) + ini->smcd_version &= ~SMC_V2; + +out: + if (!ini->smcd_version) { + if (pclc->hdr.typev1 == SMC_TYPE_B || + pclc->hdr.typev2 == SMC_TYPE_B) + return SMC_CLC_DECL_NOSMCDEV; + if (pclc->hdr.typev1 == SMC_TYPE_D || + pclc->hdr.typev2 == SMC_TYPE_D) + return SMC_CLC_DECL_NOSMCDDEV; + return SMC_CLC_DECL_NOSMCRDEV; + } + + return 0; +} + /* listen worker: check prefixes */ static int smc_listen_prfx_check(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc) @@ -1152,6 +1387,8 @@ static int smc_listen_prfx_check(struct smc_sock *new_smc, struct smc_clc_msg_proposal_prefix *pclc_prfx; struct socket *newclcsock = new_smc->clcsock; + if (pclc->hdr.typev1 == SMC_TYPE_N) + return 0; pclc_prfx = smc_clc_proposal_get_prefix(pclc); if (smc_clc_prfx_match(newclcsock, pclc_prfx)) return SMC_CLC_DECL_DIFFPREFIX; @@ -1179,33 +1416,18 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, /* listen worker: initialize connection and buffers for SMC-D */ static int smc_listen_ism_init(struct smc_sock *new_smc, - struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { - struct smc_clc_msg_smcd *pclc_smcd; int rc; - pclc_smcd = smc_get_clc_msg_smcd(pclc); - ini->ism_gid = pclc_smcd->gid; rc = smc_conn_create(new_smc, ini); if (rc) return rc; - /* Check if peer can be reached via ISM device */ - if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, - new_smc->conn.lgr->vlan_id, - new_smc->conn.lgr->smcd)) { - if (ini->cln_first_contact == SMC_FIRST_CONTACT) - smc_lgr_cleanup_early(&new_smc->conn); - else - smc_conn_free(&new_smc->conn); - return SMC_CLC_DECL_SMCDNOTALK; - } - /* Create send and receive buffers */ rc = smc_buf_create(new_smc, true); if (rc) { - if (ini->cln_first_contact == SMC_FIRST_CONTACT) + if (ini->first_contact_local) smc_lgr_cleanup_early(&new_smc->conn); else smc_conn_free(&new_smc->conn); @@ -1216,12 +1438,135 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, return 0; } +static bool smc_is_already_selected(struct smcd_dev *smcd, + struct smc_init_info *ini, + int matches) +{ + int i; + + for (i = 0; i < matches; i++) + if (smcd == ini->ism_dev[i]) + return true; + + return false; +} + +/* check for ISM devices matching proposed ISM devices */ +static void smc_check_ism_v2_match(struct smc_init_info *ini, + u16 proposed_chid, u64 proposed_gid, + unsigned int *matches) +{ + struct smcd_dev *smcd; + + list_for_each_entry(smcd, &smcd_dev_list.list, list) { + if (smcd->going_away) + continue; + if (smc_is_already_selected(smcd, ini, *matches)) + continue; + if (smc_ism_get_chid(smcd) == proposed_chid && + !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) { + ini->ism_peer_gid[*matches] = proposed_gid; + ini->ism_dev[*matches] = smcd; + (*matches)++; + break; + } + } +} + +static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + struct smc_clc_smcd_v2_extension *smcd_v2_ext; + struct smc_clc_v2_extension *smc_v2_ext; + struct smc_clc_msg_smcd *pclc_smcd; + unsigned int matches = 0; + u8 smcd_version; + u8 *eid = NULL; + int i; + + if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2)) + goto not_found; + + pclc_smcd = smc_get_clc_msg_smcd(pclc); + smc_v2_ext = smc_get_clc_v2_ext(pclc); + smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); + if (!smcd_v2_ext || + !smc_v2_ext->hdr.flag.seid) /* no system EID support for SMCD */ + goto not_found; + + mutex_lock(&smcd_dev_list.mutex); + if (pclc_smcd->ism.chid) + /* check for ISM device matching proposed native ISM device */ + smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid), + ntohll(pclc_smcd->ism.gid), &matches); + for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) { + /* check for ISM devices matching proposed non-native ISM + * devices + */ + smc_check_ism_v2_match(ini, + ntohs(smcd_v2_ext->gidchid[i - 1].chid), + ntohll(smcd_v2_ext->gidchid[i - 1].gid), + &matches); + } + mutex_unlock(&smcd_dev_list.mutex); + + if (ini->ism_dev[0]) { + smc_ism_get_system_eid(ini->ism_dev[0], &eid); + if (memcmp(eid, smcd_v2_ext->system_eid, SMC_MAX_EID_LEN)) + goto not_found; + } else { + goto not_found; + } + + /* separate - outside the smcd_dev_list.lock */ + smcd_version = ini->smcd_version; + for (i = 0; i < matches; i++) { + ini->smcd_version = SMC_V2; + ini->is_smcd = true; + ini->ism_selected = i; + if (smc_listen_ism_init(new_smc, ini)) + /* try next active ISM device */ + continue; + return; /* matching and usable V2 ISM device found */ + } + /* no V2 ISM device could be initialized */ + ini->smcd_version = smcd_version; /* restore original value */ + +not_found: + ini->smcd_version &= ~SMC_V2; + ini->ism_dev[0] = NULL; + ini->is_smcd = false; +} + +static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc); + + /* check if ISM V1 is available */ + if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1)) + goto not_found; + ini->is_smcd = true; /* prepare ISM check */ + ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid); + if (smc_find_ism_device(new_smc, ini)) + goto not_found; + ini->ism_selected = 0; + if (!smc_listen_ism_init(new_smc, ini)) + return; /* V1 ISM device found */ + +not_found: + ini->ism_dev[0] = NULL; + ini->is_smcd = false; +} + /* listen worker: register buffers */ -static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) +static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first) { struct smc_connection *conn = &new_smc->conn; - if (local_contact != SMC_FIRST_CONTACT) { + if (!local_first) { if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc)) return SMC_CLC_DECL_ERR_REGRMB; } @@ -1230,52 +1575,103 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) return 0; } +static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + int rc; + + if (!smcr_indicated(ini->smc_type_v1)) + return SMC_CLC_DECL_NOSMCDEV; + + /* prepare RDMA check */ + ini->ib_lcl = &pclc->lcl; + rc = smc_find_rdma_device(new_smc, ini); + if (rc) { + /* no RDMA device found */ + if (ini->smc_type_v1 == SMC_TYPE_B) + /* neither ISM nor RDMA device found */ + rc = SMC_CLC_DECL_NOSMCDEV; + return rc; + } + rc = smc_listen_rdma_init(new_smc, ini); + if (rc) + return rc; + return smc_listen_rdma_reg(new_smc, ini->first_contact_local); +} + +/* determine the local device matching to proposal */ +static int smc_listen_find_device(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + int rc; + + /* check for ISM device matching V2 proposed device */ + smc_find_ism_v2_device_serv(new_smc, pclc, ini); + if (ini->ism_dev[0]) + return 0; + + if (!(ini->smcd_version & SMC_V1)) + return SMC_CLC_DECL_NOSMCDEV; + + /* check for matching IP prefix and subnet length */ + rc = smc_listen_prfx_check(new_smc, pclc); + if (rc) + return rc; + + /* get vlan id from IP device */ + if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) + return SMC_CLC_DECL_GETVLANERR; + + /* check for ISM device matching V1 proposed device */ + smc_find_ism_v1_device_serv(new_smc, pclc, ini); + if (ini->ism_dev[0]) + return 0; + + if (pclc->hdr.typev1 == SMC_TYPE_D) + return SMC_CLC_DECL_NOSMCDDEV; /* skip RDMA and decline */ + + /* check if RDMA is available */ + return smc_find_rdma_v1_device_serv(new_smc, pclc, ini); +} + /* listen worker: finish RDMA setup */ static int smc_listen_rdma_finish(struct smc_sock *new_smc, struct smc_clc_msg_accept_confirm *cclc, - int local_contact) + bool local_first) { struct smc_link *link = new_smc->conn.lnk; int reason_code = 0; - if (local_contact == SMC_FIRST_CONTACT) + if (local_first) smc_link_save_peer_info(link, cclc); - if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) { - reason_code = SMC_CLC_DECL_ERR_RTOK; - goto decline; - } + if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) + return SMC_CLC_DECL_ERR_RTOK; - if (local_contact == SMC_FIRST_CONTACT) { - if (smc_ib_ready_link(link)) { - reason_code = SMC_CLC_DECL_ERR_RDYLNK; - goto decline; - } + if (local_first) { + if (smc_ib_ready_link(link)) + return SMC_CLC_DECL_ERR_RDYLNK; /* QP confirmation over RoCE fabric */ smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); reason_code = smcr_serv_conf_first_link(new_smc); smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); - if (reason_code) - goto decline; } - return 0; - -decline: - smc_listen_decline(new_smc, reason_code, local_contact); return reason_code; } -/* setup for RDMA connection of server */ +/* setup for connection of server */ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); + u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; struct socket *newclcsock = new_smc->clcsock; - struct smc_clc_msg_accept_confirm cclc; + struct smc_clc_msg_accept_confirm *cclc; + struct smc_clc_msg_proposal_area *buf; struct smc_clc_msg_proposal *pclc; - struct smc_init_info ini = {0}; - bool ism_supported = false; - u8 buf[SMC_CLC_MAX_LEN]; + struct smc_init_info *ini = NULL; int rc = 0; if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) @@ -1297,102 +1693,85 @@ static void smc_listen_work(struct work_struct *work) /* do inband token exchange - * wait for and receive SMC Proposal CLC message */ - pclc = (struct smc_clc_msg_proposal *)&buf; - rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) { + rc = SMC_CLC_DECL_MEM; + goto out_decl; + } + pclc = (struct smc_clc_msg_proposal *)buf; + rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf), SMC_CLC_PROPOSAL, CLC_WAIT_TIME); if (rc) goto out_decl; + version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version; - /* IPSec connections opt out of SMC-R optimizations */ + /* IPSec connections opt out of SMC optimizations */ if (using_ipsec(new_smc)) { rc = SMC_CLC_DECL_IPSEC; goto out_decl; } - /* check for matching IP prefix and subnet length */ - rc = smc_listen_prfx_check(new_smc, pclc); - if (rc) + ini = kzalloc(sizeof(*ini), GFP_KERNEL); + if (!ini) { + rc = SMC_CLC_DECL_MEM; goto out_decl; + } - /* get vlan id from IP device */ - if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) { - rc = SMC_CLC_DECL_GETVLANERR; + /* initial version checking */ + rc = smc_listen_v2_check(new_smc, pclc, ini); + if (rc) goto out_decl; - } mutex_lock(&smc_server_lgr_pending); smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); - /* check if ISM is available */ - if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) { - ini.is_smcd = true; /* prepare ISM check */ - rc = smc_find_ism_device(new_smc, &ini); - if (!rc) - rc = smc_listen_ism_init(new_smc, pclc, &ini); - if (!rc) - ism_supported = true; - else if (pclc->hdr.path == SMC_TYPE_D) - goto out_unlock; /* skip RDMA and decline */ - } - - /* check if RDMA is available */ - if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */ - /* prepare RDMA check */ - ini.is_smcd = false; - ini.ism_dev = NULL; - ini.ib_lcl = &pclc->lcl; - rc = smc_find_rdma_device(new_smc, &ini); - if (rc) { - /* no RDMA device found */ - if (pclc->hdr.path == SMC_TYPE_B) - /* neither ISM nor RDMA device found */ - rc = SMC_CLC_DECL_NOSMCDEV; - goto out_unlock; - } - rc = smc_listen_rdma_init(new_smc, &ini); - if (rc) - goto out_unlock; - rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact); - if (rc) - goto out_unlock; - } + /* determine ISM or RoCE device used for connection */ + rc = smc_listen_find_device(new_smc, pclc, ini); + if (rc) + goto out_unlock; /* send SMC Accept CLC message */ - rc = smc_clc_send_accept(new_smc, ini.cln_first_contact); + rc = smc_clc_send_accept(new_smc, ini->first_contact_local, + ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1); if (rc) goto out_unlock; /* SMC-D does not need this lock any more */ - if (ism_supported) + if (ini->is_smcd) mutex_unlock(&smc_server_lgr_pending); /* receive SMC Confirm CLC message */ - rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), + memset(buf, 0, sizeof(*buf)); + cclc = (struct smc_clc_msg_accept_confirm *)buf; + rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf), SMC_CLC_CONFIRM, CLC_WAIT_TIME); if (rc) { - if (!ism_supported) + if (!ini->is_smcd) goto out_unlock; goto out_decl; } /* finish worker */ - if (!ism_supported) { - rc = smc_listen_rdma_finish(new_smc, &cclc, - ini.cln_first_contact); - mutex_unlock(&smc_server_lgr_pending); + if (!ini->is_smcd) { + rc = smc_listen_rdma_finish(new_smc, cclc, + ini->first_contact_local); if (rc) - return; + goto out_unlock; + mutex_unlock(&smc_server_lgr_pending); } - smc_conn_save_peer_info(new_smc, &cclc); + smc_conn_save_peer_info(new_smc, cclc); smc_listen_out_connected(new_smc); - return; + goto out_free; out_unlock: mutex_unlock(&smc_server_lgr_pending); out_decl: - smc_listen_decline(new_smc, rc, ini.cln_first_contact); + smc_listen_decline(new_smc, rc, ini, version); +out_free: + kfree(ini); + kfree(buf); } static void smc_tcp_listen_work(struct work_struct *work) @@ -1406,7 +1785,7 @@ static void smc_tcp_listen_work(struct work_struct *work) lock_sock(lsk); while (lsk->sk_state == SMC_LISTEN) { rc = smc_clcsock_accept(lsmc, &new_smc); - if (rc) + if (rc) /* clcsock accept queue empty or error */ goto out; if (!new_smc) continue; @@ -1420,13 +1799,29 @@ static void smc_tcp_listen_work(struct work_struct *work) new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; sock_hold(&new_smc->sk); /* sock_put in passive closing */ - if (!schedule_work(&new_smc->smc_listen_work)) + if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work)) sock_put(&new_smc->sk); } out: release_sock(lsk); - sock_put(&lsmc->sk); /* sock_hold in smc_listen */ + sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */ +} + +static void smc_clcsock_data_ready(struct sock *listen_clcsock) +{ + struct smc_sock *lsmc; + + lsmc = (struct smc_sock *) + ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY); + if (!lsmc) + return; + lsmc->clcsk_data_ready(listen_clcsock); + if (lsmc->sk.sk_state == SMC_LISTEN) { + sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */ + if (!queue_work(smc_hs_wq, &lsmc->tcp_listen_work)) + sock_put(&lsmc->sk); + } } static int smc_listen(struct socket *sock, int backlog) @@ -1455,15 +1850,19 @@ static int smc_listen(struct socket *sock, int backlog) if (!smc->use_fallback) tcp_sk(smc->clcsock->sk)->syn_smc = 1; + /* save original sk_data_ready function and establish + * smc-specific sk_data_ready function + */ + smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready; + smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready; + smc->clcsock->sk->sk_user_data = + (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); rc = kernel_listen(smc->clcsock, backlog); if (rc) goto out; sk->sk_max_ack_backlog = backlog; sk->sk_ack_backlog = 0; sk->sk_state = SMC_LISTEN; - sock_hold(sk); /* sock_hold in tcp_listen_worker */ - if (!schedule_work(&smc->tcp_listen_work)) - sock_put(sk); out: release_sock(sk); @@ -1788,8 +2187,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_CLOSED) { if (val) - mod_delayed_work(system_wq, &smc->conn.tx_work, - 0); + mod_delayed_work(smc->conn.lgr->tx_wq, + &smc->conn.tx_work, 0); } break; case TCP_CORK: @@ -1797,8 +2196,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_CLOSED) { if (!val) - mod_delayed_work(system_wq, &smc->conn.tx_work, - 0); + mod_delayed_work(smc->conn.lgr->tx_wq, + &smc->conn.tx_work, 0); } break; case TCP_DEFER_ACCEPT: @@ -2077,14 +2476,26 @@ static int __init smc_init(void) if (rc) return rc; + smc_ism_init(); + smc_clc_init(); + rc = smc_pnet_init(); if (rc) goto out_pernet_subsys; + rc = -ENOMEM; + smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0); + if (!smc_hs_wq) + goto out_pnet; + + smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0); + if (!smc_close_wq) + goto out_alloc_hs_wq; + rc = smc_core_init(); if (rc) { pr_err("%s: smc_core_init fails with %d\n", __func__, rc); - goto out_pnet; + goto out_alloc_wqs; } rc = smc_llc_init(); @@ -2136,6 +2547,10 @@ out_proto: proto_unregister(&smc_proto); out_core: smc_core_exit(); +out_alloc_wqs: + destroy_workqueue(smc_close_wq); +out_alloc_hs_wq: + destroy_workqueue(smc_hs_wq); out_pnet: smc_pnet_exit(); out_pernet_subsys: @@ -2150,6 +2565,8 @@ static void __exit smc_exit(void) sock_unregister(PF_SMC); smc_core_exit(); smc_ib_unregister_client(); + destroy_workqueue(smc_close_wq); + destroy_workqueue(smc_hs_wq); proto_unregister(&smc_proto6); proto_unregister(&smc_proto); smc_pnet_exit(); diff --git a/net/smc/smc.h b/net/smc/smc.h index 6f1c42da7a4c..d65e15f0c944 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -18,9 +18,20 @@ #include "smc_ib.h" +#define SMC_V1 1 /* SMC version V1 */ +#define SMC_V2 2 /* SMC version V2 */ +#define SMC_RELEASE 0 + #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ +#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM + * devices + */ + +#define SMC_MAX_HOSTNAME_LEN 32 +#define SMC_MAX_EID_LEN 32 + extern struct proto smc_proto; extern struct proto smc_proto6; @@ -201,6 +212,8 @@ struct smc_connection { struct smc_sock { /* smc sock container */ struct sock sk; struct socket *clcsock; /* internal tcp socket */ + void (*clcsk_data_ready)(struct sock *sk); + /* original data_ready fct. **/ struct smc_connection conn; /* smc connection */ struct smc_sock *listen_smc; /* listen parent */ struct work_struct connect_work; /* handle non-blocking connect*/ @@ -235,10 +248,16 @@ static inline struct smc_sock *smc_sk(const struct sock *sk) return (struct smc_sock *)sk; } +extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ +extern struct workqueue_struct *smc_close_wq; /* wq for close work */ + #define SMC_SYSTEMID_LEN 8 extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ +#define ntohll(x) be64_to_cpu(x) +#define htonll(x) cpu_to_be64(x) + /* convert an u32 value into network byte order, store it into a 3 byte field */ static inline void hton24(u8 *net, u32 host) { diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index ce468ff62a19..b1ce6ccbfaec 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -299,7 +299,7 @@ static void smc_cdc_msg_validate(struct smc_sock *smc, struct smc_cdc_msg *cdc, conn->lnk = link; spin_unlock_bh(&conn->send_lock); sock_hold(&smc->sk); /* sock_put in abort_work */ - if (!schedule_work(&conn->abort_work)) + if (!queue_work(smc_close_wq, &conn->abort_work)) sock_put(&smc->sk); } } @@ -368,7 +368,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(&smc->sk, SOCK_DONE); sock_hold(&smc->sk); /* sock_put in close_work */ - if (!schedule_work(&conn->close_work)) + if (!queue_work(smc_close_wq, &conn->close_work)) sock_put(&smc->sk); } } diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 779f4142a11d..696d89c2dce4 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -14,6 +14,8 @@ #include <linux/inetdevice.h> #include <linux/if_ether.h> #include <linux/sched/signal.h> +#include <linux/utsname.h> +#include <linux/ctype.h> #include <net/addrconf.h> #include <net/sock.h> @@ -27,6 +29,7 @@ #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 +#define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78 #define SMC_CLC_RECV_BUF_LEN 100 /* eye catcher "SMCR" EBCDIC for CLC messages */ @@ -34,13 +37,88 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; /* eye catcher "SMCD" EBCDIC for CLC messages */ static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; +static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN]; + +/* check arriving CLC proposal */ +static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) +{ + struct smc_clc_msg_proposal_prefix *pclc_prfx; + struct smc_clc_smcd_v2_extension *smcd_v2_ext; + struct smc_clc_msg_hdr *hdr = &pclc->hdr; + struct smc_clc_v2_extension *v2_ext; + + v2_ext = smc_get_clc_v2_ext(pclc); + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (hdr->version == SMC_V1) { + if (hdr->typev1 == SMC_TYPE_N) + return false; + if (ntohs(hdr->length) != + sizeof(*pclc) + ntohs(pclc->iparea_offset) + + sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(struct smc_clc_ipv6_prefix) + + sizeof(struct smc_clc_msg_trail)) + return false; + } else { + if (ntohs(hdr->length) != + sizeof(*pclc) + + sizeof(struct smc_clc_msg_smcd) + + (hdr->typev1 != SMC_TYPE_N ? + sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(struct smc_clc_ipv6_prefix) : 0) + + (hdr->typev2 != SMC_TYPE_N ? + sizeof(*v2_ext) + + v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) + + (smcd_indicated(hdr->typev2) ? + sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt * + sizeof(struct smc_clc_smcd_gid_chid) : + 0) + + sizeof(struct smc_clc_msg_trail)) + return false; + } + return true; +} + +/* check arriving CLC accept or confirm */ +static bool +smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) +{ + struct smc_clc_msg_hdr *hdr = &clc_v2->hdr; + + if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) + return false; + if (hdr->version == SMC_V1) { + if ((hdr->typev1 == SMC_TYPE_R && + ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || + (hdr->typev1 == SMC_TYPE_D && + ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN)) + return false; + } else { + if (hdr->typev1 == SMC_TYPE_D && + ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 && + (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 + + sizeof(struct smc_clc_first_contact_ext))) + return false; + } + return true; +} + +static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len) +{ + memset(fce, 0, sizeof(*fce)); + fce->os_type = SMC_CLC_OS_LINUX; + fce->release = SMC_RELEASE; + memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname)); + (*len) += sizeof(*fce); +} + /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers */ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) { - struct smc_clc_msg_proposal_prefix *pclc_prfx; - struct smc_clc_msg_accept_confirm *clc; + struct smc_clc_msg_accept_confirm_v2 *clc_v2; struct smc_clc_msg_proposal *pclc; struct smc_clc_msg_decline *dclc; struct smc_clc_msg_trail *trl; @@ -51,29 +129,19 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) switch (clcm->type) { case SMC_CLC_PROPOSAL: pclc = (struct smc_clc_msg_proposal *)clcm; - pclc_prfx = smc_clc_proposal_get_prefix(pclc); - if (ntohs(pclc->hdr.length) < - sizeof(*pclc) + ntohs(pclc->iparea_offset) + - sizeof(*pclc_prfx) + - pclc_prfx->ipv6_prefixes_cnt * - sizeof(struct smc_clc_ipv6_prefix) + - sizeof(*trl)) + if (!smc_clc_msg_prop_valid(pclc)) return false; trl = (struct smc_clc_msg_trail *) ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); break; case SMC_CLC_ACCEPT: case SMC_CLC_CONFIRM: - if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D) - return false; - clc = (struct smc_clc_msg_accept_confirm *)clcm; - if ((clcm->path == SMC_TYPE_R && - ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || - (clcm->path == SMC_TYPE_D && - ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN)) + clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm; + if (!smc_clc_msg_acc_conf_valid(clc_v2)) return false; trl = (struct smc_clc_msg_trail *) - ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl)); + ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) - + sizeof(*trl)); break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; @@ -153,7 +221,6 @@ static int smc_clc_prfx_set(struct socket *clcsock, struct sockaddr_in *addr; int rc = -ENOENT; - memset(prop, 0, sizeof(*prop)); if (!dst) { rc = -ENOTCONN; goto out; @@ -320,7 +387,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || - (clcm->version < SMC_CLC_V1) || + (clcm->version < SMC_V1) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@ -328,9 +395,6 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, goto out; } - if (clcm->type == SMC_CLC_PROPOSAL && clcm->path == SMC_TYPE_N) - reason_code = SMC_CLC_DECL_VERSMISMAT; /* just V2 offered */ - /* receive the complete CLC message */ memset(&msg, 0, sizeof(struct msghdr)); if (datlen > buflen) { @@ -366,7 +430,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, dclc = (struct smc_clc_msg_decline *)clcm; reason_code = SMC_CLC_DECL_PEERDECL; smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); - if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { + if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 & + SMC_FIRST_CONTACT_MASK) { smc->conn.lgr->sync_err = 1; smc_lgr_terminate_sched(smc->conn.lgr); } @@ -378,7 +443,7 @@ out: } /* send CLC DECLINE message across internal TCP socket */ -int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) +int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) { struct smc_clc_msg_decline dclc; struct msghdr msg; @@ -389,8 +454,10 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); dclc.hdr.type = SMC_CLC_DECLINE; dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); - dclc.hdr.version = SMC_CLC_V1; - dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0; + dclc.hdr.version = version; + dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; + dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? + SMC_FIRST_CONTACT_MASK : 0; if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) && smc_ib_is_valid_local_systemid()) memcpy(dclc.id_for_peer, local_systemid, @@ -409,142 +476,274 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) } /* send CLC PROPOSAL message across internal TCP socket */ -int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, - struct smc_init_info *ini) +int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) { - struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX]; - struct smc_clc_msg_proposal_prefix pclc_prfx; - struct smc_clc_msg_smcd pclc_smcd; - struct smc_clc_msg_proposal pclc; - struct smc_clc_msg_trail trl; + struct smc_clc_smcd_v2_extension *smcd_v2_ext; + struct smc_clc_msg_proposal_prefix *pclc_prfx; + struct smc_clc_msg_proposal *pclc_base; + struct smc_clc_smcd_gid_chid *gidchids; + struct smc_clc_msg_proposal_area *pclc; + struct smc_clc_ipv6_prefix *ipv6_prfx; + struct smc_clc_v2_extension *v2_ext; + struct smc_clc_msg_smcd *pclc_smcd; + struct smc_clc_msg_trail *trl; int len, i, plen, rc; int reason_code = 0; - struct kvec vec[5]; + struct kvec vec[8]; struct msghdr msg; + pclc = kzalloc(sizeof(*pclc), GFP_KERNEL); + if (!pclc) + return -ENOMEM; + + pclc_base = &pclc->pclc_base; + pclc_smcd = &pclc->pclc_smcd; + pclc_prfx = &pclc->pclc_prfx; + ipv6_prfx = pclc->pclc_prfx_ipv6; + v2_ext = &pclc->pclc_v2_ext; + smcd_v2_ext = &pclc->pclc_smcd_v2_ext; + gidchids = pclc->pclc_gidchids; + trl = &pclc->pclc_trl; + + pclc_base->hdr.version = SMC_V2; + pclc_base->hdr.typev1 = ini->smc_type_v1; + pclc_base->hdr.typev2 = ini->smc_type_v2; + plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl); + /* retrieve ip prefixes for CLC proposal msg */ - rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx); - if (rc) - return SMC_CLC_DECL_CNFERR; /* configuration error */ + if (ini->smc_type_v1 != SMC_TYPE_N) { + rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx); + if (rc) { + if (ini->smc_type_v2 == SMC_TYPE_N) { + kfree(pclc); + return SMC_CLC_DECL_CNFERR; + } + pclc_base->hdr.typev1 = SMC_TYPE_N; + } else { + pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); + plen += sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(ipv6_prfx[0]); + } + } - /* send SMC Proposal CLC message */ - plen = sizeof(pclc) + sizeof(pclc_prfx) + - (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) + - sizeof(trl); - memset(&pclc, 0, sizeof(pclc)); - memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); - pclc.hdr.type = SMC_CLC_PROPOSAL; - pclc.hdr.version = SMC_CLC_V1; /* SMC version */ - pclc.hdr.path = smc_type; - if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) { + /* build SMC Proposal CLC message */ + memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + pclc_base->hdr.type = SMC_CLC_PROPOSAL; + if (smcr_indicated(ini->smc_type_v1)) { /* add SMC-R specifics */ - memcpy(pclc.lcl.id_for_peer, local_systemid, + memcpy(pclc_base->lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&pclc.lcl.gid, ini->ib_gid, SMC_GID_SIZE); - memcpy(&pclc.lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1], + memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE); + memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1], ETH_ALEN); - pclc.iparea_offset = htons(0); } - if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { + if (smcd_indicated(ini->smc_type_v1)) { /* add SMC-D specifics */ - memset(&pclc_smcd, 0, sizeof(pclc_smcd)); - plen += sizeof(pclc_smcd); - pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET); - pclc_smcd.gid = ini->ism_dev->local_gid; + if (ini->ism_dev[0]) { + pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid); + pclc_smcd->ism.chid = + htons(smc_ism_get_chid(ini->ism_dev[0])); + } + } + if (ini->smc_type_v2 == SMC_TYPE_N) { + pclc_smcd->v2_ext_offset = 0; + } else { + u16 v2_ext_offset; + u8 *eid = NULL; + + v2_ext_offset = sizeof(*pclc_smcd) - + offsetofend(struct smc_clc_msg_smcd, v2_ext_offset); + if (ini->smc_type_v1 != SMC_TYPE_N) + v2_ext_offset += sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(ipv6_prfx[0]); + pclc_smcd->v2_ext_offset = htons(v2_ext_offset); + v2_ext->hdr.eid_cnt = 0; + v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; + v2_ext->hdr.flag.release = SMC_RELEASE; + v2_ext->hdr.flag.seid = 1; + v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - + offsetofend(struct smc_clnt_opts_area_hdr, + smcd_v2_ext_offset) + + v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); + if (ini->ism_dev[0]) + smc_ism_get_system_eid(ini->ism_dev[0], &eid); + else + smc_ism_get_system_eid(ini->ism_dev[1], &eid); + if (eid) + memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN); + plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext); + if (ini->ism_offered_cnt) { + for (i = 1; i <= ini->ism_offered_cnt; i++) { + gidchids[i - 1].gid = + htonll(ini->ism_dev[i]->local_gid); + gidchids[i - 1].chid = + htons(smc_ism_get_chid(ini->ism_dev[i])); + } + plen += ini->ism_offered_cnt * + sizeof(struct smc_clc_smcd_gid_chid); + } } - pclc.hdr.length = htons(plen); + pclc_base->hdr.length = htons(plen); + memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); - memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + /* send SMC Proposal CLC message */ memset(&msg, 0, sizeof(msg)); i = 0; - vec[i].iov_base = &pclc; - vec[i++].iov_len = sizeof(pclc); - if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { - vec[i].iov_base = &pclc_smcd; - vec[i++].iov_len = sizeof(pclc_smcd); + vec[i].iov_base = pclc_base; + vec[i++].iov_len = sizeof(*pclc_base); + vec[i].iov_base = pclc_smcd; + vec[i++].iov_len = sizeof(*pclc_smcd); + if (ini->smc_type_v1 != SMC_TYPE_N) { + vec[i].iov_base = pclc_prfx; + vec[i++].iov_len = sizeof(*pclc_prfx); + if (pclc_prfx->ipv6_prefixes_cnt > 0) { + vec[i].iov_base = ipv6_prfx; + vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt * + sizeof(ipv6_prfx[0]); + } } - vec[i].iov_base = &pclc_prfx; - vec[i++].iov_len = sizeof(pclc_prfx); - if (pclc_prfx.ipv6_prefixes_cnt > 0) { - vec[i].iov_base = &ipv6_prfx[0]; - vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt * - sizeof(ipv6_prfx[0]); + if (ini->smc_type_v2 != SMC_TYPE_N) { + vec[i].iov_base = v2_ext; + vec[i++].iov_len = sizeof(*v2_ext); + vec[i].iov_base = smcd_v2_ext; + vec[i++].iov_len = sizeof(*smcd_v2_ext); + if (ini->ism_offered_cnt) { + vec[i].iov_base = gidchids; + vec[i++].iov_len = ini->ism_offered_cnt * + sizeof(struct smc_clc_smcd_gid_chid); + } } - vec[i].iov_base = &trl; - vec[i++].iov_len = sizeof(trl); + vec[i].iov_base = trl; + vec[i++].iov_len = sizeof(*trl); /* due to the few bytes needed for clc-handshake this cannot block */ len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen); if (len < 0) { smc->sk.sk_err = smc->clcsock->sk->sk_err; reason_code = -smc->sk.sk_err; - } else if (len < (int)sizeof(pclc)) { + } else if (len < ntohs(pclc_base->hdr.length)) { reason_code = -ENETUNREACH; smc->sk.sk_err = -reason_code; } + kfree(pclc); return reason_code; } -/* send CLC CONFIRM message across internal TCP socket */ -int smc_clc_send_confirm(struct smc_sock *smc) +/* build and send CLC CONFIRM / ACCEPT message */ +static int smc_clc_send_confirm_accept(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm_v2 *clc_v2, + int first_contact, u8 version) { struct smc_connection *conn = &smc->conn; - struct smc_clc_msg_accept_confirm cclc; - struct smc_link *link; - int reason_code = 0; + struct smc_clc_msg_accept_confirm *clc; + struct smc_clc_first_contact_ext fce; + struct smc_clc_msg_trail trl; + struct kvec vec[3]; struct msghdr msg; - struct kvec vec; - int len; + int i, len; /* send SMC Confirm CLC msg */ - memset(&cclc, 0, sizeof(cclc)); - cclc.hdr.type = SMC_CLC_CONFIRM; - cclc.hdr.version = SMC_CLC_V1; /* SMC version */ - if (smc->conn.lgr->is_smcd) { + clc = (struct smc_clc_msg_accept_confirm *)clc_v2; + clc->hdr.version = version; /* SMC version */ + if (first_contact) + clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK; + if (conn->lgr->is_smcd) { /* SMC-D specific settings */ - memcpy(cclc.hdr.eyecatcher, SMCD_EYECATCHER, + memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)); - cclc.hdr.path = SMC_TYPE_D; - cclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); - cclc.gid = conn->lgr->smcd->local_gid; - cclc.token = conn->rmb_desc->token; - cclc.dmbe_size = conn->rmbe_size_short; - cclc.dmbe_idx = 0; - memcpy(&cclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); - memcpy(cclc.smcd_trl.eyecatcher, SMCD_EYECATCHER, + clc->hdr.typev1 = SMC_TYPE_D; + clc->d0.gid = conn->lgr->smcd->local_gid; + clc->d0.token = conn->rmb_desc->token; + clc->d0.dmbe_size = conn->rmbe_size_short; + clc->d0.dmbe_idx = 0; + memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); + if (version == SMC_V1) { + clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); + } else { + u8 *eid = NULL; + + clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd)); + smc_ism_get_system_eid(conn->lgr->smcd, &eid); + if (eid) + memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN); + len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; + if (first_contact) + smc_clc_fill_fce(&fce, &len); + clc_v2->hdr.length = htons(len); + } + memcpy(trl.eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)); } else { + struct smc_link *link = conn->lnk; + /* SMC-R specific settings */ link = conn->lnk; - memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, + memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); - cclc.hdr.path = SMC_TYPE_R; - cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); - memcpy(cclc.lcl.id_for_peer, local_systemid, + clc->hdr.typev1 = SMC_TYPE_R; + clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); + memcpy(clc->r0.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&cclc.lcl.gid, link->gid, SMC_GID_SIZE); - memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], + memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); + memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); - hton24(cclc.qpn, link->roce_qp->qp_num); - cclc.rmb_rkey = + hton24(clc->r0.qpn, link->roce_qp->qp_num); + clc->r0.rmb_rkey = htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey); - cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ - cclc.rmbe_alert_token = htonl(conn->alert_token_local); - cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); - cclc.rmbe_size = conn->rmbe_size_short; - cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address + clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ + clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); + switch (clc->hdr.type) { + case SMC_CLC_ACCEPT: + clc->r0.qp_mtu = link->path_mtu; + break; + case SMC_CLC_CONFIRM: + clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); + break; + } + clc->r0.rmbe_size = conn->rmbe_size_short; + clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address (conn->rmb_desc->sgt[link->link_idx].sgl)); - hton24(cclc.psn, link->psn_initial); - memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER, - sizeof(SMC_EYECATCHER)); + hton24(clc->r0.psn, link->psn_initial); + memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); } memset(&msg, 0, sizeof(msg)); - vec.iov_base = &cclc; - vec.iov_len = ntohs(cclc.hdr.length); - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, - ntohs(cclc.hdr.length)); - if (len < ntohs(cclc.hdr.length)) { + i = 0; + vec[i].iov_base = clc_v2; + if (version > SMC_V1) + vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl); + else + vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? + SMCD_CLC_ACCEPT_CONFIRM_LEN : + SMCR_CLC_ACCEPT_CONFIRM_LEN) - + sizeof(trl); + if (version > SMC_V1 && first_contact) { + vec[i].iov_base = &fce; + vec[i++].iov_len = sizeof(fce); + } + vec[i].iov_base = &trl; + vec[i++].iov_len = sizeof(trl); + return kernel_sendmsg(smc->clcsock, &msg, vec, 1, + ntohs(clc->hdr.length)); +} + +/* send CLC CONFIRM message across internal TCP socket */ +int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, + u8 version) +{ + struct smc_clc_msg_accept_confirm_v2 cclc_v2; + int reason_code = 0; + int len; + + /* send SMC Confirm CLC msg */ + memset(&cclc_v2, 0, sizeof(cclc_v2)); + cclc_v2.hdr.type = SMC_CLC_CONFIRM; + len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, + version); + if (len < ntohs(cclc_v2.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; smc->sk.sk_err = -reason_code; @@ -557,67 +756,28 @@ int smc_clc_send_confirm(struct smc_sock *smc) } /* send CLC ACCEPT message across internal TCP socket */ -int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) +int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, + u8 version) { - struct smc_connection *conn = &new_smc->conn; - struct smc_clc_msg_accept_confirm aclc; - struct smc_link *link; - struct msghdr msg; - struct kvec vec; + struct smc_clc_msg_accept_confirm_v2 aclc_v2; int len; - memset(&aclc, 0, sizeof(aclc)); - aclc.hdr.type = SMC_CLC_ACCEPT; - aclc.hdr.version = SMC_CLC_V1; /* SMC version */ - if (srv_first_contact) - aclc.hdr.flag = 1; - - if (new_smc->conn.lgr->is_smcd) { - /* SMC-D specific settings */ - aclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); - memcpy(aclc.hdr.eyecatcher, SMCD_EYECATCHER, - sizeof(SMCD_EYECATCHER)); - aclc.hdr.path = SMC_TYPE_D; - aclc.gid = conn->lgr->smcd->local_gid; - aclc.token = conn->rmb_desc->token; - aclc.dmbe_size = conn->rmbe_size_short; - aclc.dmbe_idx = 0; - memcpy(&aclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); - memcpy(aclc.smcd_trl.eyecatcher, SMCD_EYECATCHER, - sizeof(SMCD_EYECATCHER)); - } else { - /* SMC-R specific settings */ - aclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); - memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, - sizeof(SMC_EYECATCHER)); - aclc.hdr.path = SMC_TYPE_R; - link = conn->lnk; - memcpy(aclc.lcl.id_for_peer, local_systemid, - sizeof(local_systemid)); - memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE); - memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], - ETH_ALEN); - hton24(aclc.qpn, link->roce_qp->qp_num); - aclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey); - aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */ - aclc.rmbe_alert_token = htonl(conn->alert_token_local); - aclc.qp_mtu = link->path_mtu; - aclc.rmbe_size = conn->rmbe_size_short, - aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address - (conn->rmb_desc->sgt[link->link_idx].sgl)); - hton24(aclc.psn, link->psn_initial); - memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER, - sizeof(SMC_EYECATCHER)); - } - - memset(&msg, 0, sizeof(msg)); - vec.iov_base = &aclc; - vec.iov_len = ntohs(aclc.hdr.length); - len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, - ntohs(aclc.hdr.length)); - if (len < ntohs(aclc.hdr.length)) + memset(&aclc_v2, 0, sizeof(aclc_v2)); + aclc_v2.hdr.type = SMC_CLC_ACCEPT; + len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, + version); + if (len < ntohs(aclc_v2.hdr.length)) len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; return len > 0 ? 0 : len; } + +void __init smc_clc_init(void) +{ + struct new_utsname *u; + + memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */ + u = utsname(); + memcpy(smc_hostname, u->nodename, + min_t(size_t, strlen(u->nodename), sizeof(smc_hostname))); +} diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index cf7b45306f4e..b3f46ab79e47 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -22,7 +22,6 @@ #define SMC_CLC_CONFIRM 0x03 #define SMC_CLC_DECLINE 0x04 -#define SMC_CLC_V1 0x1 /* SMC version */ #define SMC_TYPE_R 0 /* SMC-R only */ #define SMC_TYPE_D 1 /* SMC-D only */ #define SMC_TYPE_N 2 /* neither SMC-R nor SMC-D */ @@ -38,7 +37,6 @@ #define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found (R or D) */ #define SMC_CLC_DECL_NOSMCDDEV 0x03030001 /* no SMC-D device found */ #define SMC_CLC_DECL_NOSMCRDEV 0x03030002 /* no SMC-R device found */ -#define SMC_CLC_DECL_SMCDNOTALK 0x03030003 /* SMC-D dev can't talk to peer */ #define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/ #define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */ #define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */ @@ -56,19 +54,19 @@ #define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */ #define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */ +#define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */ + struct smc_clc_msg_hdr { /* header1 of clc messages */ u8 eyecatcher[4]; /* eye catcher */ u8 type; /* proposal / accept / confirm / decline */ __be16 length; #if defined(__BIG_ENDIAN_BITFIELD) u8 version : 4, - flag : 1, - rsvd : 1, - path : 2; + typev2 : 2, + typev1 : 2; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 path : 2, - rsvd : 1, - flag : 1, + u8 typev1 : 2, + typev2 : 2, version : 4; #endif } __packed; /* format defined in RFC7609 */ @@ -83,8 +81,6 @@ struct smc_clc_msg_local { /* header2 of clc messages */ u8 mac[6]; /* mac of ib_device port */ }; -#define SMC_CLC_MAX_V6_PREFIX 8 - /* Struct would be 4 byte aligned, but it is used in an array that is sent * to peers and must conform to RFC7609, hence we need to use packed here. */ @@ -93,6 +89,44 @@ struct smc_clc_ipv6_prefix { u8 prefix_len; } __packed; /* format defined in RFC7609 */ +#if defined(__BIG_ENDIAN_BITFIELD) +struct smc_clc_v2_flag { + u8 release : 4, + rsvd : 3, + seid : 1; +}; +#elif defined(__LITTLE_ENDIAN_BITFIELD) +struct smc_clc_v2_flag { + u8 seid : 1, + rsvd : 3, + release : 4; +}; +#endif + +struct smc_clnt_opts_area_hdr { + u8 eid_cnt; /* number of user defined EIDs */ + u8 ism_gid_cnt; /* number of ISMv2 GIDs */ + u8 reserved1; + struct smc_clc_v2_flag flag; + u8 reserved2[2]; + __be16 smcd_v2_ext_offset; /* SMC-Dv2 Extension Offset */ +}; + +struct smc_clc_smcd_gid_chid { + __be64 gid; /* ISM GID */ + __be16 chid; /* ISMv2 CHID */ +} __packed; /* format defined in + * IBM Shared Memory Communications Version 2 + * (https://www.ibm.com/support/pages/node/6326337) + */ + +struct smc_clc_v2_extension { + struct smc_clnt_opts_area_hdr hdr; + u8 roce[16]; /* RoCEv2 GID */ + u8 reserved[16]; + u8 user_eids[0][SMC_MAX_EID_LEN]; +}; + struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ __be32 outgoing_subnet; /* subnet mask */ u8 prefix_len; /* number of significant bits in mask */ @@ -101,8 +135,15 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ } __aligned(4); struct smc_clc_msg_smcd { /* SMC-D GID information */ - u64 gid; /* ISM GID of requestor */ - u8 res[32]; + struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */ + __be16 v2_ext_offset; /* SMC Version 2 Extension Offset */ + u8 reserved[28]; +}; + +struct smc_clc_smcd_v2_extension { + u8 system_eid[SMC_MAX_EID_LEN]; + u8 reserved[16]; + struct smc_clc_smcd_gid_chid gidchid[0]; }; struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ @@ -111,64 +152,107 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ __be16 iparea_offset; /* offset to IP address information area */ } __aligned(4); -#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28 -#define SMC_CLC_PROPOSAL_MAX_PREFIX (SMC_CLC_MAX_V6_PREFIX * \ - sizeof(struct smc_clc_ipv6_prefix)) -#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \ - SMC_CLC_PROPOSAL_MAX_OFFSET + \ - sizeof(struct smc_clc_msg_proposal_prefix) + \ - SMC_CLC_PROPOSAL_MAX_PREFIX + \ - sizeof(struct smc_clc_msg_trail)) +#define SMC_CLC_MAX_V6_PREFIX 8 -struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ - struct smc_clc_msg_hdr hdr; - union { - struct { /* SMC-R */ - struct smc_clc_msg_local lcl; - u8 qpn[3]; /* QP number */ - __be32 rmb_rkey; /* RMB rkey */ - u8 rmbe_idx; /* Index of RMBE in RMB */ - __be32 rmbe_alert_token;/* unique connection id */ +struct smc_clc_msg_proposal_area { + struct smc_clc_msg_proposal pclc_base; + struct smc_clc_msg_smcd pclc_smcd; + struct smc_clc_msg_proposal_prefix pclc_prfx; + struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX]; + struct smc_clc_v2_extension pclc_v2_ext; + struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext; + struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS]; + struct smc_clc_msg_trail pclc_trl; +}; + +struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */ + struct smc_clc_msg_local lcl; + u8 qpn[3]; /* QP number */ + __be32 rmb_rkey; /* RMB rkey */ + u8 rmbe_idx; /* Index of RMBE in RMB */ + __be32 rmbe_alert_token; /* unique connection id */ + #if defined(__BIG_ENDIAN_BITFIELD) + u8 rmbe_size : 4, /* buf size (compressed) */ + qp_mtu : 4; /* QP mtu */ +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 qp_mtu : 4, + rmbe_size : 4; +#endif + u8 reserved; + __be64 rmb_dma_addr; /* RMB virtual address */ + u8 reserved2; + u8 psn[3]; /* packet sequence number */ +} __packed; + +struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ + u64 gid; /* Sender GID */ + u64 token; /* DMB token */ + u8 dmbe_idx; /* DMBE index */ #if defined(__BIG_ENDIAN_BITFIELD) - u8 rmbe_size : 4, /* buf size (compressed) */ - qp_mtu : 4; /* QP mtu */ + u8 dmbe_size : 4, /* buf size (compressed) */ + reserved3 : 4; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 qp_mtu : 4, - rmbe_size : 4; + u8 reserved3 : 4, + dmbe_size : 4; #endif - u8 reserved; - __be64 rmb_dma_addr; /* RMB virtual address */ - u8 reserved2; - u8 psn[3]; /* packet sequence number */ - struct smc_clc_msg_trail smcr_trl; - /* eye catcher "SMCR" EBCDIC */ - } __packed; - struct { /* SMC-D */ - u64 gid; /* Sender GID */ - u64 token; /* DMB token */ - u8 dmbe_idx; /* DMBE index */ + u16 reserved4; + __be32 linkid; /* Link identifier */ +} __packed; + +#define SMC_CLC_OS_ZOS 1 +#define SMC_CLC_OS_LINUX 2 +#define SMC_CLC_OS_AIX 3 + +struct smc_clc_first_contact_ext { + u8 reserved1; #if defined(__BIG_ENDIAN_BITFIELD) - u8 dmbe_size : 4, /* buf size (compressed) */ - reserved3 : 4; + u8 os_type : 4, + release : 4; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 reserved3 : 4, - dmbe_size : 4; + u8 release : 4, + os_type : 4; #endif - u16 reserved4; - u32 linkid; /* Link identifier */ + u8 reserved2[2]; + u8 hostname[SMC_MAX_HOSTNAME_LEN]; +}; + +struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ + struct smc_clc_msg_hdr hdr; + union { + struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ + struct { /* SMC-D */ + struct smcd_clc_msg_accept_confirm_common d0; u32 reserved5[3]; - struct smc_clc_msg_trail smcd_trl; - /* eye catcher "SMCD" EBCDIC */ - } __packed; + }; }; } __packed; /* format defined in RFC7609 */ +struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */ + struct smc_clc_msg_hdr hdr; + union { + struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ + struct { /* SMC-D */ + struct smcd_clc_msg_accept_confirm_common d0; + __be16 chid; + u8 eid[SMC_MAX_EID_LEN]; + u8 reserved5[8]; + }; + }; +}; + struct smc_clc_msg_decline { /* clc decline message */ struct smc_clc_msg_hdr hdr; u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */ __be32 peer_diagnosis; /* diagnosis information */ - u8 reserved2[4]; - struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 os_type : 4, + reserved : 4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved : 4, + os_type : 4; +#endif + u8 reserved2[3]; + struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */ } __aligned(4); /* determine start of the prefix area within the proposal message */ @@ -179,16 +263,58 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); } +static inline bool smcr_indicated(int smc_type) +{ + return smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B; +} + +static inline bool smcd_indicated(int smc_type) +{ + return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B; +} + /* get SMC-D info from proposal message */ static inline struct smc_clc_msg_smcd * smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) { - if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd)) + if (smcd_indicated(prop->hdr.typev1) && + ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd)) return NULL; return (struct smc_clc_msg_smcd *)(prop + 1); } +static inline struct smc_clc_v2_extension * +smc_get_clc_v2_ext(struct smc_clc_msg_proposal *prop) +{ + struct smc_clc_msg_smcd *prop_smcd = smc_get_clc_msg_smcd(prop); + + if (!prop_smcd || !ntohs(prop_smcd->v2_ext_offset)) + return NULL; + + return (struct smc_clc_v2_extension *) + ((u8 *)prop_smcd + + offsetof(struct smc_clc_msg_smcd, v2_ext_offset) + + sizeof(prop_smcd->v2_ext_offset) + + ntohs(prop_smcd->v2_ext_offset)); +} + +static inline struct smc_clc_smcd_v2_extension * +smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext) +{ + if (!prop_v2ext) + return NULL; + if (!ntohs(prop_v2ext->hdr.smcd_v2_ext_offset)) + return NULL; + + return (struct smc_clc_smcd_v2_extension *) + ((u8 *)prop_v2ext + + offsetof(struct smc_clc_v2_extension, hdr) + + offsetof(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) + + sizeof(prop_v2ext->hdr.smcd_v2_ext_offset) + + ntohs(prop_v2ext->hdr.smcd_v2_ext_offset)); +} + struct smcd_dev; struct smc_init_info; @@ -196,10 +322,12 @@ int smc_clc_prfx_match(struct socket *clcsock, struct smc_clc_msg_proposal_prefix *prop); int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type, unsigned long timeout); -int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); -int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, - struct smc_init_info *ini); -int smc_clc_send_confirm(struct smc_sock *smc); -int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact); +int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version); +int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini); +int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, + u8 version); +int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, + u8 version); +void smc_clc_init(void) __init; #endif diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 0e7409e469c0..0f9ffba07d26 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -210,9 +210,9 @@ again: sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); /* wake up accept */ if (smc->clcsock && smc->clcsock->sk) { + smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; + smc->clcsock->sk->sk_user_data = NULL; rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); - /* wake up kernel_accept of smc_tcp_listen_worker */ - smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); } smc_close_cleanup_listen(sk); release_sock(sk); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index a406627b1d55..d790c43c473f 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -34,7 +34,6 @@ #define SMC_LGR_NUM_INCR 256 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) -#define SMC_LGR_FREE_DELAY_FAST (8 * HZ) static struct smc_lgr_list smc_lgr_list = { /* established link groups */ .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), @@ -70,7 +69,7 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) * creation. For client use a somewhat higher removal delay time, * otherwise there is a risk of out-of-sync link groups. */ - if (!lgr->freeing && !lgr->freefast) { + if (!lgr->freeing) { mod_delayed_work(system_wq, &lgr->free_work, (!lgr->is_smcd && lgr->role == SMC_CLNT) ? SMC_LGR_FREE_DELAY_CLNT : @@ -78,15 +77,6 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) } } -void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) -{ - if (!lgr->freeing && !lgr->freefast) { - lgr->freefast = 1; - mod_delayed_work(system_wq, &lgr->free_work, - SMC_LGR_FREE_DELAY_FAST); - } -} - /* Register connection's alert token in our lookup structure. * To use rbtrees we have to implement our own insert core. * Requires @conns_lock @@ -227,7 +217,7 @@ void smc_lgr_cleanup_early(struct smc_connection *conn) if (!list_empty(lgr_list)) list_del_init(lgr_list); spin_unlock_bh(lgr_lock); - smc_lgr_schedule_free_work_fast(lgr); + __smc_lgr_terminate(lgr, true); } static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) @@ -385,7 +375,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) int i; if (ini->is_smcd && ini->vlan_id) { - if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { + if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected], + ini->vlan_id)) { rc = SMC_CLC_DECL_ISMVLANERR; goto out; } @@ -396,10 +387,15 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) rc = SMC_CLC_DECL_MEM; goto ism_put_vlan; } + lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0, + SMC_LGR_ID_SIZE, &lgr->id); + if (!lgr->tx_wq) { + rc = -ENOMEM; + goto free_lgr; + } lgr->is_smcd = ini->is_smcd; lgr->sync_err = 0; lgr->terminating = 0; - lgr->freefast = 0; lgr->freeing = 0; lgr->vlan_id = ini->vlan_id; mutex_init(&lgr->sndbufs_lock); @@ -417,13 +413,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->conns_all = RB_ROOT; if (ini->is_smcd) { /* SMC-D specific settings */ - get_device(&ini->ism_dev->dev); - lgr->peer_gid = ini->ism_gid; - lgr->smcd = ini->ism_dev; - lgr_list = &ini->ism_dev->lgr_list; + get_device(&ini->ism_dev[ini->ism_selected]->dev); + lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected]; + lgr->smcd = ini->ism_dev[ini->ism_selected]; + lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list; lgr_lock = &lgr->smcd->lgr_lock; + lgr->smc_version = ini->smcd_version; lgr->peer_shutdown = 0; - atomic_inc(&ini->ism_dev->lgr_cnt); + atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt); } else { /* SMC-R specific settings */ lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; @@ -437,7 +434,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lnk = &lgr->lnk[link_idx]; rc = smcr_link_init(lgr, lnk, link_idx, ini); if (rc) - goto free_lgr; + goto free_wq; lgr_list = &smc_lgr_list.list; lgr_lock = &smc_lgr_list.lock; atomic_inc(&lgr_cnt); @@ -448,11 +445,13 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) spin_unlock_bh(lgr_lock); return 0; +free_wq: + destroy_workqueue(lgr->tx_wq); free_lgr: kfree(lgr); ism_put_vlan: if (ini->is_smcd && ini->vlan_id) - smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); + smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id); out: if (rc < 0) { if (rc == -ENOMEM) @@ -517,7 +516,7 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, smc->sk.sk_state != SMC_CLOSED) { rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf); if (!rc) { - schedule_delayed_work(&conn->tx_work, 0); + queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0); smc->sk.sk_data_ready(&smc->sk); } } else { @@ -824,11 +823,10 @@ static void smc_lgr_free(struct smc_link_group *lgr) } smc_lgr_free_bufs(lgr); + destroy_workqueue(lgr->tx_wq); if (lgr->is_smcd) { - if (!lgr->terminating) { - smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); - put_device(&lgr->smcd->dev); - } + smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); + put_device(&lgr->smcd->dev); if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) wake_up(&lgr->smcd->lgrs_deleted); } else { @@ -889,8 +887,6 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr) if (lgr->is_smcd) { smc_ism_signal_shutdown(lgr); smcd_unregister_all_dmbs(lgr); - smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); - put_device(&lgr->smcd->dev); } else { u32 rsn = lgr->llc_termination_rsn; @@ -1294,11 +1290,13 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) spinlock_t *lgr_lock; int rc = 0; - lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; - lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; - ini->cln_first_contact = SMC_FIRST_CONTACT; + lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list : + &smc_lgr_list.list; + lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock : + &smc_lgr_list.lock; + ini->first_contact_local = 1; role = smc->listen_smc ? SMC_SERV : SMC_CLNT; - if (role == SMC_CLNT && ini->srv_first_contact) + if (role == SMC_CLNT && ini->first_contact_peer) /* create new link group as well */ goto create; @@ -1307,14 +1305,15 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) list_for_each_entry(lgr, lgr_list, list) { write_lock_bh(&lgr->conns_lock); if ((ini->is_smcd ? - smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : + smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected], + ini->ism_peer_gid[ini->ism_selected]) : smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && !lgr->sync_err && lgr->vlan_id == ini->vlan_id && (role == SMC_CLNT || ini->is_smcd || lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { /* link group found */ - ini->cln_first_contact = SMC_REUSE_CONTACT; + ini->first_contact_local = 0; conn->lgr = lgr; rc = smc_lgr_register_conn(conn, false); write_unlock_bh(&lgr->conns_lock); @@ -1328,8 +1327,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) if (rc) return rc; - if (role == SMC_CLNT && !ini->srv_first_contact && - ini->cln_first_contact == SMC_FIRST_CONTACT) { + if (role == SMC_CLNT && !ini->first_contact_peer && + ini->first_contact_local) { /* Server reuses a link group, but Client wants to start * a new one * send out_of_sync decline, reason synchr. error @@ -1338,7 +1337,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) } create: - if (ini->cln_first_contact == SMC_FIRST_CONTACT) { + if (ini->first_contact_local) { rc = smc_lgr_create(smc, ini); if (rc) goto out; @@ -1597,7 +1596,7 @@ out: return rc; } -#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ +#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, bool is_dmb, int bufsize) @@ -1616,7 +1615,8 @@ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); if (rc) { kfree(buf_desc); - return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc); + return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : + ERR_PTR(-EIO); } buf_desc->pages = virt_to_page(buf_desc->cpu_addr); /* CDC header stored in buf. So, pretend it was smaller */ @@ -1892,8 +1892,8 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *lnk, struct smc_clc_msg_accept_confirm *clc) { - conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr, - clc->rmb_rkey); + conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr, + clc->r0.rmb_rkey); if (conn->rtoken_idx < 0) return conn->rtoken_idx; return 0; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 1c4d5439d0ff..f1e867ce2e63 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -137,9 +137,6 @@ struct smc_link { #define SMC_LINKS_PER_LGR_MAX 3 #define SMC_SINGLE_LINK 0 -#define SMC_FIRST_CONTACT 1 /* first contact to a peer */ -#define SMC_REUSE_CONTACT 0 /* follow-on contact to a peer*/ - /* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */ struct smc_buf_desc { struct list_head list; @@ -228,12 +225,17 @@ struct smc_link_group { u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ struct delayed_work free_work; /* delayed freeing of an lgr */ struct work_struct terminate_work; /* abnormal lgr termination */ + struct workqueue_struct *tx_wq; /* wq for conn. tx workers */ u8 sync_err : 1; /* lgr no longer fits to peer */ u8 terminating : 1;/* lgr is terminating */ - u8 freefast : 1; /* free worker scheduled fast */ u8 freeing : 1; /* lgr is being freed */ bool is_smcd; /* SMC-R or SMC-D */ + u8 smc_version; + u8 negotiated_eid[SMC_MAX_EID_LEN]; + u8 peer_os; /* peer operating system */ + u8 peer_smc_release; + u8 peer_hostname[SMC_MAX_HOSTNAME_LEN]; union { struct { /* SMC-R */ enum smc_lgr_role role; @@ -294,9 +296,11 @@ struct smc_clc_msg_local; struct smc_init_info { u8 is_smcd; + u8 smc_type_v1; + u8 smc_type_v2; + u8 first_contact_peer; + u8 first_contact_local; unsigned short vlan_id; - int srv_first_contact; - int cln_first_contact; /* SMC-R */ struct smc_clc_msg_local *ib_lcl; struct smc_ib_device *ib_dev; @@ -304,8 +308,12 @@ struct smc_init_info { u8 ib_port; u32 ib_clcqpn; /* SMC-D */ - u64 ism_gid; - struct smcd_dev *ism_dev; + u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1]; + struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1]; + u16 ism_chid[SMC_MAX_ISM_DEVS + 1]; + u8 ism_offered_cnt; /* # of ISM devices offered */ + u8 ism_selected; /* index of selected ISM dev*/ + u8 smcd_version; }; /* Find the connection associated with the given alert token in the link group. diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index da9ba6d1679b..f15fca59b4b2 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -22,6 +22,15 @@ #include "smc.h" #include "smc_core.h" +struct smc_diag_dump_ctx { + int pos[2]; +}; + +static struct smc_diag_dump_ctx *smc_dump_context(struct netlink_callback *cb) +{ + return (struct smc_diag_dump_ctx *)cb->ctx; +} + static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw) { sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", @@ -193,13 +202,15 @@ errout: } static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, int p_type) { + struct smc_diag_dump_ctx *cb_ctx = smc_dump_context(cb); struct net *net = sock_net(skb->sk); + int snum = cb_ctx->pos[p_type]; struct nlattr *bc = NULL; struct hlist_head *head; + int rc = 0, num = 0; struct sock *sk; - int rc = 0; read_lock(&prot->h.smc_hash->lock); head = &prot->h.smc_hash->ht; @@ -209,13 +220,18 @@ static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb, sk_for_each(sk, head) { if (!net_eq(sock_net(sk), net)) continue; + if (num < snum) + goto next; rc = __smc_diag_dump(sk, skb, cb, nlmsg_data(cb->nlh), bc); - if (rc) - break; + if (rc < 0) + goto out; +next: + num++; } out: read_unlock(&prot->h.smc_hash->lock); + cb_ctx->pos[p_type] = num; return rc; } @@ -223,10 +239,10 @@ static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { int rc = 0; - rc = smc_diag_dump_proto(&smc_proto, skb, cb); + rc = smc_diag_dump_proto(&smc_proto, skb, cb, SMCPROTO_SMC); if (!rc) - rc = smc_diag_dump_proto(&smc_proto6, skb, cb); - return rc; + smc_diag_dump_proto(&smc_proto6, skb, cb, SMCPROTO_SMC6); + return skb->len; } static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 998c525de785..6abbdd09a580 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -21,7 +21,9 @@ struct smcd_dev_list smcd_dev_list = { .mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex) }; -/* Test if an ISM communication is possible. */ +bool smc_ism_v2_capable; + +/* Test if an ISM communication is possible - same CPC */ int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) { return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0, @@ -39,6 +41,16 @@ int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos, return rc < 0 ? rc : 0; } +void smc_ism_get_system_eid(struct smcd_dev *smcd, u8 **eid) +{ + smcd->ops->get_system_eid(smcd, eid); +} + +u16 smc_ism_get_chid(struct smcd_dev *smcd) +{ + return smcd->ops->get_chid(smcd); +} + /* Set a connection using this DMBE. */ void smc_ism_set_conn(struct smc_connection *conn) { @@ -319,7 +331,18 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev); int smcd_register_dev(struct smcd_dev *smcd) { mutex_lock(&smcd_dev_list.mutex); - list_add_tail(&smcd->list, &smcd_dev_list.list); + if (list_empty(&smcd_dev_list.list)) { + u8 *system_eid = NULL; + + smc_ism_get_system_eid(smcd, &system_eid); + if (system_eid[24] != '0' || system_eid[28] != '0') + smc_ism_v2_capable = true; + } + /* sort list: devices without pnetid before devices with pnetid */ + if (smcd->pnetid[0]) + list_add_tail(&smcd->list, &smcd_dev_list.list); + else + list_add(&smcd->list, &smcd_dev_list.list); mutex_unlock(&smcd_dev_list.mutex); pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n", @@ -399,3 +422,8 @@ void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno) spin_unlock_irqrestore(&smcd->lock, flags); } EXPORT_SYMBOL_GPL(smcd_handle_irq); + +void __init smc_ism_init(void) +{ + smc_ism_v2_capable = false; +} diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 81cc4537efd3..8048e09ddcf8 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -19,7 +19,10 @@ struct smcd_dev_list { /* List of SMCD devices */ struct mutex mutex; /* Protects list of devices */ }; -extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ +extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ +extern bool smc_ism_v2_capable; /* HW supports ISM V2 and thus + * System EID is defined + */ struct smc_ism_vlanid { /* VLAN id set on ISM device */ struct list_head list; @@ -47,4 +50,7 @@ int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, void *data, size_t len); int smc_ism_signal_shutdown(struct smc_link_group *lgr); +void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid); +u16 smc_ism_get_chid(struct smcd_dev *dev); +void smc_ism_init(void); #endif diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 3ea33466ebe9..273eaf1bfe49 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -233,8 +233,6 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, default: flow->type = SMC_LLC_FLOW_NONE; } - if (qentry == lgr->delayed_event) - lgr->delayed_event = NULL; smc_llc_flow_qentry_set(flow, qentry); spin_unlock_bh(&lgr->llc_flow_lock); return true; @@ -1209,7 +1207,7 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) /* enqueue a local add_link req to trigger a new add_link flow */ void smc_llc_add_link_local(struct smc_link *link) { - struct smc_llc_msg_add_link add_llc = {0}; + struct smc_llc_msg_add_link add_llc = {}; add_llc.hd.length = sizeof(add_llc); add_llc.hd.common.type = SMC_LLC_ADD_LINK; @@ -1242,7 +1240,7 @@ out: */ void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id) { - struct smc_llc_msg_del_link del_llc = {0}; + struct smc_llc_msg_del_link del_llc = {}; del_llc.hd.length = sizeof(del_llc); del_llc.hd.common.type = SMC_LLC_DELETE_LINK; @@ -1314,7 +1312,7 @@ out: */ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) { - struct smc_llc_msg_del_link delllc = {0}; + struct smc_llc_msg_del_link delllc = {}; int i; delllc.hd.common.type = SMC_LLC_DELETE_LINK; @@ -1603,13 +1601,12 @@ static void smc_llc_event_work(struct work_struct *work) struct smc_llc_qentry *qentry; if (!lgr->llc_flow_lcl.type && lgr->delayed_event) { - if (smc_link_usable(lgr->delayed_event->link)) { - smc_llc_event_handler(lgr->delayed_event); - } else { - qentry = lgr->delayed_event; - lgr->delayed_event = NULL; + qentry = lgr->delayed_event; + lgr->delayed_event = NULL; + if (smc_link_usable(qentry->link)) + smc_llc_event_handler(qentry); + else kfree(qentry); - } } again: @@ -1691,7 +1688,7 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) spin_lock_irqsave(&lgr->llc_event_q_lock, flags); list_add_tail(&qentry->list, &lgr->llc_event_q); spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags); - schedule_work(&lgr->llc_event_work); + queue_work(system_highpri_wq, &lgr->llc_event_work); } /* copy received msg and add it to the event queue */ diff --git a/net/smc/smc_netns.h b/net/smc/smc_netns.h index e7a8fc4ae02f..0f4f35aa43ad 100644 --- a/net/smc/smc_netns.h +++ b/net/smc/smc_netns.h @@ -16,5 +16,6 @@ extern unsigned int smc_net_id; /* per-network namespace private data */ struct smc_net { struct smc_pnettable pnettable; + struct smc_pnetids_ndev pnetids_ndev; }; #endif diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 30e5fac7034e..f3c18b991d35 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -29,8 +29,7 @@ #include "smc_ism.h" #include "smc_core.h" -#define SMC_ASCII_BLANK 32 - +static struct net_device *__pnet_find_base_ndev(struct net_device *ndev); static struct net_device *pnet_find_base_ndev(struct net_device *ndev); static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { @@ -73,14 +72,22 @@ struct smc_pnetentry { }; }; +/* Check if the pnetid is set */ +bool smc_pnet_is_pnetid_set(u8 *pnetid) +{ + if (pnetid[0] == 0 || pnetid[0] == _S) + return false; + return true; +} + /* Check if two given pnetids match */ static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) { int i; for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { - if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && - (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) + if ((pnetid1[i] == 0 || pnetid1[i] == _S) && + (pnetid2[i] == 0 || pnetid2[i] == _S)) break; if (pnetid1[i] != pnetid2[i]) return false; @@ -238,11 +245,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, char *pnet_name) { - u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; bool applied = false; mutex_lock(&smc_ib_devices.mutex); - if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { + if (!smc_pnet_is_pnetid_set(ib_dev->pnetid[ib_port - 1])) { memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, SMC_MAX_PNETID_LEN); ib_dev->pnetid_by_user[ib_port - 1] = true; @@ -256,11 +262,10 @@ static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, */ static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) { - u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; bool applied = false; mutex_lock(&smcd_dev_list.mutex); - if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { + if (!smc_pnet_is_pnetid_set(smcd_dev->pnetid)) { memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); smcd_dev->pnetid_by_user = true; applied = true; @@ -708,10 +713,115 @@ static struct genl_family smc_pnet_nl_family __ro_after_init = { .n_ops = ARRAY_SIZE(smc_pnet_ops) }; +bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid) +{ + struct smc_net *sn = net_generic(net, smc_net_id); + struct smc_pnetids_ndev_entry *pe; + bool rc = false; + + read_lock(&sn->pnetids_ndev.lock); + list_for_each_entry(pe, &sn->pnetids_ndev.list, list) { + if (smc_pnet_match(pnetid, pe->pnetid)) { + rc = true; + goto unlock; + } + } + +unlock: + read_unlock(&sn->pnetids_ndev.lock); + return rc; +} + +static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid) +{ + struct smc_net *sn = net_generic(net, smc_net_id); + struct smc_pnetids_ndev_entry *pe, *pi; + + pe = kzalloc(sizeof(*pe), GFP_KERNEL); + if (!pe) + return -ENOMEM; + + write_lock(&sn->pnetids_ndev.lock); + list_for_each_entry(pi, &sn->pnetids_ndev.list, list) { + if (smc_pnet_match(pnetid, pe->pnetid)) { + refcount_inc(&pi->refcnt); + kfree(pe); + goto unlock; + } + } + refcount_set(&pe->refcnt, 1); + memcpy(pe->pnetid, pnetid, SMC_MAX_PNETID_LEN); + list_add_tail(&pe->list, &sn->pnetids_ndev.list); + +unlock: + write_unlock(&sn->pnetids_ndev.lock); + return 0; +} + +static void smc_pnet_remove_pnetid(struct net *net, u8 *pnetid) +{ + struct smc_net *sn = net_generic(net, smc_net_id); + struct smc_pnetids_ndev_entry *pe, *pe2; + + write_lock(&sn->pnetids_ndev.lock); + list_for_each_entry_safe(pe, pe2, &sn->pnetids_ndev.list, list) { + if (smc_pnet_match(pnetid, pe->pnetid)) { + if (refcount_dec_and_test(&pe->refcnt)) { + list_del(&pe->list); + kfree(pe); + } + break; + } + } + write_unlock(&sn->pnetids_ndev.lock); +} + +static void smc_pnet_add_base_pnetid(struct net *net, struct net_device *dev, + u8 *ndev_pnetid) +{ + struct net_device *base_dev; + + base_dev = __pnet_find_base_ndev(dev); + if (base_dev->flags & IFF_UP && + !smc_pnetid_by_dev_port(base_dev->dev.parent, base_dev->dev_port, + ndev_pnetid)) { + /* add to PNETIDs list */ + smc_pnet_add_pnetid(net, ndev_pnetid); + } +} + +/* create initial list of netdevice pnetids */ +static void smc_pnet_create_pnetids_list(struct net *net) +{ + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct net_device *dev; + + rtnl_lock(); + for_each_netdev(net, dev) + smc_pnet_add_base_pnetid(net, dev, ndev_pnetid); + rtnl_unlock(); +} + +/* clean up list of netdevice pnetids */ +static void smc_pnet_destroy_pnetids_list(struct net *net) +{ + struct smc_net *sn = net_generic(net, smc_net_id); + struct smc_pnetids_ndev_entry *pe, *temp_pe; + + write_lock(&sn->pnetids_ndev.lock); + list_for_each_entry_safe(pe, temp_pe, &sn->pnetids_ndev.list, list) { + list_del(&pe->list); + kfree(pe); + } + write_unlock(&sn->pnetids_ndev.lock); +} + static int smc_pnet_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(event_dev); + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; switch (event) { case NETDEV_REBOOT: @@ -721,6 +831,17 @@ static int smc_pnet_netdev_event(struct notifier_block *this, case NETDEV_REGISTER: smc_pnet_add_by_ndev(event_dev); return NOTIFY_OK; + case NETDEV_UP: + smc_pnet_add_base_pnetid(net, event_dev, ndev_pnetid); + return NOTIFY_OK; + case NETDEV_DOWN: + event_dev = __pnet_find_base_ndev(event_dev); + if (!smc_pnetid_by_dev_port(event_dev->dev.parent, + event_dev->dev_port, ndev_pnetid)) { + /* remove from PNETIDs list */ + smc_pnet_remove_pnetid(net, ndev_pnetid); + } + return NOTIFY_OK; default: return NOTIFY_DONE; } @@ -735,9 +856,14 @@ int smc_pnet_net_init(struct net *net) { struct smc_net *sn = net_generic(net, smc_net_id); struct smc_pnettable *pnettable = &sn->pnettable; + struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev; INIT_LIST_HEAD(&pnettable->pnetlist); rwlock_init(&pnettable->lock); + INIT_LIST_HEAD(&pnetids_ndev->list); + rwlock_init(&pnetids_ndev->lock); + + smc_pnet_create_pnetids_list(net); return 0; } @@ -752,6 +878,7 @@ int __init smc_pnet_init(void) rc = register_netdevice_notifier(&smc_netdev_notifier); if (rc) genl_unregister_family(&smc_pnet_nl_family); + return rc; } @@ -760,6 +887,7 @@ void smc_pnet_net_exit(struct net *net) { /* flush pnet table */ smc_pnet_remove_by_pnetid(net, NULL); + smc_pnet_destroy_pnetids_list(net); } void smc_pnet_exit(void) @@ -768,16 +896,11 @@ void smc_pnet_exit(void) genl_unregister_family(&smc_pnet_nl_family); } -/* Determine one base device for stacked net devices. - * If the lower device level contains more than one devices - * (for instance with bonding slaves), just the first device - * is used to reach a base device. - */ -static struct net_device *pnet_find_base_ndev(struct net_device *ndev) +static struct net_device *__pnet_find_base_ndev(struct net_device *ndev) { int i, nest_lvl; - rtnl_lock(); + ASSERT_RTNL(); nest_lvl = ndev->lower_level; for (i = 0; i < nest_lvl; i++) { struct list_head *lower = &ndev->adj_list.lower; @@ -787,6 +910,18 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev) lower = lower->next; ndev = netdev_lower_get_next(ndev, &lower); } + return ndev; +} + +/* Determine one base device for stacked net devices. + * If the lower device level contains more than one devices + * (for instance with bonding slaves), just the first device + * is used to reach a base device. + */ +static struct net_device *pnet_find_base_ndev(struct net_device *ndev) +{ + rtnl_lock(); + ndev = __pnet_find_base_ndev(ndev); rtnl_unlock(); return ndev; } @@ -928,8 +1063,11 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(ismdev, &smcd_dev_list.list, list) { if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && - !ismdev->going_away) { - ini->ism_dev = ismdev; + !ismdev->going_away && + (!ini->ism_peer_gid[0] || + !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id, + ismdev))) { + ini->ism_dev[0] = ismdev; break; } } @@ -963,7 +1101,7 @@ void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(sk); - ini->ism_dev = NULL; + ini->ism_dev[0] = NULL; if (!dst) goto out; if (!dst->dev) diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 811a65986691..14039272f7e4 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -12,6 +12,8 @@ #ifndef _SMC_PNET_H #define _SMC_PNET_H +#include <net/smc.h> + #if IS_ENABLED(CONFIG_HAVE_PNETID) #include <asm/pnet.h> #endif @@ -31,6 +33,17 @@ struct smc_pnettable { struct list_head pnetlist; }; +struct smc_pnetids_ndev { /* list of pnetids for net devices in UP state*/ + struct list_head list; + rwlock_t lock; +}; + +struct smc_pnetids_ndev_entry { + struct list_head list; + u8 pnetid[SMC_MAX_PNETID_LEN]; + refcount_t refcnt; +}; + static inline int smc_pnetid_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid) { @@ -52,4 +65,6 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcd); void smc_pnet_find_alt_roce(struct smc_link_group *lgr, struct smc_init_info *ini, struct smc_ib_device *known_dev); +bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid); +bool smc_pnet_is_pnetid_set(u8 *pnetid); #endif diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 54ba0443847e..4532c16bf85e 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -228,8 +228,8 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) /* for a corked socket defer the RDMA writes if there * is still sufficient sndbuf_space available */ - schedule_delayed_work(&conn->tx_work, - SMC_TX_CORK_DELAY); + queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, + SMC_TX_CORK_DELAY); else smc_tx_sndbuf_nonempty(conn); } /* while (msg_data_left(msg)) */ @@ -499,7 +499,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) if (conn->killed) return -EPIPE; rc = 0; - mod_delayed_work(system_wq, &conn->tx_work, + mod_delayed_work(conn->lgr->tx_wq, &conn->tx_work, SMC_TX_WORK_DELAY); } return rc; @@ -623,8 +623,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) return; if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && !conn->killed) { - schedule_delayed_work(&conn->tx_work, - SMC_TX_WORK_DELAY); + queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, + SMC_TX_WORK_DELAY); return; } } |