summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/smc/af_smc.c66
-rw-r--r--net/smc/smc_clc.c8
-rw-r--r--net/smc/smc_clc.h2
-rw-r--r--net/smc/smc_core.c213
-rw-r--r--net/smc/smc_core.h10
-rw-r--r--net/smc/smc_ib.c15
-rw-r--r--net/smc/smc_llc.c33
-rw-r--r--net/smc/smc_rx.c90
-rw-r--r--net/smc/smc_tx.c9
9 files changed, 328 insertions, 118 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9497a3b9ad09..6e70d9c10b78 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -487,6 +487,29 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
+/* register the new vzalloced sndbuf on all links */
+static int smcr_lgr_reg_sndbufs(struct smc_link *link,
+ struct smc_buf_desc *snd_desc)
+{
+ struct smc_link_group *lgr = link->lgr;
+ int i, rc = 0;
+
+ if (!snd_desc->is_vm)
+ return -EINVAL;
+
+ /* protect against parallel smcr_link_reg_buf() */
+ mutex_lock(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_active(&lgr->lnk[i]))
+ continue;
+ rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc);
+ if (rc)
+ break;
+ }
+ mutex_unlock(&lgr->llc_conf_mutex);
+ return rc;
+}
+
/* register the new rmb on all links */
static int smcr_lgr_reg_rmbs(struct smc_link *link,
struct smc_buf_desc *rmb_desc)
@@ -498,13 +521,13 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
if (rc)
return rc;
/* protect against parallel smc_llc_cli_rkey_exchange() and
- * parallel smcr_link_reg_rmb()
+ * parallel smcr_link_reg_buf()
*/
mutex_lock(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
- rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
+ rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc);
if (rc)
goto out;
}
@@ -550,8 +573,15 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
smc_wr_remember_qp_attr(link);
- if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
- return SMC_CLC_DECL_ERR_REGRMB;
+ /* reg the sndbuf if it was vzalloced */
+ if (smc->conn.sndbuf_desc->is_vm) {
+ if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
+ return SMC_CLC_DECL_ERR_REGBUF;
+ }
+
+ /* reg the rmb */
+ if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
+ return SMC_CLC_DECL_ERR_REGBUF;
/* confirm_rkey is implicit on 1st contact */
smc->conn.rmb_desc->is_conf_rkey = true;
@@ -1221,8 +1251,15 @@ static int smc_connect_rdma(struct smc_sock *smc,
goto connect_abort;
}
} else {
+ /* reg sendbufs if they were vzalloced */
+ if (smc->conn.sndbuf_desc->is_vm) {
+ if (smcr_lgr_reg_sndbufs(link, smc->conn.sndbuf_desc)) {
+ reason_code = SMC_CLC_DECL_ERR_REGBUF;
+ goto connect_abort;
+ }
+ }
if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
- reason_code = SMC_CLC_DECL_ERR_REGRMB;
+ reason_code = SMC_CLC_DECL_ERR_REGBUF;
goto connect_abort;
}
}
@@ -1749,8 +1786,15 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
struct smc_llc_qentry *qentry;
int rc;
- if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
- return SMC_CLC_DECL_ERR_REGRMB;
+ /* reg the sndbuf if it was vzalloced*/
+ if (smc->conn.sndbuf_desc->is_vm) {
+ if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
+ return SMC_CLC_DECL_ERR_REGBUF;
+ }
+
+ /* reg the rmb */
+ if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
+ return SMC_CLC_DECL_ERR_REGBUF;
/* send CONFIRM LINK request to client over the RoCE fabric */
rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
@@ -2109,8 +2153,14 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
struct smc_connection *conn = &new_smc->conn;
if (!local_first) {
+ /* reg sendbufs if they were vzalloced */
+ if (conn->sndbuf_desc->is_vm) {
+ if (smcr_lgr_reg_sndbufs(conn->lnk,
+ conn->sndbuf_desc))
+ return SMC_CLC_DECL_ERR_REGBUF;
+ }
if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
- return SMC_CLC_DECL_ERR_REGRMB;
+ return SMC_CLC_DECL_ERR_REGBUF;
}
return 0;
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index f9f3f59c79de..1472f31480d8 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1034,7 +1034,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
ETH_ALEN);
hton24(clc->r0.qpn, link->roce_qp->qp_num);
clc->r0.rmb_rkey =
- htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
+ htonl(conn->rmb_desc->mr[link->link_idx]->rkey);
clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
switch (clc->hdr.type) {
@@ -1046,8 +1046,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
break;
}
clc->r0.rmbe_size = conn->rmbe_size_short;
- clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
- (conn->rmb_desc->sgt[link->link_idx].sgl));
+ clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
+ cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
+ cpu_to_be64((u64)sg_dma_address
+ (conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(clc->r0.psn, link->psn_initial);
if (version == SMC_V1) {
clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 83f02f131fc0..5fee545c9a10 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -62,7 +62,7 @@
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
#define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */
#define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */
-#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */
+#define SMC_CLC_DECL_ERR_REGBUF 0x09990003 /* reg rdma bufs failed */
#define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 86afbbce4fca..f26770c29d78 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1087,34 +1087,37 @@ err_out:
return NULL;
}
-static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
+static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link_group *lgr)
{
+ struct mutex *lock; /* lock buffer list */
int rc;
- if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
+ if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
/* unregister rmb with peer */
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (!rc) {
/* protect against smc_llc_cli_rkey_exchange() */
mutex_lock(&lgr->llc_conf_mutex);
- smc_llc_do_delete_rkey(lgr, rmb_desc);
- rmb_desc->is_conf_rkey = false;
+ smc_llc_do_delete_rkey(lgr, buf_desc);
+ buf_desc->is_conf_rkey = false;
mutex_unlock(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
}
- if (rmb_desc->is_reg_err) {
+ if (buf_desc->is_reg_err) {
/* buf registration failed, reuse not possible */
- mutex_lock(&lgr->rmbs_lock);
- list_del(&rmb_desc->list);
- mutex_unlock(&lgr->rmbs_lock);
+ lock = is_rmb ? &lgr->rmbs_lock :
+ &lgr->sndbufs_lock;
+ mutex_lock(lock);
+ list_del(&buf_desc->list);
+ mutex_unlock(lock);
- smc_buf_free(lgr, true, rmb_desc);
+ smc_buf_free(lgr, is_rmb, buf_desc);
} else {
- rmb_desc->used = 0;
- memset(rmb_desc->cpu_addr, 0, rmb_desc->len);
+ buf_desc->used = 0;
+ memset(buf_desc->cpu_addr, 0, buf_desc->len);
}
}
@@ -1122,15 +1125,23 @@ static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
if (conn->sndbuf_desc) {
- conn->sndbuf_desc->used = 0;
- memset(conn->sndbuf_desc->cpu_addr, 0, conn->sndbuf_desc->len);
+ if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) {
+ smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
+ } else {
+ conn->sndbuf_desc->used = 0;
+ memset(conn->sndbuf_desc->cpu_addr, 0,
+ conn->sndbuf_desc->len);
+ }
}
- if (conn->rmb_desc && lgr->is_smcd) {
- conn->rmb_desc->used = 0;
- memset(conn->rmb_desc->cpu_addr, 0, conn->rmb_desc->len +
- sizeof(struct smcd_cdc_msg));
- } else if (conn->rmb_desc) {
- smcr_buf_unuse(conn->rmb_desc, lgr);
+ if (conn->rmb_desc) {
+ if (!lgr->is_smcd) {
+ smcr_buf_unuse(conn->rmb_desc, true, lgr);
+ } else {
+ conn->rmb_desc->used = 0;
+ memset(conn->rmb_desc->cpu_addr, 0,
+ conn->rmb_desc->len +
+ sizeof(struct smcd_cdc_msg));
+ }
}
}
@@ -1178,20 +1189,21 @@ lgr_put:
static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link *lnk)
{
- if (is_rmb)
+ if (is_rmb || buf_desc->is_vm)
buf_desc->is_reg_mr[lnk->link_idx] = false;
if (!buf_desc->is_map_ib[lnk->link_idx])
return;
- if (is_rmb) {
- if (buf_desc->mr_rx[lnk->link_idx]) {
- smc_ib_put_memory_region(
- buf_desc->mr_rx[lnk->link_idx]);
- buf_desc->mr_rx[lnk->link_idx] = NULL;
- }
+
+ if ((is_rmb || buf_desc->is_vm) &&
+ buf_desc->mr[lnk->link_idx]) {
+ smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]);
+ buf_desc->mr[lnk->link_idx] = NULL;
+ }
+ if (is_rmb)
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
- } else {
+ else
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
- }
+
sg_free_table(&buf_desc->sgt[lnk->link_idx]);
buf_desc->is_map_ib[lnk->link_idx] = false;
}
@@ -1280,8 +1292,10 @@ static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
- if (buf_desc->pages)
+ if (!buf_desc->is_vm && buf_desc->pages)
__free_pages(buf_desc->pages, buf_desc->order);
+ else if (buf_desc->is_vm && buf_desc->cpu_addr)
+ vfree(buf_desc->cpu_addr);
kfree(buf_desc);
}
@@ -1993,26 +2007,50 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
-/* map an rmb buf to a link */
+/* map an buf to a link */
static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link *lnk)
{
- int rc;
+ int rc, i, nents, offset, buf_size, size, access_flags;
+ struct scatterlist *sg;
+ void *buf;
if (buf_desc->is_map_ib[lnk->link_idx])
return 0;
- rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
+ if (buf_desc->is_vm) {
+ buf = buf_desc->cpu_addr;
+ buf_size = buf_desc->len;
+ offset = offset_in_page(buf_desc->cpu_addr);
+ nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE;
+ } else {
+ nents = 1;
+ }
+
+ rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL);
if (rc)
return rc;
- sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
- buf_desc->cpu_addr, buf_desc->len);
+
+ if (buf_desc->is_vm) {
+ /* virtually contiguous buffer */
+ for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) {
+ size = min_t(int, PAGE_SIZE - offset, buf_size);
+ sg_set_page(sg, vmalloc_to_page(buf), size, offset);
+ buf += size / sizeof(*buf);
+ buf_size -= size;
+ offset = 0;
+ }
+ } else {
+ /* physically contiguous buffer */
+ sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
+ buf_desc->cpu_addr, buf_desc->len);
+ }
/* map sg table to DMA address */
rc = smc_ib_buf_map_sg(lnk, buf_desc,
is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
/* SMC protocol depends on mapping to one DMA address only */
- if (rc != 1) {
+ if (rc != nents) {
rc = -EAGAIN;
goto free_table;
}
@@ -2020,15 +2058,18 @@ static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
buf_desc->is_dma_need_sync |=
smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
- /* create a new memory region for the RMB */
- if (is_rmb) {
- rc = smc_ib_get_memory_region(lnk->roce_pd,
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_LOCAL_WRITE,
+ if (is_rmb || buf_desc->is_vm) {
+ /* create a new memory region for the RMB or vzalloced sndbuf */
+ access_flags = is_rmb ?
+ IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+ IB_ACCESS_LOCAL_WRITE;
+
+ rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags,
buf_desc, lnk->link_idx);
if (rc)
goto buf_unmap;
- smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
+ smc_ib_sync_sg_for_device(lnk, buf_desc,
+ is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
}
buf_desc->is_map_ib[lnk->link_idx] = true;
return 0;
@@ -2041,20 +2082,23 @@ free_table:
return rc;
}
-/* register a new rmb on IB device,
+/* register a new buf on IB device, rmb or vzalloced sndbuf
* must be called under lgr->llc_conf_mutex lock
*/
-int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
+int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
{
if (list_empty(&link->lgr->list))
return -ENOLINK;
- if (!rmb_desc->is_reg_mr[link->link_idx]) {
- /* register memory region for new rmb */
- if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
- rmb_desc->is_reg_err = true;
+ if (!buf_desc->is_reg_mr[link->link_idx]) {
+ /* register memory region for new buf */
+ if (buf_desc->is_vm)
+ buf_desc->mr[link->link_idx]->iova =
+ (uintptr_t)buf_desc->cpu_addr;
+ if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) {
+ buf_desc->is_reg_err = true;
return -EFAULT;
}
- rmb_desc->is_reg_mr[link->link_idx] = true;
+ buf_desc->is_reg_mr[link->link_idx] = true;
}
return 0;
}
@@ -2106,18 +2150,38 @@ int smcr_buf_reg_lgr(struct smc_link *lnk)
struct smc_buf_desc *buf_desc, *bf;
int i, rc = 0;
+ /* reg all RMBs for a new link */
mutex_lock(&lgr->rmbs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
if (!buf_desc->used)
continue;
- rc = smcr_link_reg_rmb(lnk, buf_desc);
- if (rc)
- goto out;
+ rc = smcr_link_reg_buf(lnk, buf_desc);
+ if (rc) {
+ mutex_unlock(&lgr->rmbs_lock);
+ return rc;
+ }
}
}
-out:
mutex_unlock(&lgr->rmbs_lock);
+
+ if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
+ return rc;
+
+ /* reg all vzalloced sndbufs for a new link */
+ mutex_lock(&lgr->sndbufs_lock);
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
+ if (!buf_desc->used || !buf_desc->is_vm)
+ continue;
+ rc = smcr_link_reg_buf(lnk, buf_desc);
+ if (rc) {
+ mutex_unlock(&lgr->sndbufs_lock);
+ return rc;
+ }
+ }
+ }
+ mutex_unlock(&lgr->sndbufs_lock);
return rc;
}
@@ -2131,18 +2195,39 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
if (!buf_desc)
return ERR_PTR(-ENOMEM);
- buf_desc->order = get_order(bufsize);
- buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
- __GFP_NOMEMALLOC | __GFP_COMP |
- __GFP_NORETRY | __GFP_ZERO,
- buf_desc->order);
- if (!buf_desc->pages) {
- kfree(buf_desc);
- return ERR_PTR(-EAGAIN);
- }
- buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
- buf_desc->len = bufsize;
+ switch (lgr->buf_type) {
+ case SMCR_PHYS_CONT_BUFS:
+ case SMCR_MIXED_BUFS:
+ buf_desc->order = get_order(bufsize);
+ buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
+ __GFP_NOMEMALLOC | __GFP_COMP |
+ __GFP_NORETRY | __GFP_ZERO,
+ buf_desc->order);
+ if (buf_desc->pages) {
+ buf_desc->cpu_addr =
+ (void *)page_address(buf_desc->pages);
+ buf_desc->len = bufsize;
+ buf_desc->is_vm = false;
+ break;
+ }
+ if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
+ goto out;
+ fallthrough; // try virtually continguous buf
+ case SMCR_VIRT_CONT_BUFS:
+ buf_desc->order = get_order(bufsize);
+ buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
+ if (!buf_desc->cpu_addr)
+ goto out;
+ buf_desc->pages = NULL;
+ buf_desc->len = bufsize;
+ buf_desc->is_vm = true;
+ break;
+ }
return buf_desc;
+
+out:
+ kfree(buf_desc);
+ return ERR_PTR(-EAGAIN);
}
/* map buf_desc on all usable links,
@@ -2273,7 +2358,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (!is_smcd) {
if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
- smcr_buf_unuse(buf_desc, lgr);
+ smcr_buf_unuse(buf_desc, is_rmb, lgr);
return -ENOMEM;
}
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 0261124a7a70..fe8b524ad846 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -168,9 +168,11 @@ struct smc_buf_desc {
struct { /* SMC-R */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
/* virtual buffer */
- struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
- /* for rmb only: memory region
+ struct ib_mr *mr[SMC_LINKS_PER_LGR_MAX];
+ /* memory region: for rmb and
+ * vzalloced sndbuf
* incl. rkey provided to peer
+ * and lkey provided to local
*/
u32 order; /* allocation order */
@@ -183,6 +185,8 @@ struct smc_buf_desc {
u8 is_dma_need_sync;
u8 is_reg_err;
/* buffer registration err */
+ u8 is_vm;
+ /* virtually contiguous */
};
struct { /* SMC-D */
unsigned short sba_idx;
@@ -543,7 +547,7 @@ int smcr_buf_reg_lgr(struct smc_link *lnk);
void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
enum smc_lgr_type new_type, int asym_lnk_idx);
-int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
+int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *rmb_desc);
struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
struct smc_link *from_lnk, bool is_dev_err);
void smcr_link_down_cond(struct smc_link *lnk);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 60e5095890b1..854772dd52fd 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -698,7 +698,7 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx)
int sg_num;
/* map the largest prefix of a dma mapped SG list */
- sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx],
+ sg_num = ib_map_mr_sg(buf_slot->mr[link_idx],
buf_slot->sgt[link_idx].sgl,
buf_slot->sgt[link_idx].orig_nents,
&offset, PAGE_SIZE);
@@ -710,20 +710,21 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx)
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
struct smc_buf_desc *buf_slot, u8 link_idx)
{
- if (buf_slot->mr_rx[link_idx])
+ if (buf_slot->mr[link_idx])
return 0; /* already done */
- buf_slot->mr_rx[link_idx] =
+ buf_slot->mr[link_idx] =
ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
- if (IS_ERR(buf_slot->mr_rx[link_idx])) {
+ if (IS_ERR(buf_slot->mr[link_idx])) {
int rc;
- rc = PTR_ERR(buf_slot->mr_rx[link_idx]);
- buf_slot->mr_rx[link_idx] = NULL;
+ rc = PTR_ERR(buf_slot->mr[link_idx]);
+ buf_slot->mr[link_idx] = NULL;
return rc;
}
- if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1)
+ if (smc_ib_map_mr_sg(buf_slot, link_idx) !=
+ buf_slot->sgt[link_idx].orig_nents)
return -EINVAL;
return 0;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index c4d057b2941d..1d83fa9ae56f 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -505,19 +505,22 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
if (smc_link_active(link) && link != send_link) {
rkeyllc->rtoken[rtok_ix].link_id = link->link_id;
rkeyllc->rtoken[rtok_ix].rmb_key =
- htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
- rkeyllc->rtoken[rtok_ix].rmb_vaddr = cpu_to_be64(
- (u64)sg_dma_address(
- rmb_desc->sgt[link->link_idx].sgl));
+ htonl(rmb_desc->mr[link->link_idx]->rkey);
+ rkeyllc->rtoken[rtok_ix].rmb_vaddr = rmb_desc->is_vm ?
+ cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) :
+ cpu_to_be64((u64)sg_dma_address
+ (rmb_desc->sgt[link->link_idx].sgl));
rtok_ix++;
}
}
/* rkey of send_link is in rtoken[0] */
rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1;
rkeyllc->rtoken[0].rmb_key =
- htonl(rmb_desc->mr_rx[send_link->link_idx]->rkey);
- rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
- (u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
+ htonl(rmb_desc->mr[send_link->link_idx]->rkey);
+ rkeyllc->rtoken[0].rmb_vaddr = rmb_desc->is_vm ?
+ cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) :
+ cpu_to_be64((u64)sg_dma_address
+ (rmb_desc->sgt[send_link->link_idx].sgl));
/* send llc message */
rc = smc_wr_tx_send(send_link, pend);
put_out:
@@ -544,7 +547,7 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY;
smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc));
rkeyllc->num_rkeys = 1;
- rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
+ rkeyllc->rkey[0] = htonl(rmb_desc->mr[link->link_idx]->rkey);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
put_out:
@@ -614,9 +617,10 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
if (!buf_pos)
break;
rmb = buf_pos;
- ext->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
- ext->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
- ext->rt[i].rmb_vaddr_new =
+ ext->rt[i].rmb_key = htonl(rmb->mr[prim_lnk_idx]->rkey);
+ ext->rt[i].rmb_key_new = htonl(rmb->mr[lnk_idx]->rkey);
+ ext->rt[i].rmb_vaddr_new = rmb->is_vm ?
+ cpu_to_be64((uintptr_t)rmb->cpu_addr) :
cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
while (buf_pos && !(buf_pos)->used)
@@ -852,9 +856,10 @@ static int smc_llc_add_link_cont(struct smc_link *link,
}
rmb = *buf_pos;
- addc_llc->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
- addc_llc->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
- addc_llc->rt[i].rmb_vaddr_new =
+ addc_llc->rt[i].rmb_key = htonl(rmb->mr[prim_lnk_idx]->rkey);
+ addc_llc->rt[i].rmb_key_new = htonl(rmb->mr[lnk_idx]->rkey);
+ addc_llc->rt[i].rmb_vaddr_new = rmb->is_vm ?
+ cpu_to_be64((uintptr_t)rmb->cpu_addr) :
cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
(*num_rkeys_todo)--;
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 00ad004835e6..17c5aee7ee4f 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -145,35 +145,93 @@ static void smc_rx_spd_release(struct splice_pipe_desc *spd,
static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,
struct smc_sock *smc)
{
+ struct smc_link_group *lgr = smc->conn.lgr;
+ int offset = offset_in_page(src);
+ struct partial_page *partial;
struct splice_pipe_desc spd;
- struct partial_page partial;
- struct smc_spd_priv *priv;
- int bytes;
+ struct smc_spd_priv **priv;
+ struct page **pages;
+ int bytes, nr_pages;
+ int i;
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ nr_pages = !lgr->is_smcd && smc->conn.rmb_desc->is_vm ?
+ PAGE_ALIGN(len + offset) / PAGE_SIZE : 1;
+
+ pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ goto out;
+ partial = kcalloc(nr_pages, sizeof(*partial), GFP_KERNEL);
+ if (!partial)
+ goto out_page;
+ priv = kcalloc(nr_pages, sizeof(*priv), GFP_KERNEL);
if (!priv)
- return -ENOMEM;
- priv->len = len;
- priv->smc = smc;
- partial.offset = src - (char *)smc->conn.rmb_desc->cpu_addr;
- partial.len = len;
- partial.private = (unsigned long)priv;
-
- spd.nr_pages_max = 1;
- spd.nr_pages = 1;
- spd.pages = &smc->conn.rmb_desc->pages;
- spd.partial = &partial;
+ goto out_part;
+ for (i = 0; i < nr_pages; i++) {
+ priv[i] = kzalloc(sizeof(**priv), GFP_KERNEL);
+ if (!priv[i])
+ goto out_priv;
+ }
+
+ if (lgr->is_smcd ||
+ (!lgr->is_smcd && !smc->conn.rmb_desc->is_vm)) {
+ /* smcd or smcr that uses physically contiguous RMBs */
+ priv[0]->len = len;
+ priv[0]->smc = smc;
+ partial[0].offset = src - (char *)smc->conn.rmb_desc->cpu_addr;
+ partial[0].len = len;
+ partial[0].private = (unsigned long)priv[0];
+ pages[0] = smc->conn.rmb_desc->pages;
+ } else {
+ int size, left = len;
+ void *buf = src;
+ /* smcr that uses virtually contiguous RMBs*/
+ for (i = 0; i < nr_pages; i++) {
+ size = min_t(int, PAGE_SIZE - offset, left);
+ priv[i]->len = size;
+ priv[i]->smc = smc;
+ pages[i] = vmalloc_to_page(buf);
+ partial[i].offset = offset;
+ partial[i].len = size;
+ partial[i].private = (unsigned long)priv[i];
+ buf += size / sizeof(*buf);
+ left -= size;
+ offset = 0;
+ }
+ }
+ spd.nr_pages_max = nr_pages;
+ spd.nr_pages = nr_pages;
+ spd.pages = pages;
+ spd.partial = partial;
spd.ops = &smc_pipe_ops;
spd.spd_release = smc_rx_spd_release;
bytes = splice_to_pipe(pipe, &spd);
if (bytes > 0) {
sock_hold(&smc->sk);
- get_page(smc->conn.rmb_desc->pages);
+ if (!lgr->is_smcd && smc->conn.rmb_desc->is_vm) {
+ for (i = 0; i < PAGE_ALIGN(bytes + offset) / PAGE_SIZE; i++)
+ get_page(pages[i]);
+ } else {
+ get_page(smc->conn.rmb_desc->pages);
+ }
atomic_add(bytes, &smc->conn.splice_pending);
}
+ kfree(priv);
+ kfree(partial);
+ kfree(pages);
return bytes;
+
+out_priv:
+ for (i = (i - 1); i >= 0; i--)
+ kfree(priv[i]);
+ kfree(priv);
+out_part:
+ kfree(partial);
+out_page:
+ kfree(pages);
+out:
+ return -ENOMEM;
}
static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn)
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index ca0d5f57908c..4e8377657a62 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -383,6 +383,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
dma_addr_t dma_addr =
sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl);
+ u64 virt_addr = (uintptr_t)conn->sndbuf_desc->cpu_addr;
int src_len_sum = src_len, dst_len_sum = dst_len;
int sent_count = src_off;
int srcchunk, dstchunk;
@@ -395,7 +396,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
u64 base_addr = dma_addr;
if (dst_len < link->qp_attr.cap.max_inline_data) {
- base_addr = (uintptr_t)conn->sndbuf_desc->cpu_addr;
+ base_addr = virt_addr;
wr->wr.send_flags |= IB_SEND_INLINE;
} else {
wr->wr.send_flags &= ~IB_SEND_INLINE;
@@ -403,8 +404,12 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
- sge[srcchunk].addr = base_addr + src_off;
+ sge[srcchunk].addr = conn->sndbuf_desc->is_vm ?
+ (virt_addr + src_off) : (base_addr + src_off);
sge[srcchunk].length = src_len;
+ if (conn->sndbuf_desc->is_vm)
+ sge[srcchunk].lkey =
+ conn->sndbuf_desc->mr[link->link_idx]->lkey;
num_sges++;
src_off += src_len;