diff options
author | Roland Dreier <rolandd@cisco.com> | 2010-03-02 08:51:56 +0100 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2010-03-02 08:51:56 +0100 |
commit | 5c2187f0a184d6c5ec87aab403b79a8bb24a7988 (patch) | |
tree | 18d9e853fa3980f8f96e7fbc374bc64ea4c08d7f /drivers/infiniband | |
parent | Merge branch 'ipoib' into for-next (diff) | |
parent | IB/iser: Remove redundant locking from iser scsi command response flow (diff) | |
download | linux-5c2187f0a184d6c5ec87aab403b79a8bb24a7988.tar.xz linux-5c2187f0a184d6c5ec87aab403b79a8bb24a7988.zip |
Merge branch 'iser' into for-next
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.c | 47 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.h | 97 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_initiator.c | 506 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_memory.c | 64 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 281 |
5 files changed, 391 insertions, 604 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 5f7a6fca0a4d..71237f8f78f7 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -128,6 +128,28 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) return 0; } +int iser_initialize_task_headers(struct iscsi_task *task, + struct iser_tx_desc *tx_desc) +{ + struct iscsi_iser_conn *iser_conn = task->conn->dd_data; + struct iser_device *device = iser_conn->ib_conn->device; + struct iscsi_iser_task *iser_task = task->dd_data; + u64 dma_addr; + + dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, + ISER_HEADERS_LEN, DMA_TO_DEVICE); + if (ib_dma_mapping_error(device->ib_device, dma_addr)) + return -ENOMEM; + + tx_desc->dma_addr = dma_addr; + tx_desc->tx_sg[0].addr = tx_desc->dma_addr; + tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; + tx_desc->tx_sg[0].lkey = device->mr->lkey; + + iser_task->headers_initialized = 1; + iser_task->iser_conn = iser_conn; + return 0; +} /** * iscsi_iser_task_init - Initialize task * @task: iscsi task @@ -137,17 +159,17 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) static int iscsi_iser_task_init(struct iscsi_task *task) { - struct iscsi_iser_conn *iser_conn = task->conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; + if (!iser_task->headers_initialized) + if (iser_initialize_task_headers(task, &iser_task->desc)) + return -ENOMEM; + /* mgmt task */ - if (!task->sc) { - iser_task->desc.data = task->data; + if (!task->sc) return 0; - } iser_task->command_sent = 0; - iser_task->iser_conn = iser_conn; iser_task_rdma_init(iser_task); return 0; } @@ -168,7 +190,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) { int error = 0; - iser_dbg("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); + iser_dbg("mtask xmit [cid %d itt 0x%x]\n", conn->id, task->itt); error = iser_send_control(conn, task); @@ -178,9 +200,6 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) * - if yes, the task is recycled at iscsi_complete_pdu * - if no, the task is recycled at iser_snd_completion */ - if (error && error != -ENOBUFS) - iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); - return error; } @@ -232,7 +251,7 @@ iscsi_iser_task_xmit(struct iscsi_task *task) task->imm_count, task->unsol_r2t.data_length); } - iser_dbg("task deq [cid %d itt 0x%x]\n", + iser_dbg("ctask xmit [cid %d itt 0x%x]\n", conn->id, task->itt); /* Send the cmd PDU */ @@ -248,8 +267,6 @@ iscsi_iser_task_xmit(struct iscsi_task *task) error = iscsi_iser_task_xmit_unsol_data(conn, task); iscsi_iser_task_xmit_exit: - if (error && error != -ENOBUFS) - iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; } @@ -283,7 +300,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) * due to issues with the login code re iser sematics * this not set in iscsi_conn_setup - FIXME */ - conn->max_recv_dlength = 128; + conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN; iser_conn = conn->dd_data; conn->dd_data = iser_conn; @@ -401,7 +418,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct Scsi_Host *shost; struct iser_conn *ib_conn; - shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 1); + shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) return NULL; shost->transportt = iscsi_iser_scsi_transport; @@ -675,7 +692,7 @@ static int __init iser_init(void) memset(&ig, 0, sizeof(struct iser_global)); ig.desc_cache = kmem_cache_create("iser_descriptors", - sizeof (struct iser_desc), + sizeof(struct iser_tx_desc), 0, SLAB_HWCACHE_ALIGN, NULL); if (ig.desc_cache == NULL) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 9d529cae1f0d..036934cdcb92 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -102,9 +102,9 @@ #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * * SCSI_TMFUNC(2), LOGOUT(1) */ -#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \ - ISER_MAX_RX_MISC_PDUS + \ - ISER_MAX_TX_MISC_PDUS) +#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX) + +#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2) /* the max TX (send) WR supported by the iSER QP is defined by * * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * @@ -132,6 +132,12 @@ struct iser_hdr { __be64 read_va; } __attribute__((packed)); +/* Constant PDU lengths calculations */ +#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr)) + +#define ISER_RECV_DATA_SEG_LEN 128 +#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN) +#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN) /* Length of an object name string */ #define ISER_OBJECT_NAME_SIZE 64 @@ -187,51 +193,43 @@ struct iser_regd_buf { struct iser_mem_reg reg; /* memory registration info */ void *virt_addr; struct iser_device *device; /* device->device for dma_unmap */ - u64 dma_addr; /* if non zero, addr for dma_unmap */ enum dma_data_direction direction; /* direction for dma_unmap */ unsigned int data_size; - atomic_t ref_count; /* refcount, freed when dec to 0 */ -}; - -#define MAX_REGD_BUF_VECTOR_LEN 2 - -struct iser_dto { - struct iscsi_iser_task *task; - struct iser_conn *ib_conn; - int notify_enable; - - /* vector of registered buffers */ - unsigned int regd_vector_len; - struct iser_regd_buf *regd[MAX_REGD_BUF_VECTOR_LEN]; - - /* offset into the registered buffer may be specified */ - unsigned int offset[MAX_REGD_BUF_VECTOR_LEN]; - - /* a smaller size may be specified, if 0, then full size is used */ - unsigned int used_sz[MAX_REGD_BUF_VECTOR_LEN]; }; enum iser_desc_type { - ISCSI_RX, ISCSI_TX_CONTROL , ISCSI_TX_SCSI_COMMAND, ISCSI_TX_DATAOUT }; -struct iser_desc { +struct iser_tx_desc { struct iser_hdr iser_header; struct iscsi_hdr iscsi_header; - struct iser_regd_buf hdr_regd_buf; - void *data; /* used by RX & TX_CONTROL */ - struct iser_regd_buf data_regd_buf; /* used by RX & TX_CONTROL */ enum iser_desc_type type; - struct iser_dto dto; + u64 dma_addr; + /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either + of immediate data, unsolicited data-out or control (login,text) */ + struct ib_sge tx_sg[2]; + int num_sge; }; +#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ + sizeof(u64) + sizeof(struct ib_sge))) +struct iser_rx_desc { + struct iser_hdr iser_header; + struct iscsi_hdr iscsi_header; + char data[ISER_RECV_DATA_SEG_LEN]; + u64 dma_addr; + struct ib_sge rx_sg; + char pad[ISER_RX_PAD_SIZE]; +} __attribute__((packed)); + struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; - struct ib_cq *cq; + struct ib_cq *rx_cq; + struct ib_cq *tx_cq; struct ib_mr *mr; struct tasklet_struct cq_tasklet; struct list_head ig_list; /* entry in ig devices list */ @@ -250,15 +248,18 @@ struct iser_conn { struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ int disc_evt_flag; /* disconn event delivered */ wait_queue_head_t wait; /* waitq for conn/disconn */ - atomic_t post_recv_buf_count; /* posted rx count */ + int post_recv_buf_count; /* posted rx count */ atomic_t post_send_buf_count; /* posted tx count */ - atomic_t unexpected_pdu_count;/* count of received * - * unexpected pdus * - * not yet retired */ char name[ISER_OBJECT_NAME_SIZE]; struct iser_page_vec *page_vec; /* represents SG to fmr maps* * maps serialized as tx is*/ struct list_head conn_list; /* entry in ig conn list */ + + char *login_buf; + u64 login_dma; + unsigned int rx_desc_head; + struct iser_rx_desc *rx_descs; + struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; }; struct iscsi_iser_conn { @@ -267,7 +268,7 @@ struct iscsi_iser_conn { }; struct iscsi_iser_task { - struct iser_desc desc; + struct iser_tx_desc desc; struct iscsi_iser_conn *iser_conn; enum iser_task_status status; int command_sent; /* set if command sent */ @@ -275,6 +276,7 @@ struct iscsi_iser_task { struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ + int headers_initialized; }; struct iser_page_vec { @@ -322,22 +324,17 @@ void iser_conn_put(struct iser_conn *ib_conn); void iser_conn_terminate(struct iser_conn *ib_conn); -void iser_rcv_completion(struct iser_desc *desc, - unsigned long dto_xfer_len); +void iser_rcv_completion(struct iser_rx_desc *desc, + unsigned long dto_xfer_len, + struct iser_conn *ib_conn); -void iser_snd_completion(struct iser_desc *desc); +void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn); void iser_task_rdma_init(struct iscsi_iser_task *task); void iser_task_rdma_finalize(struct iscsi_iser_task *task); -void iser_dto_buffs_release(struct iser_dto *dto); - -int iser_regd_buff_release(struct iser_regd_buf *regd_buf); - -void iser_reg_single(struct iser_device *device, - struct iser_regd_buf *regd_buf, - enum dma_data_direction direction); +void iser_free_rx_descriptors(struct iser_conn *ib_conn); void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); @@ -356,11 +353,9 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, void iser_unreg_mem(struct iser_mem_reg *mem_reg); -int iser_post_recv(struct iser_desc *rx_desc); -int iser_post_send(struct iser_desc *tx_desc); - -int iser_conn_state_comp(struct iser_conn *ib_conn, - enum iser_ib_conn_state comp); +int iser_post_recvl(struct iser_conn *ib_conn); +int iser_post_recvm(struct iser_conn *ib_conn, int count); +int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc); int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data, @@ -368,4 +363,6 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, enum dma_data_direction dma_dir); void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); +int iser_initialize_task_headers(struct iscsi_task *task, + struct iser_tx_desc *tx_desc); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 9de640200ad3..0b9ef0716588 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -39,29 +39,6 @@ #include "iscsi_iser.h" -/* Constant PDU lengths calculations */ -#define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \ - sizeof (struct iscsi_hdr)) - -/* iser_dto_add_regd_buff - increments the reference count for * - * the registered buffer & adds it to the DTO object */ -static void iser_dto_add_regd_buff(struct iser_dto *dto, - struct iser_regd_buf *regd_buf, - unsigned long use_offset, - unsigned long use_size) -{ - int add_idx; - - atomic_inc(®d_buf->ref_count); - - add_idx = dto->regd_vector_len; - dto->regd[add_idx] = regd_buf; - dto->used_sz[add_idx] = use_size; - dto->offset[add_idx] = use_offset; - - dto->regd_vector_len++; -} - /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in * iser_task->data[ISER_DIR_IN].data_len @@ -122,9 +99,9 @@ iser_prepare_write_cmd(struct iscsi_task *task, struct iscsi_iser_task *iser_task = task->dd_data; struct iser_regd_buf *regd_buf; int err; - struct iser_dto *send_dto = &iser_task->desc.dto; struct iser_hdr *hdr = &iser_task->desc.iser_header; struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; + struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1]; err = iser_dma_map_task_data(iser_task, buf_out, @@ -163,135 +140,100 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (imm_sz > 0) { iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", task->itt, imm_sz); - iser_dto_add_regd_buff(send_dto, - regd_buf, - 0, - imm_sz); + tx_dsg->addr = regd_buf->reg.va; + tx_dsg->length = imm_sz; + tx_dsg->lkey = regd_buf->reg.lkey; + iser_task->desc.num_sge = 2; } return 0; } -/** - * iser_post_receive_control - allocates, initializes and posts receive DTO. - */ -static int iser_post_receive_control(struct iscsi_conn *conn) +/* creates a new tx descriptor and adds header regd buffer */ +static void iser_create_send_desc(struct iser_conn *ib_conn, + struct iser_tx_desc *tx_desc) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iser_desc *rx_desc; - struct iser_regd_buf *regd_hdr; - struct iser_regd_buf *regd_data; - struct iser_dto *recv_dto = NULL; - struct iser_device *device = iser_conn->ib_conn->device; - int rx_data_size, err; - int posts, outstanding_unexp_pdus; - - /* for the login sequence we must support rx of upto 8K; login is done - * after conn create/bind (connect) and conn stop/bind (reconnect), - * what's common for both schemes is that the connection is not started - */ - if (conn->c_stage != ISCSI_CONN_STARTED) - rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN; - else /* FIXME till user space sets conn->max_recv_dlength correctly */ - rx_data_size = 128; - - outstanding_unexp_pdus = - atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0); - - /* - * in addition to the response buffer, replace those consumed by - * unexpected pdus. - */ - for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) { - rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); - if (rx_desc == NULL) { - iser_err("Failed to alloc desc for post recv %d\n", - posts); - err = -ENOMEM; - goto post_rx_cache_alloc_failure; - } - rx_desc->type = ISCSI_RX; - rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); - if (rx_desc->data == NULL) { - iser_err("Failed to alloc data buf for post recv %d\n", - posts); - err = -ENOMEM; - goto post_rx_kmalloc_failure; - } - - recv_dto = &rx_desc->dto; - recv_dto->ib_conn = iser_conn->ib_conn; - recv_dto->regd_vector_len = 0; + struct iser_device *device = ib_conn->device; - regd_hdr = &rx_desc->hdr_regd_buf; - memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); - regd_hdr->device = device; - regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */ - regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; + ib_dma_sync_single_for_cpu(device->ib_device, + tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); - iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); - - iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); + memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); + tx_desc->iser_header.flags = ISER_VER; - regd_data = &rx_desc->data_regd_buf; - memset(regd_data, 0, sizeof(struct iser_regd_buf)); - regd_data->device = device; - regd_data->virt_addr = rx_desc->data; - regd_data->data_size = rx_data_size; + tx_desc->num_sge = 1; - iser_reg_single(device, regd_data, DMA_FROM_DEVICE); + if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { + tx_desc->tx_sg[0].lkey = device->mr->lkey; + iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc); + } +} - iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); - err = iser_post_recv(rx_desc); - if (err) { - iser_err("Failed iser_post_recv for post %d\n", posts); - goto post_rx_post_recv_failure; - } +int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) +{ + int i, j; + u64 dma_addr; + struct iser_rx_desc *rx_desc; + struct ib_sge *rx_sg; + struct iser_device *device = ib_conn->device; + + ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS * + sizeof(struct iser_rx_desc), GFP_KERNEL); + if (!ib_conn->rx_descs) + goto rx_desc_alloc_fail; + + rx_desc = ib_conn->rx_descs; + + for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) { + dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, + ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(device->ib_device, dma_addr)) + goto rx_desc_dma_map_failed; + + rx_desc->dma_addr = dma_addr; + + rx_sg = &rx_desc->rx_sg; + rx_sg->addr = rx_desc->dma_addr; + rx_sg->length = ISER_RX_PAYLOAD_SIZE; + rx_sg->lkey = device->mr->lkey; } - /* all posts successful */ - return 0; -post_rx_post_recv_failure: - iser_dto_buffs_release(recv_dto); - kfree(rx_desc->data); -post_rx_kmalloc_failure: - kmem_cache_free(ig.desc_cache, rx_desc); -post_rx_cache_alloc_failure: - if (posts > 0) { - /* - * response buffer posted, but did not replace all unexpected - * pdu recv bufs. Ignore error, retry occurs next send - */ - outstanding_unexp_pdus -= (posts - 1); - err = 0; - } - atomic_add(outstanding_unexp_pdus, - &iser_conn->ib_conn->unexpected_pdu_count); + ib_conn->rx_desc_head = 0; + return 0; - return err; +rx_desc_dma_map_failed: + rx_desc = ib_conn->rx_descs; + for (j = 0; j < i; j++, rx_desc++) + ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, + ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + kfree(ib_conn->rx_descs); + ib_conn->rx_descs = NULL; +rx_desc_alloc_fail: + iser_err("failed allocating rx descriptors / data buffers\n"); + return -ENOMEM; } -/* creates a new tx descriptor and adds header regd buffer */ -static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, - struct iser_desc *tx_desc) +void iser_free_rx_descriptors(struct iser_conn *ib_conn) { - struct iser_regd_buf *regd_hdr = &tx_desc->hdr_regd_buf; - struct iser_dto *send_dto = &tx_desc->dto; + int i; + struct iser_rx_desc *rx_desc; + struct iser_device *device = ib_conn->device; - memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); - regd_hdr->device = iser_conn->ib_conn->device; - regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ - regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; + if (ib_conn->login_buf) { + ib_dma_unmap_single(device->ib_device, ib_conn->login_dma, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + kfree(ib_conn->login_buf); + } - send_dto->ib_conn = iser_conn->ib_conn; - send_dto->notify_enable = 1; - send_dto->regd_vector_len = 0; + if (!ib_conn->rx_descs) + return; - memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); - tx_desc->iser_header.flags = ISER_VER; - - iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0); + rx_desc = ib_conn->rx_descs; + for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) + ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, + ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + kfree(ib_conn->rx_descs); } /** @@ -301,46 +243,23 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - int i; - /* - * FIXME this value should be declared to the target during login with - * the MaxOutstandingUnexpectedPDUs key when supported - */ - int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS; - - iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num); + iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX); /* Check that there is no posted recv or send buffers left - */ /* they must be consumed during the login phase */ - BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0); + BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0); BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); - /* Initial post receive buffers */ - for (i = 0; i < initial_post_recv_bufs_num; i++) { - if (iser_post_receive_control(conn) != 0) { - iser_err("Failed to post recv bufs at:%d conn:0x%p\n", - i, conn); - return -ENOMEM; - } - } - iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn); - return 0; -} + if (iser_alloc_rx_descriptors(iser_conn->ib_conn)) + return -ENOMEM; -static int -iser_check_xmit(struct iscsi_conn *conn, void *task) -{ - struct iscsi_iser_conn *iser_conn = conn->dd_data; + /* Initial post receive buffers */ + if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) + return -ENOMEM; - if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == - ISER_QP_MAX_REQ_DTOS) { - iser_dbg("%ld can't xmit task %p\n",jiffies,task); - return -ENOBUFS; - } return 0; } - /** * iser_send_command - send command PDU */ @@ -349,27 +268,18 @@ int iser_send_command(struct iscsi_conn *conn, { struct iscsi_iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_dto *send_dto = NULL; unsigned long edtl; - int err = 0; + int err; struct iser_data_buf *data_buf; struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr; struct scsi_cmnd *sc = task->sc; - - if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { - iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); - return -EPERM; - } - if (iser_check_xmit(conn, task)) - return -ENOBUFS; + struct iser_tx_desc *tx_desc = &iser_task->desc; edtl = ntohl(hdr->data_length); /* build the tx desc regd header and add it to the tx desc dto */ - iser_task->desc.type = ISCSI_TX_SCSI_COMMAND; - send_dto = &iser_task->desc.dto; - send_dto->task = iser_task; - iser_create_send_desc(iser_conn, &iser_task->desc); + tx_desc->type = ISCSI_TX_SCSI_COMMAND; + iser_create_send_desc(iser_conn->ib_conn, tx_desc); if (hdr->flags & ISCSI_FLAG_CMD_READ) data_buf = &iser_task->data[ISER_DIR_IN]; @@ -398,23 +308,13 @@ int iser_send_command(struct iscsi_conn *conn, goto send_command_error; } - iser_reg_single(iser_conn->ib_conn->device, - send_dto->regd[0], DMA_TO_DEVICE); - - if (iser_post_receive_control(conn) != 0) { - iser_err("post_recv failed!\n"); - err = -ENOMEM; - goto send_command_error; - } - iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(&iser_task->desc); + err = iser_post_send(iser_conn->ib_conn, tx_desc); if (!err) return 0; send_command_error: - iser_dto_buffs_release(send_dto); iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err); return err; } @@ -428,20 +328,13 @@ int iser_send_data_out(struct iscsi_conn *conn, { struct iscsi_iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_desc *tx_desc = NULL; - struct iser_dto *send_dto = NULL; + struct iser_tx_desc *tx_desc = NULL; + struct iser_regd_buf *regd_buf; unsigned long buf_offset; unsigned long data_seg_len; uint32_t itt; int err = 0; - - if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { - iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); - return -EPERM; - } - - if (iser_check_xmit(conn, task)) - return -ENOBUFS; + struct ib_sge *tx_dsg; itt = (__force uint32_t)hdr->itt; data_seg_len = ntoh24(hdr->dlength); @@ -450,28 +343,25 @@ int iser_send_data_out(struct iscsi_conn *conn, iser_dbg("%s itt %d dseg_len %d offset %d\n", __func__,(int)itt,(int)data_seg_len,(int)buf_offset); - tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); + tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC); if (tx_desc == NULL) { iser_err("Failed to alloc desc for post dataout\n"); return -ENOMEM; } tx_desc->type = ISCSI_TX_DATAOUT; + tx_desc->iser_header.flags = ISER_VER; memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); - /* build the tx desc regd header and add it to the tx desc dto */ - send_dto = &tx_desc->dto; - send_dto->task = iser_task; - iser_create_send_desc(iser_conn, tx_desc); - - iser_reg_single(iser_conn->ib_conn->device, - send_dto->regd[0], DMA_TO_DEVICE); + /* build the tx desc */ + iser_initialize_task_headers(task, tx_desc); - /* all data was registered for RDMA, we can use the lkey */ - iser_dto_add_regd_buff(send_dto, - &iser_task->rdma_regd[ISER_DIR_OUT], - buf_offset, - data_seg_len); + regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; + tx_dsg = &tx_desc->tx_sg[1]; + tx_dsg->addr = regd_buf->reg.va + buf_offset; + tx_dsg->length = data_seg_len; + tx_dsg->lkey = regd_buf->reg.lkey; + tx_desc->num_sge = 2; if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { iser_err("Offset:%ld & DSL:%ld in Data-Out " @@ -485,12 +375,11 @@ int iser_send_data_out(struct iscsi_conn *conn, itt, buf_offset, data_seg_len); - err = iser_post_send(tx_desc); + err = iser_post_send(iser_conn->ib_conn, tx_desc); if (!err) return 0; send_data_out_error: - iser_dto_buffs_release(send_dto); kmem_cache_free(ig.desc_cache, tx_desc); iser_err("conn %p failed err %d\n",conn, err); return err; @@ -501,64 +390,44 @@ int iser_send_control(struct iscsi_conn *conn, { struct iscsi_iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_desc *mdesc = &iser_task->desc; - struct iser_dto *send_dto = NULL; + struct iser_tx_desc *mdesc = &iser_task->desc; unsigned long data_seg_len; int err = 0; - struct iser_regd_buf *regd_buf; struct iser_device *device; - unsigned char opcode; - - if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { - iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); - return -EPERM; - } - - if (iser_check_xmit(conn, task)) - return -ENOBUFS; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; - send_dto = &mdesc->dto; - send_dto->task = NULL; - iser_create_send_desc(iser_conn, mdesc); + iser_create_send_desc(iser_conn->ib_conn, mdesc); device = iser_conn->ib_conn->device; - iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE); - data_seg_len = ntoh24(task->hdr->dlength); if (data_seg_len > 0) { - regd_buf = &mdesc->data_regd_buf; - memset(regd_buf, 0, sizeof(struct iser_regd_buf)); - regd_buf->device = device; - regd_buf->virt_addr = task->data; - regd_buf->data_size = task->data_count; - iser_reg_single(device, regd_buf, - DMA_TO_DEVICE); - iser_dto_add_regd_buff(send_dto, regd_buf, - 0, - data_seg_len); + struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; + if (task != conn->login_task) { + iser_err("data present on non login task!!!\n"); + goto send_control_error; + } + memcpy(iser_conn->ib_conn->login_buf, task->data, + task->data_count); + tx_dsg->addr = iser_conn->ib_conn->login_dma; + tx_dsg->length = data_seg_len; + tx_dsg->lkey = device->mr->lkey; + mdesc->num_sge = 2; } - opcode = task->hdr->opcode & ISCSI_OPCODE_MASK; - - /* post recv buffer for response if one is expected */ - if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) { - if (iser_post_receive_control(conn) != 0) { - iser_err("post_rcv_buff failed!\n"); - err = -ENOMEM; + if (task == conn->login_task) { + err = iser_post_recvl(iser_conn->ib_conn); + if (err) goto send_control_error; - } } - err = iser_post_send(mdesc); + err = iser_post_send(iser_conn->ib_conn, mdesc); if (!err) return 0; send_control_error: - iser_dto_buffs_release(send_dto); iser_err("conn %p failed err %d\n",conn, err); return err; } @@ -566,104 +435,71 @@ send_control_error: /** * iser_rcv_dto_completion - recv DTO completion */ -void iser_rcv_completion(struct iser_desc *rx_desc, - unsigned long dto_xfer_len) +void iser_rcv_completion(struct iser_rx_desc *rx_desc, + unsigned long rx_xfer_len, + struct iser_conn *ib_conn) { - struct iser_dto *dto = &rx_desc->dto; - struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; - struct iscsi_task *task; - struct iscsi_iser_task *iser_task; + struct iscsi_iser_conn *conn = ib_conn->iser_conn; struct iscsi_hdr *hdr; - char *rx_data = NULL; - int rx_data_len = 0; - unsigned char opcode; - - hdr = &rx_desc->iscsi_header; + u64 rx_dma; + int rx_buflen, outstanding, count, err; + + /* differentiate between login to all other PDUs */ + if ((char *)rx_desc == ib_conn->login_buf) { + rx_dma = ib_conn->login_dma; + rx_buflen = ISER_RX_LOGIN_SIZE; + } else { + rx_dma = rx_desc->dma_addr; + rx_buflen = ISER_RX_PAYLOAD_SIZE; + } - iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt); + ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, + rx_buflen, DMA_FROM_DEVICE); - if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */ - rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN; - rx_data = dto->regd[1]->virt_addr; - rx_data += dto->offset[1]; - } + hdr = &rx_desc->iscsi_header; - opcode = hdr->opcode & ISCSI_OPCODE_MASK; - - if (opcode == ISCSI_OP_SCSI_CMD_RSP) { - spin_lock(&conn->iscsi_conn->session->lock); - task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt); - if (task) - __iscsi_get_task(task); - spin_unlock(&conn->iscsi_conn->session->lock); - - if (!task) - iser_err("itt can't be matched to task!!! " - "conn %p opcode %d itt %d\n", - conn->iscsi_conn, opcode, hdr->itt); - else { - iser_task = task->dd_data; - iser_dbg("itt %d task %p\n",hdr->itt, task); - iser_task->status = ISER_TASK_STATUS_COMPLETED; - iser_task_rdma_finalize(iser_task); - iscsi_put_task(task); - } - } - iser_dto_buffs_release(dto); + iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, + hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); - iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); + iscsi_iser_recv(conn->iscsi_conn, hdr, + rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); - kfree(rx_desc->data); - kmem_cache_free(ig.desc_cache, rx_desc); + ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, + rx_buflen, DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ - atomic_dec(&conn->ib_conn->post_recv_buf_count); + conn->ib_conn->post_recv_buf_count--; - /* - * if an unexpected PDU was received then the recv wr consumed must - * be replaced, this is done in the next send of a control-type PDU - */ - if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) { - /* nop-in with itt = 0xffffffff */ - atomic_inc(&conn->ib_conn->unexpected_pdu_count); - } - else if (opcode == ISCSI_OP_ASYNC_EVENT) { - /* asyncronous message */ - atomic_inc(&conn->ib_conn->unexpected_pdu_count); + if (rx_dma == ib_conn->login_dma) + return; + + outstanding = ib_conn->post_recv_buf_count; + if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) { + count = min(ISER_QP_MAX_RECV_DTOS - outstanding, + ISER_MIN_POSTED_RX); + err = iser_post_recvm(ib_conn, count); + if (err) + iser_err("posting %d rx bufs err %d\n", count, err); } - /* a reject PDU consumes the recv buf posted for the response */ } -void iser_snd_completion(struct iser_desc *tx_desc) +void iser_snd_completion(struct iser_tx_desc *tx_desc, + struct iser_conn *ib_conn) { - struct iser_dto *dto = &tx_desc->dto; - struct iser_conn *ib_conn = dto->ib_conn; - struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; - struct iscsi_conn *conn = iser_conn->iscsi_conn; struct iscsi_task *task; - int resume_tx = 0; - - iser_dbg("Initiator, Data sent dto=0x%p\n", dto); - - iser_dto_buffs_release(dto); + struct iser_device *device = ib_conn->device; - if (tx_desc->type == ISCSI_TX_DATAOUT) + if (tx_desc->type == ISCSI_TX_DATAOUT) { + ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, + ISER_HEADERS_LEN, DMA_TO_DEVICE); kmem_cache_free(ig.desc_cache, tx_desc); - - if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == - ISER_QP_MAX_REQ_DTOS) - resume_tx = 1; + } atomic_dec(&ib_conn->post_send_buf_count); - if (resume_tx) { - iser_dbg("%ld resuming tx\n",jiffies); - iscsi_conn_queue_work(conn); - } - if (tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ task = (void *) ((long)(void *)tx_desc - @@ -692,7 +528,6 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - int deferred; int is_rdma_aligned = 1; struct iser_regd_buf *regd; @@ -710,32 +545,17 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) if (iser_task->dir[ISER_DIR_IN]) { regd = &iser_task->rdma_regd[ISER_DIR_IN]; - deferred = iser_regd_buff_release(regd); - if (deferred) { - iser_err("%d references remain for BUF-IN rdma reg\n", - atomic_read(®d->ref_count)); - } + if (regd->reg.is_fmr) + iser_unreg_mem(®d->reg); } if (iser_task->dir[ISER_DIR_OUT]) { regd = &iser_task->rdma_regd[ISER_DIR_OUT]; - deferred = iser_regd_buff_release(regd); - if (deferred) { - iser_err("%d references remain for BUF-OUT rdma reg\n", - atomic_read(®d->ref_count)); - } + if (regd->reg.is_fmr) + iser_unreg_mem(®d->reg); } /* if the data was unaligned, it was already unmapped and then copied */ if (is_rdma_aligned) iser_dma_unmap_task_data(iser_task); } - -void iser_dto_buffs_release(struct iser_dto *dto) -{ - int i; - - for (i = 0; i < dto->regd_vector_len; i++) - iser_regd_buff_release(dto->regd[i]); -} - diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 274c883ef3ea..fb88d6896b67 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -41,62 +41,6 @@ #define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ /** - * Decrements the reference count for the - * registered buffer & releases it - * - * returns 0 if released, 1 if deferred - */ -int iser_regd_buff_release(struct iser_regd_buf *regd_buf) -{ - struct ib_device *dev; - - if ((atomic_read(®d_buf->ref_count) == 0) || - atomic_dec_and_test(®d_buf->ref_count)) { - /* if we used the dma mr, unreg is just NOP */ - if (regd_buf->reg.is_fmr) - iser_unreg_mem(®d_buf->reg); - - if (regd_buf->dma_addr) { - dev = regd_buf->device->ib_device; - ib_dma_unmap_single(dev, - regd_buf->dma_addr, - regd_buf->data_size, - regd_buf->direction); - } - /* else this regd buf is associated with task which we */ - /* dma_unmap_single/sg later */ - return 0; - } else { - iser_dbg("Release deferred, regd.buff: 0x%p\n", regd_buf); - return 1; - } -} - -/** - * iser_reg_single - fills registered buffer descriptor with - * registration information - */ -void iser_reg_single(struct iser_device *device, - struct iser_regd_buf *regd_buf, - enum dma_data_direction direction) -{ - u64 dma_addr; - - dma_addr = ib_dma_map_single(device->ib_device, - regd_buf->virt_addr, - regd_buf->data_size, direction); - BUG_ON(ib_dma_mapping_error(device->ib_device, dma_addr)); - - regd_buf->reg.lkey = device->mr->lkey; - regd_buf->reg.len = regd_buf->data_size; - regd_buf->reg.va = dma_addr; - regd_buf->reg.is_fmr = 0; - - regd_buf->dma_addr = dma_addr; - regd_buf->direction = direction; -} - -/** * iser_start_rdma_unaligned_sg */ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, @@ -109,10 +53,10 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, unsigned long cmd_data_len = data->data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) - mem = (void *)__get_free_pages(GFP_NOIO, + mem = (void *)__get_free_pages(GFP_ATOMIC, ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); else - mem = kmalloc(cmd_data_len, GFP_NOIO); + mem = kmalloc(cmd_data_len, GFP_ATOMIC); if (mem == NULL) { iser_err("Failed to allocate mem size %d %d for copying sglist\n", @@ -474,9 +418,5 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, return err; } } - - /* take a reference on this regd buf such that it will not be released * - * (eg in send dto completion) before we get the scsi response */ - atomic_inc(®d_buf->ref_count); return 0; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 8579f32ce38e..308d17bb5146 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -37,9 +37,8 @@ #include "iscsi_iser.h" #define ISCSI_ISER_MAX_CONN 8 -#define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ - ISER_QP_MAX_REQ_DTOS) * \ - ISCSI_ISER_MAX_CONN) +#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) +#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) static void iser_cq_tasklet_fn(unsigned long data); static void iser_cq_callback(struct ib_cq *cq, void *cq_context); @@ -67,15 +66,23 @@ static int iser_create_device_ib_res(struct iser_device *device) if (IS_ERR(device->pd)) goto pd_err; - device->cq = ib_create_cq(device->ib_device, + device->rx_cq = ib_create_cq(device->ib_device, iser_cq_callback, iser_cq_event_callback, (void *)device, - ISER_MAX_CQ_LEN, 0); - if (IS_ERR(device->cq)) - goto cq_err; + ISER_MAX_RX_CQ_LEN, 0); + if (IS_ERR(device->rx_cq)) + goto rx_cq_err; - if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP)) + device->tx_cq = ib_create_cq(device->ib_device, + NULL, iser_cq_event_callback, + (void *)device, + ISER_MAX_TX_CQ_LEN, 0); + + if (IS_ERR(device->tx_cq)) + goto tx_cq_err; + + if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP)) goto cq_arm_err; tasklet_init(&device->cq_tasklet, @@ -93,8 +100,10 @@ static int iser_create_device_ib_res(struct iser_device *device) dma_mr_err: tasklet_kill(&device->cq_tasklet); cq_arm_err: - ib_destroy_cq(device->cq); -cq_err: + ib_destroy_cq(device->tx_cq); +tx_cq_err: + ib_destroy_cq(device->rx_cq); +rx_cq_err: ib_dealloc_pd(device->pd); pd_err: iser_err("failed to allocate an IB resource\n"); @@ -112,11 +121,13 @@ static void iser_free_device_ib_res(struct iser_device *device) tasklet_kill(&device->cq_tasklet); (void)ib_dereg_mr(device->mr); - (void)ib_destroy_cq(device->cq); + (void)ib_destroy_cq(device->tx_cq); + (void)ib_destroy_cq(device->rx_cq); (void)ib_dealloc_pd(device->pd); device->mr = NULL; - device->cq = NULL; + device->tx_cq = NULL; + device->rx_cq = NULL; device->pd = NULL; } @@ -129,13 +140,23 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) { struct iser_device *device; struct ib_qp_init_attr init_attr; - int ret; + int ret = -ENOMEM; struct ib_fmr_pool_param params; BUG_ON(ib_conn->device == NULL); device = ib_conn->device; + ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL); + if (!ib_conn->login_buf) { + goto alloc_err; + ret = -ENOMEM; + } + + ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device, + (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE, + DMA_FROM_DEVICE); + ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), GFP_KERNEL); @@ -169,12 +190,12 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; - init_attr.send_cq = device->cq; - init_attr.recv_cq = device->cq; + init_attr.send_cq = device->tx_cq; + init_attr.recv_cq = device->rx_cq; init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; - init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; - init_attr.cap.max_recv_sge = 2; + init_attr.cap.max_send_sge = 2; + init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; @@ -192,6 +213,7 @@ qp_err: (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); fmr_pool_err: kfree(ib_conn->page_vec); + kfree(ib_conn->login_buf); alloc_err: iser_err("unable to alloc mem or create resource, err %d\n", ret); return ret; @@ -278,17 +300,6 @@ static void iser_device_try_release(struct iser_device *device) mutex_unlock(&ig.device_list_mutex); } -int iser_conn_state_comp(struct iser_conn *ib_conn, - enum iser_ib_conn_state comp) -{ - int ret; - - spin_lock_bh(&ib_conn->lock); - ret = (ib_conn->state == comp); - spin_unlock_bh(&ib_conn->lock); - return ret; -} - static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, enum iser_ib_conn_state comp, enum iser_ib_conn_state exch) @@ -314,7 +325,7 @@ static void iser_conn_release(struct iser_conn *ib_conn) mutex_lock(&ig.connlist_mutex); list_del(&ib_conn->conn_list); mutex_unlock(&ig.connlist_mutex); - + iser_free_rx_descriptors(ib_conn); iser_free_ib_conn_res(ib_conn); ib_conn->device = NULL; /* on EVENT_ADDR_ERROR there's no device yet for this conn */ @@ -442,7 +453,7 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id) ISCSI_ERR_CONN_FAILED); /* Complete the termination process if no posts are pending */ - if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) && + if (ib_conn->post_recv_buf_count == 0 && (atomic_read(&ib_conn->post_send_buf_count) == 0)) { ib_conn->state = ISER_CONN_DOWN; wake_up_interruptible(&ib_conn->wait); @@ -489,9 +500,8 @@ void iser_conn_init(struct iser_conn *ib_conn) { ib_conn->state = ISER_CONN_INIT; init_waitqueue_head(&ib_conn->wait); - atomic_set(&ib_conn->post_recv_buf_count, 0); + ib_conn->post_recv_buf_count = 0; atomic_set(&ib_conn->post_send_buf_count, 0); - atomic_set(&ib_conn->unexpected_pdu_count, 0); atomic_set(&ib_conn->refcount, 1); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); @@ -626,136 +636,97 @@ void iser_unreg_mem(struct iser_mem_reg *reg) reg->mem_h = NULL; } -/** - * iser_dto_to_iov - builds IOV from a dto descriptor - */ -static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len) +int iser_post_recvl(struct iser_conn *ib_conn) { - int i; - struct ib_sge *sge; - struct iser_regd_buf *regd_buf; - - if (dto->regd_vector_len > iov_len) { - iser_err("iov size %d too small for posting dto of len %d\n", - iov_len, dto->regd_vector_len); - BUG(); - } + struct ib_recv_wr rx_wr, *rx_wr_failed; + struct ib_sge sge; + int ib_ret; - for (i = 0; i < dto->regd_vector_len; i++) { - sge = &iov[i]; - regd_buf = dto->regd[i]; - - sge->addr = regd_buf->reg.va; - sge->length = regd_buf->reg.len; - sge->lkey = regd_buf->reg.lkey; - - if (dto->used_sz[i] > 0) /* Adjust size */ - sge->length = dto->used_sz[i]; - - /* offset and length should not exceed the regd buf length */ - if (sge->length + dto->offset[i] > regd_buf->reg.len) { - iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:" - "%ld in dto:0x%p [%d], va:0x%08lX\n", - (unsigned long)sge->length, dto->offset[i], - (unsigned long)regd_buf->reg.len, dto, i, - (unsigned long)sge->addr); - BUG(); - } + sge.addr = ib_conn->login_dma; + sge.length = ISER_RX_LOGIN_SIZE; + sge.lkey = ib_conn->device->mr->lkey; - sge->addr += dto->offset[i]; /* Adjust offset */ + rx_wr.wr_id = (unsigned long)ib_conn->login_buf; + rx_wr.sg_list = &sge; + rx_wr.num_sge = 1; + rx_wr.next = NULL; + + ib_conn->post_recv_buf_count++; + ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); + if (ib_ret) { + iser_err("ib_post_recv failed ret=%d\n", ib_ret); + ib_conn->post_recv_buf_count--; } + return ib_ret; } -/** - * iser_post_recv - Posts a receive buffer. - * - * returns 0 on success, -1 on failure - */ -int iser_post_recv(struct iser_desc *rx_desc) +int iser_post_recvm(struct iser_conn *ib_conn, int count) { - int ib_ret, ret_val = 0; - struct ib_recv_wr recv_wr, *recv_wr_failed; - struct ib_sge iov[2]; - struct iser_conn *ib_conn; - struct iser_dto *recv_dto = &rx_desc->dto; - - /* Retrieve conn */ - ib_conn = recv_dto->ib_conn; - - iser_dto_to_iov(recv_dto, iov, 2); + struct ib_recv_wr *rx_wr, *rx_wr_failed; + int i, ib_ret; + unsigned int my_rx_head = ib_conn->rx_desc_head; + struct iser_rx_desc *rx_desc; + + for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { + rx_desc = &ib_conn->rx_descs[my_rx_head]; + rx_wr->wr_id = (unsigned long)rx_desc; + rx_wr->sg_list = &rx_desc->rx_sg; + rx_wr->num_sge = 1; + rx_wr->next = rx_wr + 1; + my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1); + } - recv_wr.next = NULL; - recv_wr.sg_list = iov; - recv_wr.num_sge = recv_dto->regd_vector_len; - recv_wr.wr_id = (unsigned long)rx_desc; + rx_wr--; + rx_wr->next = NULL; /* mark end of work requests list */ - atomic_inc(&ib_conn->post_recv_buf_count); - ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed); + ib_conn->post_recv_buf_count += count; + ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); - atomic_dec(&ib_conn->post_recv_buf_count); - ret_val = -1; - } - - return ret_val; + ib_conn->post_recv_buf_count -= count; + } else + ib_conn->rx_desc_head = my_rx_head; + return ib_ret; } + /** * iser_start_send - Initiate a Send DTO operation * * returns 0 on success, -1 on failure */ -int iser_post_send(struct iser_desc *tx_desc) +int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) { - int ib_ret, ret_val = 0; + int ib_ret; struct ib_send_wr send_wr, *send_wr_failed; - struct ib_sge iov[MAX_REGD_BUF_VECTOR_LEN]; - struct iser_conn *ib_conn; - struct iser_dto *dto = &tx_desc->dto; - ib_conn = dto->ib_conn; - - iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN); + ib_dma_sync_single_for_device(ib_conn->device->ib_device, + tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); send_wr.next = NULL; send_wr.wr_id = (unsigned long)tx_desc; - send_wr.sg_list = iov; - send_wr.num_sge = dto->regd_vector_len; + send_wr.sg_list = tx_desc->tx_sg; + send_wr.num_sge = tx_desc->num_sge; send_wr.opcode = IB_WR_SEND; - send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0; + send_wr.send_flags = IB_SEND_SIGNALED; atomic_inc(&ib_conn->post_send_buf_count); ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); if (ib_ret) { - iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n", - dto, dto->regd_vector_len); iser_err("ib_post_send failed, ret:%d\n", ib_ret); atomic_dec(&ib_conn->post_send_buf_count); - ret_val = -1; } - - return ret_val; + return ib_ret; } -static void iser_handle_comp_error(struct iser_desc *desc) +static void iser_handle_comp_error(struct iser_tx_desc *desc, + struct iser_conn *ib_conn) { - struct iser_dto *dto = &desc->dto; - struct iser_conn *ib_conn = dto->ib_conn; - - iser_dto_buffs_release(dto); - - if (desc->type == ISCSI_RX) { - kfree(desc->data); + if (desc && desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, desc); - atomic_dec(&ib_conn->post_recv_buf_count); - } else { /* type is TX control/command/dataout */ - if (desc->type == ISCSI_TX_DATAOUT) - kmem_cache_free(ig.desc_cache, desc); - atomic_dec(&ib_conn->post_send_buf_count); - } - if (atomic_read(&ib_conn->post_recv_buf_count) == 0 && + if (ib_conn->post_recv_buf_count == 0 && atomic_read(&ib_conn->post_send_buf_count) == 0) { /* getting here when the state is UP means that the conn is * * being terminated asynchronously from the iSCSI layer's * @@ -774,32 +745,74 @@ static void iser_handle_comp_error(struct iser_desc *desc) } } +static int iser_drain_tx_cq(struct iser_device *device) +{ + struct ib_cq *cq = device->tx_cq; + struct ib_wc wc; + struct iser_tx_desc *tx_desc; + struct iser_conn *ib_conn; + int completed_tx = 0; + + while (ib_poll_cq(cq, 1, &wc) == 1) { + tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; + ib_conn = wc.qp->qp_context; + if (wc.status == IB_WC_SUCCESS) { + if (wc.opcode == IB_WC_SEND) + iser_snd_completion(tx_desc, ib_conn); + else + iser_err("expected opcode %d got %d\n", + IB_WC_SEND, wc.opcode); + } else { + iser_err("tx id %llx status %d vend_err %x\n", + wc.wr_id, wc.status, wc.vendor_err); + atomic_dec(&ib_conn->post_send_buf_count); + iser_handle_comp_error(tx_desc, ib_conn); + } + completed_tx++; + } + return completed_tx; +} + + static void iser_cq_tasklet_fn(unsigned long data) { struct iser_device *device = (struct iser_device *)data; - struct ib_cq *cq = device->cq; + struct ib_cq *cq = device->rx_cq; struct ib_wc wc; - struct iser_desc *desc; + struct iser_rx_desc *desc; unsigned long xfer_len; + struct iser_conn *ib_conn; + int completed_tx, completed_rx; + completed_tx = completed_rx = 0; while (ib_poll_cq(cq, 1, &wc) == 1) { - desc = (struct iser_desc *) (unsigned long) wc.wr_id; + desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); - + ib_conn = wc.qp->qp_context; if (wc.status == IB_WC_SUCCESS) { - if (desc->type == ISCSI_RX) { + if (wc.opcode == IB_WC_RECV) { xfer_len = (unsigned long)wc.byte_len; - iser_rcv_completion(desc, xfer_len); - } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ - iser_snd_completion(desc); + iser_rcv_completion(desc, xfer_len, ib_conn); + } else + iser_err("expected opcode %d got %d\n", + IB_WC_RECV, wc.opcode); } else { - iser_err("comp w. error op %d status %d\n",desc->type,wc.status); - iser_handle_comp_error(desc); + if (wc.status != IB_WC_WR_FLUSH_ERR) + iser_err("rx id %llx status %d vend_err %x\n", + wc.wr_id, wc.status, wc.vendor_err); + ib_conn->post_recv_buf_count--; + iser_handle_comp_error(NULL, ib_conn); } + completed_rx++; + if (!(completed_rx & 63)) + completed_tx += iser_drain_tx_cq(device); } /* #warning "it is assumed here that arming CQ only once its empty" * * " would not cause interrupts to be missed" */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + + completed_tx += iser_drain_tx_cq(device); + iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); } static void iser_cq_callback(struct ib_cq *cq, void *cq_context) |