diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1/user_sdma.c')
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_sdma.c | 626 |
1 files changed, 228 insertions, 398 deletions
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index d55339f5d73b..c0c0e0445cbf 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -64,224 +64,20 @@ #include "hfi.h" #include "sdma.h" +#include "mmu_rb.h" #include "user_sdma.h" #include "verbs.h" /* for the headers */ #include "common.h" /* for struct hfi1_tid_info */ #include "trace.h" -#include "mmu_rb.h" static uint hfi1_sdma_comp_ring_size = 128; module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO); MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128"); -/* The maximum number of Data io vectors per message/request */ -#define MAX_VECTORS_PER_REQ 8 -/* - * Maximum number of packet to send from each message/request - * before moving to the next one. - */ -#define MAX_PKTS_PER_QUEUE 16 - -#define num_pages(x) (1 + ((((x) - 1) & PAGE_MASK) >> PAGE_SHIFT)) - -#define req_opcode(x) \ - (((x) >> HFI1_SDMA_REQ_OPCODE_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK) -#define req_version(x) \ - (((x) >> HFI1_SDMA_REQ_VERSION_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK) -#define req_iovcnt(x) \ - (((x) >> HFI1_SDMA_REQ_IOVCNT_SHIFT) & HFI1_SDMA_REQ_IOVCNT_MASK) - -/* Number of BTH.PSN bits used for sequence number in expected rcvs */ -#define BTH_SEQ_MASK 0x7ffull - -/* - * Define fields in the KDETH header so we can update the header - * template. - */ -#define KDETH_OFFSET_SHIFT 0 -#define KDETH_OFFSET_MASK 0x7fff -#define KDETH_OM_SHIFT 15 -#define KDETH_OM_MASK 0x1 -#define KDETH_TID_SHIFT 16 -#define KDETH_TID_MASK 0x3ff -#define KDETH_TIDCTRL_SHIFT 26 -#define KDETH_TIDCTRL_MASK 0x3 -#define KDETH_INTR_SHIFT 28 -#define KDETH_INTR_MASK 0x1 -#define KDETH_SH_SHIFT 29 -#define KDETH_SH_MASK 0x1 -#define KDETH_HCRC_UPPER_SHIFT 16 -#define KDETH_HCRC_UPPER_MASK 0xff -#define KDETH_HCRC_LOWER_SHIFT 24 -#define KDETH_HCRC_LOWER_MASK 0xff - -#define AHG_KDETH_INTR_SHIFT 12 -#define AHG_KDETH_SH_SHIFT 13 - -#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) -#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) - -#define KDETH_GET(val, field) \ - (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK) -#define KDETH_SET(dw, field, val) do { \ - u32 dwval = le32_to_cpu(dw); \ - dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \ - dwval |= (((val) & KDETH_##field##_MASK) << \ - KDETH_##field##_SHIFT); \ - dw = cpu_to_le32(dwval); \ - } while (0) - -#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ - do { \ - if ((idx) < ARRAY_SIZE((arr))) \ - (arr)[(idx++)] = sdma_build_ahg_descriptor( \ - (__force u16)(value), (dw), (bit), \ - (width)); \ - else \ - return -ERANGE; \ - } while (0) - -/* KDETH OM multipliers and switch over point */ -#define KDETH_OM_SMALL 4 -#define KDETH_OM_SMALL_SHIFT 2 -#define KDETH_OM_LARGE 64 -#define KDETH_OM_LARGE_SHIFT 6 -#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) - -/* Tx request flag bits */ -#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ -#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ - -/* SDMA request flag bits */ -#define SDMA_REQ_FOR_THREAD 1 -#define SDMA_REQ_SEND_DONE 2 -#define SDMA_REQ_HAS_ERROR 3 -#define SDMA_REQ_DONE_ERROR 4 - -#define SDMA_PKT_Q_INACTIVE BIT(0) -#define SDMA_PKT_Q_ACTIVE BIT(1) -#define SDMA_PKT_Q_DEFERRED BIT(2) - -/* - * Maximum retry attempts to submit a TX request - * before putting the process to sleep. - */ -#define MAX_DEFER_RETRY_COUNT 1 - static unsigned initial_pkt_count = 8; -#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ - -struct sdma_mmu_node; - -struct user_sdma_iovec { - struct list_head list; - struct iovec iov; - /* number of pages in this vector */ - unsigned npages; - /* array of pinned pages for this vector */ - struct page **pages; - /* - * offset into the virtual address space of the vector at - * which we last left off. - */ - u64 offset; - struct sdma_mmu_node *node; -}; - -struct sdma_mmu_node { - struct mmu_rb_node rb; - struct hfi1_user_sdma_pkt_q *pq; - atomic_t refcount; - struct page **pages; - unsigned npages; -}; - -/* evict operation argument */ -struct evict_data { - u32 cleared; /* count evicted so far */ - u32 target; /* target count to evict */ -}; - -struct user_sdma_request { - struct sdma_req_info info; - struct hfi1_user_sdma_pkt_q *pq; - struct hfi1_user_sdma_comp_q *cq; - /* This is the original header from user space */ - struct hfi1_pkt_header hdr; - /* - * Pointer to the SDMA engine for this request. - * Since different request could be on different VLs, - * each request will need it's own engine pointer. - */ - struct sdma_engine *sde; - s8 ahg_idx; - u32 ahg[9]; - /* - * KDETH.Offset (Eager) field - * We need to remember the initial value so the headers - * can be updated properly. - */ - u32 koffset; - /* - * KDETH.OFFSET (TID) field - * The offset can cover multiple packets, depending on the - * size of the TID entry. - */ - u32 tidoffset; - /* - * We copy the iovs for this request (based on - * info.iovcnt). These are only the data vectors - */ - unsigned data_iovs; - /* total length of the data in the request */ - u32 data_len; - /* progress index moving along the iovs array */ - unsigned iov_idx; - struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; - /* number of elements copied to the tids array */ - u16 n_tids; - /* TID array values copied from the tid_iov vector */ - u32 *tids; - u16 tididx; - u32 sent; - u64 seqnum; - u64 seqcomp; - u64 seqsubmitted; - struct list_head txps; - unsigned long flags; - /* status of the last txreq completed */ - int status; -}; - -/* - * A single txreq could span up to 3 physical pages when the MTU - * is sufficiently large (> 4K). Each of the IOV pointers also - * needs it's own set of flags so the vector has been handled - * independently of each other. - */ -struct user_sdma_txreq { - /* Packet header for the txreq */ - struct hfi1_pkt_header hdr; - struct sdma_txreq txreq; - struct list_head list; - struct user_sdma_request *req; - u16 flags; - unsigned busycount; - u64 seqnum; -}; - -#define SDMA_DBG(req, fmt, ...) \ - hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \ - (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \ - ##__VA_ARGS__) -#define SDMA_Q_DBG(pq, fmt, ...) \ - hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \ - (pq)->subctxt, ##__VA_ARGS__) - static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts); -static int num_user_pages(const struct iovec *iov); static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); @@ -307,7 +103,8 @@ static int defer_packet_queue( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned int seq); + uint seq, + bool pkts_sent); static void activate_packet_queue(struct iowait *wait, int reason); static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, unsigned long len); @@ -329,7 +126,8 @@ static int defer_packet_queue( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned seq) + uint seq, + bool pkts_sent) { struct hfi1_user_sdma_pkt_q *pq = container_of(wait, struct hfi1_user_sdma_pkt_q, busy); @@ -349,7 +147,7 @@ static int defer_packet_queue( xchg(&pq->state, SDMA_PKT_Q_DEFERRED); write_seqlock(&dev->iowait_lock); if (list_empty(&pq->busy.list)) - list_add_tail(&pq->busy.list, &sde->dmawait); + iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); write_sequnlock(&dev->iowait_lock); return -EBUSY; eagain: @@ -364,13 +162,6 @@ static void activate_packet_queue(struct iowait *wait, int reason) wake_up(&wait->wait_dma); }; -static void sdma_kmem_cache_ctor(void *obj) -{ - struct user_sdma_txreq *tx = obj; - - memset(tx, 0, sizeof(*tx)); -} - int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_filedata *fd) { @@ -379,7 +170,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_devdata *dd; struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_pkt_q *pq; - unsigned long flags; if (!uctxt || !fd) return -EBADF; @@ -393,7 +183,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, if (!pq) return -ENOMEM; - INIT_LIST_HEAD(&pq->list); pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -426,7 +215,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, sizeof(struct user_sdma_txreq), L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, - sdma_kmem_cache_ctor); + NULL); if (!pq->txreq_cache) { dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n", uctxt->ctxt); @@ -454,10 +243,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, fd->pq = pq; fd->cq = cq; - spin_lock_irqsave(&uctxt->sdma_qlock, flags); - list_add(&pq->list, &uctxt->sdma_queues); - spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); - return 0; pq_mmu_fail: @@ -476,22 +261,17 @@ pq_reqs_nomem: return ret; } -int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) +int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq; - unsigned long flags; - hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, - uctxt->ctxt, fd->subctxt); + trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); + pq = fd->pq; if (pq) { if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); - spin_lock_irqsave(&uctxt->sdma_qlock, flags); - if (!list_empty(&pq->list)) - list_del_init(&pq->list); - spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); iowait_sdma_drain(&pq->busy); /* Wait until all requests have been freed. */ wait_event_interruptible( @@ -546,6 +326,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct sdma_req_info info; struct user_sdma_request *req; u8 opcode, sc, vl; + u16 pkey; + u32 slid; int req_queued = 0; u16 dlid; u32 selector; @@ -567,7 +349,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, (u16 *)&info); - if (info.comp_idx >= hfi1_sdma_comp_ring_size) { hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Invalid comp index", @@ -604,15 +385,23 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, /* * All safety checks have been done and this request has been claimed. */ - hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, - uctxt->ctxt, fd->subctxt, info.comp_idx); + trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt, + info.comp_idx); req = pq->reqs + info.comp_idx; - memset(req, 0, sizeof(*req)); req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ + req->data_len = 0; req->pq = pq; req->cq = cq; req->status = -1; req->ahg_idx = -1; + req->iov_idx = 0; + req->sent = 0; + req->seqnum = 0; + req->seqcomp = 0; + req->seqsubmitted = 0; + req->tids = NULL; + req->done = 0; + req->has_error = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -671,8 +460,9 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } /* Checking P_KEY for requests from user-space */ - if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc, - PKEY_CHECK_INVALID)) { + pkey = (u16)be32_to_cpu(req->hdr.bth[0]); + slid = be16_to_cpu(req->hdr.lrh[3]); + if (egress_pkey_check(dd->pport, slid, pkey, sc, PKEY_CHECK_INVALID)) { ret = -EINVAL; goto free_req; } @@ -696,24 +486,27 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) * (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? KDETH_OM_LARGE : KDETH_OM_SMALL); - SDMA_DBG(req, "Initial TID offset %u", req->tidoffset); + trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt, + info.comp_idx, req->tidoffset); idx++; /* Save all the IO vector structures */ for (i = 0; i < req->data_iovs; i++) { + req->iovs[i].offset = 0; INIT_LIST_HEAD(&req->iovs[i].list); memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(req->iovs[i].iov)); ret = pin_vector_pages(req, &req->iovs[i]); if (ret) { + req->data_iovs = i; req->status = ret; goto free_req; } req->data_len += req->iovs[i].iov.iov_len; } - SDMA_DBG(req, "total data length %u", req->data_len); - + trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt, + info.comp_idx, req->data_len); if (pcount > req->info.npkts) pcount = req->info.npkts; /* @@ -749,6 +542,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } req->tids = tmp; req->n_tids = ntids; + req->tididx = 0; idx++; } @@ -791,12 +585,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, * request have been submitted to the SDMA engine. However, it * will not wait for send completions. */ - while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) { + while (req->seqsubmitted != req->info.npkts) { ret = user_sdma_send_pkts(req, pcount); if (ret < 0) { if (ret != -EBUSY) { req->status = ret; - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); if (ACCESS_ONCE(req->seqcomp) == req->seqsubmitted - 1) goto free_req; @@ -867,7 +661,11 @@ static inline u32 compute_data_length(struct user_sdma_request *req, } else { len = min(req->data_len - req->sent, (u32)req->info.fragsize); } - SDMA_DBG(req, "Data Length = %u", len); + trace_hfi1_sdma_user_compute_length(req->pq->dd, + req->pq->ctxt, + req->pq->subctxt, + req->info.comp_idx, + len); return len; } @@ -884,6 +682,84 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len); } +static int user_sdma_txadd_ahg(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + u32 datalen) +{ + int ret; + u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); + u32 lrhlen = get_lrh_len(req->hdr, pad_len(datalen)); + struct hfi1_user_sdma_pkt_q *pq = req->pq; + + /* + * Copy the request header into the tx header + * because the HW needs a cacheline-aligned + * address. + * This copy can be optimized out if the hdr + * member of user_sdma_request were also + * cacheline aligned. + */ + memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); + if (PBC2LRH(pbclen) != lrhlen) { + pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); + tx->hdr.pbc[0] = cpu_to_le16(pbclen); + } + ret = check_header_template(req, &tx->hdr, lrhlen, datalen); + if (ret) + return ret; + ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, + sizeof(tx->hdr) + datalen, req->ahg_idx, + 0, NULL, 0, user_sdma_txreq_cb); + if (ret) + return ret; + ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, &tx->hdr, sizeof(tx->hdr)); + if (ret) + sdma_txclean(pq->dd, &tx->txreq); + return ret; +} + +static int user_sdma_txadd(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct user_sdma_iovec *iovec, u32 datalen, + u32 *queued_ptr, u32 *data_sent_ptr, + u64 *iov_offset_ptr) +{ + int ret; + unsigned int pageidx, len; + unsigned long base, offset; + u64 iov_offset = *iov_offset_ptr; + u32 queued = *queued_ptr, data_sent = *data_sent_ptr; + struct hfi1_user_sdma_pkt_q *pq = req->pq; + + base = (unsigned long)iovec->iov.iov_base; + offset = offset_in_page(base + iovec->offset + iov_offset); + pageidx = (((iovec->offset + iov_offset + base) - (base & PAGE_MASK)) >> + PAGE_SHIFT); + len = offset + req->info.fragsize > PAGE_SIZE ? + PAGE_SIZE - offset : req->info.fragsize; + len = min((datalen - queued), len); + ret = sdma_txadd_page(pq->dd, &tx->txreq, iovec->pages[pageidx], + offset, len); + if (ret) { + SDMA_DBG(req, "SDMA txreq add page failed %d\n", ret); + return ret; + } + iov_offset += len; + queued += len; + data_sent += len; + if (unlikely(queued < datalen && pageidx == iovec->npages && + req->iov_idx < req->data_iovs - 1)) { + iovec->offset += iov_offset; + iovec = &req->iovs[++req->iov_idx]; + iov_offset = 0; + } + + *queued_ptr = queued; + *data_sent_ptr = data_sent; + *iov_offset_ptr = iov_offset; + return ret; +} + static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) { int ret = 0, count; @@ -898,10 +774,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) pq = req->pq; /* If tx completion has reported an error, we are done. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } /* * Check if we might have sent the entire request already @@ -924,10 +798,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) * with errors. If so, we are not going to process any * more packets from this request. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL); if (!tx) @@ -984,39 +856,9 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) if (req->ahg_idx >= 0) { if (!req->seqnum) { - u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); - u32 lrhlen = get_lrh_len(req->hdr, - pad_len(datalen)); - /* - * Copy the request header into the tx header - * because the HW needs a cacheline-aligned - * address. - * This copy can be optimized out if the hdr - * member of user_sdma_request were also - * cacheline aligned. - */ - memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); - if (PBC2LRH(pbclen) != lrhlen) { - pbclen = (pbclen & 0xf000) | - LRH2PBC(lrhlen); - tx->hdr.pbc[0] = cpu_to_le16(pbclen); - } - ret = check_header_template(req, &tx->hdr, - lrhlen, datalen); + ret = user_sdma_txadd_ahg(req, tx, datalen); if (ret) goto free_tx; - ret = sdma_txinit_ahg(&tx->txreq, - SDMA_TXREQ_F_AHG_COPY, - sizeof(tx->hdr) + datalen, - req->ahg_idx, 0, NULL, 0, - user_sdma_txreq_cb); - if (ret) - goto free_tx; - ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, - &tx->hdr, - sizeof(tx->hdr)); - if (ret) - goto free_txreq; } else { int changes; @@ -1024,11 +866,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) datalen); if (changes < 0) goto free_tx; - sdma_txinit_ahg(&tx->txreq, - SDMA_TXREQ_F_USE_AHG, - datalen, req->ahg_idx, changes, - req->ahg, sizeof(req->hdr), - user_sdma_txreq_cb); } } else { ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + @@ -1052,35 +889,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) */ while (queued < datalen && (req->sent + data_sent) < req->data_len) { - unsigned long base, offset; - unsigned pageidx, len; - - base = (unsigned long)iovec->iov.iov_base; - offset = offset_in_page(base + iovec->offset + - iov_offset); - pageidx = (((iovec->offset + iov_offset + - base) - (base & PAGE_MASK)) >> PAGE_SHIFT); - len = offset + req->info.fragsize > PAGE_SIZE ? - PAGE_SIZE - offset : req->info.fragsize; - len = min((datalen - queued), len); - ret = sdma_txadd_page(pq->dd, &tx->txreq, - iovec->pages[pageidx], - offset, len); - if (ret) { - SDMA_DBG(req, "SDMA txreq add page failed %d\n", - ret); + ret = user_sdma_txadd(req, tx, iovec, datalen, + &queued, &data_sent, &iov_offset); + if (ret) goto free_txreq; - } - iov_offset += len; - queued += len; - data_sent += len; - if (unlikely(queued < datalen && - pageidx == iovec->npages && - req->iov_idx < req->data_iovs - 1)) { - iovec->offset += iov_offset; - iovec = &req->iovs[++req->iov_idx]; - iov_offset = 0; - } } /* * The txreq was submitted successfully so we can update @@ -1105,7 +917,7 @@ dosend: ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { - set_bit(SDMA_REQ_SEND_DONE, &req->flags); + WRITE_ONCE(req->done, 1); /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not @@ -1124,19 +936,6 @@ free_tx: return ret; } -/* - * How many pages in this iovec element? - */ -static inline int num_user_pages(const struct iovec *iov) -{ - const unsigned long addr = (unsigned long)iov->iov_base; - const unsigned long len = iov->iov_len; - const unsigned long spage = addr & PAGE_MASK; - const unsigned long epage = (addr + len - 1) & PAGE_MASK; - - return 1 + ((epage - spage) >> PAGE_SHIFT); -} - static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) { struct evict_data evict_data; @@ -1147,22 +946,82 @@ static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) return evict_data.cleared; } +static int pin_sdma_pages(struct user_sdma_request *req, + struct user_sdma_iovec *iovec, + struct sdma_mmu_node *node, + int npages) +{ + int pinned, cleared; + struct page **pages; + struct hfi1_user_sdma_pkt_q *pq = req->pq; + + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) { + SDMA_DBG(req, "Failed page array alloc"); + return -ENOMEM; + } + memcpy(pages, node->pages, node->npages * sizeof(*pages)); + + npages -= node->npages; +retry: + if (!hfi1_can_pin_pages(pq->dd, pq->mm, + atomic_read(&pq->n_locked), npages)) { + cleared = sdma_cache_evict(pq, npages); + if (cleared >= npages) + goto retry; + } + pinned = hfi1_acquire_user_pages(pq->mm, + ((unsigned long)iovec->iov.iov_base + + (node->npages * PAGE_SIZE)), npages, 0, + pages + node->npages); + if (pinned < 0) { + kfree(pages); + return pinned; + } + if (pinned != npages) { + unpin_vector_pages(pq->mm, pages, node->npages, pinned); + return -EFAULT; + } + kfree(node->pages); + node->rb.len = iovec->iov.iov_len; + node->pages = pages; + atomic_add(pinned, &pq->n_locked); + return pinned; +} + +static void unpin_sdma_pages(struct sdma_mmu_node *node) +{ + if (node->npages) { + unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); + atomic_sub(node->npages, &node->pq->n_locked); + } +} + static int pin_vector_pages(struct user_sdma_request *req, struct user_sdma_iovec *iovec) { - int ret = 0, pinned, npages, cleared; - struct page **pages; + int ret = 0, pinned, npages; struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; - - rb_node = hfi1_mmu_rb_extract(pq->handler, - (unsigned long)iovec->iov.iov_base, - iovec->iov.iov_len); - if (rb_node) + struct iovec *iov; + bool extracted; + + extracted = + hfi1_mmu_rb_remove_unless_exact(pq->handler, + (unsigned long) + iovec->iov.iov_base, + iovec->iov.iov_len, &rb_node); + if (rb_node) { node = container_of(rb_node, struct sdma_mmu_node, rb); - else - rb_node = NULL; + if (!extracted) { + atomic_inc(&node->refcount); + iovec->pages = node->pages; + iovec->npages = node->npages; + iovec->node = node; + return 0; + } + } if (!node) { node = kzalloc(sizeof(*node), GFP_KERNEL); @@ -1174,46 +1033,16 @@ static int pin_vector_pages(struct user_sdma_request *req, atomic_set(&node->refcount, 0); } - npages = num_user_pages(&iovec->iov); + iov = &iovec->iov; + npages = num_user_pages((unsigned long)iov->iov_base, iov->iov_len); if (node->npages < npages) { - pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) { - SDMA_DBG(req, "Failed page array alloc"); - ret = -ENOMEM; - goto bail; - } - memcpy(pages, node->pages, node->npages * sizeof(*pages)); - - npages -= node->npages; - -retry: - if (!hfi1_can_pin_pages(pq->dd, pq->mm, - atomic_read(&pq->n_locked), npages)) { - cleared = sdma_cache_evict(pq, npages); - if (cleared >= npages) - goto retry; - } - pinned = hfi1_acquire_user_pages(pq->mm, - ((unsigned long)iovec->iov.iov_base + - (node->npages * PAGE_SIZE)), npages, 0, - pages + node->npages); + pinned = pin_sdma_pages(req, iovec, node, npages); if (pinned < 0) { - kfree(pages); ret = pinned; goto bail; } - if (pinned != npages) { - unpin_vector_pages(pq->mm, pages, node->npages, - pinned); - ret = -EFAULT; - goto bail; - } - kfree(node->pages); - node->rb.len = iovec->iov.iov_len; - node->pages = pages; node->npages += pinned; npages = node->npages; - atomic_add(pinned, &pq->n_locked); } iovec->pages = node->pages; iovec->npages = npages; @@ -1221,14 +1050,12 @@ retry: ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb); if (ret) { - atomic_sub(node->npages, &pq->n_locked); iovec->node = NULL; goto bail; } return 0; bail: - if (rb_node) - unpin_vector_pages(pq->mm, node->pages, 0, node->npages); + unpin_sdma_pages(node); kfree(node); return ret; } @@ -1408,9 +1235,10 @@ static int set_txreq_header(struct user_sdma_request *req, * Set the KDETH.OFFSET and KDETH.OM based on size of * transfer. */ - SDMA_DBG(req, "TID offset %ubytes %uunits om%u", - req->tidoffset, req->tidoffset >> omfactor, - omfactor != KDETH_OM_SMALL_SHIFT); + trace_hfi1_sdma_user_tid_info( + pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, + req->tidoffset, req->tidoffset >> omfactor, + omfactor != KDETH_OM_SMALL_SHIFT); KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, req->tidoffset >> omfactor); KDETH_SET(hdr->kdeth.ver_tid_offset, OM, @@ -1423,21 +1251,22 @@ done: } static int set_txreq_header_ahg(struct user_sdma_request *req, - struct user_sdma_txreq *tx, u32 len) + struct user_sdma_txreq *tx, u32 datalen) { + u32 ahg[AHG_KDETH_ARRAY_SIZE]; int diff = 0; u8 omfactor; /* KDETH.OM */ struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); - u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len)); + u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); if (PBC2LRH(pbclen) != lrhlen) { /* PBC.PbcLengthDWs */ - AHG_HEADER_SET(req->ahg, diff, 0, 0, 12, + AHG_HEADER_SET(ahg, diff, 0, 0, 12, cpu_to_le16(LRH2PBC(lrhlen))); /* LRH.PktLen (we need the full 16 bits due to byte swap) */ - AHG_HEADER_SET(req->ahg, diff, 3, 0, 16, + AHG_HEADER_SET(ahg, diff, 3, 0, 16, cpu_to_be16(lrhlen >> 2)); } @@ -1449,13 +1278,12 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; - AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); - AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); + AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); + AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); /* KDETH.Offset */ - AHG_HEADER_SET(req->ahg, diff, 15, 0, 16, + AHG_HEADER_SET(ahg, diff, 15, 0, 16, cpu_to_le16(req->koffset & 0xffff)); - AHG_HEADER_SET(req->ahg, diff, 15, 16, 16, - cpu_to_le16(req->koffset >> 16)); + AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16)); if (req_opcode(req->info.ctrl) == EXPECTED) { __le16 val; @@ -1473,9 +1301,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, * we have to check again. */ if (++req->tididx > req->n_tids - 1 || - !req->tids[req->tididx]) { + !req->tids[req->tididx]) return -EINVAL; - } tidval = req->tids[req->tididx]; } omfactor = ((EXP_TID_GET(tidval, LEN) * @@ -1483,7 +1310,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ - AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, + AHG_HEADER_SET(ahg, diff, 7, 0, 16, ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | ((req->tidoffset >> omfactor) & 0x7fff))); @@ -1503,12 +1330,20 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_KDETH_INTR_SHIFT)); } - AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); + AHG_HEADER_SET(ahg, diff, 7, 16, 14, val); } + if (diff < 0) + return diff; trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, req->sde->this_idx, - req->ahg_idx, req->ahg, diff, tidval); + req->ahg_idx, ahg, diff, tidval); + sdma_txinit_ahg(&tx->txreq, + SDMA_TXREQ_F_USE_AHG, + datalen, req->ahg_idx, diff, + ahg, sizeof(req->hdr), + user_sdma_txreq_cb); + return diff; } @@ -1537,7 +1372,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) { SDMA_DBG(req, "SDMA completion with error %d", status); - set_bit(SDMA_REQ_HAS_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); } req->seqcomp = tx->seqnum; @@ -1556,8 +1391,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) req->status = status; if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && - (test_bit(SDMA_REQ_SEND_DONE, &req->flags) || - test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { + (READ_ONCE(req->done) || + READ_ONCE(req->has_error))) { user_sdma_free_request(req, false); pq_update(pq); set_comp_state(pq, cq, idx, ERROR, req->status); @@ -1611,8 +1446,6 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, u16 idx, enum hfi1_sdma_comp_state state, int ret) { - hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d", - pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret); if (state == ERROR) cq->comps[idx].errcode = -ret; smp_wmb(); /* make sure errcode is visible first */ @@ -1667,10 +1500,7 @@ static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); - atomic_sub(node->npages, &node->pq->n_locked); - - unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); - + unpin_sdma_pages(node); kfree(node); } |