/* * Copyright(c) 2015 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * BSD LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * - Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include #include #include #include #include #include "qp.h" #include "vt.h" /* * Note that it is OK to post send work requests in the SQE and ERR * states; rvt_do_send() will process them and generate error * completions as per IB 1.2 C10-96. */ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = 0, [IB_QPS_INIT] = RVT_POST_RECV_OK, [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK, [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK | RVT_PROCESS_NEXT_SEND_OK, [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK, [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | RVT_POST_SEND_OK | RVT_FLUSH_SEND, [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV | RVT_POST_SEND_OK | RVT_FLUSH_SEND, }; EXPORT_SYMBOL(ib_rvt_state_ops); static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) { unsigned long page = get_zeroed_page(gfp); /* * Free the page if someone raced with us installing it. */ spin_lock(&qpt->lock); if (map->page) free_page(page); else map->page = (void *)page; spin_unlock(&qpt->lock); } /** * init_qpn_table - initialize the QP number table for a device * @qpt: the QPN table */ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) { u32 offset, i; struct rvt_qpn_map *map; int ret = 0; if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start)) return -EINVAL; spin_lock_init(&qpt->lock); qpt->last = rdi->dparms.qpn_start; qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift; /* * Drivers may want some QPs beyond what we need for verbs let them use * our qpn table. No need for two. Lets go ahead and mark the bitmaps * for those. The reserved range must be *after* the range which verbs * will pick from. */ /* Figure out number of bit maps needed before reserved range */ qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE; /* This should always be zero */ offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK; /* Starting with the first reserved bit map */ map = &qpt->map[qpt->nmaps]; rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n", rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) { if (!map->page) { get_map_page(qpt, map, GFP_KERNEL); if (!map->page) { ret = -ENOMEM; break; } } set_bit(offset, map->page); offset++; if (offset == RVT_BITS_PER_PAGE) { /* next page */ qpt->nmaps++; map++; offset = 0; } } return ret; } /** * free_qpn_table - free the QP number table for a device * @qpt: the QPN table */ static void free_qpn_table(struct rvt_qpn_table *qpt) { int i; for (i = 0; i < ARRAY_SIZE(qpt->map); i++) free_page((unsigned long)qpt->map[i].page); } int rvt_driver_qp_init(struct rvt_dev_info *rdi) { int i; int ret = -ENOMEM; if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) { rvt_pr_info(rdi, "Driver is doing QP init.\n"); return 0; } if (!rdi->dparms.qp_table_size) return -EINVAL; /* * If driver is not doing any QP allocation then make sure it is * providing the necessary QP functions. */ if (!rdi->driver_f.free_all_qps || !rdi->driver_f.qp_priv_alloc || !rdi->driver_f.qp_priv_free || !rdi->driver_f.notify_qp_reset) return -EINVAL; /* allocate parent object */ rdi->qp_dev = kzalloc(sizeof(*rdi->qp_dev), GFP_KERNEL); if (!rdi->qp_dev) return -ENOMEM; /* allocate hash table */ rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size; rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size); rdi->qp_dev->qp_table = kmalloc(rdi->qp_dev->qp_table_size * sizeof(*rdi->qp_dev->qp_table), GFP_KERNEL); if (!rdi->qp_dev->qp_table) goto no_qp_table; for (i = 0; i < rdi->qp_dev->qp_table_size; i++) RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL); spin_lock_init(&rdi->qp_dev->qpt_lock); /* initialize qpn map */ if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table)) goto fail_table; spin_lock_init(&rdi->n_qps_lock); return 0; fail_table: kfree(rdi->qp_dev->qp_table); free_qpn_table(&rdi->qp_dev->qpn_table); no_qp_table: kfree(rdi->qp_dev); return ret; } /** * free_all_qps - check for QPs still in use * @qpt: the QP table to empty * * There should not be any QPs still in use. * Free memory for table. */ static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) { unsigned long flags; struct rvt_qp *qp; unsigned n, qp_inuse = 0; spinlock_t *ql; /* work around too long line below */ if (rdi->driver_f.free_all_qps) qp_inuse = rdi->driver_f.free_all_qps(rdi); if (!rdi->qp_dev) return qp_inuse; ql = &rdi->qp_dev->qpt_lock; spin_lock_irqsave(ql, flags); for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], lockdep_is_held(ql)); RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); for (; qp; qp = rcu_dereference_protected(qp->next, lockdep_is_held(ql))) qp_inuse++; } spin_unlock_irqrestore(ql, flags); synchronize_rcu(); return qp_inuse; } void rvt_qp_exit(struct rvt_dev_info *rdi) { u32 qps_inuse = rvt_free_all_qps(rdi); if (qps_inuse) rvt_pr_err(rdi, "QP memory leak! %u still in use\n", qps_inuse); if (!rdi->qp_dev) return; if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) return; /* driver did the qp init so nothing else to do */ kfree(rdi->qp_dev->qp_table); free_qpn_table(&rdi->qp_dev->qpn_table); kfree(rdi->qp_dev); } static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off) { return (map - qpt->map) * RVT_BITS_PER_PAGE + off; } /* * Allocate the next available QPN or * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; if (rdi->driver_f.alloc_qpn) return rdi->driver_f.alloc_qpn(rdi, qpt, type, port, GFP_KERNEL); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; ret = type == IB_QPT_GSI; n = 1 << (ret + 2 * (port - 1)); spin_lock(&qpt->lock); if (qpt->flags & n) ret = -EINVAL; else qpt->flags |= n; spin_unlock(&qpt->lock); goto bail; } qpn = qpt->last + qpt->incr; if (qpn >= RVT_QPN_MAX) qpn = qpt->incr | ((qpt->last & 1) ^ 1); /* offset carries bit 0 */ offset = qpn & RVT_BITS_PER_PAGE_MASK; map = &qpt->map[qpn / RVT_BITS_PER_PAGE]; max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { get_map_page(qpt, map, gfp); if (unlikely(!map->page)) break; } do { if (!test_and_set_bit(offset, map->page)) { qpt->last = qpn; ret = qpn; goto bail; } offset += qpt->incr; /* * This qpn might be bogus if offset >= BITS_PER_PAGE. * That is OK. It gets re-assigned below */ qpn = mk_qpn(qpt, map, offset); } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX); /* * In order to keep the number of pages allocated to a * minimum, we scan the all existing pages before increasing * the size of the bitmap table. */ if (++i > max_scan) { if (qpt->nmaps == RVT_QPNMAP_ENTRIES) break; map = &qpt->map[qpt->nmaps++]; /* start at incr with current bit 0 */ offset = qpt->incr | (offset & 1); } else if (map < &qpt->map[qpt->nmaps]) { ++map; /* start at incr with current bit 0 */ offset = qpt->incr | (offset & 1); } else { map = &qpt->map[0]; /* wrap to first map page, invert bit 0 */ offset = qpt->incr | ((offset & 1) ^ 1); } /* there can be no bits at shift and below */ WARN_ON(offset & (rdi->dparms.qos_shift - 1)); qpn = mk_qpn(qpt, map, offset); } ret = -ENOMEM; bail: return ret; } static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { struct rvt_qpn_map *map; map = qpt->map + qpn / RVT_BITS_PER_PAGE; if (map->page) clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); } /** * reset_qp - initialize the QP state to the reset state * @qp: the QP to reset * @type: the QP type */ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type) { qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; /* * Let driver do anything it needs to for a new/reset qp */ rdi->driver_f.notify_qp_reset(qp); qp->s_flags &= RVT_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; qp->s_draining = 0; qp->s_next_psn = 0; qp->s_last_psn = 0; qp->s_sending_psn = 0; qp->s_sending_hpsn = 0; qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; if (type == IB_QPT_RC) { qp->s_state = IB_OPCODE_RC_SEND_LAST; qp->r_state = IB_OPCODE_RC_SEND_LAST; } else { qp->s_state = IB_OPCODE_UC_SEND_LAST; qp->r_state = IB_OPCODE_UC_SEND_LAST; } qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; qp->r_aflags = 0; qp->r_flags = 0; qp->s_head = 0; qp->s_tail = 0; qp->s_cur = 0; qp->s_acked = 0; qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; qp->s_mig_state = IB_MIG_MIGRATED; memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; qp->s_num_rd_atomic = 0; if (qp->r_rq.wq) { qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; } qp->r_sge.num_sge = 0; } EXPORT_SYMBOL(rvt_reset_qp); /** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair * @udata: user data for libibverbs.so * * Queue pair creation is mostly an rvt issue. However, drivers have their own * unique idea of what queue pair numbers mean. For instance there is a reserved * range for PSM. * * Returns the queue pair on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. */ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct rvt_qp *qp; int err; struct rvt_swqe *swq = NULL; size_t sz; size_t sg_list_sz; struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; gfp_t gfp; if (!rdi) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); /* GFP_NOIO is applicable to RC QP's only */ if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && init_attr->qp_type != IB_QPT_RC) return ERR_PTR(-EINVAL); gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? GFP_NOIO : GFP_KERNEL; /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge + init_attr->cap.max_send_wr + init_attr->cap.max_recv_sge + init_attr->cap.max_recv_wr == 0) return ERR_PTR(-EINVAL); } switch (init_attr->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: if (init_attr->port_num == 0 || init_attr->port_num > ibpd->device->phys_port_cnt) return ERR_PTR(-EINVAL); case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + sizeof(struct rvt_swqe); if (gfp == GFP_NOIO) swq = __vmalloc( (init_attr->cap.max_send_wr + 1) * sz, gfp, PAGE_KERNEL); else swq = vmalloc( (init_attr->cap.max_send_wr + 1) * sz); if (!swq) return ERR_PTR(-ENOMEM); sz = sizeof(*qp); sg_list_sz = 0; if (init_attr->srq) { struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); if (srq->rq.max_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (srq->rq.max_sge - 1); } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); qp = kzalloc(sz + sg_list_sz, gfp); if (!qp) goto bail_swq; RCU_INIT_POINTER(qp->next, NULL); /* * Driver needs to set up it's private QP structure and do any * initialization that is needed. */ priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); if (!priv) goto bail_qp; qp->priv = priv; qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); if (init_attr->srq) { sz = 0; } else { qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct rvt_rwqe); if (udata) qp->r_rq.wq = vmalloc_user( sizeof(struct rvt_rwq) + qp->r_rq.size * sz); else if (gfp == GFP_NOIO) qp->r_rq.wq = __vmalloc( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, gfp, PAGE_KERNEL); else qp->r_rq.wq = vmalloc( sizeof(struct rvt_rwq) + qp->r_rq.size * sz); if (!qp->r_rq.wq) goto bail_driver_priv; } /* * ib_create_qp() will initialize qp->ibqp * except for qp->ibqp.qp_num. */ spin_lock_init(&qp->r_lock); spin_lock_init(&qp->s_lock); spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); init_timer(&qp->s_timer); qp->s_timer.data = (unsigned long)qp; INIT_LIST_HEAD(&qp->rspwait); qp->state = IB_QPS_RESET; qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = RVT_S_SIGNAL_REQ_WR; err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; rvt_reset_qp(rdi, qp, init_attr->qp_type); break; default: /* Don't support raw QPs */ return ERR_PTR(-EINVAL); } init_attr->cap.max_inline_data = 0; /* * Return the address of the RWQ as the offset to mmap. * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { if (!qp->r_rq.wq) { __u64 offset = 0; err = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (err) { ret = ERR_PTR(err); goto bail_qpn; } } else { u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; qp->ip = rvt_create_mmap_info(rdi, s, ibpd->uobject->context, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); goto bail_qpn; } err = ib_copy_to_udata(udata, &qp->ip->offset, sizeof(qp->ip->offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } } spin_lock(&rdi->n_qps_lock); if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { spin_unlock(&rdi->n_qps_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } rdi->n_qps_allocated++; spin_unlock(&rdi->n_qps_lock); if (qp->ip) { spin_lock_irq(&rdi->pending_lock); list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps); spin_unlock_irq(&rdi->pending_lock); } ret = &qp->ibqp; /* * We have our QP and its good, now keep track of what types of opcodes * can be processed on this QP. We do this by keeping track of what the * 3 high order bits of the opcode are. */ switch (init_attr->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK; break; case IB_QPT_RC: qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK; break; case IB_QPT_UC: qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK; break; default: ret = ERR_PTR(-EINVAL); goto bail_ip; } return ret; bail_ip: kref_put(&qp->ip->ref, rvt_release_mmap_info); bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: vfree(qp->r_rq.wq); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); bail_qp: kfree(qp); bail_swq: vfree(swq); return ret; } /** * qib_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes * @attr_mask: the mask of attributes to modify * @udata: user data for libibverbs.so * * Returns 0 on success, otherwise returns an errno. */ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { /* * VT-DRIVER-API: qp_mtu() * OPA devices have a per VL MTU the driver has a mapping of IB SL to SC * to VL and the mapping table of MTUs per VL. This is not something * that IB has and should not live in the rvt. */ return -EOPNOTSUPP; } /** * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy * * Returns 0 on success. * * Note that this can be called while the QP is actively sending or * receiving! */ int rvt_destroy_qp(struct ib_qp *ibqp) { /* * VT-DRIVER-API: qp_flush() * Driver provies a mechanism to flush and wait for that flush to * finish. */ return -EOPNOTSUPP; } int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { return -EOPNOTSUPP; } /** * rvt_post_receive - post a receive on a QP * @ibqp: the QP to post the receive on * @wr: the WR to post * @bad_wr: the first bad WR is put here * * This may be called from interrupt context. */ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { /* * When a packet arrives the driver needs to call up to rvt to process * the packet. The UD, RC, UC processing will be done in rvt, however * the driver should be able to override this if it so choses. Perhaps a * set of function pointers set up at registration time. */ return -EOPNOTSUPP; } /** * rvt_post_one_wr - post one RC, UC, or UD send work request * @qp: the QP to post on * @wr: the work request to send */ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) { struct rvt_swqe *wqe; u32 next; int i; int j; int acc; struct rvt_lkey_table *rkt; struct rvt_pd *pd; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); /* IB spec says that num_sge == 0 is OK. */ if (unlikely(wr->num_sge > qp->s_max_sge)) return -EINVAL; /* * Don't allow RDMA reads or atomic operations on UC or * undefined operations. * Make sure buffer is large enough to hold the result for atomics. */ if (qp->ibqp.qp_type == IB_QPT_UC) { if ((unsigned)wr->opcode >= IB_WR_RDMA_READ) return -EINVAL; } else if (qp->ibqp.qp_type != IB_QPT_RC) { /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ if (wr->opcode != IB_WR_SEND && wr->opcode != IB_WR_SEND_WITH_IMM) return -EINVAL; /* Check UD destination address PD */ if (qp->ibqp.pd != ud_wr(wr)->ah->pd) return -EINVAL; } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { return -EINVAL; } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && (wr->num_sge == 0 || wr->sg_list[0].length < sizeof(u64) || wr->sg_list[0].addr & (sizeof(u64) - 1))) { return -EINVAL; } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { return -EINVAL; } next = qp->s_head + 1; if (next >= qp->s_size) next = 0; if (next == qp->s_last) return -ENOMEM; rkt = &rdi->lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.pd); wqe = rvt_get_swqe_ptr(qp, qp->s_head); if (qp->ibqp.qp_type != IB_QPT_UC && qp->ibqp.qp_type != IB_QPT_RC) memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || wr->opcode == IB_WR_RDMA_WRITE || wr->opcode == IB_WR_RDMA_READ) memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); else memcpy(&wqe->wr, wr, sizeof(wqe->wr)); wqe->length = 0; j = 0; if (wr->num_sge) { acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0; for (i = 0; i < wr->num_sge; i++) { u32 length = wr->sg_list[i].length; int ok; if (length == 0) continue; ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], &wr->sg_list[i], acc); if (!ok) goto bail_inval_free; wqe->length += length; j++; } wqe->wr.num_sge = j; } if (qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_RC) { if (wqe->length > 0x80000000U) goto bail_inval_free; } else { atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); } wqe->ssn = qp->s_ssn++; qp->s_head = next; return 0; bail_inval_free: /* release mr holds */ while (j) { struct rvt_sge *sge = &wqe->sg_list[--j]; rvt_put_mr(sge->mr); } return -EINVAL; } /** * rvt_post_send - post a send on a QP * @ibqp: the QP to post the send on * @wr: the list of work requests to post * @bad_wr: the first bad WR is put here * * This may be called from interrupt context. */ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); unsigned long flags = 0; int call_send; unsigned nreq = 0; int err = 0; spin_lock_irqsave(&qp->s_lock, flags); /* * Ensure QP state is such that we can send. If not bail out early, * there is no need to do this every time we post a send. */ if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { spin_unlock_irqrestore(&qp->s_lock, flags); return -EINVAL; } /* * If the send queue is empty, and we only have a single WR then just go * ahead and kick the send engine into gear. Otherwise we will always * just schedule the send to happen later. */ call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; for (; wr; wr = wr->next) { err = rvt_post_one_wr(qp, wr); if (unlikely(err)) { *bad_wr = wr; goto bail; } nreq++; } bail: if (nreq && !call_send) rdi->driver_f.schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); if (nreq && call_send) rdi->driver_f.do_send(qp); return err; } /** * rvt_post_srq_receive - post a receive on a shared receive queue * @ibsrq: the SRQ to post the receive on * @wr: the list of work requests to post * @bad_wr: A pointer to the first WR to cause a problem is put here * * This may be called from interrupt context. */ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { return -EOPNOTSUPP; }