diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-22 02:54:55 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-22 02:54:55 +0200 |
commit | c23ddf7857bdb2e8001b0a058603497c765a580d (patch) | |
tree | f1d826612114a17d6ab543b7095adf04b5ba614a /drivers/infiniband/hw/cxgb4 | |
parent | Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi (diff) | |
parent | Merge branches 'core', 'cxgb4', 'ipath', 'iser', 'lockdep', 'mlx4', 'nes', 'o... (diff) | |
download | linux-c23ddf7857bdb2e8001b0a058603497c765a580d.tar.xz linux-c23ddf7857bdb2e8001b0a058603497c765a580d.zip |
Merge tag 'rdma-for-3.5' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull InfiniBand/RDMA changes from Roland Dreier:
- Add ocrdma hardware driver for Emulex IB-over-Ethernet adapters
- Add generic and mlx4 support for "raw" QPs: allow suitably privileged
applications to send and receive arbitrary packets directly to/from
the hardware
- Add "doorbell drop" handling to the cxgb4 driver
- A fairly large batch of qib hardware driver changes
- A few fixes for lockdep-detected issues
- A few other miscellaneous fixes and cleanups
Fix up trivial conflict in drivers/net/ethernet/emulex/benet/be.h.
* tag 'rdma-for-3.5' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (53 commits)
RDMA/cxgb4: Include vmalloc.h for vmalloc and vfree
IB/mlx4: Fix mlx4_ib_add() error flow
IB/core: Fix IB_SA_COMP_MASK macro
IB/iser: Fix error flow in iser ep connection establishment
IB/mlx4: Increase the number of vectors (EQs) available for ULPs
RDMA/cxgb4: Add query_qp support
RDMA/cxgb4: Remove kfifo usage
RDMA/cxgb4: Use vmalloc() for debugfs QP dump
RDMA/cxgb4: DB Drop Recovery for RDMA and LLD queues
RDMA/cxgb4: Disable interrupts in c4iw_ev_dispatch()
RDMA/cxgb4: Add DB Overflow Avoidance
RDMA/cxgb4: Add debugfs RDMA memory stats
cxgb4: DB Drop Recovery for RDMA and LLD queues
cxgb4: Common platform specific changes for DB Drop Recovery
cxgb4: Detect DB FULL events and notify RDMA ULD
RDMA/cxgb4: Drop peer_abort when no endpoint found
RDMA/cxgb4: Always wake up waiters in c4iw_peer_abort_intr()
mlx4_core: Change bitmap allocator to work in round-robin fashion
RDMA/nes: Don't call event handler if pointer is NULL
RDMA/nes: Fix for the ORD value of the connecting peer
...
Diffstat (limited to 'drivers/infiniband/hw/cxgb4')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/Makefile | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cm.c | 36 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/device.c | 340 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/ev.c | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/id_table.c | 112 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 134 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/mem.c | 21 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/provider.c | 19 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 105 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/resource.c | 180 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 24 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/user.h | 2 |
12 files changed, 821 insertions, 162 deletions
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index 46b878ca2c3b..e11cf7299945 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -2,4 +2,4 @@ ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o -iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o +iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 92b4c2b0308b..55ab284e22f2 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1362,7 +1362,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - BUG_ON(!ep); + if (!ep) { + printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n"); + return 0; + } mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: @@ -1410,6 +1413,24 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } + /* + * Log interesting failures. + */ + switch (status) { + case CPL_ERR_CONN_RESET: + case CPL_ERR_CONN_TIMEDOUT: + break; + default: + printk(KERN_INFO MOD "Active open failure - " + "atid %u status %u errno %d %pI4:%u->%pI4:%u\n", + atid, status, status2errno(status), + &ep->com.local_addr.sin_addr.s_addr, + ntohs(ep->com.local_addr.sin_port), + &ep->com.remote_addr.sin_addr.s_addr, + ntohs(ep->com.remote_addr.sin_port)); + break; + } + connect_reply_upcall(ep, status2errno(status)); state_set(&ep->com, DEAD); @@ -1593,7 +1614,7 @@ static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst, n, n->dev, 0); if (!ep->l2t) goto out; - ep->mtu = dst_mtu(ep->dst); + ep->mtu = dst_mtu(dst); ep->tx_chan = cxgb4_port_chan(n->dev); ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1; step = cdev->rdev.lldi.ntxq / @@ -2656,6 +2677,12 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(req); ep = lookup_tid(t, tid); + if (!ep) { + printk(KERN_WARNING MOD + "Abort on non-existent endpoint, tid %d\n", tid); + kfree_skb(skb); + return 0; + } if (is_neg_adv_abort(req->status)) { PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, ep->hwtid); @@ -2667,11 +2694,8 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) /* * Wake up any threads in rdma_init() or rdma_fini(). - * However, this is not needed if com state is just - * MPA_REQ_SENT */ - if (ep->com.state != MPA_REQ_SENT) - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); sched(dev, skb); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 6d0df6ec161b..cb4ecd783700 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/debugfs.h> +#include <linux/vmalloc.h> #include <rdma/ib_verbs.h> @@ -44,6 +45,12 @@ MODULE_DESCRIPTION("Chelsio T4 RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); +struct uld_ctx { + struct list_head entry; + struct cxgb4_lld_info lldi; + struct c4iw_dev *dev; +}; + static LIST_HEAD(uld_ctx_list); static DEFINE_MUTEX(dev_mutex); @@ -115,7 +122,7 @@ static int qp_release(struct inode *inode, struct file *file) printk(KERN_INFO "%s null qpd?\n", __func__); return 0; } - kfree(qpd->buf); + vfree(qpd->buf); kfree(qpd); return 0; } @@ -139,7 +146,7 @@ static int qp_open(struct inode *inode, struct file *file) spin_unlock_irq(&qpd->devp->lock); qpd->bufsize = count * 128; - qpd->buf = kmalloc(qpd->bufsize, GFP_KERNEL); + qpd->buf = vmalloc(qpd->bufsize); if (!qpd->buf) { ret = -ENOMEM; goto err1; @@ -240,6 +247,81 @@ static const struct file_operations stag_debugfs_fops = { .llseek = default_llseek, }; +static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"}; + +static int stats_show(struct seq_file *seq, void *v) +{ + struct c4iw_dev *dev = seq->private; + + seq_printf(seq, " Object: %10s %10s %10s %10s\n", "Total", "Current", + "Max", "Fail"); + seq_printf(seq, " PDID: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur, + dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail); + seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur, + dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail); + seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur, + dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail); + seq_printf(seq, " PBLMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur, + dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail); + seq_printf(seq, " RQTMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur, + dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail); + seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur, + dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail); + seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full); + seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty); + seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop); + seq_printf(seq, " DB State: %s Transitions %llu\n", + db_state_str[dev->db_state], + dev->rdev.stats.db_state_transitions); + return 0; +} + +static int stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, stats_show, inode->i_private); +} + +static ssize_t stats_clear(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private; + + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pd.max = 0; + dev->rdev.stats.pd.fail = 0; + dev->rdev.stats.qid.max = 0; + dev->rdev.stats.qid.fail = 0; + dev->rdev.stats.stag.max = 0; + dev->rdev.stats.stag.fail = 0; + dev->rdev.stats.pbl.max = 0; + dev->rdev.stats.pbl.fail = 0; + dev->rdev.stats.rqt.max = 0; + dev->rdev.stats.rqt.fail = 0; + dev->rdev.stats.ocqp.max = 0; + dev->rdev.stats.ocqp.fail = 0; + dev->rdev.stats.db_full = 0; + dev->rdev.stats.db_empty = 0; + dev->rdev.stats.db_drop = 0; + dev->rdev.stats.db_state_transitions = 0; + mutex_unlock(&dev->rdev.stats.lock); + return count; +} + +static const struct file_operations stats_debugfs_fops = { + .owner = THIS_MODULE, + .open = stats_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, + .write = stats_clear, +}; + static int setup_debugfs(struct c4iw_dev *devp) { struct dentry *de; @@ -256,6 +338,12 @@ static int setup_debugfs(struct c4iw_dev *devp) (void *)devp, &stag_debugfs_fops); if (de && de->d_inode) de->d_inode->i_size = 4096; + + de = debugfs_create_file("stats", S_IWUSR, devp->debugfs_root, + (void *)devp, &stats_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + return 0; } @@ -269,9 +357,13 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, list_for_each_safe(pos, nxt, &uctx->qpids) { entry = list_entry(pos, struct c4iw_qid_list, entry); list_del_init(&entry->entry); - if (!(entry->qid & rdev->qpmask)) - c4iw_put_resource(&rdev->resource.qid_fifo, entry->qid, - &rdev->resource.qid_fifo_lock); + if (!(entry->qid & rdev->qpmask)) { + c4iw_put_resource(&rdev->resource.qid_table, + entry->qid); + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur -= rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); + } kfree(entry); } @@ -332,6 +424,13 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) goto err1; } + rdev->stats.pd.total = T4_MAX_NUM_PD; + rdev->stats.stag.total = rdev->lldi.vr->stag.size; + rdev->stats.pbl.total = rdev->lldi.vr->pbl.size; + rdev->stats.rqt.total = rdev->lldi.vr->rq.size; + rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size; + rdev->stats.qid.total = rdev->lldi.vr->qp.size; + err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); if (err) { printk(KERN_ERR MOD "error %d initializing resources\n", err); @@ -370,12 +469,6 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) c4iw_destroy_resource(&rdev->resource); } -struct uld_ctx { - struct list_head entry; - struct cxgb4_lld_info lldi; - struct c4iw_dev *dev; -}; - static void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); @@ -440,6 +533,8 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) idr_init(&devp->qpidr); idr_init(&devp->mmidr); spin_lock_init(&devp->lock); + mutex_init(&devp->rdev.stats.lock); + mutex_init(&devp->db_mutex); if (c4iw_debugfs_root) { devp->debugfs_root = debugfs_create_dir( @@ -585,11 +680,234 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) return 0; } +static int disable_qp_db(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + + t4_disable_wq_db(&qp->wq); + return 0; +} + +static void stop_queues(struct uld_ctx *ctx) +{ + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->db_state == NORMAL) { + ctx->dev->rdev.stats.db_state_transitions++; + ctx->dev->db_state = FLOW_CONTROL; + idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); + } + spin_unlock_irq(&ctx->dev->lock); +} + +static int enable_qp_db(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + + t4_enable_wq_db(&qp->wq); + return 0; +} + +static void resume_queues(struct uld_ctx *ctx) +{ + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->qpcnt <= db_fc_threshold && + ctx->dev->db_state == FLOW_CONTROL) { + ctx->dev->db_state = NORMAL; + ctx->dev->rdev.stats.db_state_transitions++; + idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); + } + spin_unlock_irq(&ctx->dev->lock); +} + +struct qp_list { + unsigned idx; + struct c4iw_qp **qps; +}; + +static int add_and_ref_qp(int id, void *p, void *data) +{ + struct qp_list *qp_listp = data; + struct c4iw_qp *qp = p; + + c4iw_qp_add_ref(&qp->ibqp); + qp_listp->qps[qp_listp->idx++] = qp; + return 0; +} + +static int count_qps(int id, void *p, void *data) +{ + unsigned *countp = data; + (*countp)++; + return 0; +} + +static void deref_qps(struct qp_list qp_list) +{ + int idx; + + for (idx = 0; idx < qp_list.idx; idx++) + c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp); +} + +static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) +{ + int idx; + int ret; + + for (idx = 0; idx < qp_list->idx; idx++) { + struct c4iw_qp *qp = qp_list->qps[idx]; + + ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], + qp->wq.sq.qid, + t4_sq_host_wq_pidx(&qp->wq), + t4_sq_wq_size(&qp->wq)); + if (ret) { + printk(KERN_ERR MOD "%s: Fatal error - " + "DB overflow recovery failed - " + "error syncing SQ qid %u\n", + pci_name(ctx->lldi.pdev), qp->wq.sq.qid); + return; + } + + ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], + qp->wq.rq.qid, + t4_rq_host_wq_pidx(&qp->wq), + t4_rq_wq_size(&qp->wq)); + + if (ret) { + printk(KERN_ERR MOD "%s: Fatal error - " + "DB overflow recovery failed - " + "error syncing RQ qid %u\n", + pci_name(ctx->lldi.pdev), qp->wq.rq.qid); + return; + } + + /* Wait for the dbfifo to drain */ + while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(10)); + } + } +} + +static void recover_queues(struct uld_ctx *ctx) +{ + int count = 0; + struct qp_list qp_list; + int ret; + + /* lock out kernel db ringers */ + mutex_lock(&ctx->dev->db_mutex); + + /* put all queues in to recovery mode */ + spin_lock_irq(&ctx->dev->lock); + ctx->dev->db_state = RECOVERY; + ctx->dev->rdev.stats.db_state_transitions++; + idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); + spin_unlock_irq(&ctx->dev->lock); + + /* slow everybody down */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(1000)); + + /* Wait for the dbfifo to completely drain. */ + while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(10)); + } + + /* flush the SGE contexts */ + ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]); + if (ret) { + printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", + pci_name(ctx->lldi.pdev)); + goto out; + } + + /* Count active queues so we can build a list of queues to recover */ + spin_lock_irq(&ctx->dev->lock); + idr_for_each(&ctx->dev->qpidr, count_qps, &count); + + qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC); + if (!qp_list.qps) { + printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", + pci_name(ctx->lldi.pdev)); + spin_unlock_irq(&ctx->dev->lock); + goto out; + } + qp_list.idx = 0; + + /* add and ref each qp so it doesn't get freed */ + idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list); + + spin_unlock_irq(&ctx->dev->lock); + + /* now traverse the list in a safe context to recover the db state*/ + recover_lost_dbs(ctx, &qp_list); + + /* we're almost done! deref the qps and clean up */ + deref_qps(qp_list); + kfree(qp_list.qps); + + /* Wait for the dbfifo to completely drain again */ + while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(10)); + } + + /* resume the queues */ + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->qpcnt > db_fc_threshold) + ctx->dev->db_state = FLOW_CONTROL; + else { + ctx->dev->db_state = NORMAL; + idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); + } + ctx->dev->rdev.stats.db_state_transitions++; + spin_unlock_irq(&ctx->dev->lock); + +out: + /* start up kernel db ringers again */ + mutex_unlock(&ctx->dev->db_mutex); +} + +static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) +{ + struct uld_ctx *ctx = handle; + + switch (control) { + case CXGB4_CONTROL_DB_FULL: + stop_queues(ctx); + mutex_lock(&ctx->dev->rdev.stats.lock); + ctx->dev->rdev.stats.db_full++; + mutex_unlock(&ctx->dev->rdev.stats.lock); + break; + case CXGB4_CONTROL_DB_EMPTY: + resume_queues(ctx); + mutex_lock(&ctx->dev->rdev.stats.lock); + ctx->dev->rdev.stats.db_empty++; + mutex_unlock(&ctx->dev->rdev.stats.lock); + break; + case CXGB4_CONTROL_DB_DROP: + recover_queues(ctx); + mutex_lock(&ctx->dev->rdev.stats.lock); + ctx->dev->rdev.stats.db_drop++; + mutex_unlock(&ctx->dev->rdev.stats.lock); + break; + default: + printk(KERN_WARNING MOD "%s: unknown control cmd %u\n", + pci_name(ctx->lldi.pdev), control); + break; + } + return 0; +} + static struct cxgb4_uld_info c4iw_uld_info = { .name = DRV_NAME, .add = c4iw_uld_add, .rx_handler = c4iw_uld_rx_handler, .state_change = c4iw_uld_state_change, + .control = c4iw_uld_control, }; static int __init c4iw_init_module(void) diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 397cb36cf103..cf2f6b47617a 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -84,7 +84,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) struct c4iw_qp *qhp; u32 cqid; - spin_lock(&dev->lock); + spin_lock_irq(&dev->lock); qhp = get_qhp(dev, CQE_QPID(err_cqe)); if (!qhp) { printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d " @@ -93,7 +93,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - spin_unlock(&dev->lock); + spin_unlock_irq(&dev->lock); goto out; } @@ -109,13 +109,13 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - spin_unlock(&dev->lock); + spin_unlock_irq(&dev->lock); goto out; } c4iw_qp_add_ref(&qhp->ibqp); atomic_inc(&chp->refcnt); - spin_unlock(&dev->lock); + spin_unlock_irq(&dev->lock); /* Bad incoming write */ if (RQ_TYPE(err_cqe) && diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c new file mode 100644 index 000000000000..f95e5df30db2 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/id_table.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2011 Chelsio Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/kernel.h> +#include <linux/random.h> +#include "iw_cxgb4.h" + +#define RANDOM_SKIP 16 + +/* + * Trivial bitmap-based allocator. If the random flag is set, the + * allocator is designed to: + * - pseudo-randomize the id returned such that it is not trivially predictable. + * - avoid reuse of recently used id (at the expense of predictability) + */ +u32 c4iw_id_alloc(struct c4iw_id_table *alloc) +{ + unsigned long flags; + u32 obj; + + spin_lock_irqsave(&alloc->lock, flags); + + obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); + if (obj >= alloc->max) + obj = find_first_zero_bit(alloc->table, alloc->max); + + if (obj < alloc->max) { + if (alloc->flags & C4IW_ID_TABLE_F_RANDOM) + alloc->last += random32() % RANDOM_SKIP; + else + alloc->last = obj + 1; + if (alloc->last >= alloc->max) + alloc->last = 0; + set_bit(obj, alloc->table); + obj += alloc->start; + } else + obj = -1; + + spin_unlock_irqrestore(&alloc->lock, flags); + return obj; +} + +void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj) +{ + unsigned long flags; + + obj -= alloc->start; + BUG_ON((int)obj < 0); + + spin_lock_irqsave(&alloc->lock, flags); + clear_bit(obj, alloc->table); + spin_unlock_irqrestore(&alloc->lock, flags); +} + +int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, + u32 reserved, u32 flags) +{ + int i; + + alloc->start = start; + alloc->flags = flags; + if (flags & C4IW_ID_TABLE_F_RANDOM) + alloc->last = random32() % RANDOM_SKIP; + else + alloc->last = 0; + alloc->max = num; + spin_lock_init(&alloc->lock); + alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof(long), + GFP_KERNEL); + if (!alloc->table) + return -ENOMEM; + + bitmap_zero(alloc->table, num); + if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY)) + for (i = 0; i < reserved; ++i) + set_bit(i, alloc->table); + + return 0; +} + +void c4iw_id_table_free(struct c4iw_id_table *alloc) +{ + kfree(alloc->table); +} diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 1357c5bf209b..9beb3a9f0336 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -45,7 +45,6 @@ #include <linux/kref.h> #include <linux/timer.h> #include <linux/io.h> -#include <linux/kfifo.h> #include <asm/byteorder.h> @@ -79,13 +78,22 @@ static inline void *cplhdr(struct sk_buff *skb) return skb->data; } +#define C4IW_ID_TABLE_F_RANDOM 1 /* Pseudo-randomize the id's returned */ +#define C4IW_ID_TABLE_F_EMPTY 2 /* Table is initially empty */ + +struct c4iw_id_table { + u32 flags; + u32 start; /* logical minimal id */ + u32 last; /* hint for find */ + u32 max; + spinlock_t lock; + unsigned long *table; +}; + struct c4iw_resource { - struct kfifo tpt_fifo; - spinlock_t tpt_fifo_lock; - struct kfifo qid_fifo; - spinlock_t qid_fifo_lock; - struct kfifo pdid_fifo; - spinlock_t pdid_fifo_lock; + struct c4iw_id_table tpt_table; + struct c4iw_id_table qid_table; + struct c4iw_id_table pdid_table; }; struct c4iw_qid_list { @@ -103,6 +111,27 @@ enum c4iw_rdev_flags { T4_FATAL_ERROR = (1<<0), }; +struct c4iw_stat { + u64 total; + u64 cur; + u64 max; + u64 fail; +}; + +struct c4iw_stats { + struct mutex lock; + struct c4iw_stat qid; + struct c4iw_stat pd; + struct c4iw_stat stag; + struct c4iw_stat pbl; + struct c4iw_stat rqt; + struct c4iw_stat ocqp; + u64 db_full; + u64 db_empty; + u64 db_drop; + u64 db_state_transitions; +}; + struct c4iw_rdev { struct c4iw_resource resource; unsigned long qpshift; @@ -117,6 +146,7 @@ struct c4iw_rdev { struct cxgb4_lld_info lldi; unsigned long oc_mw_pa; void __iomem *oc_mw_kva; + struct c4iw_stats stats; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -175,6 +205,12 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, return wr_waitp->ret; } +enum db_state { + NORMAL = 0, + FLOW_CONTROL = 1, + RECOVERY = 2 +}; + struct c4iw_dev { struct ib_device ibdev; struct c4iw_rdev rdev; @@ -183,7 +219,10 @@ struct c4iw_dev { struct idr qpidr; struct idr mmidr; spinlock_t lock; + struct mutex db_mutex; struct dentry *debugfs_root; + enum db_state db_state; + int qpcnt; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -211,29 +250,57 @@ static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid) return idr_find(&rhp->mmidr, mmid); } -static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, - void *handle, u32 id) +static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, + void *handle, u32 id, int lock) { int ret; int newid; do { - if (!idr_pre_get(idr, GFP_KERNEL)) + if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC)) return -ENOMEM; - spin_lock_irq(&rhp->lock); + if (lock) + spin_lock_irq(&rhp->lock); ret = idr_get_new_above(idr, handle, id, &newid); - BUG_ON(newid != id); - spin_unlock_irq(&rhp->lock); + BUG_ON(!ret && newid != id); + if (lock) + spin_unlock_irq(&rhp->lock); } while (ret == -EAGAIN); return ret; } -static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) +static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, + void *handle, u32 id) +{ + return _insert_handle(rhp, idr, handle, id, 1); +} + +static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr, + void *handle, u32 id) +{ + return _insert_handle(rhp, idr, handle, id, 0); +} + +static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr, + u32 id, int lock) { - spin_lock_irq(&rhp->lock); + if (lock) + spin_lock_irq(&rhp->lock); idr_remove(idr, id); - spin_unlock_irq(&rhp->lock); + if (lock) + spin_unlock_irq(&rhp->lock); +} + +static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) +{ + _remove_handle(rhp, idr, id, 1); +} + +static inline void remove_handle_nolock(struct c4iw_dev *rhp, + struct idr *idr, u32 id) +{ + _remove_handle(rhp, idr, id, 0); } struct c4iw_pd { @@ -353,6 +420,8 @@ struct c4iw_qp_attributes { struct c4iw_ep *llp_stream_handle; u8 layer_etype; u8 ecode; + u16 sq_db_inc; + u16 rq_db_inc; }; struct c4iw_qp { @@ -427,6 +496,8 @@ static inline void insert_mmap(struct c4iw_ucontext *ucontext, enum c4iw_qp_attr_mask { C4IW_QP_ATTR_NEXT_STATE = 1 << 0, + C4IW_QP_ATTR_SQ_DB = 1<<1, + C4IW_QP_ATTR_RQ_DB = 1<<2, C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, @@ -480,6 +551,23 @@ static inline int c4iw_convert_state(enum ib_qp_state ib_state) } } +static inline int to_ib_qp_state(int c4iw_qp_state) +{ + switch (c4iw_qp_state) { + case C4IW_QP_STATE_IDLE: + return IB_QPS_INIT; + case C4IW_QP_STATE_RTS: + return IB_QPS_RTS; + case C4IW_QP_STATE_CLOSING: + return IB_QPS_SQD; + case C4IW_QP_STATE_TERMINATE: + return IB_QPS_SQE; + case C4IW_QP_STATE_ERROR: + return IB_QPS_ERR; + } + return IB_QPS_ERR; +} + static inline u32 c4iw_ib_to_tpt_access(int a) { return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | @@ -693,14 +781,20 @@ static inline int compute_wscale(int win) return wscale; } +u32 c4iw_id_alloc(struct c4iw_id_table *alloc); +void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj); +int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, + u32 reserved, u32 flags); +void c4iw_id_table_free(struct c4iw_id_table *alloc); + typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb); int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t); void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid, struct c4iw_dev_ucontext *uctx); -u32 c4iw_get_resource(struct kfifo *fifo, spinlock_t *lock); -void c4iw_put_resource(struct kfifo *fifo, u32 entry, spinlock_t *lock); +u32 c4iw_get_resource(struct c4iw_id_table *id_table); +void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_pblpool_create(struct c4iw_rdev *rdev); @@ -769,6 +863,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_udata *udata); int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); +int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr); struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn); u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size); @@ -797,5 +893,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; extern int c4iw_max_read_depth; +extern int db_fc_threshold; + #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 40c835309e49..57e07c61ace2 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -131,10 +131,14 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, stag_idx = (*stag) >> 8; if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) { - stag_idx = c4iw_get_resource(&rdev->resource.tpt_fifo, - &rdev->resource.tpt_fifo_lock); + stag_idx = c4iw_get_resource(&rdev->resource.tpt_table); if (!stag_idx) return -ENOMEM; + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.cur += 32; + if (rdev->stats.stag.cur > rdev->stats.stag.max) + rdev->stats.stag.max = rdev->stats.stag.cur; + mutex_unlock(&rdev->stats.lock); *stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff); } PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", @@ -165,9 +169,12 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, (rdev->lldi.vr->stag.start >> 5), sizeof(tpt), &tpt); - if (reset_tpt_entry) - c4iw_put_resource(&rdev->resource.tpt_fifo, stag_idx, - &rdev->resource.tpt_fifo_lock); + if (reset_tpt_entry) { + c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.cur -= 32; + mutex_unlock(&rdev->stats.lock); + } return err; } @@ -686,8 +693,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw) mhp = to_c4iw_mw(mw); rhp = mhp->rhp; mmid = (mw->rkey) >> 8; - deallocate_window(&rhp->rdev, mhp->attr.stag); remove_handle(rhp, &rhp->mmidr, mmid); + deallocate_window(&rhp->rdev, mhp->attr.stag); kfree(mhp); PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); return 0; @@ -789,12 +796,12 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) mhp = to_c4iw_mr(ib_mr); rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; + remove_handle(rhp, &rhp->mmidr, mmid); dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); if (mhp->attr.pbl_size) c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); - remove_handle(rhp, &rhp->mmidr, mmid); if (mhp->kva) kfree((void *) (unsigned long) mhp->kva); if (mhp->umem) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index be1c18f44400..e084fdc6da7f 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -188,8 +188,10 @@ static int c4iw_deallocate_pd(struct ib_pd *pd) php = to_c4iw_pd(pd); rhp = php->rhp; PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); - c4iw_put_resource(&rhp->rdev.resource.pdid_fifo, php->pdid, - &rhp->rdev.resource.pdid_fifo_lock); + c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid); + mutex_lock(&rhp->rdev.stats.lock); + rhp->rdev.stats.pd.cur--; + mutex_unlock(&rhp->rdev.stats.lock); kfree(php); return 0; } @@ -204,14 +206,12 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, PDBG("%s ibdev %p\n", __func__, ibdev); rhp = (struct c4iw_dev *) ibdev; - pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_fifo, - &rhp->rdev.resource.pdid_fifo_lock); + pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table); if (!pdid) return ERR_PTR(-EINVAL); php = kzalloc(sizeof(*php), GFP_KERNEL); if (!php) { - c4iw_put_resource(&rhp->rdev.resource.pdid_fifo, pdid, - &rhp->rdev.resource.pdid_fifo_lock); + c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid); return ERR_PTR(-ENOMEM); } php->pdid = pdid; @@ -222,6 +222,11 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, return ERR_PTR(-EFAULT); } } + mutex_lock(&rhp->rdev.stats.lock); + rhp->rdev.stats.pd.cur++; + if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max) + rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur; + mutex_unlock(&rhp->rdev.stats.lock); PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); return &php->ibpd; } @@ -438,6 +443,7 @@ int c4iw_register_device(struct c4iw_dev *dev) (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | @@ -460,6 +466,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.destroy_ah = c4iw_ah_destroy; dev->ibdev.create_qp = c4iw_create_qp; dev->ibdev.modify_qp = c4iw_ib_modify_qp; + dev->ibdev.query_qp = c4iw_ib_query_qp; dev->ibdev.destroy_qp = c4iw_destroy_qp; dev->ibdev.create_cq = c4iw_create_cq; dev->ibdev.destroy_cq = c4iw_destroy_cq; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 5f940aeaab1e..45aedf1d9338 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -34,10 +34,19 @@ #include "iw_cxgb4.h" +static int db_delay_usecs = 1; +module_param(db_delay_usecs, int, 0644); +MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain"); + static int ocqp_support = 1; module_param(ocqp_support, int, 0644); MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); +int db_fc_threshold = 2000; +module_param(db_fc_threshold, int, 0644); +MODULE_PARM_DESC(db_fc_threshold, "QP count/threshold that triggers automatic " + "db flow control mode (default = 2000)"); + static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) { unsigned long flag; @@ -1128,6 +1137,35 @@ out: return ret; } +/* + * Called by the library when the qp has user dbs disabled due to + * a DB_FULL condition. This function will single-thread all user + * DB rings to avoid overflowing the hw db-fifo. + */ +static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc) +{ + int delay = db_delay_usecs; + + mutex_lock(&qhp->rhp->db_mutex); + do { + + /* + * The interrupt threshold is dbfifo_int_thresh << 6. So + * make sure we don't cross that and generate an interrupt. + */ + if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < + (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) { + writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db); + break; + } + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(delay)); + delay = min(delay << 1, 2000); + } while (1); + mutex_unlock(&qhp->rhp->db_mutex); + return 0; +} + int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, enum c4iw_qp_attr_mask mask, struct c4iw_qp_attributes *attrs, @@ -1176,6 +1214,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp->attr = newattr; } + if (mask & C4IW_QP_ATTR_SQ_DB) { + ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc); + goto out; + } + if (mask & C4IW_QP_ATTR_RQ_DB) { + ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc); + goto out; + } + if (!(mask & C4IW_QP_ATTR_NEXT_STATE)) goto out; if (qhp->attr.state == attrs->next_state) @@ -1352,6 +1399,14 @@ out: return ret; } +static int enable_qp_db(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + + t4_enable_wq_db(&qp->wq); + return 0; +} + int c4iw_destroy_qp(struct ib_qp *ib_qp) { struct c4iw_dev *rhp; @@ -1369,7 +1424,16 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); + spin_lock_irq(&rhp->lock); + remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid); + rhp->qpcnt--; + BUG_ON(rhp->qpcnt < 0); + if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) { + rhp->rdev.stats.db_state_transitions++; + rhp->db_state = NORMAL; + idr_for_each(&rhp->qpidr, enable_qp_db, NULL); + } + spin_unlock_irq(&rhp->lock); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); @@ -1383,6 +1447,14 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) return 0; } +static int disable_qp_db(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + + t4_disable_wq_db(&qp->wq); + return 0; +} + struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { @@ -1469,7 +1541,16 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); - ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); + spin_lock_irq(&rhp->lock); + if (rhp->db_state != NORMAL) + t4_disable_wq_db(&qhp->wq); + if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { + rhp->rdev.stats.db_state_transitions++; + rhp->db_state = FLOW_CONTROL; + idr_for_each(&rhp->qpidr, disable_qp_db, NULL); + } + ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); + spin_unlock_irq(&rhp->lock); if (ret) goto err2; @@ -1613,6 +1694,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, C4IW_QP_ATTR_ENABLE_RDMA_WRITE | C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0; + /* + * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for + * ringing the queue db when we're in DB_FULL mode. + */ + attrs.sq_db_inc = attr->sq_psn; + attrs.rq_db_inc = attr->rq_psn; + mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; + mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; + return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); } @@ -1621,3 +1711,14 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); } + +int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct c4iw_qp *qhp = to_c4iw_qp(ibqp); + + memset(attr, 0, sizeof *attr); + memset(init_attr, 0, sizeof *init_attr); + attr->qp_state = to_ib_qp_state(qhp->attr.state); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 407ff3924150..cdef4d7fb6d8 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -30,96 +30,25 @@ * SOFTWARE. */ /* Crude resource management */ -#include <linux/kernel.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/kfifo.h> #include <linux/spinlock.h> -#include <linux/errno.h> #include <linux/genalloc.h> #include <linux/ratelimit.h> #include "iw_cxgb4.h" -#define RANDOM_SIZE 16 - -static int __c4iw_init_resource_fifo(struct kfifo *fifo, - spinlock_t *fifo_lock, - u32 nr, u32 skip_low, - u32 skip_high, - int random) -{ - u32 i, j, entry = 0, idx; - u32 random_bytes; - u32 rarray[16]; - spin_lock_init(fifo_lock); - - if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) - return -ENOMEM; - - for (i = 0; i < skip_low + skip_high; i++) - kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); - if (random) { - j = 0; - random_bytes = random32(); - for (i = 0; i < RANDOM_SIZE; i++) - rarray[i] = i + skip_low; - for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { - if (j >= RANDOM_SIZE) { - j = 0; - random_bytes = random32(); - } - idx = (random_bytes >> (j * 2)) & 0xF; - kfifo_in(fifo, - (unsigned char *) &rarray[idx], - sizeof(u32)); - rarray[idx] = i; - j++; - } - for (i = 0; i < RANDOM_SIZE; i++) - kfifo_in(fifo, - (unsigned char *) &rarray[i], - sizeof(u32)); - } else - for (i = skip_low; i < nr - skip_high; i++) - kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); - - for (i = 0; i < skip_low + skip_high; i++) - if (kfifo_out_locked(fifo, (unsigned char *) &entry, - sizeof(u32), fifo_lock)) - break; - return 0; -} - -static int c4iw_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - return __c4iw_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 0); -} - -static int c4iw_init_resource_fifo_random(struct kfifo *fifo, - spinlock_t *fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - return __c4iw_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 1); -} - -static int c4iw_init_qid_fifo(struct c4iw_rdev *rdev) +static int c4iw_init_qid_table(struct c4iw_rdev *rdev) { u32 i; - spin_lock_init(&rdev->resource.qid_fifo_lock); - - if (kfifo_alloc(&rdev->resource.qid_fifo, rdev->lldi.vr->qp.size * - sizeof(u32), GFP_KERNEL)) + if (c4iw_id_table_alloc(&rdev->resource.qid_table, + rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, + rdev->lldi.vr->qp.size, 0)) return -ENOMEM; for (i = rdev->lldi.vr->qp.start; - i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++) + i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++) if (!(i & rdev->qpmask)) - kfifo_in(&rdev->resource.qid_fifo, - (unsigned char *) &i, sizeof(u32)); + c4iw_id_free(&rdev->resource.qid_table, i); return 0; } @@ -127,44 +56,42 @@ static int c4iw_init_qid_fifo(struct c4iw_rdev *rdev) int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) { int err = 0; - err = c4iw_init_resource_fifo_random(&rdev->resource.tpt_fifo, - &rdev->resource.tpt_fifo_lock, - nr_tpt, 1, 0); + err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1, + C4IW_ID_TABLE_F_RANDOM); if (err) goto tpt_err; - err = c4iw_init_qid_fifo(rdev); + err = c4iw_init_qid_table(rdev); if (err) goto qid_err; - err = c4iw_init_resource_fifo(&rdev->resource.pdid_fifo, - &rdev->resource.pdid_fifo_lock, - nr_pdid, 1, 0); + err = c4iw_id_table_alloc(&rdev->resource.pdid_table, 0, + nr_pdid, 1, 0); if (err) goto pdid_err; return 0; -pdid_err: - kfifo_free(&rdev->resource.qid_fifo); -qid_err: - kfifo_free(&rdev->resource.tpt_fifo); -tpt_err: + pdid_err: + c4iw_id_table_free(&rdev->resource.qid_table); + qid_err: + c4iw_id_table_free(&rdev->resource.tpt_table); + tpt_err: return -ENOMEM; } /* * returns 0 if no resource available */ -u32 c4iw_get_resource(struct kfifo *fifo, spinlock_t *lock) +u32 c4iw_get_resource(struct c4iw_id_table *id_table) { u32 entry; - if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) - return entry; - else + entry = c4iw_id_alloc(id_table); + if (entry == (u32)(-1)) return 0; + return entry; } -void c4iw_put_resource(struct kfifo *fifo, u32 entry, spinlock_t *lock) +void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry) { PDBG("%s entry 0x%x\n", __func__, entry); - kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock); + c4iw_id_free(id_table, entry); } u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) @@ -181,10 +108,12 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) qid = entry->qid; kfree(entry); } else { - qid = c4iw_get_resource(&rdev->resource.qid_fifo, - &rdev->resource.qid_fifo_lock); + qid = c4iw_get_resource(&rdev->resource.qid_table); if (!qid) goto out; + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur += rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); for (i = qid+1; i & rdev->qpmask; i++) { entry = kmalloc(sizeof *entry, GFP_KERNEL); if (!entry) @@ -213,6 +142,10 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) out: mutex_unlock(&uctx->lock); PDBG("%s qid 0x%x\n", __func__, qid); + mutex_lock(&rdev->stats.lock); + if (rdev->stats.qid.cur > rdev->stats.qid.max) + rdev->stats.qid.max = rdev->stats.qid.cur; + mutex_unlock(&rdev->stats.lock); return qid; } @@ -245,10 +178,12 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) qid = entry->qid; kfree(entry); } else { - qid = c4iw_get_resource(&rdev->resource.qid_fifo, - &rdev->resource.qid_fifo_lock); + qid = c4iw_get_resource(&rdev->resource.qid_table); if (!qid) goto out; + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur += rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); for (i = qid+1; i & rdev->qpmask; i++) { entry = kmalloc(sizeof *entry, GFP_KERNEL); if (!entry) @@ -277,6 +212,10 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) out: mutex_unlock(&uctx->lock); PDBG("%s qid 0x%x\n", __func__, qid); + mutex_lock(&rdev->stats.lock); + if (rdev->stats.qid.cur > rdev->stats.qid.max) + rdev->stats.qid.max = rdev->stats.qid.cur; + mutex_unlock(&rdev->stats.lock); return qid; } @@ -297,9 +236,9 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, void c4iw_destroy_resource(struct c4iw_resource *rscp) { - kfifo_free(&rscp->tpt_fifo); - kfifo_free(&rscp->qid_fifo); - kfifo_free(&rscp->pdid_fifo); + c4iw_id_table_free(&rscp->tpt_table); + c4iw_id_table_free(&rscp->qid_table); + c4iw_id_table_free(&rscp->pdid_table); } /* @@ -312,15 +251,23 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size); PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); - if (!addr) - printk_ratelimited(KERN_WARNING MOD "%s: Out of PBL memory\n", - pci_name(rdev->lldi.pdev)); + mutex_lock(&rdev->stats.lock); + if (addr) { + rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT); + if (rdev->stats.pbl.cur > rdev->stats.pbl.max) + rdev->stats.pbl.max = rdev->stats.pbl.cur; + } else + rdev->stats.pbl.fail++; + mutex_unlock(&rdev->stats.lock); return (u32)addr; } void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + mutex_lock(&rdev->stats.lock); + rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT); + mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size); } @@ -377,12 +324,23 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) if (!addr) printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n", pci_name(rdev->lldi.pdev)); + mutex_lock(&rdev->stats.lock); + if (addr) { + rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT); + if (rdev->stats.rqt.cur > rdev->stats.rqt.max) + rdev->stats.rqt.max = rdev->stats.rqt.cur; + } else + rdev->stats.rqt.fail++; + mutex_unlock(&rdev->stats.lock); return (u32)addr; } void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6); + mutex_lock(&rdev->stats.lock); + rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT); + mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6); } @@ -433,12 +391,22 @@ u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size); PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + if (addr) { + mutex_lock(&rdev->stats.lock); + rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT); + if (rdev->stats.ocqp.cur > rdev->stats.ocqp.max) + rdev->stats.ocqp.max = rdev->stats.ocqp.cur; + mutex_unlock(&rdev->stats.lock); + } return (u32)addr; } void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size) { PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + mutex_lock(&rdev->stats.lock); + rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT); + mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size); } diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index c0221eec8817..16f26ab29302 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -62,6 +62,10 @@ struct t4_status_page { __be16 pidx; u8 qp_err; /* flit 1 - sw owns */ u8 db_off; + u8 pad; + u16 host_wq_pidx; + u16 host_cidx; + u16 host_pidx; }; #define T4_EQ_ENTRY_SIZE 64 @@ -375,6 +379,16 @@ static inline void t4_rq_consume(struct t4_wq *wq) wq->rq.cidx = 0; } +static inline u16 t4_rq_host_wq_pidx(struct t4_wq *wq) +{ + return wq->rq.queue[wq->rq.size].status.host_wq_pidx; +} + +static inline u16 t4_rq_wq_size(struct t4_wq *wq) +{ + return wq->rq.size * T4_RQ_NUM_SLOTS; +} + static inline int t4_sq_onchip(struct t4_sq *sq) { return sq->flags & T4_SQ_ONCHIP; @@ -412,6 +426,16 @@ static inline void t4_sq_consume(struct t4_wq *wq) wq->sq.cidx = 0; } +static inline u16 t4_sq_host_wq_pidx(struct t4_wq *wq) +{ + return wq->sq.queue[wq->sq.size].status.host_wq_pidx; +} + +static inline u16 t4_sq_wq_size(struct t4_wq *wq) +{ + return wq->sq.size * T4_SQ_NUM_SLOTS; +} + static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) { wmb(); diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index e6669d54770e..32b754c35ab7 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -32,7 +32,7 @@ #ifndef __C4IW_USER_H__ #define __C4IW_USER_H__ -#define C4IW_UVERBS_ABI_VERSION 1 +#define C4IW_UVERBS_ABI_VERSION 2 /* * Make sure that all structs defined in this file remain laid out so |