summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/addr.c4
-rw-r--r--drivers/infiniband/core/multicast.c11
-rw-r--r--drivers/infiniband/core/verbs.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c28
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h19
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c27
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c49
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c239
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c22
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c104
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h30
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c6
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c102
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c91
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c2
22 files changed, 483 insertions, 291 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 8172d37f9add..f80da50d84a5 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -176,8 +176,8 @@ static void set_timeout(unsigned long time)
unsigned long delay;
delay = time - jiffies;
- if ((long)delay <= 0)
- delay = 1;
+ if ((long)delay < 0)
+ delay = 0;
mod_delayed_work(addr_wq, &work, delay);
}
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index d2360a8ef0b2..fa17b552ff78 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -525,17 +525,22 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
if (status)
process_join_error(group, status);
else {
+ int mgids_changed, is_mgid0;
ib_find_pkey(group->port->dev->device, group->port->port_num,
be16_to_cpu(rec->pkey), &pkey_index);
spin_lock_irq(&group->port->lock);
- group->rec = *rec;
if (group->state == MCAST_BUSY &&
group->pkey_index == MCAST_INVALID_PKEY_INDEX)
group->pkey_index = pkey_index;
- if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
+ mgids_changed = memcmp(&rec->mgid, &group->rec.mgid,
+ sizeof(group->rec.mgid));
+ group->rec = *rec;
+ if (mgids_changed) {
rb_erase(&group->node, &group->port->table);
- mcast_insert(group->port, group, 1);
+ is_mgid0 = !memcmp(&mgid0, &group->rec.mgid,
+ sizeof(mgid0));
+ mcast_insert(group->port, group, is_mgid0);
}
spin_unlock_irq(&group->port->lock);
}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c2b89cc5dbca..f93eb8da7b5a 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -879,7 +879,8 @@ int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
- qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+ if (!(*qp_attr_mask & IB_QP_VID))
+ qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
} else {
ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 4b8c6116c058..9edc200b311d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1640,7 +1640,8 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
__state_set(&ep->com, MPA_REQ_RCVD);
/* drive upcall */
- mutex_lock(&ep->parent_ep->com.mutex);
+ mutex_lock_nested(&ep->parent_ep->com.mutex,
+ SINGLE_DEPTH_NESTING);
if (ep->parent_ep->com.state != DEAD) {
if (connect_request_upcall(ep))
abort_connection(ep, skb, GFP_KERNEL);
@@ -3126,6 +3127,8 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
err = c4iw_wait_for_reply(&ep->com.dev->rdev,
&ep->com.wr_wait,
0, 0, __func__);
+ else if (err > 0)
+ err = net_xmit_errno(err);
if (err)
pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
err, ep->stid,
@@ -3159,6 +3162,8 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
err = c4iw_wait_for_reply(&ep->com.dev->rdev,
&ep->com.wr_wait,
0, 0, __func__);
+ else if (err > 0)
+ err = net_xmit_errno(err);
}
if (err)
pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 72f1f052e88c..eb5df4e62703 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -670,7 +670,7 @@ static int ep_open(struct inode *inode, struct file *file)
idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
spin_unlock_irq(&epd->devp->lock);
- epd->bufsize = count * 160;
+ epd->bufsize = count * 240;
epd->buf = vmalloc(epd->bufsize);
if (!epd->buf) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 0744455cd88b..cb43c2299ac0 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -50,6 +50,13 @@ static int inline_threshold = C4IW_INLINE_THRESHOLD;
module_param(inline_threshold, int, 0644);
MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)");
+static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length)
+{
+ return (is_t4(dev->rdev.lldi.adapter_type) ||
+ is_t5(dev->rdev.lldi.adapter_type)) &&
+ length >= 8*1024*1024*1024ULL;
+}
+
static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
u32 len, dma_addr_t data, int wait)
{
@@ -369,9 +376,11 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
int ret;
ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
- FW_RI_STAG_NSMR, mhp->attr.perms,
+ FW_RI_STAG_NSMR, mhp->attr.len ?
+ mhp->attr.perms : 0,
mhp->attr.mw_bind_enable, mhp->attr.zbva,
- mhp->attr.va_fbo, mhp->attr.len, shift - 12,
+ mhp->attr.va_fbo, mhp->attr.len ?
+ mhp->attr.len : -1, shift - 12,
mhp->attr.pbl_size, mhp->attr.pbl_addr);
if (ret)
return ret;
@@ -536,6 +545,11 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
return ret;
}
+ if (mr_exceeds_hw_limits(rhp, total_size)) {
+ kfree(page_list);
+ return -EINVAL;
+ }
+
ret = reregister_mem(rhp, php, &mh, shift, npages);
kfree(page_list);
if (ret)
@@ -596,6 +610,12 @@ struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
if (ret)
goto err;
+ if (mr_exceeds_hw_limits(rhp, total_size)) {
+ kfree(page_list);
+ ret = -EINVAL;
+ goto err;
+ }
+
ret = alloc_pbl(mhp, npages);
if (ret) {
kfree(page_list);
@@ -699,6 +719,10 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
php = to_c4iw_pd(pd);
rhp = php->rhp;
+
+ if (mr_exceeds_hw_limits(rhp, length))
+ return ERR_PTR(-EINVAL);
+
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 2ed3ece2b2ee..bb85d479e66e 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1538,9 +1538,9 @@ err:
set_state(qhp, C4IW_QP_STATE_ERROR);
free = 1;
abort = 1;
- wake_up(&qhp->wait);
BUG_ON(!ep);
flush_qp(qhp);
+ wake_up(&qhp->wait);
out:
mutex_unlock(&qhp->mutex);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 8f9325cfc85d..c36ccbd9a644 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -223,7 +223,6 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
if (flags & IB_MR_REREG_TRANS) {
int shift;
- int err;
int n;
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index ac02ce4e8040..f3cc8c9e65ae 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -96,7 +96,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
union ib_gid sgid;
- u8 zmac[ETH_ALEN];
if (!(attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
@@ -118,9 +117,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
goto av_conf_err;
}
- memset(&zmac, 0, ETH_ALEN);
- if (pd->uctx &&
- memcmp(attr->dmac, &zmac, ETH_ALEN)) {
+ if (pd->uctx) {
status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
attr->dmac, &attr->vlan_id);
if (status) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index b48fd01f34d2..fb8d8c4dfbb9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1410,6 +1410,8 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
mutex_unlock(&dev->dev_lock);
if (status)
goto mbx_err;
+ if (qp->qp_type == IB_QPT_UD)
+ qp_attr->qkey = params.qkey;
qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT);
qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT);
qp_attr->path_mtu =
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index d7562beb5423..8ba80a6d3a46 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -98,9 +98,15 @@ enum {
IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
IPOIB_MCAST_FLAG_SENDONLY = 1,
- IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
+ /*
+ * For IPOIB_MCAST_FLAG_BUSY
+ * When set, in flight join and mcast->mc is unreliable
+ * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
+ * haven't started yet
+ * When clear and mcast->mc is valid pointer, join was successful
+ */
+ IPOIB_MCAST_FLAG_BUSY = 2,
IPOIB_MCAST_FLAG_ATTACHED = 3,
- IPOIB_MCAST_JOIN_STARTED = 4,
MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256,
@@ -317,6 +323,7 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
+ struct workqueue_struct *wq;
struct delayed_work mcast_task;
struct work_struct carrier_on_task;
struct work_struct flush_light;
@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
-int ipoib_ib_dev_open(struct net_device *dev, int flush);
+int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
+int ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 933efcea0d03..56959adb6c7d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
}
spin_lock_irq(&priv->lock);
- queue_delayed_work(ipoib_workqueue,
+ queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
/* Add this entry to passive ids list head, but do not re-add it
* if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv);
- queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+ queue_work(priv->wq, &priv->cm.rx_reap_task);
spin_unlock_irqrestore(&priv->lock, flags);
} else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags);
list_move(&p->list, &priv->cm.rx_reap_list);
spin_unlock_irqrestore(&priv->lock, flags);
- queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+ queue_work(priv->wq, &priv->cm.rx_reap_task);
}
return;
}
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
- queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ queue_work(priv->wq, &priv->cm.reap_task);
}
clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
- queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ queue_work(priv->wq, &priv->cm.reap_task);
}
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
tx->dev = dev;
list_add(&tx->list, &priv->cm.start_list);
set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
- queue_work(ipoib_workqueue, &priv->cm.start_task);
+ queue_work(priv->wq, &priv->cm.start_task);
return tx;
}
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags);
list_move(&tx->list, &priv->cm.reap_list);
- queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ queue_work(priv->wq, &priv->cm.reap_task);
ipoib_dbg(priv, "Reap connection for gid %pI6\n",
tx->neigh->daddr + 4);
tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
skb_queue_tail(&priv->cm.skb_queue, skb);
if (e)
- queue_work(ipoib_workqueue, &priv->cm.skb_task);
+ queue_work(priv->wq, &priv->cm.skb_task);
}
static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
}
if (!list_empty(&priv->cm.passive_ids))
- queue_delayed_work(ipoib_workqueue,
+ queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
spin_unlock_irq(&priv->lock);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 72626c348174..fe65abb5150c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -655,7 +655,7 @@ void ipoib_reap_ah(struct work_struct *work)
__ipoib_reap_ah(dev);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+ queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
}
@@ -664,7 +664,7 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
drain_tx_cq((struct net_device *)ctx);
}
-int ipoib_ib_dev_open(struct net_device *dev, int flush)
+int ipoib_ib_dev_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
@@ -696,7 +696,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
- queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+ queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +706,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
dev_stop:
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
napi_enable(&priv->napi);
- ipoib_ib_dev_stop(dev, flush);
+ ipoib_ib_dev_stop(dev);
return -1;
}
@@ -738,7 +738,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
return ipoib_mcast_start_thread(dev);
}
-int ipoib_ib_dev_down(struct net_device *dev, int flush)
+int ipoib_ib_dev_down(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -747,7 +747,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
netif_carrier_off(dev);
- ipoib_mcast_stop_thread(dev, flush);
+ ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev);
@@ -807,7 +807,7 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable();
}
-int ipoib_ib_dev_stop(struct net_device *dev, int flush)
+int ipoib_ib_dev_stop(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
@@ -880,8 +880,7 @@ timeout:
/* Wait for all AHs to be reaped */
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
- if (flush)
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
begin = jiffies;
@@ -918,7 +917,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
(unsigned long) dev);
if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev, 1)) {
+ if (ipoib_ib_dev_open(dev)) {
ipoib_transport_dev_cleanup(dev);
return -ENODEV;
}
@@ -1040,12 +1039,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
}
if (level >= IPOIB_FLUSH_NORMAL)
- ipoib_ib_dev_down(dev, 0);
+ ipoib_ib_dev_down(dev);
if (level == IPOIB_FLUSH_HEAVY) {
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- ipoib_ib_dev_stop(dev, 0);
- if (ipoib_ib_dev_open(dev, 0) != 0)
+ ipoib_ib_dev_stop(dev);
+ if (ipoib_ib_dev_open(dev) != 0)
return;
if (netif_queue_stopped(dev))
netif_start_queue(dev);
@@ -1097,7 +1096,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/
ipoib_flush_paths(dev);
- ipoib_mcast_stop_thread(dev, 1);
+ ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev);
ipoib_transport_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 58b5aa3b6f2d..6bad17d4d588 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
- if (ipoib_ib_dev_open(dev, 1)) {
+ if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0;
goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
return 0;
err_stop:
- ipoib_ib_dev_stop(dev, 1);
+ ipoib_ib_dev_stop(dev);
err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
- ipoib_ib_dev_down(dev, 1);
- ipoib_ib_dev_stop(dev, 0);
+ ipoib_ib_dev_down(dev);
+ ipoib_ib_dev_stop(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -839,7 +839,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
return;
}
- queue_work(ipoib_workqueue, &priv->restart_task);
+ queue_work(priv->wq, &priv->restart_task);
}
static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
@@ -954,7 +954,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
__ipoib_reap_neigh(priv);
if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+ queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
}
@@ -1133,7 +1133,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
/* start garbage collection */
clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
- queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+ queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
return 0;
@@ -1262,15 +1262,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- if (ipoib_neigh_hash_init(priv) < 0)
- goto out;
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL);
if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, ipoib_recvq_size);
- goto out_neigh_hash_cleanup;
+ goto out;
}
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1285,16 +1283,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
if (ipoib_ib_dev_init(dev, ca, port))
goto out_tx_ring_cleanup;
+ /*
+ * Must be after ipoib_ib_dev_init so we can allocate a per
+ * device wq there and use it here
+ */
+ if (ipoib_neigh_hash_init(priv) < 0)
+ goto out_dev_uninit;
+
return 0;
+out_dev_uninit:
+ ipoib_ib_dev_cleanup(dev);
+
out_tx_ring_cleanup:
vfree(priv->tx_ring);
out_rx_ring_cleanup:
kfree(priv->rx_ring);
-out_neigh_hash_cleanup:
- ipoib_neigh_hash_uninit(dev);
out:
return -ENOMEM;
}
@@ -1317,6 +1323,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
}
unregister_netdevice_many(&head);
+ /*
+ * Must be before ipoib_ib_dev_cleanup or we delete an in use
+ * work queue
+ */
+ ipoib_neigh_hash_uninit(dev);
+
ipoib_ib_dev_cleanup(dev);
kfree(priv->rx_ring);
@@ -1324,8 +1336,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
priv->rx_ring = NULL;
priv->tx_ring = NULL;
-
- ipoib_neigh_hash_uninit(dev);
}
static const struct header_ops ipoib_header_ops = {
@@ -1636,7 +1646,7 @@ register_failed:
/* Stop GC if started before flush */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task);
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
event_failed:
ipoib_dev_cleanup(priv->dev);
@@ -1707,7 +1717,7 @@ static void ipoib_remove_one(struct ib_device *device)
/* Stop GC */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task);
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
unregister_netdev(priv->dev);
free_netdev(priv->dev);
@@ -1748,8 +1758,13 @@ static int __init ipoib_init_module(void)
* unregister_netdev() and linkwatch_event take the rtnl lock,
* so flush_scheduled_work() can deadlock during device
* removal.
+ *
+ * In addition, bringing one device up and another down at the
+ * same time can deadlock a single workqueue, so we have this
+ * global fallback workqueue, but we also attempt to open a
+ * per device workqueue each time we bring an interface up
*/
- ipoib_workqueue = create_singlethread_workqueue("ipoib");
+ ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
if (!ipoib_workqueue) {
ret = -ENOMEM;
goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index ffb83b5f7e80..bc50dd0d0e4d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -190,12 +190,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
spin_unlock_irq(&priv->lock);
priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
set_qkey = 1;
-
- if (!ipoib_cm_admin_enabled(dev)) {
- rtnl_lock();
- dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
- rtnl_unlock();
- }
}
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -277,16 +271,27 @@ ipoib_mcast_sendonly_join_complete(int status,
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
+ /*
+ * We have to take the mutex to force mcast_sendonly_join to
+ * return from ib_sa_multicast_join and set mcast->mc to a
+ * valid value. Otherwise we were racing with ourselves in
+ * that we might fail here, but get a valid return from
+ * ib_sa_multicast_join after we had cleared mcast->mc here,
+ * resulting in mis-matched joins and leaves and a deadlock
+ */
+ mutex_lock(&mcast_mutex);
+
/* We trap for port events ourselves. */
if (status == -ENETRESET)
- return 0;
+ goto out;
if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
if (status) {
if (mcast->logcount++ < 20)
- ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
+ ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast "
+ "join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status);
/* Flush out any queued packets */
@@ -296,11 +301,15 @@ ipoib_mcast_sendonly_join_complete(int status,
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
}
netif_tx_unlock_bh(dev);
-
- /* Clear the busy flag so we try again */
- status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
- &mcast->flags);
}
+out:
+ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ if (status)
+ mcast->mc = NULL;
+ complete(&mcast->done);
+ if (status == -ENETRESET)
+ status = 0;
+ mutex_unlock(&mcast_mutex);
return status;
}
@@ -318,12 +327,14 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
int ret = 0;
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
- ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
+ ipoib_dbg_mcast(priv, "device shutting down, no sendonly "
+ "multicast joins\n");
return -ENODEV;
}
- if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
- ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
+ if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
+ ipoib_dbg_mcast(priv, "multicast entry busy, skipping "
+ "sendonly join\n");
return -EBUSY;
}
@@ -331,6 +342,9 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
rec.port_gid = priv->local_gid;
rec.pkey = cpu_to_be16(priv->pkey);
+ mutex_lock(&mcast_mutex);
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
priv->port, &rec,
IB_SA_MCMEMBER_REC_MGID |
@@ -343,12 +357,14 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
if (IS_ERR(mcast->mc)) {
ret = PTR_ERR(mcast->mc);
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
- ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
- ret);
+ complete(&mcast->done);
+ ipoib_warn(priv, "ib_sa_join_multicast for sendonly join "
+ "failed (ret = %d)\n", ret);
} else {
- ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
- mcast->mcmember.mgid.raw);
+ ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting "
+ "sendonly join\n", mcast->mcmember.mgid.raw);
}
+ mutex_unlock(&mcast_mutex);
return ret;
}
@@ -359,18 +375,29 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
carrier_on_task);
struct ib_port_attr attr;
- /*
- * Take rtnl_lock to avoid racing with ipoib_stop() and
- * turning the carrier back on while a device is being
- * removed.
- */
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
return;
}
- rtnl_lock();
+ /*
+ * Take rtnl_lock to avoid racing with ipoib_stop() and
+ * turning the carrier back on while a device is being
+ * removed. However, ipoib_stop() will attempt to flush
+ * the workqueue while holding the rtnl lock, so loop
+ * on trylock until either we get the lock or we see
+ * FLAG_ADMIN_UP go away as that signals that we are bailing
+ * and can safely ignore the carrier on work.
+ */
+ while (!rtnl_trylock()) {
+ if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+ return;
+ else
+ msleep(20);
+ }
+ if (!ipoib_cm_admin_enabled(priv->dev))
+ dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
netif_carrier_on(priv->dev);
rtnl_unlock();
}
@@ -385,60 +412,63 @@ static int ipoib_mcast_join_complete(int status,
ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
mcast->mcmember.mgid.raw, status);
+ /*
+ * We have to take the mutex to force mcast_join to
+ * return from ib_sa_multicast_join and set mcast->mc to a
+ * valid value. Otherwise we were racing with ourselves in
+ * that we might fail here, but get a valid return from
+ * ib_sa_multicast_join after we had cleared mcast->mc here,
+ * resulting in mis-matched joins and leaves and a deadlock
+ */
+ mutex_lock(&mcast_mutex);
+
/* We trap for port events ourselves. */
- if (status == -ENETRESET) {
- status = 0;
+ if (status == -ENETRESET)
goto out;
- }
if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
if (!status) {
mcast->backoff = 1;
- mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->mcast_task, 0);
- mutex_unlock(&mcast_mutex);
+ queue_delayed_work(priv->wq, &priv->mcast_task, 0);
/*
- * Defer carrier on work to ipoib_workqueue to avoid a
+ * Defer carrier on work to priv->wq to avoid a
* deadlock on rtnl_lock here.
*/
if (mcast == priv->broadcast)
- queue_work(ipoib_workqueue, &priv->carrier_on_task);
-
- status = 0;
- goto out;
- }
-
- if (mcast->logcount++ < 20) {
- if (status == -ETIMEDOUT || status == -EAGAIN) {
- ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
- mcast->mcmember.mgid.raw, status);
- } else {
- ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
- mcast->mcmember.mgid.raw, status);
+ queue_work(priv->wq, &priv->carrier_on_task);
+ } else {
+ if (mcast->logcount++ < 20) {
+ if (status == -ETIMEDOUT || status == -EAGAIN) {
+ ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
+ mcast->mcmember.mgid.raw, status);
+ } else {
+ ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
+ mcast->mcmember.mgid.raw, status);
+ }
}
- }
-
- mcast->backoff *= 2;
- if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
- mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
- /* Clear the busy flag so we try again */
- status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-
- mutex_lock(&mcast_mutex);
+ mcast->backoff *= 2;
+ if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
+ mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
+ }
+out:
spin_lock_irq(&priv->lock);
- if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ if (status)
+ mcast->mc = NULL;
+ complete(&mcast->done);
+ if (status == -ENETRESET)
+ status = 0;
+ if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags))
+ queue_delayed_work(priv->wq, &priv->mcast_task,
mcast->backoff * HZ);
spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
-out:
- complete(&mcast->done);
+
return status;
}
@@ -487,10 +517,9 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
}
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ mutex_lock(&mcast_mutex);
init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
-
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
&rec, comp_mask, GFP_KERNEL,
ipoib_mcast_join_complete, mcast);
@@ -504,13 +533,11 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
- mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->mcast_task,
+ queue_delayed_work(priv->wq, &priv->mcast_task,
mcast->backoff * HZ);
- mutex_unlock(&mcast_mutex);
}
+ mutex_unlock(&mcast_mutex);
}
void ipoib_mcast_join_task(struct work_struct *work)
@@ -547,8 +574,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
ipoib_warn(priv, "failed to allocate broadcast group\n");
mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->mcast_task, HZ);
+ queue_delayed_work(priv->wq, &priv->mcast_task,
+ HZ);
mutex_unlock(&mcast_mutex);
return;
}
@@ -563,7 +590,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
}
if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
- if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+ if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
+ !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
ipoib_mcast_join(dev, priv->broadcast, 0);
return;
}
@@ -571,23 +599,33 @@ void ipoib_mcast_join_task(struct work_struct *work)
while (1) {
struct ipoib_mcast *mcast = NULL;
+ /*
+ * Need the mutex so our flags are consistent, need the
+ * priv->lock so we don't race with list removals in either
+ * mcast_dev_flush or mcast_restart_task
+ */
+ mutex_lock(&mcast_mutex);
spin_lock_irq(&priv->lock);
list_for_each_entry(mcast, &priv->multicast_list, list) {
- if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
- && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
- && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+ if (IS_ERR_OR_NULL(mcast->mc) &&
+ !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
+ !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
/* Found the next unjoined group */
break;
}
}
spin_unlock_irq(&priv->lock);
+ mutex_unlock(&mcast_mutex);
if (&mcast->list == &priv->multicast_list) {
/* All done */
break;
}
- ipoib_mcast_join(dev, mcast, 1);
+ if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+ ipoib_mcast_sendonly_join(mcast);
+ else
+ ipoib_mcast_join(dev, mcast, 1);
return;
}
@@ -604,13 +642,13 @@ int ipoib_mcast_start_thread(struct net_device *dev)
mutex_lock(&mcast_mutex);
if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
+ queue_delayed_work(priv->wq, &priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
return 0;
}
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
+int ipoib_mcast_stop_thread(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -621,8 +659,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
cancel_delayed_work(&priv->mcast_task);
mutex_unlock(&mcast_mutex);
- if (flush)
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
return 0;
}
@@ -633,6 +670,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+ ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
+
+ if (!IS_ERR_OR_NULL(mcast->mc))
ib_sa_free_multicast(mcast->mc);
if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -685,6 +725,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
__ipoib_mcast_add(dev, mcast);
list_add_tail(&mcast->list, &priv->multicast_list);
+ if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
+ queue_delayed_work(priv->wq, &priv->mcast_task, 0);
}
if (!mcast->ah) {
@@ -698,8 +740,6 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
ipoib_dbg_mcast(priv, "no address vector, "
"but multicast join already started\n");
- else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
- ipoib_mcast_sendonly_join(mcast);
/*
* If lookup completes between here and out:, don't
@@ -759,9 +799,12 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
spin_unlock_irqrestore(&priv->lock, flags);
- /* seperate between the wait to the leave*/
+ /*
+ * make sure the in-flight joins have finished before we attempt
+ * to leave
+ */
list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
- if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+ if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -794,8 +837,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
ipoib_dbg_mcast(priv, "restarting multicast task\n");
- ipoib_mcast_stop_thread(dev, 0);
-
local_irq_save(flags);
netif_addr_lock(dev);
spin_lock(&priv->lock);
@@ -880,14 +921,38 @@ void ipoib_mcast_restart_task(struct work_struct *work)
netif_addr_unlock(dev);
local_irq_restore(flags);
- /* We have to cancel outside of the spinlock */
+ /*
+ * make sure the in-flight joins have finished before we attempt
+ * to leave
+ */
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+ if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+ wait_for_completion(&mcast->done);
+
+ /*
+ * We have to cancel outside of the spinlock, but we have to
+ * take the rtnl lock or else we race with the removal of
+ * entries from the remove list in mcast_dev_flush as part
+ * of ipoib_stop(). We detect the drop of the ADMIN_UP flag
+ * to signal that we have hit this particular race, and we
+ * return since we know we don't need to do anything else
+ * anyway.
+ */
+ while (!rtnl_trylock()) {
+ if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+ return;
+ else
+ msleep(20);
+ }
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
ipoib_mcast_leave(mcast->dev, mcast);
ipoib_mcast_free(mcast);
}
-
- if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
- ipoib_mcast_start_thread(dev);
+ /*
+ * Restart our join task if needed
+ */
+ ipoib_mcast_start_thread(dev);
+ rtnl_unlock();
}
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index c56d5d44c53b..b72a753eb41d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -145,10 +145,20 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
int ret, size;
int i;
+ /*
+ * the various IPoIB tasks assume they will never race against
+ * themselves, so always use a single thread workqueue
+ */
+ priv->wq = create_singlethread_workqueue("ipoib_wq");
+ if (!priv->wq) {
+ printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
+ return -ENODEV;
+ }
+
priv->pd = ib_alloc_pd(priv->ca);
if (IS_ERR(priv->pd)) {
printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
- return -ENODEV;
+ goto out_free_wq;
}
priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
@@ -242,6 +252,10 @@ out_free_mr:
out_free_pd:
ib_dealloc_pd(priv->pd);
+
+out_free_wq:
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
return -ENODEV;
}
@@ -270,6 +284,12 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
if (ib_dealloc_pd(priv->pd))
ipoib_warn(priv, "ib_dealloc_pd failed\n");
+
+ if (priv->wq) {
+ flush_workqueue(priv->wq);
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
}
void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 20ca6a619476..6a594aac2290 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -97,7 +97,7 @@ module_param_named(pi_enable, iser_pi_enable, bool, 0644);
MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
module_param_named(pi_guard, iser_pi_guard, int, 0644);
-MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)");
+MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
static struct workqueue_struct *release_wq;
struct iser_global ig;
@@ -164,18 +164,42 @@ iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
return 0;
}
-int iser_initialize_task_headers(struct iscsi_task *task,
- struct iser_tx_desc *tx_desc)
+/**
+ * iser_initialize_task_headers() - Initialize task headers
+ * @task: iscsi task
+ * @tx_desc: iser tx descriptor
+ *
+ * Notes:
+ * This routine may race with iser teardown flow for scsi
+ * error handling TMFs. So for TMF we should acquire the
+ * state mutex to avoid dereferencing the IB device which
+ * may have already been terminated.
+ */
+int
+iser_initialize_task_headers(struct iscsi_task *task,
+ struct iser_tx_desc *tx_desc)
{
- struct iser_conn *iser_conn = task->conn->dd_data;
+ struct iser_conn *iser_conn = task->conn->dd_data;
struct iser_device *device = iser_conn->ib_conn.device;
struct iscsi_iser_task *iser_task = task->dd_data;
u64 dma_addr;
+ const bool mgmt_task = !task->sc && !in_interrupt();
+ int ret = 0;
+
+ if (unlikely(mgmt_task))
+ mutex_lock(&iser_conn->state_mutex);
+
+ if (unlikely(iser_conn->state != ISER_CONN_UP)) {
+ ret = -ENODEV;
+ goto out;
+ }
dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(device->ib_device, dma_addr))
- return -ENOMEM;
+ if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
+ ret = -ENOMEM;
+ goto out;
+ }
tx_desc->dma_addr = dma_addr;
tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
@@ -183,7 +207,11 @@ int iser_initialize_task_headers(struct iscsi_task *task,
tx_desc->tx_sg[0].lkey = device->mr->lkey;
iser_task->iser_conn = iser_conn;
- return 0;
+out:
+ if (unlikely(mgmt_task))
+ mutex_unlock(&iser_conn->state_mutex);
+
+ return ret;
}
/**
@@ -199,9 +227,14 @@ static int
iscsi_iser_task_init(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ int ret;
- if (iser_initialize_task_headers(task, &iser_task->desc))
- return -ENOMEM;
+ ret = iser_initialize_task_headers(task, &iser_task->desc);
+ if (ret) {
+ iser_err("Failed to init task %p, err = %d\n",
+ iser_task, ret);
+ return ret;
+ }
/* mgmt task */
if (!task->sc)
@@ -508,8 +541,8 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
*/
if (iser_conn) {
mutex_lock(&iser_conn->state_mutex);
- iscsi_conn_stop(cls_conn, flag);
iser_conn_terminate(iser_conn);
+ iscsi_conn_stop(cls_conn, flag);
/* unbind */
iser_conn->iscsi_conn = NULL;
@@ -541,12 +574,13 @@ iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
static inline unsigned int
iser_dif_prot_caps(int prot_caps)
{
- return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION |
- SHOST_DIX_TYPE1_PROTECTION : 0) |
- ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION |
- SHOST_DIX_TYPE2_PROTECTION : 0) |
- ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION |
- SHOST_DIX_TYPE3_PROTECTION : 0);
+ return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ?
+ SHOST_DIF_TYPE1_PROTECTION | SHOST_DIX_TYPE0_PROTECTION |
+ SHOST_DIX_TYPE1_PROTECTION : 0) |
+ ((prot_caps & IB_PROT_T10DIF_TYPE_2) ?
+ SHOST_DIF_TYPE2_PROTECTION | SHOST_DIX_TYPE2_PROTECTION : 0) |
+ ((prot_caps & IB_PROT_T10DIF_TYPE_3) ?
+ SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE3_PROTECTION : 0);
}
/**
@@ -569,6 +603,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
struct Scsi_Host *shost;
struct iser_conn *iser_conn = NULL;
struct ib_conn *ib_conn;
+ u16 max_cmds;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
if (!shost)
@@ -586,26 +621,41 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
*/
if (ep) {
iser_conn = ep->dd_data;
+ max_cmds = iser_conn->max_cmds;
+
+ mutex_lock(&iser_conn->state_mutex);
+ if (iser_conn->state != ISER_CONN_UP) {
+ iser_err("iser conn %p already started teardown\n",
+ iser_conn);
+ mutex_unlock(&iser_conn->state_mutex);
+ goto free_host;
+ }
+
ib_conn = &iser_conn->ib_conn;
if (ib_conn->pi_support) {
u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
- if (iser_pi_guard)
- scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP);
- else
- scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC);
+ scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
+ SHOST_DIX_GUARD_CRC);
}
- }
- if (iscsi_host_add(shost, ep ?
- ib_conn->device->ib_device->dma_device : NULL))
- goto free_host;
+ if (iscsi_host_add(shost,
+ ib_conn->device->ib_device->dma_device)) {
+ mutex_unlock(&iser_conn->state_mutex);
+ goto free_host;
+ }
+ mutex_unlock(&iser_conn->state_mutex);
+ } else {
+ max_cmds = ISER_DEF_XMIT_CMDS_MAX;
+ if (iscsi_host_add(shost, NULL))
+ goto free_host;
+ }
- if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
+ if (cmds_max > max_cmds) {
iser_info("cmds_max changed from %u to %u\n",
- cmds_max, ISER_DEF_XMIT_CMDS_MAX);
- cmds_max = ISER_DEF_XMIT_CMDS_MAX;
+ cmds_max, max_cmds);
+ cmds_max = max_cmds;
}
cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index cd4174ca9a76..5ce26817e7e1 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -69,34 +69,31 @@
#define DRV_NAME "iser"
#define PFX DRV_NAME ": "
-#define DRV_VER "1.4.8"
+#define DRV_VER "1.5"
#define iser_dbg(fmt, arg...) \
do { \
- if (iser_debug_level > 2) \
+ if (unlikely(iser_debug_level > 2)) \
printk(KERN_DEBUG PFX "%s: " fmt,\
__func__ , ## arg); \
} while (0)
#define iser_warn(fmt, arg...) \
do { \
- if (iser_debug_level > 0) \
+ if (unlikely(iser_debug_level > 0)) \
pr_warn(PFX "%s: " fmt, \
__func__ , ## arg); \
} while (0)
#define iser_info(fmt, arg...) \
do { \
- if (iser_debug_level > 1) \
+ if (unlikely(iser_debug_level > 1)) \
pr_info(PFX "%s: " fmt, \
__func__ , ## arg); \
} while (0)
-#define iser_err(fmt, arg...) \
- do { \
- printk(KERN_ERR PFX "%s: " fmt, \
- __func__ , ## arg); \
- } while (0)
+#define iser_err(fmt, arg...) \
+ pr_err(PFX "%s: " fmt, __func__ , ## arg)
#define SHIFT_4K 12
#define SIZE_4K (1ULL << SHIFT_4K)
@@ -144,6 +141,11 @@
ISER_MAX_TX_MISC_PDUS + \
ISER_MAX_RX_MISC_PDUS)
+#define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr \
+ - ISER_MAX_TX_MISC_PDUS \
+ - ISER_MAX_RX_MISC_PDUS) / \
+ (1 + ISER_INFLIGHT_DATAOUTS))
+
#define ISER_WC_BATCH_COUNT 16
#define ISER_SIGNAL_CMD_COUNT 32
@@ -247,7 +249,6 @@ struct iscsi_endpoint;
* @va: MR start address (buffer va)
* @len: MR length
* @mem_h: pointer to registration context (FMR/Fastreg)
- * @is_mr: indicates weather we registered the buffer
*/
struct iser_mem_reg {
u32 lkey;
@@ -255,7 +256,6 @@ struct iser_mem_reg {
u64 va;
u64 len;
void *mem_h;
- int is_mr;
};
/**
@@ -323,8 +323,6 @@ struct iser_rx_desc {
char pad[ISER_RX_PAD_SIZE];
} __attribute__((packed));
-#define ISER_MAX_CQ 4
-
struct iser_conn;
struct ib_conn;
struct iscsi_iser_task;
@@ -375,7 +373,7 @@ struct iser_device {
struct list_head ig_list;
int refcount;
int comps_used;
- struct iser_comp comps[ISER_MAX_CQ];
+ struct iser_comp *comps;
int (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn,
unsigned cmds_max);
void (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn);
@@ -432,6 +430,7 @@ struct fast_reg_descriptor {
* @cma_id: rdma_cm connection maneger handle
* @qp: Connection Queue-pair
* @post_recv_buf_count: post receive counter
+ * @sig_count: send work request signal count
* @rx_wr: receive work request for batch posts
* @device: reference to iser device
* @comp: iser completion context
@@ -452,6 +451,7 @@ struct ib_conn {
struct rdma_cm_id *cma_id;
struct ib_qp *qp;
int post_recv_buf_count;
+ u8 sig_count;
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
struct iser_device *device;
struct iser_comp *comp;
@@ -482,6 +482,7 @@ struct ib_conn {
* to max number of post recvs
* @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1)
* @min_posted_rx: (qp_max_recv_dtos >> 2)
+ * @max_cmds: maximum cmds allowed for this connection
* @name: connection peer portal
* @release_work: deffered work for release job
* @state_mutex: protects iser onnection state
@@ -507,6 +508,7 @@ struct iser_conn {
unsigned qp_max_recv_dtos;
unsigned qp_max_recv_dtos_mask;
unsigned min_posted_rx;
+ u16 max_cmds;
char name[ISER_OBJECT_NAME_SIZE];
struct work_struct release_work;
struct mutex state_mutex;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 5a489ea63732..3821633f1065 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -369,7 +369,7 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
return 0;
}
-static inline bool iser_signal_comp(int sig_count)
+static inline bool iser_signal_comp(u8 sig_count)
{
return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
}
@@ -388,7 +388,7 @@ int iser_send_command(struct iscsi_conn *conn,
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
struct scsi_cmnd *sc = task->sc;
struct iser_tx_desc *tx_desc = &iser_task->desc;
- static unsigned sig_count;
+ u8 sig_count = ++iser_conn->ib_conn.sig_count;
edtl = ntohl(hdr->data_length);
@@ -435,7 +435,7 @@ int iser_send_command(struct iscsi_conn *conn,
iser_task->status = ISER_TASK_STATUS_STARTED;
err = iser_post_send(&iser_conn->ib_conn, tx_desc,
- iser_signal_comp(++sig_count));
+ iser_signal_comp(sig_count));
if (!err)
return 0;
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 6c5ce357fba6..abce9339333f 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -73,7 +73,6 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
if (cmd_dir == ISER_DIR_OUT) {
/* copy the unaligned sg the buffer which is used for RDMA */
- int i;
char *p, *from;
sgl = (struct scatterlist *)data->buf;
@@ -409,7 +408,6 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
- regd_buf->reg.is_mr = 0;
iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
"va: 0x%08lX sz: %ld]\n",
@@ -440,13 +438,13 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
return 0;
}
-static inline void
+static void
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
struct ib_sig_domain *domain)
{
domain->sig_type = IB_SIG_TYPE_T10_DIF;
- domain->sig.dif.pi_interval = sc->device->sector_size;
- domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff;
+ domain->sig.dif.pi_interval = scsi_prot_interval(sc);
+ domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
/*
* At the moment we hard code those, but in the future
* we will take them from sc.
@@ -454,8 +452,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
domain->sig.dif.apptag_check_mask = 0xffff;
domain->sig.dif.app_escape = true;
domain->sig.dif.ref_escape = true;
- if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 ||
- scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2)
+ if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
domain->sig.dif.ref_remap = true;
};
@@ -473,26 +470,16 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
case SCSI_PROT_WRITE_STRIP:
sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
- /*
- * At the moment we use this modparam to tell what is
- * the memory bg_type, in the future we will take it
- * from sc.
- */
- sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
- IB_T10DIF_CRC;
+ sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
+ IB_T10DIF_CSUM : IB_T10DIF_CRC;
break;
case SCSI_PROT_READ_PASS:
case SCSI_PROT_WRITE_PASS:
iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
- /*
- * At the moment we use this modparam to tell what is
- * the memory bg_type, in the future we will take it
- * from sc.
- */
- sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
- IB_T10DIF_CRC;
+ sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
+ IB_T10DIF_CSUM : IB_T10DIF_CRC;
break;
default:
iser_err("Unsupported PI operation %d\n",
@@ -503,26 +490,28 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
return 0;
}
-static int
+static inline void
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
- switch (scsi_get_prot_type(sc)) {
- case SCSI_PROT_DIF_TYPE0:
- break;
- case SCSI_PROT_DIF_TYPE1:
- case SCSI_PROT_DIF_TYPE2:
- *mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG;
- break;
- case SCSI_PROT_DIF_TYPE3:
- *mask = ISER_CHECK_GUARD;
- break;
- default:
- iser_err("Unsupported protection type %d\n",
- scsi_get_prot_type(sc));
- return -EINVAL;
- }
+ *mask = 0;
+ if (sc->prot_flags & SCSI_PROT_REF_CHECK)
+ *mask |= ISER_CHECK_REFTAG;
+ if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
+ *mask |= ISER_CHECK_GUARD;
+}
- return 0;
+static void
+iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+{
+ u32 rkey;
+
+ memset(inv_wr, 0, sizeof(*inv_wr));
+ inv_wr->opcode = IB_WR_LOCAL_INV;
+ inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+ inv_wr->ex.invalidate_rkey = mr->rkey;
+
+ rkey = ib_inc_rkey(mr->rkey);
+ ib_update_fast_reg_key(mr, rkey);
}
static int
@@ -536,26 +525,17 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
struct ib_send_wr *bad_wr, *wr = NULL;
struct ib_sig_attrs sig_attrs;
int ret;
- u32 key;
memset(&sig_attrs, 0, sizeof(sig_attrs));
ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
if (ret)
goto err;
- ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
- if (ret)
- goto err;
+ iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
- memset(&inv_wr, 0, sizeof(inv_wr));
- inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.wr_id = ISER_FASTREG_LI_WRID;
- inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+ iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
wr = &inv_wr;
- /* Bump the key */
- key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
}
memset(&sig_wr, 0, sizeof(sig_wr));
@@ -585,12 +565,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
sig_sge->lkey = pi_ctx->sig_mr->lkey;
sig_sge->addr = 0;
- sig_sge->length = data_sge->length + prot_sge->length;
- if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT ||
- scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) {
- sig_sge->length += (data_sge->length /
- iser_task->sc->device->sector_size) * 8;
- }
+ sig_sge->length = scsi_transfer_length(iser_task->sc);
iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n",
sig_sge->addr, sig_sge->length,
@@ -613,7 +588,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct ib_fast_reg_page_list *frpl;
struct ib_send_wr fastreg_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
- u8 key;
int ret, offset, size, plen;
/* if there a single dma entry, dma mr suffices */
@@ -645,14 +619,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
}
if (!(desc->reg_indicators & ind)) {
- memset(&inv_wr, 0, sizeof(inv_wr));
- inv_wr.wr_id = ISER_FASTREG_LI_WRID;
- inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.ex.invalidate_rkey = mr->rkey;
+ iser_inv_rkey(&inv_wr, mr);
wr = &inv_wr;
- /* Bump the key */
- key = (u8)(mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(mr, ++key);
}
/* Prepare FASTREG WR */
@@ -770,15 +738,11 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
regd_buf->reg.va = sig_sge.addr;
regd_buf->reg.len = sig_sge.length;
- regd_buf->reg.is_mr = 1;
} else {
- if (desc) {
+ if (desc)
regd_buf->reg.rkey = desc->data_mr->rkey;
- regd_buf->reg.is_mr = 1;
- } else {
+ else
regd_buf->reg.rkey = device->mr->rkey;
- regd_buf->reg.is_mr = 0;
- }
regd_buf->reg.lkey = data_sge.lkey;
regd_buf->reg.va = data_sge.addr;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 67225bb82bb5..695a2704bd43 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -76,7 +76,7 @@ static void iser_event_handler(struct ib_event_handler *handler,
static int iser_create_device_ib_res(struct iser_device *device)
{
struct ib_device_attr *dev_attr = &device->dev_attr;
- int ret, i;
+ int ret, i, max_cqe;
ret = ib_query_device(device->ib_device, dev_attr);
if (ret) {
@@ -104,11 +104,19 @@ static int iser_create_device_ib_res(struct iser_device *device)
return -1;
}
- device->comps_used = min(ISER_MAX_CQ,
+ device->comps_used = min_t(int, num_online_cpus(),
device->ib_device->num_comp_vectors);
- iser_info("using %d CQs, device %s supports %d vectors\n",
+
+ device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
+ GFP_KERNEL);
+ if (!device->comps)
+ goto comps_err;
+
+ max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+
+ iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
device->comps_used, device->ib_device->name,
- device->ib_device->num_comp_vectors);
+ device->ib_device->num_comp_vectors, max_cqe);
device->pd = ib_alloc_pd(device->ib_device);
if (IS_ERR(device->pd))
@@ -122,7 +130,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
iser_cq_callback,
iser_cq_event_callback,
(void *)comp,
- ISER_MAX_CQ_LEN, i);
+ max_cqe, i);
if (IS_ERR(comp->cq)) {
comp->cq = NULL;
goto cq_err;
@@ -162,6 +170,8 @@ cq_err:
}
ib_dealloc_pd(device->pd);
pd_err:
+ kfree(device->comps);
+comps_err:
iser_err("failed to allocate an IB resource\n");
return -1;
}
@@ -187,6 +197,9 @@ static void iser_free_device_ib_res(struct iser_device *device)
(void)ib_dereg_mr(device->mr);
(void)ib_dealloc_pd(device->pd);
+ kfree(device->comps);
+ device->comps = NULL;
+
device->mr = NULL;
device->pd = NULL;
}
@@ -425,7 +438,10 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
*/
static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
{
+ struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+ ib_conn);
struct iser_device *device;
+ struct ib_device_attr *dev_attr;
struct ib_qp_init_attr init_attr;
int ret = -ENOMEM;
int index, min_index = 0;
@@ -433,6 +449,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
BUG_ON(ib_conn->device == NULL);
device = ib_conn->device;
+ dev_attr = &device->dev_attr;
memset(&init_attr, 0, sizeof init_attr);
@@ -460,8 +477,20 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
if (ib_conn->pi_support) {
init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
+ iser_conn->max_cmds =
+ ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
} else {
- init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
+ if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+ init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
+ iser_conn->max_cmds =
+ ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
+ } else {
+ init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+ iser_conn->max_cmds =
+ ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+ iser_dbg("device %s supports max_send_wr %d\n",
+ device->ib_device->name, dev_attr->max_qp_wr);
+ }
}
ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
@@ -475,7 +504,11 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
return ret;
out_err:
+ mutex_lock(&ig.connlist_mutex);
+ ib_conn->comp->active_qps--;
+ mutex_unlock(&ig.connlist_mutex);
iser_err("unable to alloc mem or create resource, err %d\n", ret);
+
return ret;
}
@@ -610,9 +643,11 @@ void iser_conn_release(struct iser_conn *iser_conn)
mutex_unlock(&ig.connlist_mutex);
mutex_lock(&iser_conn->state_mutex);
- if (iser_conn->state != ISER_CONN_DOWN)
+ if (iser_conn->state != ISER_CONN_DOWN) {
iser_warn("iser conn %p state %d, expected state down.\n",
iser_conn, iser_conn->state);
+ iser_conn->state = ISER_CONN_DOWN;
+ }
/*
* In case we never got to bind stage, we still need to
* release IB resources (which is safe to call more than once).
@@ -662,8 +697,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
/* post an indication that all flush errors were consumed */
err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
- if (err)
+ if (err) {
iser_err("conn %p failed to post beacon", ib_conn);
+ return 1;
+ }
wait_for_completion(&ib_conn->flush_comp);
}
@@ -846,20 +883,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
break;
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_ADDR_CHANGE:
- iser_disconnected_handler(cma_id);
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ iser_cleanup_handler(cma_id, false);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
/*
* we *must* destroy the device as we cannot rely
* on iscsid to be around to initiate error handling.
- * also implicitly destroy the cma_id.
+ * also if we are not in state DOWN implicitly destroy
+ * the cma_id.
*/
iser_cleanup_handler(cma_id, true);
- iser_conn->ib_conn.cma_id = NULL;
- ret = 1;
- break;
- case RDMA_CM_EVENT_TIMEWAIT_EXIT:
- iser_cleanup_handler(cma_id, false);
+ if (iser_conn->state != ISER_CONN_DOWN) {
+ iser_conn->ib_conn.cma_id = NULL;
+ ret = 1;
+ }
break;
default:
iser_err("Unexpected RDMA CM event (%d)\n", event->event);
@@ -981,7 +1019,6 @@ int iser_reg_page_vec(struct ib_conn *ib_conn,
mem_reg->rkey = mem->fmr->rkey;
mem_reg->len = page_vec->length * SIZE_4K;
mem_reg->va = io_addr;
- mem_reg->is_mr = 1;
mem_reg->mem_h = (void *)mem;
mem_reg->va += page_vec->offset;
@@ -1008,7 +1045,7 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
- if (!reg->is_mr)
+ if (!reg->mem_h)
return;
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
@@ -1028,11 +1065,10 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct fast_reg_descriptor *desc = reg->mem_h;
- if (!reg->is_mr)
+ if (!desc)
return;
reg->mem_h = NULL;
- reg->is_mr = 0;
spin_lock_bh(&ib_conn->lock);
list_add_tail(&desc->list, &ib_conn->fastreg.pool);
spin_unlock_bh(&ib_conn->lock);
@@ -1049,7 +1085,7 @@ int iser_post_recvl(struct iser_conn *iser_conn)
sge.length = ISER_RX_LOGIN_SIZE;
sge.lkey = ib_conn->device->mr->lkey;
- rx_wr.wr_id = (unsigned long)iser_conn->login_resp_buf;
+ rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
rx_wr.sg_list = &sge;
rx_wr.num_sge = 1;
rx_wr.next = NULL;
@@ -1073,7 +1109,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
rx_desc = &iser_conn->rx_descs[my_rx_head];
- rx_wr->wr_id = (unsigned long)rx_desc;
+ rx_wr->wr_id = (uintptr_t)rx_desc;
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
rx_wr->next = rx_wr + 1;
@@ -1110,7 +1146,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
DMA_TO_DEVICE);
send_wr.next = NULL;
- send_wr.wr_id = (unsigned long)tx_desc;
+ send_wr.wr_id = (uintptr_t)tx_desc;
send_wr.sg_list = tx_desc->tx_sg;
send_wr.num_sge = tx_desc->num_sge;
send_wr.opcode = IB_WR_SEND;
@@ -1160,6 +1196,7 @@ static void
iser_handle_comp_error(struct ib_conn *ib_conn,
struct ib_wc *wc)
{
+ void *wr_id = (void *)(uintptr_t)wc->wr_id;
struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
ib_conn);
@@ -1168,8 +1205,8 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
iscsi_conn_failure(iser_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED);
- if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
- struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
+ if (is_iser_tx_desc(iser_conn, wr_id)) {
+ struct iser_tx_desc *desc = wr_id;
if (desc->type == ISCSI_TX_DATAOUT)
kmem_cache_free(ig.desc_cache, desc);
@@ -1193,14 +1230,14 @@ static void iser_handle_wc(struct ib_wc *wc)
struct iser_rx_desc *rx_desc;
ib_conn = wc->qp->qp_context;
- if (wc->status == IB_WC_SUCCESS) {
+ if (likely(wc->status == IB_WC_SUCCESS)) {
if (wc->opcode == IB_WC_RECV) {
- rx_desc = (struct iser_rx_desc *)wc->wr_id;
+ rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
iser_rcv_completion(rx_desc, wc->byte_len,
ib_conn);
} else
if (wc->opcode == IB_WC_SEND) {
- tx_desc = (struct iser_tx_desc *)wc->wr_id;
+ tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
iser_snd_completion(tx_desc, ib_conn);
} else {
iser_err("Unknown wc opcode %d\n", wc->opcode);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 5461924c9f10..db3c8c851af1 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2929,7 +2929,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
return -ENOMEM;
sep_opt = options;
- while ((p = strsep(&sep_opt, ",")) != NULL) {
+ while ((p = strsep(&sep_opt, ",\n")) != NULL) {
if (!*p)
continue;