IB/mlx4: Reset flow support for IB kernel ULPs

The driver exposes interfaces that directly relate to HW state. Upon fatal error, consumers of these interfaces (ULPs) that rely on completion of all their posted work-request could hang, thereby introducing dependencies in shutdown order. To prevent this from happening, we manage the relevant resources (CQs, QPs) that are used by the device. Upon a fatal error, we now generate simulated completions for outstanding WQEs that were not completed at the time the HW was reset. It includes invoking the completion event handler for all involved CQs so that the ULPs will poll those CQs. When polled we return simulated CQEs with IB_WC_WR_FLUSH_ERR return code enabling ULPs to clean up their resources and not wait forever for completions upon receiving remove_one. The above change requires an extra check in the data path to make sure that when device is in error state, the simulated CQEs will be returned and no further WQEs will be posted. Signed-off-by: Yishai Hadas <yishaih@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Yishai Hadas <yishaih@mellanox.com> 2015-02-08 10:49:34 +0100
committer: David S. Miller <davem@davemloft.net> 2015-02-09 23:03:53 +0100
commit: 35f05dabf95ac3ebc4c15bafd6833f7a3046e66f (patch)
tree: a39bb7c432f4e36467e61b316050e41ecd408b1f /drivers/infiniband/hw/mlx4/main.c
parent: IB/mlx4: Always use the correct port for mirrored multicast attachments (diff)
download: linux-35f05dabf95ac3ebc4c15bafd6833f7a3046e66f.tar.xz
linux-35f05dabf95ac3ebc4c15bafd6833f7a3046e66f.zip
1 files changed, 64 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3140da518a07..eb8e215f1613 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2308,6 +2308,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 
 	spin_lock_init(&ibdev->sm_lock);
 	mutex_init(&ibdev->cap_mask_mutex);
+	INIT_LIST_HEAD(&ibdev->qp_list);
+	spin_lock_init(&ibdev->reset_flow_resource_lock);
 
 	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
 	    ib_num_ports) {
@@ -2622,6 +2624,67 @@ out:
 	return;
 }
 
+static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
+{
+	struct mlx4_ib_qp *mqp;
+	unsigned long flags_qp;
+	unsigned long flags_cq;
+	struct mlx4_ib_cq *send_mcq, *recv_mcq;
+	struct list_head    cq_notify_list;
+	struct mlx4_cq *mcq;
+	unsigned long flags;
+
+	pr_warn("mlx4_ib_handle_catas_error was started\n");
+	INIT_LIST_HEAD(&cq_notify_list);
+
+	/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
+	spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
+
+	list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
+		spin_lock_irqsave(&mqp->sq.lock, flags_qp);
+		if (mqp->sq.tail != mqp->sq.head) {
+			send_mcq = to_mcq(mqp->ibqp.send_cq);
+			spin_lock_irqsave(&send_mcq->lock, flags_cq);
+			if (send_mcq->mcq.comp &&
+			    mqp->ibqp.send_cq->comp_handler) {
+				if (!send_mcq->mcq.reset_notify_added) {
+					send_mcq->mcq.reset_notify_added = 1;
+					list_add_tail(&send_mcq->mcq.reset_notify,
+						      &cq_notify_list);
+				}
+			}
+			spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
+		}
+		spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
+		/* Now, handle the QP's receive queue */
+		spin_lock_irqsave(&mqp->rq.lock, flags_qp);
+		/* no handling is needed for SRQ */
+		if (!mqp->ibqp.srq) {
+			if (mqp->rq.tail != mqp->rq.head) {
+				recv_mcq = to_mcq(mqp->ibqp.recv_cq);
+				spin_lock_irqsave(&recv_mcq->lock, flags_cq);
+				if (recv_mcq->mcq.comp &&
+				    mqp->ibqp.recv_cq->comp_handler) {
+					if (!recv_mcq->mcq.reset_notify_added) {
+						recv_mcq->mcq.reset_notify_added = 1;
+						list_add_tail(&recv_mcq->mcq.reset_notify,
+							      &cq_notify_list);
+					}
+				}
+				spin_unlock_irqrestore(&recv_mcq->lock,
+						       flags_cq);
+			}
+		}
+		spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
+	}
+
+	list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
+		mcq->comp(mcq);
+	}
+	spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
+	pr_warn("mlx4_ib_handle_catas_error ended\n");
+}
+
 static void handle_bonded_port_state_event(struct work_struct *work)
 {
 	struct ib_event_work *ew =
@@ -2701,6 +2764,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
 	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
 		ibdev->ib_active = false;
 		ibev.event = IB_EVENT_DEVICE_FATAL;
+		mlx4_ib_handle_catas_error(ibdev);
 		break;
 
 	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
author	Yishai Hadas <yishaih@mellanox.com>	2015-02-08 10:49:34 +0100
committer	David S. Miller <davem@davemloft.net>	2015-02-09 23:03:53 +0100
commit	35f05dabf95ac3ebc4c15bafd6833f7a3046e66f (patch)
tree	a39bb7c432f4e36467e61b316050e41ecd408b1f /drivers/infiniband/hw/mlx4/main.c
parent	IB/mlx4: Always use the correct port for mirrored multicast attachments (diff)
download	linux-35f05dabf95ac3ebc4c15bafd6833f7a3046e66f.tar.xz linux-35f05dabf95ac3ebc4c15bafd6833f7a3046e66f.zip