diff options
author | Patrisious Haddad <phaddad@nvidia.com> | 2023-01-04 10:43:36 +0100 |
---|---|---|
committer | Leon Romanovsky <leon@kernel.org> | 2023-01-15 11:23:15 +0100 |
commit | 8067fd8b26bf5d5f9917e934d878e4f3794d082e (patch) | |
tree | ae2db4b173bf5a8be6666e244f45398d9af6200c /drivers/infiniband/hw | |
parent | RDMA/mlx: Calling qp event handler in workqueue context (diff) | |
download | linux-8067fd8b26bf5d5f9917e934d878e4f3794d082e.tar.xz linux-8067fd8b26bf5d5f9917e934d878e4f3794d082e.zip |
RDMA/mlx5: Print error syndrome in case of fatal QP errors
Print syndromes in case of fatal QP events. This is helpful for upper
level debugging, as there maybe no CQEs.
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/edc794f622a33e4ee12d7f5d218d1a59aa7c6af5.1672821186.git.leonro@nvidia.com
Reviewed-by: Saeed Mahameed <saeed@kernel.org>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 45 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qpc.c | 4 |
3 files changed, 48 insertions, 3 deletions
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9b85e1be0aed..7cc3b973dec7 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -310,6 +310,44 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc); } +static void mlx5_ib_qp_err_syndrome(struct ib_qp *ibqp) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + void *pas_ext_union, *err_syn; + u32 *outb; + int err; + + if (!MLX5_CAP_GEN(dev->mdev, qpc_extension) || + !MLX5_CAP_GEN(dev->mdev, qp_error_syndrome)) + return; + + outb = kzalloc(outlen, GFP_KERNEL); + if (!outb) + return; + + err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen, + true); + if (err) + goto out; + + pas_ext_union = + MLX5_ADDR_OF(query_qp_out, outb, qp_pas_or_qpc_ext_and_pas); + err_syn = MLX5_ADDR_OF(qpc_extension_and_pas_list_in, pas_ext_union, + qpc_data_extension.error_syndrome); + + pr_err("%s/%d: QP %d error: %s (0x%x 0x%x 0x%x)\n", + ibqp->device->name, ibqp->port, ibqp->qp_num, + ib_wc_status_msg( + MLX5_GET(cqe_error_syndrome, err_syn, syndrome)), + MLX5_GET(cqe_error_syndrome, err_syn, vendor_error_syndrome), + MLX5_GET(cqe_error_syndrome, err_syn, hw_syndrome_type), + MLX5_GET(cqe_error_syndrome, err_syn, hw_error_syndrome)); +out: + kfree(outb); +} + static void mlx5_ib_handle_qp_event(struct work_struct *_work) { struct mlx5_ib_qp_event_work *qpe_work = @@ -350,6 +388,10 @@ static void mlx5_ib_handle_qp_event(struct work_struct *_work) goto out; } + if ((event.event == IB_EVENT_QP_FATAL) || + (event.event == IB_EVENT_QP_ACCESS_ERR)) + mlx5_ib_qp_err_syndrome(ibqp); + ibqp->event_handler(&event, ibqp->qp_context); out: @@ -4862,7 +4904,8 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (!outb) return -ENOMEM; - err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen); + err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen, + false); if (err) goto out; diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index fb2f4e030bb8..77f9b4a54816 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -20,7 +20,7 @@ int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask, int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp); int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct); int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, - u32 *out, int outlen); + u32 *out, int outlen, bool qpc_ext); int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, u32 *out, int outlen); diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index 604af1fd6397..bae0334d6e7f 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -505,12 +505,14 @@ void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev) } int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, - u32 *out, int outlen) + u32 *out, int outlen, bool qpc_ext) { u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {}; MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP); MLX5_SET(query_qp_in, in, qpn, qp->qpn); + MLX5_SET(query_qp_in, in, qpc_ext, qpc_ext); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, outlen); } |