diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 129 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.h | 14 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 34 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 369 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/flow.c | 393 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 546 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 9 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 99 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 26 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 123 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 491 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 1 |
13 files changed, 1646 insertions, 591 deletions
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index c84fef9a8a08..ca060a2e2b36 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -197,3 +197,132 @@ int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } + +void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {}; + + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, tirn, tirn); + MLX5_SET(destroy_tir_in, in, uid, uid); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + MLX5_SET(destroy_tis_in, in, uid, uid); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {}; + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); + MLX5_SET(destroy_rqt_in, in, uid, uid); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, + u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0}; + int err; + + MLX5_SET(alloc_transport_domain_in, in, opcode, + MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *tdn = MLX5_GET(alloc_transport_domain_out, out, + transport_domain); + + return err; +} + +void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, + u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0}; + + MLX5_SET(dealloc_transport_domain_in, in, opcode, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; + + MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, in, pd, pdn); + MLX5_SET(dealloc_pd_in, in, uid, uid); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, + u32 qpn, u16 uid) +{ + u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {}; + u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {}; + void *gid; + + MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG); + MLX5_SET(attach_to_mcg_in, in, qpn, qpn); + MLX5_SET(attach_to_mcg_in, in, uid, uid); + gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, + u32 qpn, u16 uid) +{ + u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {}; + u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {}; + void *gid; + + MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, in, qpn, qpn); + MLX5_SET(detach_from_mcg_in, in, uid, uid); + gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid) +{ + u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {}; + int err; + + MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD); + MLX5_SET(alloc_xrcd_in, in, uid, uid); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd); + return err; +} + +int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {}; + + MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn); + MLX5_SET(dealloc_xrcd_in, in, uid, uid); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 88cbb1c41703..c03c56455534 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -47,4 +47,18 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, u64 length, u32 alignment); int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length); +void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); +void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid); +void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid); +void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid); +int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, + u16 uid); +void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, + u16 uid); +int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, + u32 qpn, u16 uid); +int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, + u32 qpn, u16 uid); +int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); +int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 088205d7f1a1..7d769b5538b4 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -393,7 +393,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { - mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf); + mlx5_frag_buf_free(dev->mdev, &buf->frag_buf); } static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, @@ -728,16 +728,11 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev, int nent, int cqe_size) { - struct mlx5_frag_buf_ctrl *c = &buf->fbc; - struct mlx5_frag_buf *frag_buf = &c->frag_buf; - u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0}; + struct mlx5_frag_buf *frag_buf = &buf->frag_buf; + u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0); + u8 log_wq_sz = ilog2(cqe_size); int err; - MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size)); - MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0); - - mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff); - err = mlx5_frag_buf_alloc_node(dev->mdev, nent * cqe_size, frag_buf, @@ -745,6 +740,8 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev, if (err) return err; + mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); + buf->cqe_size = cqe_size; buf->nent = nent; @@ -877,6 +874,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; } + MLX5_SET(create_cq_in, *cqb, uid, to_mucontext(context)->devx_uid); return 0; err_cqb: @@ -934,7 +932,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * - cq->buf.fbc.frag_buf.npages; + cq->buf.frag_buf.npages; *cqb = kvzalloc(*inlen, GFP_KERNEL); if (!*cqb) { err = -ENOMEM; @@ -942,11 +940,11 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); - mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas); + mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, - cq->buf.fbc.frag_buf.page_shift - + cq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); *index = dev->mdev->priv.uar->index; @@ -1365,11 +1363,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) cqe_size = 64; err = resize_kernel(dev, cq, entries, cqe_size); if (!err) { - struct mlx5_frag_buf_ctrl *c; + struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf; - c = &cq->resize_buf->fbc; - npas = c->frag_buf.npages; - page_shift = c->frag_buf.page_shift; + npas = frag_buf->npages; + page_shift = frag_buf->page_shift; } } @@ -1390,8 +1387,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, pas, 0); else - mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf, - pas); + mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas); MLX5_SET(modify_cq_in, in, modify_field_select_resize_field_select.resize_field_select.resize_field_select, @@ -1459,7 +1455,7 @@ ex: return err; } -int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) +int mlx5_ib_get_cqe_size(struct ib_cq *ibcq) { struct mlx5_ib_cq *cq; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ac116d63e466..61aab7c0c513 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -19,7 +19,7 @@ #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) struct devx_obj { struct mlx5_core_dev *mdev; - u32 obj_id; + u64 obj_id; u32 dinlen; /* destroy inbox length */ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; }; @@ -45,13 +45,14 @@ static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) return to_mucontext(ib_uverbs_get_ucontext(file)); } -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; u64 general_obj_types; void *hdr; int err; + u16 uid; hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); @@ -60,9 +61,6 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) return -EINVAL; - if (!capable(CAP_NET_RAW)) - return -EPERM; - MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); @@ -70,19 +68,18 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex if (err) return err; - context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); - return 0; + uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return uid; } -void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, - struct mlx5_ib_ucontext *context) +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) { u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid); mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } @@ -109,150 +106,218 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) } } +/* + * As the obj_id in the firmware is not globally unique the object type + * must be considered upon checking for a valid object id. + * For that the opcode of the creator command is encoded as part of the obj_id. + */ +static u64 get_enc_obj_id(u16 opcode, u32 obj_id) +{ + return ((u64)opcode << 32) | obj_id; +} + static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); - u32 obj_id; + u64 obj_id; switch (opcode) { case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: - obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT, + MLX5_GET(general_obj_in_cmd_hdr, in, + obj_id)); break; case MLX5_CMD_OP_QUERY_MKEY: - obj_id = MLX5_GET(query_mkey_in, in, mkey_index); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY, + MLX5_GET(query_mkey_in, in, + mkey_index)); break; case MLX5_CMD_OP_QUERY_CQ: - obj_id = MLX5_GET(query_cq_in, in, cqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, + MLX5_GET(query_cq_in, in, cqn)); break; case MLX5_CMD_OP_MODIFY_CQ: - obj_id = MLX5_GET(modify_cq_in, in, cqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, + MLX5_GET(modify_cq_in, in, cqn)); break; case MLX5_CMD_OP_QUERY_SQ: - obj_id = MLX5_GET(query_sq_in, in, sqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, + MLX5_GET(query_sq_in, in, sqn)); break; case MLX5_CMD_OP_MODIFY_SQ: - obj_id = MLX5_GET(modify_sq_in, in, sqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, + MLX5_GET(modify_sq_in, in, sqn)); break; case MLX5_CMD_OP_QUERY_RQ: - obj_id = MLX5_GET(query_rq_in, in, rqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + MLX5_GET(query_rq_in, in, rqn)); break; case MLX5_CMD_OP_MODIFY_RQ: - obj_id = MLX5_GET(modify_rq_in, in, rqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + MLX5_GET(modify_rq_in, in, rqn)); break; case MLX5_CMD_OP_QUERY_RMP: - obj_id = MLX5_GET(query_rmp_in, in, rmpn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP, + MLX5_GET(query_rmp_in, in, rmpn)); break; case MLX5_CMD_OP_MODIFY_RMP: - obj_id = MLX5_GET(modify_rmp_in, in, rmpn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP, + MLX5_GET(modify_rmp_in, in, rmpn)); break; case MLX5_CMD_OP_QUERY_RQT: - obj_id = MLX5_GET(query_rqt_in, in, rqtn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, + MLX5_GET(query_rqt_in, in, rqtn)); break; case MLX5_CMD_OP_MODIFY_RQT: - obj_id = MLX5_GET(modify_rqt_in, in, rqtn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, + MLX5_GET(modify_rqt_in, in, rqtn)); break; case MLX5_CMD_OP_QUERY_TIR: - obj_id = MLX5_GET(query_tir_in, in, tirn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, + MLX5_GET(query_tir_in, in, tirn)); break; case MLX5_CMD_OP_MODIFY_TIR: - obj_id = MLX5_GET(modify_tir_in, in, tirn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, + MLX5_GET(modify_tir_in, in, tirn)); break; case MLX5_CMD_OP_QUERY_TIS: - obj_id = MLX5_GET(query_tis_in, in, tisn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, + MLX5_GET(query_tis_in, in, tisn)); break; case MLX5_CMD_OP_MODIFY_TIS: - obj_id = MLX5_GET(modify_tis_in, in, tisn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, + MLX5_GET(modify_tis_in, in, tisn)); break; case MLX5_CMD_OP_QUERY_FLOW_TABLE: - obj_id = MLX5_GET(query_flow_table_in, in, table_id); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE, + MLX5_GET(query_flow_table_in, in, + table_id)); break; case MLX5_CMD_OP_MODIFY_FLOW_TABLE: - obj_id = MLX5_GET(modify_flow_table_in, in, table_id); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE, + MLX5_GET(modify_flow_table_in, in, + table_id)); break; case MLX5_CMD_OP_QUERY_FLOW_GROUP: - obj_id = MLX5_GET(query_flow_group_in, in, group_id); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP, + MLX5_GET(query_flow_group_in, in, + group_id)); break; case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: - obj_id = MLX5_GET(query_fte_in, in, flow_index); + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY, + MLX5_GET(query_fte_in, in, + flow_index)); break; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: - obj_id = MLX5_GET(set_fte_in, in, flow_index); + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY, + MLX5_GET(set_fte_in, in, flow_index)); break; case MLX5_CMD_OP_QUERY_Q_COUNTER: - obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id); + obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER, + MLX5_GET(query_q_counter_in, in, + counter_set_id)); break; case MLX5_CMD_OP_QUERY_FLOW_COUNTER: - obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id); + obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER, + MLX5_GET(query_flow_counter_in, in, + flow_counter_id)); break; case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: - obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); + obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT, + MLX5_GET(general_obj_in_cmd_hdr, in, + obj_id)); break; case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: - obj_id = MLX5_GET(query_scheduling_element_in, in, - scheduling_element_id); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, + MLX5_GET(query_scheduling_element_in, + in, scheduling_element_id)); break; case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: - obj_id = MLX5_GET(modify_scheduling_element_in, in, - scheduling_element_id); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, + MLX5_GET(modify_scheduling_element_in, + in, scheduling_element_id)); break; case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: - obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT, + MLX5_GET(add_vxlan_udp_dport_in, in, + vxlan_udp_port)); break; case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: - obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index); + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY, + MLX5_GET(query_l2_table_entry_in, in, + table_index)); break; case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: - obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY, + MLX5_GET(set_l2_table_entry_in, in, + table_index)); break; case MLX5_CMD_OP_QUERY_QP: - obj_id = MLX5_GET(query_qp_in, in, qpn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(query_qp_in, in, qpn)); break; case MLX5_CMD_OP_RST2INIT_QP: - obj_id = MLX5_GET(rst2init_qp_in, in, qpn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(rst2init_qp_in, in, qpn)); break; case MLX5_CMD_OP_INIT2RTR_QP: - obj_id = MLX5_GET(init2rtr_qp_in, in, qpn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(init2rtr_qp_in, in, qpn)); break; case MLX5_CMD_OP_RTR2RTS_QP: - obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(rtr2rts_qp_in, in, qpn)); break; case MLX5_CMD_OP_RTS2RTS_QP: - obj_id = MLX5_GET(rts2rts_qp_in, in, qpn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(rts2rts_qp_in, in, qpn)); break; case MLX5_CMD_OP_SQERR2RTS_QP: - obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(sqerr2rts_qp_in, in, qpn)); break; case MLX5_CMD_OP_2ERR_QP: - obj_id = MLX5_GET(qp_2err_in, in, qpn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(qp_2err_in, in, qpn)); break; case MLX5_CMD_OP_2RST_QP: - obj_id = MLX5_GET(qp_2rst_in, in, qpn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(qp_2rst_in, in, qpn)); break; case MLX5_CMD_OP_QUERY_DCT: - obj_id = MLX5_GET(query_dct_in, in, dctn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, + MLX5_GET(query_dct_in, in, dctn)); break; case MLX5_CMD_OP_QUERY_XRQ: - obj_id = MLX5_GET(query_xrq_in, in, xrqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, + MLX5_GET(query_xrq_in, in, xrqn)); break; case MLX5_CMD_OP_QUERY_XRC_SRQ: - obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ, + MLX5_GET(query_xrc_srq_in, in, + xrc_srqn)); break; case MLX5_CMD_OP_ARM_XRC_SRQ: - obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ, + MLX5_GET(arm_xrc_srq_in, in, xrc_srqn)); break; case MLX5_CMD_OP_QUERY_SRQ: - obj_id = MLX5_GET(query_srq_in, in, srqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ, + MLX5_GET(query_srq_in, in, srqn)); break; case MLX5_CMD_OP_ARM_RQ: - obj_id = MLX5_GET(arm_rq_in, in, srq_number); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + MLX5_GET(arm_rq_in, in, srq_number)); break; case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: - obj_id = MLX5_GET(drain_dct_in, in, dctn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, + MLX5_GET(drain_dct_in, in, dctn)); break; case MLX5_CMD_OP_ARM_XRQ: - obj_id = MLX5_GET(arm_xrq_in, in, xrqn); + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, + MLX5_GET(arm_xrq_in, in, xrqn)); break; default: return false; @@ -264,11 +329,102 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) return false; } -static bool devx_is_obj_create_cmd(const void *in) +static void devx_set_umem_valid(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); switch (opcode) { + case MLX5_CMD_OP_CREATE_MKEY: + MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1); + break; + case MLX5_CMD_OP_CREATE_CQ: + { + void *cqc; + + MLX5_SET(create_cq_in, in, cq_umem_valid, 1); + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, dbr_umem_valid, 1); + break; + } + case MLX5_CMD_OP_CREATE_QP: + { + void *qpc; + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, dbr_umem_valid, 1); + MLX5_SET(create_qp_in, in, wq_umem_valid, 1); + break; + } + + case MLX5_CMD_OP_CREATE_RQ: + { + void *rqc, *wq; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + MLX5_SET(wq, wq, dbr_umem_valid, 1); + MLX5_SET(wq, wq, wq_umem_valid, 1); + break; + } + + case MLX5_CMD_OP_CREATE_SQ: + { + void *sqc, *wq; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + MLX5_SET(wq, wq, dbr_umem_valid, 1); + MLX5_SET(wq, wq, wq_umem_valid, 1); + break; + } + + case MLX5_CMD_OP_MODIFY_CQ: + MLX5_SET(modify_cq_in, in, cq_umem_valid, 1); + break; + + case MLX5_CMD_OP_CREATE_RMP: + { + void *rmpc, *wq; + + rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + MLX5_SET(wq, wq, dbr_umem_valid, 1); + MLX5_SET(wq, wq, wq_umem_valid, 1); + break; + } + + case MLX5_CMD_OP_CREATE_XRQ: + { + void *xrqc, *wq; + + xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context); + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + MLX5_SET(wq, wq, dbr_umem_valid, 1); + MLX5_SET(wq, wq, wq_umem_valid, 1); + break; + } + + case MLX5_CMD_OP_CREATE_XRC_SRQ: + { + void *xrc_srqc; + + MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1); + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in, + xrc_srq_context_entry); + MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1); + break; + } + + default: + return; + } +} + +static bool devx_is_obj_create_cmd(const void *in, u16 *opcode) +{ + *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (*opcode) { case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: case MLX5_CMD_OP_CREATE_MKEY: case MLX5_CMD_OP_CREATE_CQ: @@ -284,7 +440,7 @@ static bool devx_is_obj_create_cmd(const void *in) case MLX5_CMD_OP_CREATE_FLOW_TABLE: case MLX5_CMD_OP_CREATE_FLOW_GROUP: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: - case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: @@ -385,12 +541,49 @@ static bool devx_is_obj_query_cmd(const void *in) } } +static bool devx_is_whitelist_cmd(void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + return true; + default: + return false; + } +} + +static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in) +{ + if (devx_is_whitelist_cmd(cmd_in)) { + struct mlx5_ib_dev *dev; + + if (c->devx_uid) + return c->devx_uid; + + dev = to_mdev(c->ibucontext.device); + if (dev->devx_whitelist_uid) + return dev->devx_whitelist_uid; + + return -EOPNOTSUPP; + } + + if (!c->devx_uid) + return -EINVAL; + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + return c->devx_uid; +} static bool devx_is_general_cmd(void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_VPORT_STATE: case MLX5_CMD_OP_QUERY_ADAPTER: case MLX5_CMD_OP_QUERY_ISSI: @@ -498,14 +691,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); void *cmd_out; int err; + int uid; c = devx_ufile2uctx(file); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); - if (!c->devx_uid) - return -EPERM; + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; /* Only white list of some general HCA commands are allowed for this method. */ if (!devx_is_general_cmd(cmd_in)) @@ -515,7 +710,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( if (IS_ERR(cmd_out)) return PTR_ERR(cmd_out); - MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); err = mlx5_cmd_exec(dev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), cmd_out, cmd_out_len); @@ -627,9 +822,9 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); break; - case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, - MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); break; case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, @@ -723,13 +918,18 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; struct devx_obj *obj; int err; + int uid; + u32 obj_id; + u16 opcode; - if (!c->devx_uid) - return -EPERM; + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; - if (!devx_is_obj_create_cmd(cmd_in)) + if (!devx_is_obj_create_cmd(cmd_in, &opcode)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -740,7 +940,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( if (!obj) return -ENOMEM; - MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); + devx_set_umem_valid(cmd_in); + err = mlx5_cmd_exec(dev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), cmd_out, cmd_out_len); @@ -749,15 +951,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( uobj->object = obj; obj->mdev = dev->mdev; - devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id); + devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, + &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) - goto obj_free; + goto obj_destroy; + obj->obj_id = get_enc_obj_id(opcode, obj_id); return 0; +obj_destroy: + mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); obj_free: kfree(obj); return err; @@ -775,9 +981,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( struct devx_obj *obj = uobj->object; void *cmd_out; int err; + int uid; - if (!c->devx_uid) - return -EPERM; + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; if (!devx_is_obj_modify_cmd(cmd_in)) return -EINVAL; @@ -789,7 +997,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( if (IS_ERR(cmd_out)) return PTR_ERR(cmd_out); - MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); + devx_set_umem_valid(cmd_in); + err = mlx5_cmd_exec(obj->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), cmd_out, cmd_out_len); @@ -812,9 +1022,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( struct devx_obj *obj = uobj->object; void *cmd_out; int err; + int uid; - if (!c->devx_uid) - return -EPERM; + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; if (!devx_is_obj_query_cmd(cmd_in)) return -EINVAL; @@ -826,7 +1038,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( if (IS_ERR(cmd_out)) return PTR_ERR(cmd_out); - MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); err = mlx5_cmd_exec(obj->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), cmd_out, cmd_out_len); @@ -925,6 +1137,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( int err; if (!c->devx_uid) + return -EINVAL; + + if (!capable(CAP_NET_RAW)) return -EPERM; obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 1a29f47f836e..f86cdcafdafc 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -7,7 +7,9 @@ #include <rdma/ib_verbs.h> #include <rdma/uverbs_types.h> #include <rdma/uverbs_ioctl.h> +#include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/ib_umem.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> @@ -16,6 +18,24 @@ #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> +static int +mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, + enum mlx5_flow_namespace_type *namespace) +{ + switch (table_type) { + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: + *namespace = MLX5_FLOW_NAMESPACE_BYPASS; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: + *namespace = MLX5_FLOW_NAMESPACE_EGRESS; + break; + default: + return -EINVAL; + } + + return 0; +} + static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { [MLX5_IB_FLOW_TYPE_NORMAL] = { .type = UVERBS_ATTR_TYPE_PTR_IN, @@ -38,11 +58,15 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { }, }; +#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_ib_flow_handler *flow_handler; struct mlx5_ib_flow_matcher *fs_matcher; + struct ib_uobject **arr_flow_actions; + struct ib_uflow_resources *uflow_res; void *devx_obj; int dest_id, dest_type; void *cmd_in; @@ -52,6 +76,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + int len, ret, i; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -61,7 +86,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( dest_qp = uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)) + fs_matcher = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS && + ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) + return -EINVAL; + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS && + (dest_devx || dest_qp)) return -EINVAL; if (dest_devx) { @@ -75,7 +107,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( */ if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) return -EINVAL; - } else { + } else if (dest_qp) { struct mlx5_ib_qp *mqp; qp = uverbs_attr_get_obj(attrs, @@ -92,6 +124,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( else dest_id = mqp->raw_packet_qp.rq.tirn; dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + } else { + dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; } if (dev->rep) @@ -101,16 +135,48 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); inlen = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); - fs_matcher = uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_MATCHER); - flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen, + + uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); + if (!uflow_res) + return -ENOMEM; + + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); + for (i = 0; i < len; i++) { + struct mlx5_ib_flow_action *maction = + to_mflow_act(arr_flow_actions[i]->object); + + ret = parse_flow_flow_action(maction, false, &flow_act); + if (ret) + goto err_out; + flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, + arr_flow_actions[i]->object); + } + + ret = uverbs_copy_from(&flow_act.flow_tag, attrs, + MLX5_IB_ATTR_CREATE_FLOW_TAG); + if (!ret) { + if (flow_act.flow_tag >= BIT(24)) { + ret = -EINVAL; + goto err_out; + } + flow_act.flags |= FLOW_ACT_HAS_TAG; + } + + flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act, + cmd_in, inlen, dest_id, dest_type); - if (IS_ERR(flow_handler)) - return PTR_ERR(flow_handler); + if (IS_ERR(flow_handler)) { + ret = PTR_ERR(flow_handler); + goto err_out; + } - ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev); + ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); return 0; +err_out: + ib_uverbs_flow_resources_free(uflow_res); + return ret; } static int flow_matcher_cleanup(struct ib_uobject *uobject, @@ -134,12 +200,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); struct mlx5_ib_flow_matcher *obj; + u32 flags; int err; obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); if (!obj) return -ENOMEM; + obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; obj->mask_len = uverbs_attr_get_len( attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); err = uverbs_copy_from(&obj->matcher_mask, @@ -165,6 +233,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( if (err) goto end; + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + IB_FLOW_ATTR_FLAGS_EGRESS); + if (err) + goto end; + + if (flags) { + err = mlx5_ib_ft_type_to_namespace( + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type); + if (err) + goto end; + } + uobj->object = obj; obj->mdev = dev->mdev; atomic_set(&obj->usecnt, 0); @@ -175,6 +256,248 @@ end: return err; } +void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) +{ + switch (maction->flow_action_raw.sub_type) { + case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: + mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, + maction->flow_action_raw.action_id); + break; + case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: + mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, + maction->flow_action_raw.action_id); + break; + case MLX5_IB_FLOW_ACTION_DECAP: + break; + default: + break; + } +} + +static struct ib_flow_action * +mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, + enum mlx5_ib_uapi_flow_table_type ft_type, + u8 num_actions, void *in) +{ + enum mlx5_flow_namespace_type namespace; + struct mlx5_ib_flow_action *maction; + int ret; + + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); + if (ret) + return ERR_PTR(-EINVAL); + + maction = kzalloc(sizeof(*maction), GFP_KERNEL); + if (!maction) + return ERR_PTR(-ENOMEM); + + ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in, + &maction->flow_action_raw.action_id); + + if (ret) { + kfree(maction); + return ERR_PTR(ret); + } + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_MODIFY_HEADER; + maction->flow_action_raw.dev = dev; + + return &maction->ib_action; +} + +static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) +{ + return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + max_modify_header_actions) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, max_modify_header_actions); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + enum mlx5_ib_uapi_flow_table_type ft_type; + struct ib_flow_action *action; + size_t num_actions; + void *in; + int len; + int ret; + + if (!mlx5_ib_modify_header_supported(mdev)) + return -EOPNOTSUPP; + + in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); + len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); + + if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)) + return -EINVAL; + + ret = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); + if (ret) + return ret; + + num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto), + action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); + if (IS_ERR(action)) + return PTR_ERR(action); + + uverbs_flow_action_fill_action(action, uobj, uobj->context->device, + IB_FLOW_ACTION_UNSPECIFIED); + + return 0; +} + +static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, + u8 packet_reformat_type, + u8 ft_type) +{ + switch (packet_reformat_type) { + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) + return MLX5_CAP_FLOWTABLE(ibdev->mdev, + encap_general_header); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) + return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, + reformat_l2_to_l3_tunnel); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, + reformat_l3_tunnel_to_l2); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); + break; + default: + break; + } + + return false; +} + +static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) +{ + switch (dv_prt) { + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int mlx5_ib_flow_action_create_packet_reformat_ctx( + struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_action *maction, + u8 ft_type, u8 dv_prt, + void *in, size_t len) +{ + enum mlx5_flow_namespace_type namespace; + u8 prm_prt; + int ret; + + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); + if (ret) + return ret; + + ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); + if (ret) + return ret; + + ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, + in, namespace, + &maction->flow_action_raw.action_id); + if (ret) + return ret; + + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; + maction->flow_action_raw.dev = dev; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; + enum mlx5_ib_uapi_flow_table_type ft_type; + struct mlx5_ib_flow_action *maction; + int ret; + + ret = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); + if (ret) + return ret; + + ret = uverbs_get_const(&dv_prt, attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); + if (ret) + return ret; + + if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) + return -EOPNOTSUPP; + + maction = kzalloc(sizeof(*maction), GFP_KERNEL); + if (!maction) + return -ENOMEM; + + if (dv_prt == + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_DECAP; + maction->flow_action_raw.dev = mdev; + } else { + void *in; + int len; + + in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); + if (IS_ERR(in)) { + ret = PTR_ERR(in); + goto free_maction; + } + + len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); + + ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, + maction, ft_type, dv_prt, in, len); + if (ret) + goto free_maction; + } + + uverbs_flow_action_fill_action(&maction->ib_action, uobj, + uobj->context->device, + IB_FLOW_ACTION_UNSPECIFIED); + return 0; + +free_maction: + kfree(maction); + return ret; +} + DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_CREATE_FLOW, UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, @@ -195,7 +518,15 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ACCESS_READ), UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_READ)); + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_READ, 1, + MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, @@ -210,6 +541,44 @@ ADD_UVERBS_METHODS(mlx5_ib_fs, &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( + set_action_in_add_action_in_auto)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, + UVERBS_ATTR_MIN_SIZE(1), + UA_ALLOC_AND_COPY, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, + enum mlx5_ib_uapi_flow_action_packet_reformat_type, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY)); + +ADD_UVERBS_METHODS( + mlx5_ib_flow_actions, + UVERBS_OBJECT_FLOW_ACTION, + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); + +DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_FLOW_MATCHER_CREATE, UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, MLX5_IB_OBJECT_FLOW_MATCHER, @@ -224,7 +593,10 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY), UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, UVERBS_ATTR_TYPE(u8), - UA_MANDATORY)); + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + enum ib_flow_flags, + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, @@ -247,6 +619,7 @@ int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) root[i++] = &flow_objects; root[i++] = &mlx5_ib_fs; + root[i++] = &mlx5_ib_flow_actions; return i; } diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 35a0e04c38f2..584ff2ea7810 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -39,9 +39,6 @@ static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), - STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, - mlx5_ib_stage_class_attr_init, - NULL), }; static int diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c414f3809e5c..e9c428071df3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1571,14 +1571,57 @@ static void deallocate_uars(struct mlx5_ib_dev *dev, mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); } -static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) +int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) +{ + int err = 0; + + mutex_lock(&dev->lb.mutex); + if (td) + dev->lb.user_td++; + if (qp) + dev->lb.qps++; + + if (dev->lb.user_td == 2 || + dev->lb.qps == 1) { + if (!dev->lb.enabled) { + err = mlx5_nic_vport_update_local_lb(dev->mdev, true); + dev->lb.enabled = true; + } + } + + mutex_unlock(&dev->lb.mutex); + + return err; +} + +void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) +{ + mutex_lock(&dev->lb.mutex); + if (td) + dev->lb.user_td--; + if (qp) + dev->lb.qps--; + + if (dev->lb.user_td == 1 && + dev->lb.qps == 0) { + if (dev->lb.enabled) { + mlx5_nic_vport_update_local_lb(dev->mdev, false); + dev->lb.enabled = false; + } + } + + mutex_unlock(&dev->lb.mutex); +} + +static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn, + u16 uid) { int err; if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) return 0; - err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); + err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid); if (err) return err; @@ -1587,35 +1630,23 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) return err; - mutex_lock(&dev->lb_mutex); - dev->user_td++; - - if (dev->user_td == 2) - err = mlx5_nic_vport_update_local_lb(dev->mdev, true); - - mutex_unlock(&dev->lb_mutex); - return err; + return mlx5_ib_enable_lb(dev, true, false); } -static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) +static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, + u16 uid) { if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) return; - mlx5_core_dealloc_transport_domain(dev->mdev, tdn); + mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid); if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) return; - mutex_lock(&dev->lb_mutex); - dev->user_td--; - - if (dev->user_td < 2) - mlx5_nic_vport_update_local_lb(dev->mdev, false); - - mutex_unlock(&dev->lb_mutex); + mlx5_ib_disable_lb(dev, true, false); } static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, @@ -1727,30 +1758,24 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; #endif - err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); - if (err) - goto out_uars; - if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { - /* Block DEVX on Infiniband as of SELinux */ - if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) { - err = -EPERM; - goto out_td; - } - - err = mlx5_ib_devx_create(dev, context); - if (err) - goto out_td; + err = mlx5_ib_devx_create(dev); + if (err < 0) + goto out_uars; + context->devx_uid = err; } + err = mlx5_ib_alloc_transport_domain(dev, &context->tdn, + context->devx_uid); + if (err) + goto out_devx; + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); if (err) goto out_mdev; } - INIT_LIST_HEAD(&context->vma_private_list); - mutex_init(&context->vma_private_list_mutex); INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -1826,13 +1851,21 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); + if (mlx5_lag_is_active(dev->mdev)) { + u8 port = mlx5_core_native_port_num(dev->mdev); + + atomic_set(&context->tx_port_affinity, + atomic_add_return( + 1, &dev->roce[port].tx_port_affinity)); + } + return &context->ibucontext; out_mdev: + mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); +out_devx: if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) - mlx5_ib_devx_destroy(dev, context); -out_td: - mlx5_ib_dealloc_transport_domain(dev, context->tdn); + mlx5_ib_devx_destroy(dev, context->devx_uid); out_uars: deallocate_uars(dev, context); @@ -1855,11 +1888,18 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; - if (context->devx_uid) - mlx5_ib_devx_destroy(dev, context); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + /* All umem's must be destroyed before destroying the ucontext. */ + mutex_lock(&ibcontext->per_mm_list_lock); + WARN_ON(!list_empty(&ibcontext->per_mm_list)); + mutex_unlock(&ibcontext->per_mm_list_lock); +#endif bfregi = &context->bfregi; - mlx5_ib_dealloc_transport_domain(dev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); + + if (context->devx_uid) + mlx5_ib_devx_destroy(dev, context->devx_uid); deallocate_uars(dev, context); kfree(bfregi->sys_pages); @@ -1900,94 +1940,9 @@ static int get_extended_index(unsigned long offset) return get_arg(offset) | ((offset >> 16) & 0xff) << 8; } -static void mlx5_ib_vma_open(struct vm_area_struct *area) -{ - /* vma_open is called when a new VMA is created on top of our VMA. This - * is done through either mremap flow or split_vma (usually due to - * mlock, madvise, munmap, etc.) We do not support a clone of the VMA, - * as this VMA is strongly hardware related. Therefore we set the - * vm_ops of the newly created/cloned VMA to NULL, to prevent it from - * calling us again and trying to do incorrect actions. We assume that - * the original VMA size is exactly a single page, and therefore all - * "splitting" operation will not happen to it. - */ - area->vm_ops = NULL; -} - -static void mlx5_ib_vma_close(struct vm_area_struct *area) -{ - struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data; - - /* It's guaranteed that all VMAs opened on a FD are closed before the - * file itself is closed, therefore no sync is needed with the regular - * closing flow. (e.g. mlx5 ib_dealloc_ucontext) - * However need a sync with accessing the vma as part of - * mlx5_ib_disassociate_ucontext. - * The close operation is usually called under mm->mmap_sem except when - * process is exiting. - * The exiting case is handled explicitly as part of - * mlx5_ib_disassociate_ucontext. - */ - mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data; - - /* setting the vma context pointer to null in the mlx5_ib driver's - * private data, to protect a race condition in - * mlx5_ib_disassociate_ucontext(). - */ - mlx5_ib_vma_priv_data->vma = NULL; - mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex); - list_del(&mlx5_ib_vma_priv_data->list); - mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex); - kfree(mlx5_ib_vma_priv_data); -} - -static const struct vm_operations_struct mlx5_ib_vm_ops = { - .open = mlx5_ib_vma_open, - .close = mlx5_ib_vma_close -}; - -static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, - struct mlx5_ib_ucontext *ctx) -{ - struct mlx5_ib_vma_private_data *vma_prv; - struct list_head *vma_head = &ctx->vma_private_list; - - vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL); - if (!vma_prv) - return -ENOMEM; - - vma_prv->vma = vma; - vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex; - vma->vm_private_data = vma_prv; - vma->vm_ops = &mlx5_ib_vm_ops; - - mutex_lock(&ctx->vma_private_list_mutex); - list_add(&vma_prv->list, vma_head); - mutex_unlock(&ctx->vma_private_list_mutex); - - return 0; -} static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { - struct vm_area_struct *vma; - struct mlx5_ib_vma_private_data *vma_private, *n; - struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); - - mutex_lock(&context->vma_private_list_mutex); - list_for_each_entry_safe(vma_private, n, &context->vma_private_list, - list) { - vma = vma_private->vma; - zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); - /* context going to be destroyed, should - * not access ops any more. - */ - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); - vma->vm_ops = NULL; - list_del(&vma_private->list); - kfree(vma_private); - } - mutex_unlock(&context->vma_private_list_mutex); } static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) @@ -2010,9 +1965,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, struct vm_area_struct *vma, struct mlx5_ib_ucontext *context) { - phys_addr_t pfn; - int err; - if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; @@ -2025,13 +1977,8 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, if (!dev->mdev->clock_info_page) return -EOPNOTSUPP; - pfn = page_to_pfn(dev->mdev->clock_info_page); - err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, - vma->vm_page_prot); - if (err) - return err; - - return mlx5_ib_set_vma_data(vma, context); + return rdma_user_mmap_page(&context->ibucontext, vma, + dev->mdev->clock_info_page, PAGE_SIZE); } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, @@ -2121,21 +2068,15 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, pfn = uar_index2pfn(dev, uar_index); mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); - vma->vm_page_prot = prot; - err = io_remap_pfn_range(vma, vma->vm_start, pfn, - PAGE_SIZE, vma->vm_page_prot); + err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, + prot); if (err) { mlx5_ib_err(dev, - "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n", + "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n", err, mmap_cmd2str(cmd)); - err = -EAGAIN; goto err; } - err = mlx5_ib_set_vma_data(vma, context); - if (err) - goto err; - if (dyn_uar) bfregi->sys_pages[idx] = uar_index; return 0; @@ -2160,7 +2101,6 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) size_t map_size = vma->vm_end - vma->vm_start; u32 npages = map_size >> PAGE_SHIFT; phys_addr_t pfn; - pgprot_t prot; if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) != page_idx + npages) @@ -2170,14 +2110,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >> PAGE_SHIFT) + page_idx; - prot = pgprot_writecombine(vma->vm_page_prot); - vma->vm_page_prot = prot; - - if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size, - vma->vm_page_prot)) - return -EAGAIN; - - return mlx5_ib_set_vma_data(vma, mctx); + return rdma_user_mmap_io(context, vma, pfn, map_size, + pgprot_writecombine(vma->vm_page_prot)); } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) @@ -2318,21 +2252,30 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, struct mlx5_ib_alloc_pd_resp resp; struct mlx5_ib_pd *pd; int err; + u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; + u16 uid = 0; pd = kmalloc(sizeof(*pd), GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); - err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); + uid = context ? to_mucontext(context)->devx_uid : 0; + MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); + MLX5_SET(alloc_pd_in, in, uid, uid); + err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), + out, sizeof(out)); if (err) { kfree(pd); return ERR_PTR(err); } + pd->pdn = MLX5_GET(alloc_pd_out, out, pd); + pd->uid = uid; if (context) { resp.pdn = pd->pdn; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); + mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid); kfree(pd); return ERR_PTR(-EFAULT); } @@ -2346,7 +2289,7 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); - mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); + mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); kfree(mpd); return 0; @@ -2452,20 +2395,50 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) offsetof(typeof(filter), field) -\ sizeof(filter.field)) -static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action) +int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, + bool is_egress, + struct mlx5_flow_act *action) { - struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act); switch (maction->ib_action.type) { case IB_FLOW_ACTION_ESP: + if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) + return -EINVAL; /* Currently only AES_GCM keymat is supported by the driver */ action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; - action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ? + action->action |= is_egress ? MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : MLX5_FLOW_CONTEXT_ACTION_DECRYPT; return 0; + case IB_FLOW_ACTION_UNSPECIFIED: + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { + if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + return -EINVAL; + action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + action->modify_id = maction->flow_action_raw.action_id; + return 0; + } + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_DECAP) { + if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + return -EINVAL; + action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + return 0; + } + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { + if (action->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) + return -EINVAL; + action->action |= + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + action->reformat_id = + maction->flow_action_raw.action_id; + return 0; + } + /* fall through */ default: return -EOPNOTSUPP; } @@ -2793,7 +2766,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, return -EINVAL; action->flow_tag = ib_spec->flow_tag.tag_id; - action->has_flow_tag = true; + action->flags |= FLOW_ACT_HAS_TAG; break; case IB_FLOW_SPEC_ACTION_DROP: if (FIELDS_NOT_SUPPORTED(ib_spec->drop, @@ -2802,7 +2775,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; break; case IB_FLOW_SPEC_ACTION_HANDLE: - ret = parse_flow_flow_action(ib_spec, flow_attr, action); + ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), + flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); if (ret) return ret; break; @@ -2883,10 +2857,10 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, * rules would be supported, always return VALID_SPEC_NA. */ if (!is_crypto) - return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA; + return VALID_SPEC_NA; return is_crypto && is_ipsec && - (!egress || (!is_drop && !flow_act->has_flow_tag)) ? + (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ? VALID_SPEC_VALID : VALID_SPEC_INVALID; } @@ -3026,14 +3000,15 @@ enum flow_table_type { static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, struct mlx5_ib_flow_prio *prio, int priority, - int num_entries, int num_groups) + int num_entries, int num_groups, + u32 flags) { struct mlx5_flow_table *ft; ft = mlx5_create_auto_grouped_flow_table(ns, priority, num_entries, num_groups, - 0, 0); + 0, flags); if (IS_ERR(ft)) return ERR_CAST(ft); @@ -3053,26 +3028,43 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, int max_table_size; int num_entries; int num_groups; + u32 flags = 0; int priority; max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - if (ft_type == MLX5_IB_FT_TX) - priority = 0; - else if (flow_is_multicast_only(flow_attr) && - !dont_trap) + enum mlx5_flow_namespace_type fn_type; + + if (flow_is_multicast_only(flow_attr) && + !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else priority = ib_prio_to_core_prio(flow_attr->priority, dont_trap); - ns = mlx5_get_flow_namespace(dev->mdev, - ft_type == MLX5_IB_FT_TX ? - MLX5_FLOW_NAMESPACE_EGRESS : - MLX5_FLOW_NAMESPACE_BYPASS); + if (ft_type == MLX5_IB_FT_RX) { + fn_type = MLX5_FLOW_NAMESPACE_BYPASS; + prio = &dev->flow_db->prios[priority]; + if (!dev->rep && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (!dev->rep && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + reformat_l3_tunnel_to_l2)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + log_max_ft_size)); + fn_type = MLX5_FLOW_NAMESPACE_EGRESS; + prio = &dev->flow_db->egress_prios[priority]; + if (!dev->rep && + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); num_entries = MLX5_FS_MAX_ENTRIES; num_groups = MLX5_FS_MAX_TYPES; - prio = &dev->flow_db->prios[priority]; } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { ns = mlx5_get_flow_namespace(dev->mdev, @@ -3104,7 +3096,8 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ft = prio->flow_table; if (!ft) - return _get_prio(ns, prio, priority, num_entries, num_groups); + return _get_prio(ns, prio, priority, num_entries, num_groups, + flags); return prio; } @@ -3271,6 +3264,9 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!is_valid_attr(dev->mdev, flow_attr)) return ERR_PTR(-EINVAL); + if (dev->rep && is_egress) + return ERR_PTR(-EINVAL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); handler = kzalloc(sizeof(*handler), GFP_KERNEL); if (!handler || !spec) { @@ -3320,15 +3316,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + struct mlx5_ib_mcounters *mcounters; + err = flow_counters_set_data(flow_act.counters, ucmd); if (err) goto free; + mcounters = to_mcounters(flow_act.counters); handler->ibcounters = flow_act.counters; dest_arr[dest_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest_arr[dest_num].counter = - to_mcounters(flow_act.counters)->hw_cntrs_hndl; + dest_arr[dest_num].counter_id = + mlx5_fc_id(mcounters->hw_cntrs_hndl); dest_num++; } @@ -3346,7 +3345,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } - if (flow_act.has_flow_tag && + if ((flow_act.flags & FLOW_ACT_HAS_TAG) && (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", @@ -3658,34 +3657,54 @@ free_ucmd: return ERR_PTR(err); } -static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev, - int priority, bool mcast) +static struct mlx5_ib_flow_prio * +_get_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_matcher *fs_matcher, + bool mcast) { - int max_table_size; struct mlx5_flow_namespace *ns = NULL; struct mlx5_ib_flow_prio *prio; + int max_table_size; + u32 flags = 0; + int priority; + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + reformat_l3_tunnel_to_l2)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */ + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + log_max_ft_size)); + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); if (max_table_size < MLX5_FS_MAX_ENTRIES) return ERR_PTR(-ENOMEM); if (mcast) priority = MLX5_IB_FLOW_MCAST_PRIO; else - priority = ib_prio_to_core_prio(priority, false); + priority = ib_prio_to_core_prio(fs_matcher->priority, false); - ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); + ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); if (!ns) return ERR_PTR(-ENOTSUPP); - prio = &dev->flow_db->prios[priority]; + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) + prio = &dev->flow_db->prios[priority]; + else + prio = &dev->flow_db->egress_prios[priority]; if (prio->flow_table) return prio; return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES, - MLX5_FS_MAX_TYPES); + MLX5_FS_MAX_TYPES, flags); } static struct mlx5_ib_flow_handler * @@ -3693,10 +3712,10 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, struct mlx5_flow_destination *dst, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_act *flow_act, void *cmd_in, int inlen) { struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft = ft_prio->flow_table; int err = 0; @@ -3715,9 +3734,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, fs_matcher->mask_len); spec->match_criteria_enable = fs_matcher->match_criteria_enable; - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; handler->rule = mlx5_add_flow_rules(ft, spec, - &flow_act, dst, 1); + flow_act, dst, 1); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); @@ -3779,12 +3797,12 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_ib_flow_handler * mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, int dest_id, int dest_type) { struct mlx5_flow_destination *dst; struct mlx5_ib_flow_prio *ft_prio; - int priority = fs_matcher->priority; struct mlx5_ib_flow_handler *handler; bool mcast; int err; @@ -3802,7 +3820,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, mcast = raw_fs_is_multicast(fs_matcher, cmd_in); mutex_lock(&dev->flow_db->lock); - ft_prio = _get_flow_table(dev, priority, mcast); + ft_prio = _get_flow_table(dev, fs_matcher, mcast); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; @@ -3811,13 +3829,18 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { dst->type = dest_type; dst->tir_num = dest_id; - } else { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; dst->ft_num = dest_id; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else { + dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; } - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in, - inlen); + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act, + cmd_in, inlen); if (IS_ERR(handler)) { err = PTR_ERR(handler); @@ -3995,6 +4018,9 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) */ mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); break; + case IB_FLOW_ACTION_UNSPECIFIED: + mlx5_ib_destroy_flow_action_raw(maction); + break; default: WARN_ON(true); break; @@ -4009,13 +4035,17 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *mqp = to_mqp(ibqp); int err; + u16 uid; + + uid = ibqp->pd ? + to_mpd(ibqp->pd)->uid : 0; if (mqp->flags & MLX5_IB_QP_UNDERLAY) { mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); return -EOPNOTSUPP; } - err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); + err = mlx5_cmd_attach_mcg(dev->mdev, gid, ibqp->qp_num, uid); if (err) mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", ibqp->qp_num, gid->raw); @@ -4027,8 +4057,11 @@ static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); int err; + u16 uid; - err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num); + uid = ibqp->pd ? + to_mpd(ibqp->pd)->uid : 0; + err = mlx5_cmd_detach_mcg(dev->mdev, gid, ibqp->qp_num, uid); if (err) mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", ibqp->qp_num, gid->raw); @@ -4049,16 +4082,17 @@ static int init_node_data(struct mlx5_ib_dev *dev) return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); } -static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, - char *buf) +static ssize_t fw_pages_show(struct device *device, + struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); } +static DEVICE_ATTR_RO(fw_pages); -static ssize_t show_reg_pages(struct device *device, +static ssize_t reg_pages_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = @@ -4066,44 +4100,47 @@ static ssize_t show_reg_pages(struct device *device, return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } +static DEVICE_ATTR_RO(reg_pages); -static ssize_t show_hca(struct device *device, struct device_attribute *attr, - char *buf) +static ssize_t hca_type_show(struct device *device, + struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); } +static DEVICE_ATTR_RO(hca_type); -static ssize_t show_rev(struct device *device, struct device_attribute *attr, - char *buf) +static ssize_t hw_rev_show(struct device *device, + struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); return sprintf(buf, "%x\n", dev->mdev->rev_id); } +static DEVICE_ATTR_RO(hw_rev); -static ssize_t show_board(struct device *device, struct device_attribute *attr, - char *buf) +static ssize_t board_id_show(struct device *device, + struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, dev->mdev->board_id); } +static DEVICE_ATTR_RO(board_id); -static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); -static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); -static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); -static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); +static struct attribute *mlx5_class_attributes[] = { + &dev_attr_hw_rev.attr, + &dev_attr_hca_type.attr, + &dev_attr_board_id.attr, + &dev_attr_fw_pages.attr, + &dev_attr_reg_pages.attr, + NULL, +}; -static struct device_attribute *mlx5_class_attributes[] = { - &dev_attr_hw_rev, - &dev_attr_hca_type, - &dev_attr_board_id, - &dev_attr_fw_pages, - &dev_attr_reg_pages, +static const struct attribute_group mlx5_attr_group = { + .attrs = mlx5_class_attributes, }; static void pkey_change_handler(struct work_struct *work) @@ -5163,22 +5200,14 @@ done: return num_counters; } -static struct net_device* -mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, - u8 port_num, - enum rdma_netdev_t type, - const char *name, - unsigned char name_assign_type, - void (*setup)(struct net_device *)) +static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, + enum rdma_netdev_t type, + struct rdma_netdev_alloc_params *params) { - struct net_device *netdev; - if (type != RDMA_NETDEV_IPOIB) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; - netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, - name, setup); - return netdev; + return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params); } static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) @@ -5636,7 +5665,6 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; - const char *name; int err; int i; @@ -5669,12 +5697,6 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); - if (!mlx5_lag_is_active(mdev)) - name = "mlx5_%d"; - else - name = "mlx5_bond_%d"; - - strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; @@ -5824,8 +5846,9 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_dev_fw_str = get_dev_fw_str; dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; - if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) - dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev; + if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && + IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) + dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params; if (mlx5_core_is_pf(mdev)) { dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; @@ -5880,7 +5903,7 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) || MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) - mutex_init(&dev->lb_mutex); + mutex_init(&dev->lb.mutex); return 0; } @@ -6087,7 +6110,14 @@ static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev) int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { - return ib_register_device(&dev->ib_dev, NULL); + const char *name; + + rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group); + if (!mlx5_lag_is_active(dev->mdev)) + name = "mlx5_%d"; + else + name = "mlx5_bond_%d"; + return ib_register_device(&dev->ib_dev, name, NULL); } void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) @@ -6117,21 +6147,6 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) cancel_delay_drop(dev); } -int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev) -{ - int err; - int i; - - for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { - err = device_create_file(&dev->ib_dev.dev, - mlx5_class_attributes[i]); - if (err) - return err; - } - - return 0; -} - static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev) { mlx5_ib_register_vport_reps(dev); @@ -6155,6 +6170,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, profile->stage[stage].cleanup(dev); } + if (dev->devx_whitelist_uid) + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); ib_dealloc_device((struct ib_device *)dev); } @@ -6163,8 +6180,7 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, { int err; int i; - - printk_once(KERN_INFO "%s", mlx5_version); + int uid; for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { @@ -6174,6 +6190,10 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, } } + uid = mlx5_ib_devx_create(dev); + if (uid > 0) + dev->devx_whitelist_uid = uid; + dev->profile = profile; dev->ib_active = true; @@ -6234,9 +6254,6 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, mlx5_ib_stage_delay_drop_init, mlx5_ib_stage_delay_drop_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, - mlx5_ib_stage_class_attr_init, - NULL), }; static const struct mlx5_ib_profile nic_rep_profile = { @@ -6279,9 +6296,6 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), - STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, - mlx5_ib_stage_class_attr_init, - NULL), STAGE_CREATE(MLX5_IB_STAGE_REP_REG, mlx5_ib_stage_rep_reg_init, mlx5_ib_stage_rep_reg_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index f3dbd75a0a96..549234988bb4 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -57,7 +57,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int entry; unsigned long page_shift = umem->page_shift; - if (umem->odp_data) { + if (umem->is_odp) { *ncont = ib_umem_page_count(umem); *count = *ncont << (page_shift - PAGE_SHIFT); *shift = page_shift; @@ -152,14 +152,13 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, struct scatterlist *sg; int entry; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - const bool odp = umem->odp_data != NULL; - - if (odp) { + if (umem->is_odp) { WARN_ON(shift != 0); WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); for (i = 0; i < num_pages; ++i) { - dma_addr_t pa = umem->odp_data->dma_list[offset + i]; + dma_addr_t pa = + to_ib_umem_odp(umem)->dma_list[offset + i]; pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 320d4dfe8c2f..b651a7a6fde9 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -39,8 +39,10 @@ #include <rdma/ib_smi.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cq.h> +#include <linux/mlx5/fs.h> #include <linux/mlx5/qp.h> #include <linux/mlx5/srq.h> +#include <linux/mlx5/fs.h> #include <linux/types.h> #include <linux/mlx5/transobj.h> #include <rdma/ib_user_verbs.h> @@ -48,17 +50,17 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> -#define mlx5_ib_dbg(dev, format, arg...) \ -pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ - __LINE__, current->pid, ##arg) +#define mlx5_ib_dbg(_dev, format, arg...) \ + dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ + __LINE__, current->pid, ##arg) -#define mlx5_ib_err(dev, format, arg...) \ -pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ - __LINE__, current->pid, ##arg) +#define mlx5_ib_err(_dev, format, arg...) \ + dev_err(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ + __LINE__, current->pid, ##arg) -#define mlx5_ib_warn(dev, format, arg...) \ -pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ - __LINE__, current->pid, ##arg) +#define mlx5_ib_warn(_dev, format, arg...) \ + dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ + __LINE__, current->pid, ##arg) #define field_avail(type, fld, sz) (offsetof(type, fld) + \ sizeof(((type *)0)->fld) <= (sz)) @@ -114,13 +116,6 @@ enum { MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, }; -struct mlx5_ib_vma_private_data { - struct list_head list; - struct vm_area_struct *vma; - /* protect vma_private_list add/del */ - struct mutex *vma_private_list_mutex; -}; - struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -132,13 +127,12 @@ struct mlx5_ib_ucontext { u8 cqe_version; /* Transport Domain number */ u32 tdn; - struct list_head vma_private_list; - /* protect vma_private_list add/del */ - struct mutex vma_private_list_mutex; u64 lib_caps; DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); u16 devx_uid; + /* For RoCE LAG TX affinity */ + atomic_t tx_port_affinity; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -149,6 +143,13 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte struct mlx5_ib_pd { struct ib_pd ibpd; u32 pdn; + u16 uid; +}; + +enum { + MLX5_IB_FLOW_ACTION_MODIFY_HEADER, + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT, + MLX5_IB_FLOW_ACTION_DECAP, }; #define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) @@ -180,6 +181,7 @@ struct mlx5_ib_flow_matcher { struct mlx5_ib_match_params matcher_mask; int mask_len; enum mlx5_ib_flow_type flow_type; + enum mlx5_flow_namespace_type ns_type; u16 priority; struct mlx5_core_dev *mdev; atomic_t usecnt; @@ -188,6 +190,7 @@ struct mlx5_ib_flow_matcher { struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; + struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; struct mlx5_flow_table *lag_demux_ft; @@ -322,6 +325,7 @@ enum { struct mlx5_ib_rwq_ind_table { struct ib_rwq_ind_table ib_rwq_ind_tbl; u32 rqtn; + u16 uid; }; struct mlx5_ib_ubuffer { @@ -428,13 +432,14 @@ struct mlx5_ib_qp { struct list_head cq_send_list; struct mlx5_rate_limit rl; u32 underlay_qpn; - bool tunnel_offload_en; + u32 flags_en; /* storage for qp sub type when core qp type is IB_QPT_DRIVER */ enum ib_qp_type qp_sub_type; }; struct mlx5_ib_cq_buf { struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; struct ib_umem *umem; int cqe_size; int nent; @@ -535,6 +540,7 @@ struct mlx5_ib_srq { struct mlx5_ib_xrcd { struct ib_xrcd ibxrcd; u32 xrcdn; + u16 uid; }; enum mlx5_ib_mtt_access_flags { @@ -699,7 +705,7 @@ struct mlx5_roce { rwlock_t netdev_lock; struct net_device *netdev; struct notifier_block nb; - atomic_t next_port; + atomic_t tx_port_affinity; enum ib_port_state last_port_state; struct mlx5_ib_dev *dev; u8 native_port_num; @@ -814,6 +820,11 @@ struct mlx5_ib_flow_action { u64 ib_flags; struct mlx5_accel_esp_xfrm *ctx; } esp_aes_gcm; + struct { + struct mlx5_ib_dev *dev; + u32 sub_type; + u32 action_id; + } flow_action_raw; }; }; @@ -858,9 +869,20 @@ to_mcounters(struct ib_counters *ibcntrs) return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs); } +int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, + bool is_egress, + struct mlx5_flow_act *action); +struct mlx5_ib_lb_state { + /* protect the user_td */ + struct mutex mutex; + u32 user_td; + int qps; + bool enabled; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; - const struct uverbs_object_tree_def *driver_trees[6]; + const struct uverbs_object_tree_def *driver_trees[7]; struct mlx5_core_dev *mdev; struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; @@ -899,13 +921,12 @@ struct mlx5_ib_dev { const struct mlx5_ib_profile *profile; struct mlx5_eswitch_rep *rep; - /* protect the user_td */ - struct mutex lb_mutex; - u32 user_td; + struct mlx5_ib_lb_state lb; u8 umr_fence; struct list_head ib_dev_list; u64 sys_image_guid; struct mlx5_memic memic; + u16 devx_whitelist_uid; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1016,6 +1037,8 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); int mlx5_ib_destroy_srq(struct ib_srq *srq); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); +int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); +void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -1105,7 +1128,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, __be64 *pas, int access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); -int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); +int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); @@ -1140,7 +1163,7 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); -void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, +void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, unsigned long end); void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, @@ -1179,7 +1202,6 @@ void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev); int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev); void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev); int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev); void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage); @@ -1228,22 +1250,20 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, - struct mlx5_ib_ucontext *context); -void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, - struct mlx5_ib_ucontext *context); +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev); +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, - void *cmd_in, int inlen, int dest_id, int dest_type); + struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, + int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); +void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else static inline int -mlx5_ib_devx_create(struct mlx5_ib_dev *dev, - struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; }; -static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, - struct mlx5_ib_ucontext *context) {} +mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; }; +static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} static inline const struct uverbs_object_tree_def * mlx5_ib_get_devx_tree(void) { return NULL; } static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, @@ -1256,6 +1276,11 @@ mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) { return 0; } +static inline void +mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) +{ + return; +}; #endif static inline void init_query_mad(struct ib_smp *mad) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9fb1d9cb9401..9b195d65a13e 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -98,7 +98,7 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static void update_odp_mr(struct mlx5_ib_mr *mr) { - if (mr->umem->odp_data) { + if (mr->umem->is_odp) { /* * This barrier prevents the compiler from moving the * setting of umem->odp_data->private to point to our @@ -107,7 +107,7 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) * handle invalidations. */ smp_wmb(); - mr->umem->odp_data->private = mr; + to_ib_umem_odp(mr->umem)->private = mr; /* * Make sure we will see the new * umem->odp_data->private value in the invalidation @@ -544,6 +544,9 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int shrink = 0; int c; + if (!mr->allocated_from_cache) + return; + c = order2idx(dev, mr->order); if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); @@ -688,7 +691,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) init_completion(&ent->compl); INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - queue_work(cache->wq, &ent->work); if (i > MR_CACHE_LAST_STD_ENTRY) { mlx5_odp_init_mr_cache_entry(ent); @@ -708,6 +710,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->limit = dev->mdev->profile->mr_cache[i].limit; else ent->limit = 0; + queue_work(cache->wq, &ent->work); } err = mlx5_mr_cache_debugfs_init(dev); @@ -1624,14 +1627,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) struct ib_umem *umem = mr->umem; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (umem && umem->odp_data) { + if (umem && umem->is_odp) { + struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); + /* Prevent new page faults from succeeding */ mr->live = 0; /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ - if (umem->odp_data->page_list) - mlx5_ib_invalidate_range(umem, ib_umem_start(umem), + if (umem_odp->page_list) + mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem), ib_umem_end(umem)); else mlx5_ib_free_implicit_mr(mr); @@ -1647,18 +1652,19 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) umem = NULL; } #endif - clean_mr(dev, mr); + /* + * We should unregister the DMA address from the HCA before + * remove the DMA mapping. + */ + mlx5_mr_cache_free(dev, mr); if (umem) { ib_umem_release(umem); atomic_sub(npages, &dev->mdev->priv.reg_pages); } - if (!mr->allocated_from_cache) kfree(mr); - else - mlx5_mr_cache_free(dev, mr); } int mlx5_ib_dereg_mr(struct ib_mr *ibmr) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index d216e0d2921d..b04eb6775326 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -61,13 +61,21 @@ static int check_parent(struct ib_umem_odp *odp, return mr && mr->parent == parent && !odp->dying; } +struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) +{ + if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp)) + return NULL; + + return to_ib_umem_odp(mr->umem)->per_mm; +} + static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) { struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; - struct ib_ucontext *ctx = odp->umem->context; + struct ib_ucontext_per_mm *per_mm = odp->per_mm; struct rb_node *rb; - down_read(&ctx->umem_rwsem); + down_read(&per_mm->umem_rwsem); while (1) { rb = rb_next(&odp->interval_tree.rb); if (!rb) @@ -79,19 +87,19 @@ static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) not_found: odp = NULL; end: - up_read(&ctx->umem_rwsem); + up_read(&per_mm->umem_rwsem); return odp; } -static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx, - u64 start, u64 length, +static struct ib_umem_odp *odp_lookup(u64 start, u64 length, struct mlx5_ib_mr *parent) { + struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent); struct ib_umem_odp *odp; struct rb_node *rb; - down_read(&ctx->umem_rwsem); - odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length); + down_read(&per_mm->umem_rwsem); + odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length); if (!odp) goto end; @@ -102,13 +110,13 @@ static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx, if (!rb) goto not_found; odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (ib_umem_start(odp->umem) > start + length) + if (ib_umem_start(&odp->umem) > start + length) goto not_found; } not_found: odp = NULL; end: - up_read(&ctx->umem_rwsem); + up_read(&per_mm->umem_rwsem); return odp; } @@ -116,7 +124,6 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags) { struct ib_pd *pd = mr->ibmr.pd; - struct ib_ucontext *ctx = pd->uobject->context; struct mlx5_ib_dev *dev = to_mdev(pd->device); struct ib_umem_odp *odp; unsigned long va; @@ -131,13 +138,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, return; } - odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE, - nentries * MLX5_IMR_MTT_SIZE, mr); + odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, + nentries * MLX5_IMR_MTT_SIZE, mr); for (i = 0; i < nentries; i++, pklm++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); va = (offset + i) * MLX5_IMR_MTT_SIZE; - if (odp && odp->umem->address == va) { + if (odp && odp->umem.address == va) { struct mlx5_ib_mr *mtt = odp->private; pklm->key = cpu_to_be32(mtt->ibmr.lkey); @@ -153,13 +160,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, static void mr_leaf_free_action(struct work_struct *work) { struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); - int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT; + int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; mr->parent = NULL; synchronize_srcu(&mr->dev->mr_srcu); - ib_umem_release(odp->umem); + ib_umem_release(&odp->umem); if (imr->live) mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | @@ -170,22 +177,24 @@ static void mr_leaf_free_action(struct work_struct *work) wake_up(&imr->q_leaf_free); } -void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, +void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, unsigned long end) { struct mlx5_ib_mr *mr; const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; + struct ib_umem *umem; int in_block = 0; u64 addr; - if (!umem || !umem->odp_data) { + if (!umem_odp) { pr_err("invalidation called on NULL umem or non-ODP umem\n"); return; } + umem = &umem_odp->umem; - mr = umem->odp_data->private; + mr = umem_odp->private; if (!mr || !mr->ibmr.pd) return; @@ -208,7 +217,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, * estimate the cost of another UMR vs. the cost of bigger * UMR. */ - if (umem->odp_data->dma_list[idx] & + if (umem_odp->dma_list[idx] & (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) { if (!in_block) { blk_start_idx = idx; @@ -237,13 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, * needed. */ - ib_umem_odp_unmap_dma_pages(umem, start, end); + ib_umem_odp_unmap_dma_pages(umem_odp, start, end); if (unlikely(!umem->npages && mr->parent && - !umem->odp_data->dying)) { - WRITE_ONCE(umem->odp_data->dying, 1); + !umem_odp->dying)) { + WRITE_ONCE(umem_odp->dying, 1); atomic_inc(&mr->parent->num_leaf_free); - schedule_work(&umem->odp_data->work); + schedule_work(&umem_odp->work); } } @@ -366,16 +375,15 @@ fail: static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt) { - struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context; struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); struct ib_umem_odp *odp, *result = NULL; + struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); u64 addr = io_virt & MLX5_IMR_MTT_MASK; int nentries = 0, start_idx = 0, ret; struct mlx5_ib_mr *mtt; - struct ib_umem *umem; - mutex_lock(&mr->umem->odp_data->umem_mutex); - odp = odp_lookup(ctx, addr, 1, mr); + mutex_lock(&odp_mr->umem_mutex); + odp = odp_lookup(addr, 1, mr); mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", io_virt, bcnt, addr, odp); @@ -385,22 +393,23 @@ next_mr: if (nentries) nentries++; } else { - umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE); - if (IS_ERR(umem)) { - mutex_unlock(&mr->umem->odp_data->umem_mutex); - return ERR_CAST(umem); + odp = ib_alloc_odp_umem(odp_mr->per_mm, addr, + MLX5_IMR_MTT_SIZE); + if (IS_ERR(odp)) { + mutex_unlock(&odp_mr->umem_mutex); + return ERR_CAST(odp); } - mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags); + mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0, + mr->access_flags); if (IS_ERR(mtt)) { - mutex_unlock(&mr->umem->odp_data->umem_mutex); - ib_umem_release(umem); + mutex_unlock(&odp_mr->umem_mutex); + ib_umem_release(&odp->umem); return ERR_CAST(mtt); } - odp = umem->odp_data; odp->private = mtt; - mtt->umem = umem; + mtt->umem = &odp->umem; mtt->mmkey.iova = addr; mtt->parent = mr; INIT_WORK(&odp->work, mr_leaf_free_action); @@ -417,7 +426,7 @@ next_mr: addr += MLX5_IMR_MTT_SIZE; if (unlikely(addr < io_virt + bcnt)) { odp = odp_next(odp); - if (odp && odp->umem->address != addr) + if (odp && odp->umem.address != addr) odp = NULL; goto next_mr; } @@ -432,7 +441,7 @@ next_mr: } } - mutex_unlock(&mr->umem->odp_data->umem_mutex); + mutex_unlock(&odp_mr->umem_mutex); return result; } @@ -460,36 +469,36 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, return imr; } -static int mr_leaf_free(struct ib_umem *umem, u64 start, - u64 end, void *cookie) +static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end, + void *cookie) { - struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie; + struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie; + struct ib_umem *umem = &umem_odp->umem; if (mr->parent != imr) return 0; - ib_umem_odp_unmap_dma_pages(umem, - ib_umem_start(umem), + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), ib_umem_end(umem)); - if (umem->odp_data->dying) + if (umem_odp->dying) return 0; - WRITE_ONCE(umem->odp_data->dying, 1); + WRITE_ONCE(umem_odp->dying, 1); atomic_inc(&imr->num_leaf_free); - schedule_work(&umem->odp_data->work); + schedule_work(&umem_odp->work); return 0; } void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { - struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context; + struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); - down_read(&ctx->umem_rwsem); - rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX, + down_read(&per_mm->umem_rwsem); + rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX, mr_leaf_free, true, imr); - up_read(&ctx->umem_rwsem); + up_read(&per_mm->umem_rwsem); wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); } @@ -497,6 +506,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, u32 *bytes_mapped) { + struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); u64 access_mask = ODP_READ_ALLOWED_BIT; int npages = 0, page_shift, np; u64 start_idx, page_mask; @@ -505,7 +515,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, size_t size; int ret; - if (!mr->umem->odp_data->page_list) { + if (!odp_mr->page_list) { odp = implicit_mr_get_data(mr, io_virt, bcnt); if (IS_ERR(odp)) @@ -513,11 +523,11 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr = odp->private; } else { - odp = mr->umem->odp_data; + odp = odp_mr; } next_mr: - size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt); + size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt); page_shift = mr->umem->page_shift; page_mask = ~(BIT(page_shift) - 1); @@ -533,7 +543,7 @@ next_mr: */ smp_rmb(); - ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size, + ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size, access_mask, current_seq); if (ret < 0) @@ -542,7 +552,8 @@ next_mr: np = ret; mutex_lock(&odp->umem_mutex); - if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) { + if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem), + current_seq)) { /* * No need to check whether the MTTs really belong to * this MR, since ib_umem_odp_map_dma_pages already @@ -575,7 +586,7 @@ next_mr: io_virt += size; next = odp_next(odp); - if (unlikely(!next || next->umem->address != io_virt)) { + if (unlikely(!next || next->umem.address != io_virt)) { mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", io_virt, next); return -EAGAIN; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6cba2a02d11b..6841c0f9237f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -37,6 +37,7 @@ #include <linux/mlx5/fs.h> #include "mlx5_ib.h" #include "ib_rep.h" +#include "cmd.h" /* not supported currently */ static int wq_signature; @@ -850,6 +851,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_umem; } + MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid); pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); @@ -1051,7 +1053,8 @@ static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) static int is_connected(enum ib_qp_type qp_type) { - if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC) + if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC || + qp_type == MLX5_IB_QPT_DCI) return 1; return 0; @@ -1059,11 +1062,13 @@ static int is_connected(enum ib_qp_type qp_type) static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - struct mlx5_ib_sq *sq, u32 tdn) + struct mlx5_ib_sq *sq, u32 tdn, + struct ib_pd *pd) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid); MLX5_SET(tisc, tisc, transport_domain, tdn); if (qp->flags & MLX5_IB_QP_UNDERLAY) MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); @@ -1072,9 +1077,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, } static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) + struct mlx5_ib_sq *sq, struct ib_pd *pd) { - mlx5_core_destroy_tis(dev->mdev, sq->tisn); + mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid); } static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, @@ -1114,6 +1119,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, goto err_umem; } + MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid); sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); MLX5_SET(sqc, sqc, flush_in_error_en, 1); if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) @@ -1188,7 +1194,7 @@ static size_t get_rq_pas_size(void *qpc) static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - size_t qpinlen) + size_t qpinlen, struct ib_pd *pd) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; @@ -1209,6 +1215,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, if (!in) return -ENOMEM; + MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid); rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING)) MLX5_SET(rqc, rqc, vsd, 1); @@ -1256,10 +1263,23 @@ static bool tunnel_offload_supported(struct mlx5_core_dev *dev) MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx)); } +static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, + struct mlx5_ib_rq *rq, + u32 qp_flags_en, + struct ib_pd *pd) +{ + if (qp_flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) + mlx5_ib_disable_lb(dev, false, true); + mlx5_cmd_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid); +} + static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, u32 tdn, - bool tunnel_offload_en) + u32 *qp_flags_en, + struct ib_pd *pd) { + u8 lb_flag = 0; u32 *in; void *tirc; int inlen; @@ -1270,33 +1290,45 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, if (!in) return -ENOMEM; + MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid); tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn); MLX5_SET(tirc, tirc, transport_domain, tdn); - if (tunnel_offload_en) + if (*qp_flags_en & MLX5_QP_FLAG_TUNNEL_OFFLOADS) MLX5_SET(tirc, tirc, tunneled_offload_en, 1); - if (dev->rep) - MLX5_SET(tirc, tirc, self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); + if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + + if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; + + if (dev->rep) { + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; + } + + MLX5_SET(tirc, tirc, self_lb_block, lb_flag); err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); + if (!err && MLX5_GET(tirc, tirc, self_lb_block)) { + err = mlx5_ib_enable_lb(dev, false, true); + + if (err) + destroy_raw_packet_qp_tir(dev, rq, 0, pd); + } kvfree(in); return err; } -static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, - struct mlx5_ib_rq *rq) -{ - mlx5_core_destroy_tir(dev->mdev, rq->tirn); -} - static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u32 *in, size_t inlen, - struct ib_pd *pd) + struct ib_pd *pd, + struct ib_udata *udata, + struct mlx5_ib_create_qp_resp *resp) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; @@ -1306,9 +1338,10 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); int err; u32 tdn = mucontext->tdn; + u16 uid = to_mpd(pd)->uid; if (qp->sq.wqe_cnt) { - err = create_raw_packet_qp_tis(dev, qp, sq, tdn); + err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd); if (err) return err; @@ -1316,6 +1349,13 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) goto err_destroy_tis; + if (uid) { + resp->tisn = sq->tisn; + resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TISN; + resp->sqn = sq->base.mqp.qpn; + resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_SQN; + } + sq->base.container_mibqp = qp; sq->base.mqp.event = mlx5_ib_qp_event; } @@ -1327,22 +1367,32 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, inlen); + err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); if (err) goto err_destroy_sq; - - err = create_raw_packet_qp_tir(dev, rq, tdn, - qp->tunnel_offload_en); + err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd); if (err) goto err_destroy_rq; + + if (uid) { + resp->rqn = rq->base.mqp.qpn; + resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN; + resp->tirn = rq->tirn; + resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; + } } qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn : rq->base.mqp.qpn; + err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp))); + if (err) + goto err_destroy_tir; return 0; +err_destroy_tir: + destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, pd); err_destroy_rq: destroy_raw_packet_qp_rq(dev, rq); err_destroy_sq: @@ -1350,7 +1400,7 @@ err_destroy_sq: return err; destroy_raw_packet_qp_sq(dev, sq); err_destroy_tis: - destroy_raw_packet_qp_tis(dev, sq); + destroy_raw_packet_qp_tis(dev, sq, pd); return err; } @@ -1363,13 +1413,13 @@ static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq = &raw_packet_qp->rq; if (qp->rq.wqe_cnt) { - destroy_raw_packet_qp_tir(dev, rq); + destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, qp->ibqp.pd); destroy_raw_packet_qp_rq(dev, rq); } if (qp->sq.wqe_cnt) { destroy_raw_packet_qp_sq(dev, sq); - destroy_raw_packet_qp_tis(dev, sq); + destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd); } } @@ -1387,7 +1437,11 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp, static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { - mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn); + if (qp->flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) + mlx5_ib_disable_lb(dev, false, true); + mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, + to_mpd(qp->ibqp.pd)->uid); } static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, @@ -1410,6 +1464,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u32 tdn = mucontext->tdn; struct mlx5_ib_create_qp_rss ucmd = {}; size_t required_cmd_sz; + u8 lb_flag = 0; if (init_attr->qp_type != IB_QPT_RAW_PACKET) return -EOPNOTSUPP; @@ -1444,7 +1499,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - if (ucmd.flags & ~MLX5_QP_FLAG_TUNNEL_OFFLOADS) { + if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) { mlx5_ib_dbg(dev, "invalid flags\n"); return -EOPNOTSUPP; } @@ -1461,6 +1518,16 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->rep) { + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; + } + + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) { + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; + qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; + } + err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); @@ -1472,6 +1539,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (!in) return -ENOMEM; + MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid); tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); @@ -1484,6 +1552,8 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) MLX5_SET(tirc, tirc, tunneled_offload_en, 1); + MLX5_SET(tirc, tirc, self_lb_block, lb_flag); + if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER) hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); else @@ -1580,26 +1650,141 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); create_tir: - if (dev->rep) - MLX5_SET(tirc, tirc, self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); - err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); + if (!err && MLX5_GET(tirc, tirc, self_lb_block)) { + err = mlx5_ib_enable_lb(dev, false, true); + + if (err) + mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, + to_mpd(pd)->uid); + } + if (err) goto err; + if (mucontext->devx_uid) { + resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; + resp.tirn = qp->rss_qp.tirn; + } + + err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); + if (err) + goto err_copy; + kvfree(in); /* qpn is reserved for that QP */ qp->trans_qp.base.mqp.qpn = 0; qp->flags |= MLX5_IB_QP_RSS; return 0; +err_copy: + mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, mucontext->devx_uid); err: kvfree(in); return err; } +static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr, + void *qpc) +{ + int rcqe_sz; + + if (init_attr->qp_type == MLX5_IB_QPT_DCI) + return; + + rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); + + if (rcqe_sz == 128) { + MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + return; + } + + if (init_attr->qp_type != MLX5_IB_QPT_DCT) + MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); +} + +static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, + struct ib_qp_init_attr *init_attr, + struct mlx5_ib_create_qp *ucmd, + void *qpc) +{ + enum ib_qp_type qpt = init_attr->qp_type; + int scqe_sz; + bool allow_scat_cqe = 0; + + if (qpt == IB_QPT_UC || qpt == IB_QPT_UD) + return; + + if (ucmd) + allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE; + + if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) + return; + + scqe_sz = mlx5_ib_get_cqe_size(init_attr->send_cq); + if (scqe_sz == 128) { + MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE); + return; + } + + if (init_attr->qp_type != MLX5_IB_QPT_DCI || + MLX5_CAP_GEN(dev->mdev, dc_req_scat_data_cqe)) + MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE); +} + +static int atomic_size_to_mode(int size_mask) +{ + /* driver does not support atomic_size > 256B + * and does not know how to translate bigger sizes + */ + int supported_size_mask = size_mask & 0x1ff; + int log_max_size; + + if (!supported_size_mask) + return -EOPNOTSUPP; + + log_max_size = __fls(supported_size_mask); + + if (log_max_size > 3) + return log_max_size; + + return MLX5_ATOMIC_MODE_8B; +} + +static int get_atomic_mode(struct mlx5_ib_dev *dev, + enum ib_qp_type qp_type) +{ + u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); + u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic); + int atomic_mode = -EOPNOTSUPP; + int atomic_size_mask; + + if (!atomic) + return -EOPNOTSUPP; + + if (qp_type == MLX5_IB_QPT_DCT) + atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); + else + atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); + + if ((atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP) || + (atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD)) + atomic_mode = atomic_size_to_mode(atomic_size_mask); + + if (atomic_mode <= 0 && + (atomic_operations & MLX5_ATOMIC_OPS_CMP_SWAP && + atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD)) + atomic_mode = MLX5_ATOMIC_MODE_IB_COMP; + + return atomic_mode; +} + +static inline bool check_flags_mask(uint64_t input, uint64_t supported) +{ + return (input & ~supported) == 0; +} + static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct mlx5_ib_qp *qp) @@ -1697,20 +1882,47 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EFAULT; } + if (!check_flags_mask(ucmd.flags, + MLX5_QP_FLAG_SIGNATURE | + MLX5_QP_FLAG_SCATTER_CQE | + MLX5_QP_FLAG_TUNNEL_OFFLOADS | + MLX5_QP_FLAG_BFREG_INDEX | + MLX5_QP_FLAG_TYPE_DCT | + MLX5_QP_FLAG_TYPE_DCI | + MLX5_QP_FLAG_ALLOW_SCATTER_CQE)) + return -EINVAL; + err = get_qp_user_index(to_mucontext(pd->uobject->context), &ucmd, udata->inlen, &uidx); if (err) return err; qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); - qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); + if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe)) + qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { if (init_attr->qp_type != IB_QPT_RAW_PACKET || !tunnel_offload_supported(mdev)) { mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n"); return -EOPNOTSUPP; } - qp->tunnel_offload_en = true; + qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS; + } + + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) { + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n"); + return -EOPNOTSUPP; + } + qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; + } + + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) { + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n"); + return -EOPNOTSUPP; + } + qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; } if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { @@ -1811,23 +2023,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, cd_slave_receive, 1); if (qp->scat_cqe && is_connected(init_attr->qp_type)) { - int rcqe_sz; - int scqe_sz; - - rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq); - scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq); - - if (rcqe_sz == 128) - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); - else - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); - - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) { - if (scqe_sz == 128) - MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE); - else - MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE); - } + configure_responder_scat_cqe(init_attr, qpc); + configure_requester_scat_cqe(dev, init_attr, + (pd && pd->uobject) ? &ucmd : NULL, + qpc); } if (qp->rq.wqe_cnt) { @@ -1911,7 +2110,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags & MLX5_IB_QP_UNDERLAY) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); - err = create_raw_packet_qp(dev, qp, in, inlen, pd); + err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, + &resp); } else { err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen); } @@ -2192,6 +2392,7 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, goto err_free; } + MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid); dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); qp->qp_sub_type = MLX5_IB_QPT_DCT; MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn); @@ -2200,6 +2401,9 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key); MLX5_SET(dctc, dctc, user_index, uidx); + if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE) + configure_responder_scat_cqe(attr, dctc); + qp->state = IB_QPS_RESET; return &qp->ibqp; @@ -2405,13 +2609,15 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp) return 0; } -static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr, - int attr_mask) +static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, + const struct ib_qp_attr *attr, + int attr_mask, __be32 *hw_access_flags) { - u32 hw_access_flags = 0; u8 dest_rd_atomic; u32 access_flags; + struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) dest_rd_atomic = attr->max_dest_rd_atomic; else @@ -2426,13 +2632,25 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att access_flags &= IB_ACCESS_REMOTE_WRITE; if (access_flags & IB_ACCESS_REMOTE_READ) - hw_access_flags |= MLX5_QP_BIT_RRE; - if (access_flags & IB_ACCESS_REMOTE_ATOMIC) - hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX); + *hw_access_flags |= MLX5_QP_BIT_RRE; + if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) && + qp->ibqp.qp_type == IB_QPT_RC) { + int atomic_mode; + + atomic_mode = get_atomic_mode(dev, qp->ibqp.qp_type); + if (atomic_mode < 0) + return -EOPNOTSUPP; + + *hw_access_flags |= MLX5_QP_BIT_RAE; + *hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET; + } + if (access_flags & IB_ACCESS_REMOTE_WRITE) - hw_access_flags |= MLX5_QP_BIT_RWE; + *hw_access_flags |= MLX5_QP_BIT_RWE; + + *hw_access_flags = cpu_to_be32(*hw_access_flags); - return cpu_to_be32(hw_access_flags); + return 0; } enum { @@ -2458,7 +2676,8 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) } static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, - struct mlx5_ib_sq *sq, u8 sl) + struct mlx5_ib_sq *sq, u8 sl, + struct ib_pd *pd) { void *in; void *tisc; @@ -2471,6 +2690,7 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, return -ENOMEM; MLX5_SET(modify_tis_in, in, bitmask.prio, 1); + MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid); tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1)); @@ -2483,7 +2703,8 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, } static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, - struct mlx5_ib_sq *sq, u8 tx_affinity) + struct mlx5_ib_sq *sq, u8 tx_affinity, + struct ib_pd *pd) { void *in; void *tisc; @@ -2496,6 +2717,7 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, return -ENOMEM; MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1); + MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid); tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity); @@ -2580,7 +2802,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) return modify_raw_packet_eth_prio(dev->mdev, &qp->raw_packet_qp.sq, - sl & 0xf); + sl & 0xf, qp->ibqp.pd); return 0; } @@ -2728,9 +2950,9 @@ static int ib_mask_to_mlx5_opt(int ib_mask) return result; } -static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, - struct mlx5_ib_rq *rq, int new_state, - const struct mlx5_modify_raw_qp_param *raw_qp_param) +static int modify_raw_packet_qp_rq( + struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state, + const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd) { void *in; void *rqc; @@ -2743,6 +2965,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, return -ENOMEM; MLX5_SET(modify_rq_in, in, rq_state, rq->state); + MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid); rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(rqc, rqc, state, new_state); @@ -2753,8 +2976,9 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID); MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id); } else - pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n", - dev->ib_dev.name); + dev_info_once( + &dev->ib_dev.dev, + "RAW PACKET QP counters are not supported on current FW\n"); } err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen); @@ -2768,10 +2992,9 @@ out: return err; } -static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, - struct mlx5_ib_sq *sq, - int new_state, - const struct mlx5_modify_raw_qp_param *raw_qp_param) +static int modify_raw_packet_qp_sq( + struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, int new_state, + const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd) { struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; struct mlx5_rate_limit old_rl = ibqp->rl; @@ -2788,6 +3011,7 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, if (!in) return -ENOMEM; + MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid); MLX5_SET(modify_sq_in, in, sq_state, sq->state); sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); @@ -2890,7 +3114,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } if (modify_rq) { - err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); + err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param, + qp->ibqp.pd); if (err) return err; } @@ -2898,17 +3123,50 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (modify_sq) { if (tx_affinity) { err = modify_raw_packet_tx_affinity(dev->mdev, sq, - tx_affinity); + tx_affinity, + qp->ibqp.pd); if (err) return err; } - return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param); + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, + raw_qp_param, qp->ibqp.pd); } return 0; } +static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, + struct mlx5_ib_pd *pd, + struct mlx5_ib_qp_base *qp_base, + u8 port_num) +{ + struct mlx5_ib_ucontext *ucontext = NULL; + unsigned int tx_port_affinity; + + if (pd && pd->ibpd.uobject && pd->ibpd.uobject->context) + ucontext = to_mucontext(pd->ibpd.uobject->context); + + if (ucontext) { + tx_port_affinity = (unsigned int)atomic_add_return( + 1, &ucontext->tx_port_affinity) % + MLX5_MAX_PORTS + + 1; + mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n", + tx_port_affinity, qp_base->mqp.qpn, ucontext); + } else { + tx_port_affinity = + (unsigned int)atomic_add_return( + 1, &dev->roce[port_num].tx_port_affinity) % + MLX5_MAX_PORTS + + 1; + mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n", + tx_port_affinity, qp_base->mqp.qpn); + } + + return tx_port_affinity; +} + static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state, @@ -2974,6 +3232,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (!context) return -ENOMEM; + pd = get_pd(qp); context->flags = cpu_to_be32(mlx5_st << 16); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { @@ -3002,9 +3261,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_XRC_TGT)) { if (mlx5_lag_is_active(dev->mdev)) { u8 p = mlx5_core_native_port_num(dev->mdev); - tx_affinity = (unsigned int)atomic_add_return(1, - &dev->roce[p].next_port) % - MLX5_MAX_PORTS + 1; + tx_affinity = get_tx_affinity(dev, pd, base, p); context->flags |= cpu_to_be32(tx_affinity << 24); } } @@ -3062,7 +3319,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, goto out; } - pd = get_pd(qp); get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq, &recv_cq); @@ -3092,8 +3348,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); } - if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) - context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask); + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { + __be32 access_flags = 0; + + err = to_mlx5_access_flags(qp, attr, attr_mask, &access_flags); + if (err) + goto out; + + context->params2 |= access_flags; + } if (attr_mask & IB_QP_MIN_RNR_TIMER) context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); @@ -3243,7 +3506,9 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new int req = IB_QP_STATE; int opt = 0; - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + if (new_state == IB_QPS_RESET) { + return is_valid_mask(attr_mask, req, opt); + } else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { req |= IB_QP_PKEY_INDEX | IB_QP_PORT; return is_valid_mask(attr_mask, req, opt); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { @@ -3307,10 +3572,14 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) MLX5_SET(dctc, dctc, rwe, 1); if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) { - if (!mlx5_ib_dc_atomic_is_supported(dev)) + int atomic_mode; + + atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT); + if (atomic_mode < 0) return -EOPNOTSUPP; + + MLX5_SET(dctc, dctc, atomic_mode, atomic_mode); MLX5_SET(dctc, dctc, rae, 1); - MLX5_SET(dctc, dctc, atomic_mode, MLX5_ATOMIC_MODE_DCT_CX); } MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index); MLX5_SET(dctc, dctc, port, attr->port_num); @@ -3367,7 +3636,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, size_t required_cmd_sz; int err = -EINVAL; int port; - enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; if (ibqp->rwq_ind_tbl) return -ENOSYS; @@ -3413,7 +3681,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); } if (qp->flags & MLX5_IB_QP_UNDERLAY) { @@ -3424,7 +3691,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } } else if (qp_type != MLX5_IB_QPT_REG_UMR && qp_type != MLX5_IB_QPT_DCI && - !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { + !ib_modify_qp_is_ok(cur_state, new_state, qp_type, + attr_mask)) { mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", cur_state, new_state, ibqp->qp_type, attr_mask); goto out; @@ -4371,6 +4639,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, u8 next_fence = 0; u8 fence; + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && + !drain)) { + *bad_wr = wr; + return -EIO; + } + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); @@ -4380,13 +4654,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { - err = -EIO; - *bad_wr = wr; - nreq = 0; - goto out; - } - for (nreq = 0; wr; nreq++, wr = wr->next) { if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { mlx5_ib_warn(dev, "\n"); @@ -4700,18 +4967,17 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int ind; int i; + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && + !drain)) { + *bad_wr = wr; + return -EIO; + } + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { - err = -EIO; - *bad_wr = wr; - nreq = 0; - goto out; - } - ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; nreq++, wr = wr->next) { @@ -5175,6 +5441,7 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_xrcd *xrcd; int err; + u16 uid; if (!MLX5_CAP_GEN(dev->mdev, xrc)) return ERR_PTR(-ENOSYS); @@ -5183,12 +5450,14 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, if (!xrcd) return ERR_PTR(-ENOMEM); - err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn); + uid = context ? to_mucontext(context)->devx_uid : 0; + err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid); if (err) { kfree(xrcd); return ERR_PTR(-ENOMEM); } + xrcd->uid = uid; return &xrcd->ibxrcd; } @@ -5196,9 +5465,10 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; + u16 uid = to_mxrcd(xrcd)->uid; int err; - err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn); + err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid); if (err) mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); @@ -5268,6 +5538,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, if (!in) return -ENOMEM; + MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid); rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); @@ -5443,8 +5714,7 @@ static int prepare_user_rq(struct ib_pd *pd, err = create_user_rq(dev, pd, rwq, &ucmd); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); - if (err) - return err; + return err; } rwq->user_index = ucmd.user_index; @@ -5573,6 +5843,9 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, for (i = 0; i < sz; i++) MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num); + rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid; + MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid); + err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); kvfree(in); @@ -5591,7 +5864,7 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, return &rwq_ind_tbl->ib_rwq_ind_tbl; err_copy: - mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); + mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); err: kfree(rwq_ind_tbl); return ERR_PTR(err); @@ -5602,7 +5875,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); - mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); + mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); kfree(rwq_ind_tbl); return 0; @@ -5653,6 +5926,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, if (wq_state == IB_WQS_ERR) wq_state = MLX5_RQC_STATE_ERR; MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); + MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid); MLX5_SET(rqc, rqc, state, wq_state); if (wq_attr_mask & IB_WQ_FLAGS) { @@ -5684,8 +5958,9 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, MLX5_SET(rqc, rqc, counter_set_id, dev->port->cnts.set_id); } else - pr_info_once("%s: Receive WQ counters are not supported on current FW\n", - dev->ib_dev.name); + dev_info_once( + &dev->ib_dev.dev, + "Receive WQ counters are not supported on current FW\n"); } err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index d359fecf7a5b..d012e7dbcc38 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -144,6 +144,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; in->page_offset = offset; + in->uid = to_mpd(pd)->uid; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) in->user_index = uidx; |