diff options
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 37 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 36 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 5 |
3 files changed, 61 insertions, 17 deletions
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index a41e8d582f5b..2211a0be16f3 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2160,26 +2160,17 @@ err: static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, struct uverbs_attr_bundle *attrs, - struct devx_umem *obj) + struct devx_umem *obj, u32 access_flags) { u64 addr; size_t size; - u32 access; int err; if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) return -EFAULT; - err = uverbs_get_flags32(&access, attrs, - MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - if (err) - return err; - - err = ib_check_mr_access(&dev->ib_dev, access); + err = ib_check_mr_access(&dev->ib_dev, access_flags); if (err) return err; @@ -2193,12 +2184,12 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, return -EFAULT; umem_dmabuf = ib_umem_dmabuf_get_pinned( - &dev->ib_dev, addr, size, dmabuf_fd, access); + &dev->ib_dev, addr, size, dmabuf_fd, access_flags); if (IS_ERR(umem_dmabuf)) return PTR_ERR(umem_dmabuf); obj->umem = &umem_dmabuf->umem; } else { - obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); + obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access_flags); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); } @@ -2240,7 +2231,8 @@ static unsigned int devx_umem_find_best_pgsize(struct ib_umem *umem, static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, struct uverbs_attr_bundle *attrs, struct devx_umem *obj, - struct devx_umem_reg_cmd *cmd) + struct devx_umem_reg_cmd *cmd, + int access) { unsigned long pgsz_bitmap; unsigned int page_size; @@ -2289,6 +2281,9 @@ static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, MLX5_SET(umem, umem, page_offset, ib_umem_dma_offset(obj->umem, page_size)); + if (mlx5_umem_needs_ats(dev, obj->umem, access)) + MLX5_SET(umem, umem, ats, 1); + mlx5_ib_populate_pas(obj->umem, page_size, mtt, (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | MLX5_IB_MTT_READ); @@ -2306,20 +2301,30 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + int access_flags; int err; if (!c->devx_uid) return -EINVAL; + err = uverbs_get_flags32(&access_flags, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_RELAXED_ORDERING); + if (err) + return err; + obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); if (!obj) return -ENOMEM; - err = devx_umem_get(dev, &c->ibucontext, attrs, obj); + err = devx_umem_get(dev, &c->ibucontext, attrs, obj, access_flags); if (err) goto err_obj_free; - err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd); + err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd, access_flags); if (err) goto err_umem_release; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2e2ad3918385..7e2c4a378220 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1550,4 +1550,40 @@ static inline bool rt_supported(int ts_cap) return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME || ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; } + +/* + * PCI Peer to Peer is a trainwreck. If no switch is present then things + * sometimes work, depending on the pci_distance_p2p logic for excluding broken + * root complexes. However if a switch is present in the path, then things get + * really ugly depending on how the switch is setup. This table assumes that the + * root complex is strict and is validating that all req/reps are matches + * perfectly - so any scenario where it sees only half the transaction is a + * failure. + * + * CR/RR/DT ATS RO P2P + * 00X X X OK + * 010 X X fails (request is routed to root but root never sees comp) + * 011 0 X fails (request is routed to root but root never sees comp) + * 011 1 X OK + * 10X X 1 OK + * 101 X 0 fails (completion is routed to root but root didn't see req) + * 110 X 0 SLOW + * 111 0 0 SLOW + * 111 1 0 fails (completion is routed to root but root didn't see req) + * 111 1 1 OK + * + * Unfortunately we cannot reliably know if a switch is present or what the + * CR/RR/DT ACS settings are, as in a VM that is all hidden. Assume that + * CR/RR/DT is 111 if the ATS cap is enabled and follow the last three rows. + * + * For now assume if the umem is a dma_buf then it is P2P. + */ +static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev, + struct ib_umem *umem, int access_flags) +{ + if (!MLX5_CAP_GEN(dev->mdev, ats) || !umem->is_dmabuf) + return false; + return access_flags & IB_ACCESS_RELAXED_ORDERING; +} + #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4fcb653b35bb..410cc5fd2523 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -935,7 +935,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, * cache then synchronously create an uncached one. */ if (!ent || ent->limit == 0 || - !mlx5r_umr_can_reconfig(dev, 0, access_flags)) { + !mlx5r_umr_can_reconfig(dev, 0, access_flags) || + mlx5_umem_needs_ats(dev, umem, access_flags)) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(pd, umem, iova, access_flags, page_size, false); mutex_unlock(&dev->slow_path_mutex); @@ -1016,6 +1017,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, MLX5_SET(mkc, mkc, translations_octword_size, get_octo_len(iova, umem->length, mr->page_shift)); MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); + if (mlx5_umem_needs_ats(dev, umem, access_flags)) + MLX5_SET(mkc, mkc, ma_translation_mode, 1); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, get_octo_len(iova, umem->length, mr->page_shift)); |