summaryrefslogtreecommitdiffstats
path: root/include/rdma
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-26 16:38:19 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-26 16:38:19 +0200
commitda19a102ce87bf3e0a7fe277a659d1fc35330d6d (patch)
treea6c1d40ef544e812b31f4b5f497c20d449d45ec3 /include/rdma
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc (diff)
parentIB/mlx5: Add support for extended atomic operations (diff)
downloadlinux-da19a102ce87bf3e0a7fe277a659d1fc35330d6d.tar.xz
linux-da19a102ce87bf3e0a7fe277a659d1fc35330d6d.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "This has been a smaller cycle with many of the commits being smallish code fixes and improvements across the drivers. - Driver updates for bnxt_re, cxgb4, hfi1, hns, mlx5, nes, qedr, and rxe - Memory window support in hns - mlx5 user API 'flow mutate/steering' allows accessing the full packet mangling and matching machinery from user space - Support inter-working with verbs API calls in the 'devx' mlx5 user API, and provide options to use devx with less privilege - Modernize the use of syfs and the device interface to use attribute groups and cdev properly for uverbs, and clean up some of the core code's device list management - More progress on net namespaces for RDMA devices - Consolidate driver BAR mmapping support into core code helpers and rework how RDMA holds poitners to mm_struct for get_user_pages cases - First pass to use 'dev_name' instead of ib_device->name - Device renaming for RDMA devices" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (242 commits) IB/mlx5: Add support for extended atomic operations RDMA/core: Fix comment for hw stats init for port == 0 RDMA/core: Refactor ib_register_device() function RDMA/core: Fix unwinding flow in case of error to register device ib_srp: Remove WARN_ON in srp_terminate_io() IB/mlx5: Allow scatter to CQE without global signaled WRs IB/mlx5: Verify that driver supports user flags IB/mlx5: Support scatter to CQE for DC transport type RDMA/drivers: Use core provided API for registering device attributes RDMA/core: Allow existing drivers to set one sysfs group per device IB/rxe: Remove unnecessary enum values RDMA/umad: Use kernel API to allocate umad indexes RDMA/uverbs: Use kernel API to allocate uverbs indexes RDMA/core: Increase total number of RDMA ports across all devices IB/mlx4: Add port and TID to MAD debug print IB/mlx4: Enable debug print of SMPs RDMA/core: Rename ports_parent to ports_kobj RDMA/core: Do not expose unsupported counters IB/mlx4: Refer to the device kobject instead of ports_parent RDMA/nldev: Allow IB device rename through RDMA netlink ...
Diffstat (limited to 'include/rdma')
-rw-r--r--include/rdma/ib_addr.h11
-rw-r--r--include/rdma/ib_cm.h2
-rw-r--r--include/rdma/ib_sa.h38
-rw-r--r--include/rdma/ib_umem.h9
-rw-r--r--include/rdma/ib_umem_odp.h75
-rw-r--r--include/rdma/ib_verbs.h149
-rw-r--r--include/rdma/rdma_cm.h11
-rw-r--r--include/rdma/rdma_netlink.h4
-rw-r--r--include/rdma/rdma_vt.h51
-rw-r--r--include/rdma/rdmavt_qp.h7
-rw-r--r--include/rdma/restrack.h12
-rw-r--r--include/rdma/uverbs_ioctl.h111
-rw-r--r--include/rdma/uverbs_std_types.h51
13 files changed, 385 insertions, 146 deletions
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 77c7908b7d73..2734c895c1bf 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -46,7 +46,6 @@
#include <net/ip.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
-#include <net/ipv6.h>
#include <net/net_namespace.h>
/**
@@ -95,20 +94,18 @@ int rdma_translate_ip(const struct sockaddr *addr,
* @timeout_ms: Amount of time to wait for the address resolution to complete.
* @callback: Call invoked once address resolution has completed, timed out,
* or been canceled. A status of 0 indicates success.
+ * @resolve_by_gid_attr: Resolve the ip based on the GID attribute from
+ * rdma_dev_addr.
* @context: User-specified context associated with the call.
*/
int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
- struct rdma_dev_addr *addr, int timeout_ms,
+ struct rdma_dev_addr *addr, unsigned long timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
- void *context);
+ bool resolve_by_gid_attr, void *context);
void rdma_addr_cancel(struct rdma_dev_addr *addr);
-void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
- const struct net_device *dev,
- const unsigned char *dst_dev_addr);
-
int rdma_addr_size(const struct sockaddr *addr);
int rdma_addr_size_in6(struct sockaddr_in6 *addr);
int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index c10f4b5ea8ab..49f4f75499b3 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -583,7 +583,7 @@ struct ib_cm_sidr_req_param {
struct sa_path_rec *path;
const struct ib_gid_attr *sgid_attr;
__be64 service_id;
- int timeout_ms;
+ unsigned long timeout_ms;
const void *private_data;
u8 private_data_len;
u8 max_cm_retries;
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index b6ddf2a1b9d8..19520979b84c 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -449,28 +449,23 @@ struct ib_sa_query;
void ib_sa_cancel_query(int id, struct ib_sa_query *query);
-int ib_sa_path_rec_get(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct sa_path_rec *resp,
+int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
+ u8 port_num, struct sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+ gfp_t gfp_mask,
+ void (*callback)(int status, struct sa_path_rec *resp,
void *context),
- void *context,
- struct ib_sa_query **query);
+ void *context, struct ib_sa_query **query);
int ib_sa_service_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- u8 method,
- struct ib_sa_service_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_service_rec *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query);
+ struct ib_device *device, u8 port_num, u8 method,
+ struct ib_sa_service_rec *rec,
+ ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+ gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_service_rec *resp,
+ void *context),
+ void *context, struct ib_sa_query **sa_query);
struct ib_sa_multicast {
struct ib_sa_mcmember_rec rec;
@@ -573,12 +568,11 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_guidinfo_rec *resp,
void *context),
- void *context,
- struct ib_sa_query **sa_query);
+ void *context, struct ib_sa_query **sa_query);
bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
struct ib_device *device,
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index a1fd63871d17..5d3755ec5afa 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -42,15 +42,14 @@ struct ib_umem_odp;
struct ib_umem {
struct ib_ucontext *context;
+ struct mm_struct *owning_mm;
size_t length;
unsigned long address;
int page_shift;
- int writable;
- int hugetlb;
+ u32 writable : 1;
+ u32 hugetlb : 1;
+ u32 is_odp : 1;
struct work_struct work;
- struct mm_struct *mm;
- unsigned long diff;
- struct ib_umem_odp *odp_data;
struct sg_table sg_head;
int nmap;
int npages;
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 381cdf5a9bd1..0b1446fe2fab 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -43,6 +43,9 @@ struct umem_odp_node {
};
struct ib_umem_odp {
+ struct ib_umem umem;
+ struct ib_ucontext_per_mm *per_mm;
+
/*
* An array of the pages included in the on-demand paging umem.
* Indices of pages that are currently not mapped into the device will
@@ -64,16 +67,9 @@ struct ib_umem_odp {
struct mutex umem_mutex;
void *private; /* for the HW driver to use. */
- /* When false, use the notifier counter in the ucontext struct. */
- bool mn_counters_active;
int notifiers_seq;
int notifiers_count;
- /* A linked list of umems that don't have private mmu notifier
- * counters yet. */
- struct list_head no_private_counters;
- struct ib_umem *umem;
-
/* Tree tracking */
struct umem_odp_node interval_tree;
@@ -82,15 +78,34 @@ struct ib_umem_odp {
struct work_struct work;
};
+static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
+{
+ return container_of(umem, struct ib_umem_odp, umem);
+}
+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
- int access);
-struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
- unsigned long addr,
- size_t size);
+struct ib_ucontext_per_mm {
+ struct ib_ucontext *context;
+ struct mm_struct *mm;
+ struct pid *tgid;
+ bool active;
+
+ struct rb_root_cached umem_tree;
+ /* Protects umem_tree */
+ struct rw_semaphore umem_rwsem;
-void ib_umem_odp_release(struct ib_umem *umem);
+ struct mmu_notifier mn;
+ unsigned int odp_mrs_count;
+
+ struct list_head ucontext_list;
+ struct rcu_head rcu;
+};
+
+int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access);
+struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
+ unsigned long addr, size_t size);
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
/*
* The lower 2 bits of the DMA address signal the R/W permissions for
@@ -105,13 +120,14 @@ void ib_umem_odp_release(struct ib_umem *umem);
#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
-int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
- u64 access_mask, unsigned long current_seq);
+int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
+ u64 bcnt, u64 access_mask,
+ unsigned long current_seq);
-void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bound);
-typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end,
void *cookie);
/*
* Call the callback on each ib_umem in the range. Returns the logical or of
@@ -129,46 +145,37 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
u64 addr, u64 length);
-static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
unsigned long mmu_seq)
{
/*
* This code is strongly based on the KVM code from
* mmu_notifier_retry. Should be called with
- * the relevant locks taken (item->odp_data->umem_mutex
+ * the relevant locks taken (umem_odp->umem_mutex
* and the ucontext umem_mutex semaphore locked for read).
*/
- /* Do not allow page faults while the new ib_umem hasn't seen a state
- * with zero notifiers yet, and doesn't have its own valid set of
- * private counters. */
- if (!item->odp_data->mn_counters_active)
- return 1;
-
- if (unlikely(item->odp_data->notifiers_count))
+ if (unlikely(umem_odp->notifiers_count))
return 1;
- if (item->odp_data->notifiers_seq != mmu_seq)
+ if (umem_odp->notifiers_seq != mmu_seq)
return 1;
return 0;
}
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
-static inline int ib_umem_odp_get(struct ib_ucontext *context,
- struct ib_umem *umem,
- int access)
+static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
{
return -EINVAL;
}
-static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
- unsigned long addr,
- size_t size)
+static inline struct ib_umem_odp *
+ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size)
{
return ERR_PTR(-EINVAL);
}
-static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 0ed5d913a492..9c0c2132a2d6 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -69,8 +69,11 @@
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
+struct ib_umem_odp;
+
extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
+extern struct workqueue_struct *ib_comp_unbound_wq;
union ib_gid {
u8 raw[16];
@@ -1137,7 +1140,9 @@ enum ib_qp_create_flags {
*/
struct ib_qp_init_attr {
+ /* Consumer's event_handler callback must not block */
void (*event_handler)(struct ib_event *, void *);
+
void *qp_context;
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
@@ -1146,7 +1151,7 @@ struct ib_qp_init_attr {
struct ib_qp_cap cap;
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
- enum ib_qp_create_flags create_flags;
+ u32 create_flags;
/*
* Only needed for special QP types, or when using the RW API.
@@ -1278,21 +1283,27 @@ struct ib_qp_attr {
};
enum ib_wr_opcode {
- IB_WR_RDMA_WRITE,
- IB_WR_RDMA_WRITE_WITH_IMM,
- IB_WR_SEND,
- IB_WR_SEND_WITH_IMM,
- IB_WR_RDMA_READ,
- IB_WR_ATOMIC_CMP_AND_SWP,
- IB_WR_ATOMIC_FETCH_AND_ADD,
- IB_WR_LSO,
- IB_WR_SEND_WITH_INV,
- IB_WR_RDMA_READ_WITH_INV,
- IB_WR_LOCAL_INV,
- IB_WR_REG_MR,
- IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
- IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+ /* These are shared with userspace */
+ IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE,
+ IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM,
+ IB_WR_SEND = IB_UVERBS_WR_SEND,
+ IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM,
+ IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
+ IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
+ IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+ IB_WR_LSO = IB_UVERBS_WR_TSO,
+ IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
+ IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
+ IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV,
+ IB_WR_MASKED_ATOMIC_CMP_AND_SWP =
+ IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
+ IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
+ IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+
+ /* These are kernel only and can not be issued by userspace */
+ IB_WR_REG_MR = 0x20,
IB_WR_REG_SIG_MR,
+
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
*/
@@ -1485,26 +1496,15 @@ struct ib_ucontext {
* it is set when we are closing the file descriptor and indicates
* that mm_sem may be locked.
*/
- int closing;
+ bool closing;
bool cleanup_retryable;
- struct pid *tgid;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- struct rb_root_cached umem_tree;
- /*
- * Protects .umem_rbroot and tree, as well as odp_mrs_count and
- * mmu notifiers registration.
- */
- struct rw_semaphore umem_rwsem;
- void (*invalidate_range)(struct ib_umem *umem,
+ void (*invalidate_range)(struct ib_umem_odp *umem_odp,
unsigned long start, unsigned long end);
-
- struct mmu_notifier mn;
- atomic_t notifier_count;
- /* A list of umems that don't have private mmu notifier counters yet. */
- struct list_head no_private_counters;
- int odp_mrs_count;
+ struct mutex per_mm_list_lock;
+ struct list_head per_mm_list;
#endif
struct ib_rdmacg_object cg_obj;
@@ -1570,9 +1570,10 @@ struct ib_ah {
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
enum ib_poll_context {
- IB_POLL_DIRECT, /* caller context, no hw completions */
- IB_POLL_SOFTIRQ, /* poll from softirq context */
- IB_POLL_WORKQUEUE, /* poll from workqueue */
+ IB_POLL_DIRECT, /* caller context, no hw completions */
+ IB_POLL_SOFTIRQ, /* poll from softirq context */
+ IB_POLL_WORKQUEUE, /* poll from workqueue */
+ IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
};
struct ib_cq {
@@ -1589,6 +1590,7 @@ struct ib_cq {
struct irq_poll iop;
struct work_struct work;
};
+ struct workqueue_struct *comp_wq;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -2263,10 +2265,11 @@ struct ib_device {
struct list_head event_handler_list;
spinlock_t event_handler_lock;
- spinlock_t client_data_lock;
+ rwlock_t client_data_lock;
struct list_head core_list;
/* Access to the client_data_list is protected by the client_data_lock
- * spinlock and the lists_rwsem read-write semaphore */
+ * rwlock and the lists_rwsem read-write semaphore
+ */
struct list_head client_data_list;
struct ib_cache cache;
@@ -2550,7 +2553,13 @@ struct ib_device {
struct module *owner;
struct device dev;
- struct kobject *ports_parent;
+ /* First group for device attributes,
+ * Second group for driver provided attributes (optional).
+ * It is NULL terminated array.
+ */
+ const struct attribute_group *groups[3];
+
+ struct kobject *ports_kobj;
struct list_head port_list;
enum {
@@ -2633,9 +2642,9 @@ void ib_dealloc_device(struct ib_device *device);
void ib_get_device_fw_str(struct ib_device *device, char *str);
-int ib_register_device(struct ib_device *device,
- int (*port_callback)(struct ib_device *,
- u8, struct kobject *));
+int ib_register_device(struct ib_device *device, const char *name,
+ int (*port_callback)(struct ib_device *, u8,
+ struct kobject *));
void ib_unregister_device(struct ib_device *device);
int ib_register_client (struct ib_client *client);
@@ -2645,6 +2654,28 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot);
+int rdma_user_mmap_page(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma, struct page *page,
+ unsigned long size);
+#else
+static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size,
+ pgprot_t prot)
+{
+ return -EINVAL;
+}
+static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma, struct page *page,
+ unsigned long size)
+{
+ return -EINVAL;
+}
+#endif
+
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
@@ -2728,7 +2759,6 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt,
* @next_state: Next QP state
* @type: QP type
* @mask: Mask of supplied QP attributes
- * @ll : link layer of port
*
* This function is a helper function that a low-level driver's
* modify_qp method can use to validate the consumer's input. It
@@ -2737,8 +2767,7 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt,
* and that the attribute mask supplied is allowed for the transition.
*/
bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll);
+ enum ib_qp_type type, enum ib_qp_attr_mask mask);
void ib_register_event_handler(struct ib_event_handler *event_handler);
void ib_unregister_event_handler(struct ib_event_handler *event_handler);
@@ -4167,20 +4196,6 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
}
-static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
- struct ib_qp *qp, struct ib_device *device)
-{
- uobj->object = ibflow;
- ibflow->uobject = uobj;
-
- if (qp) {
- atomic_inc(&qp->usecnt);
- ibflow->qp = qp;
- }
-
- ibflow->device = device;
-}
-
/**
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
@@ -4205,4 +4220,26 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num,
void (*setup)(struct net_device *),
struct net_device *netdev);
+/**
+ * rdma_set_device_sysfs_group - Set device attributes group to have
+ * driver specific sysfs entries at
+ * for infiniband class.
+ *
+ * @device: device pointer for which attributes to be created
+ * @group: Pointer to group which should be added when device
+ * is registered with sysfs.
+ * rdma_set_device_sysfs_group() allows existing drivers to expose one
+ * group per device to have sysfs attributes.
+ *
+ * NOTE: New drivers should not make use of this API; instead new device
+ * parameter should be exposed via netlink command. This API and mechanism
+ * exist only for existing drivers.
+ */
+static inline void
+rdma_set_device_sysfs_group(struct ib_device *dev,
+ const struct attribute_group *group)
+{
+ dev->groups[1] = group;
+}
+
#endif /* IB_VERBS_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 5d71a7f51a9f..60987a5903b7 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -152,7 +152,11 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
* @ps: RDMA port space.
* @qp_type: type of queue pair associated with the id.
*
- * The id holds a reference on the network namespace until it is destroyed.
+ * Returns a new rdma_cm_id. The id holds a reference on the network
+ * namespace until it is destroyed.
+ *
+ * The event handler callback serializes on the id's mutex and is
+ * allowed to sleep.
*/
#define rdma_create_id(net, event_handler, context, ps, qp_type) \
__rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
@@ -192,7 +196,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
* @timeout_ms: Time to wait for resolution to complete.
*/
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, int timeout_ms);
+ const struct sockaddr *dst_addr,
+ unsigned long timeout_ms);
/**
* rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
@@ -202,7 +207,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
* Users must have first called rdma_resolve_addr to resolve a dst_addr
* into an RDMA address before calling this routine.
*/
-int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms);
/**
* rdma_create_qp - Allocate a QP and associate it with the specified RDMA
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index c369703fcd69..70218e6b5187 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -96,7 +96,7 @@ int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
/**
* Check if there are any listeners to the netlink group
* @group: the netlink group ID
- * Returns 0 on success or a negative for no listeners.
+ * Returns true on success or false if no listeners.
*/
-int rdma_nl_chk_listeners(unsigned int group);
+bool rdma_nl_chk_listeners(unsigned int group);
#endif /* _RDMA_NETLINK_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e79229a0cf01..3584d0816fcd 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -149,6 +149,10 @@ struct rvt_ibport {
#define RVT_CQN_MAX 16 /* maximum length of cq name */
+#define RVT_SGE_COPY_MEMCPY 0
+#define RVT_SGE_COPY_CACHELESS 1
+#define RVT_SGE_COPY_ADAPTIVE 2
+
/*
* Things that are driver specific, module parameters in hfi1 and qib
*/
@@ -161,6 +165,9 @@ struct rvt_driver_params {
*/
unsigned int lkey_table_size;
unsigned int qp_table_size;
+ unsigned int sge_copy_mode;
+ unsigned int wss_threshold;
+ unsigned int wss_clean_period;
int qpn_start;
int qpn_inc;
int qpn_res_start;
@@ -193,6 +200,19 @@ struct rvt_ah {
u8 log_pmtu;
};
+/* memory working set size */
+struct rvt_wss {
+ unsigned long *entries;
+ atomic_t total_count;
+ atomic_t clean_counter;
+ atomic_t clean_entry;
+
+ int threshold;
+ int num_entries;
+ long pages_mask;
+ unsigned int clean_period;
+};
+
struct rvt_dev_info;
struct rvt_swqe;
struct rvt_driver_provided {
@@ -211,11 +231,18 @@ struct rvt_driver_provided {
* version requires the s_lock not to be held. The other assumes the
* s_lock is held.
*/
- void (*schedule_send)(struct rvt_qp *qp);
- void (*schedule_send_no_lock)(struct rvt_qp *qp);
+ bool (*schedule_send)(struct rvt_qp *qp);
+ bool (*schedule_send_no_lock)(struct rvt_qp *qp);
- /* Driver specific work request checking */
- int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+ /*
+ * Driver specific work request setup and checking.
+ * This function is allowed to perform any setup, checks, or
+ * adjustments required to the SWQE in order to be usable by
+ * underlying protocols. This includes private data structure
+ * allocations.
+ */
+ int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
/*
* Sometimes rdmavt needs to kick the driver's send progress. That is
@@ -371,6 +398,9 @@ struct rvt_dev_info {
/* post send table */
const struct rvt_operation_params *post_parms;
+ /* opcode translation table */
+ const enum ib_wc_opcode *wc_opcode;
+
/* Driver specific helper functions */
struct rvt_driver_provided driver_f;
@@ -411,6 +441,8 @@ struct rvt_dev_info {
u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
spinlock_t n_mcast_grps_lock;
+ /* Memory Working Set Size */
+ struct rvt_wss *wss;
};
/**
@@ -423,7 +455,14 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
const char *fmt, const char *name,
const int unit)
{
- snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit);
+ /*
+ * FIXME: rvt and its users want to touch the ibdev before
+ * registration and have things like the name work. We don't have the
+ * infrastructure in the core to support this directly today, hack it
+ * to work by setting the name manually here.
+ */
+ dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
+ strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
}
/**
@@ -434,7 +473,7 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
*/
static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
{
- return rdi->ibdev.name;
+ return dev_name(&rdi->ibdev.dev);
}
static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 927f6d5b6d0f..cbafb1878669 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -678,6 +678,13 @@ void rvt_del_timers_sync(struct rvt_qp *qp);
void rvt_stop_rc_timers(struct rvt_qp *qp);
void rvt_add_retry_timer(struct rvt_qp *qp);
+void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
+ void *data, u32 length,
+ bool release, bool copy_last);
+void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ enum ib_wc_status status);
+void rvt_ruc_loopback(struct rvt_qp *qp);
+
/**
* struct rvt_qp_iter - the iterator for QPs
* @qp - the current QP
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 9654d33edd98..2638fa7cd702 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -173,16 +173,10 @@ int rdma_restrack_put(struct rdma_restrack_entry *res);
/**
* rdma_restrack_set_task() - set the task for this resource
* @res: resource entry
- * @task: task struct
+ * @caller: kernel name, the current task will be used if the caller is NULL.
*/
-static inline void rdma_restrack_set_task(struct rdma_restrack_entry *res,
- struct task_struct *task)
-{
- if (res->task)
- put_task_struct(res->task);
- get_task_struct(task);
- res->task = task;
-}
+void rdma_restrack_set_task(struct rdma_restrack_entry *res,
+ const char *caller);
/*
* Helper functions for rdma drivers when filling out
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 9e997c3c2f04..84d3d15f1f38 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -52,6 +52,7 @@ enum uverbs_attr_type {
UVERBS_ATTR_TYPE_IDR,
UVERBS_ATTR_TYPE_FD,
UVERBS_ATTR_TYPE_ENUM_IN,
+ UVERBS_ATTR_TYPE_IDRS_ARRAY,
};
enum uverbs_obj_access {
@@ -101,7 +102,7 @@ struct uverbs_attr_spec {
} enum_def;
} u;
- /* This weird split of the enum lets us remove some padding */
+ /* This weird split lets us remove some padding */
union {
struct {
/*
@@ -111,6 +112,17 @@ struct uverbs_attr_spec {
*/
const struct uverbs_attr_spec *ids;
} enum_def;
+
+ struct {
+ /*
+ * higher bits mean the namespace and lower bits mean
+ * the type id within the namespace.
+ */
+ u16 obj_type;
+ u16 min_len;
+ u16 max_len;
+ u8 access;
+ } objs_arr;
} u2;
};
@@ -251,6 +263,11 @@ static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key)
return attr_key - 1;
}
+static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey)
+{
+ return attr_bkey + 1;
+}
+
/*
* =======================================
* Verbs definitions
@@ -323,6 +340,27 @@ struct uverbs_object_tree_def {
#define UA_MANDATORY .mandatory = 1
#define UA_OPTIONAL .mandatory = 0
+/*
+ * min_len must be bigger than 0 and _max_len must be smaller than 4095. Only
+ * READ\WRITE accesses are supported.
+ */
+#define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \
+ ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = (_attr_id) + \
+ BUILD_BUG_ON_ZERO((_min_len) == 0 || \
+ (_max_len) > \
+ PAGE_SIZE / sizeof(void *) || \
+ (_min_len) > (_max_len) || \
+ (_access) == UVERBS_ACCESS_NEW || \
+ (_access) == UVERBS_ACCESS_DESTROY), \
+ .attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY, \
+ .u2.objs_arr.obj_type = _idr_type, \
+ .u2.objs_arr.access = _access, \
+ .u2.objs_arr.min_len = _min_len, \
+ .u2.objs_arr.max_len = _max_len, \
+ __VA_ARGS__ } })
+
#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \
(&(const struct uverbs_attr_def){ \
.id = _attr_id, \
@@ -365,6 +403,15 @@ struct uverbs_object_tree_def {
__VA_ARGS__ }, \
})
+/* An input value that is a member in the enum _enum_type. */
+#define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...) \
+ UVERBS_ATTR_PTR_IN( \
+ _attr_id, \
+ UVERBS_ATTR_SIZE( \
+ sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)), \
+ sizeof(u64)), \
+ __VA_ARGS__)
+
/*
* An input value that is a bitwise combination of values of _enum_type.
* This permits the flag value to be passed as either a u32 or u64, it must
@@ -431,10 +478,16 @@ struct uverbs_obj_attr {
const struct uverbs_api_attr *attr_elm;
};
+struct uverbs_objs_arr_attr {
+ struct ib_uobject **uobjects;
+ u16 len;
+};
+
struct uverbs_attr {
union {
struct uverbs_ptr_attr ptr_attr;
struct uverbs_obj_attr obj_attr;
+ struct uverbs_objs_arr_attr objs_arr_attr;
};
};
@@ -507,6 +560,31 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
return attr->ptr_attr.len;
}
+/**
+ * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for
+ * UVERBS_ATTR_TYPE_IDRS_ARRAY.
+ * @arr: Returned pointer to array of pointers for uobjects or NULL if
+ * the attribute isn't provided.
+ *
+ * Return: The array length or 0 if no attribute was provided.
+ */
+static inline int uverbs_attr_get_uobjs_arr(
+ const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx,
+ struct ib_uobject ***arr)
+{
+ const struct uverbs_attr *attr =
+ uverbs_attr_get(attrs_bundle, attr_idx);
+
+ if (IS_ERR(attr)) {
+ *arr = NULL;
+ return 0;
+ }
+
+ *arr = attr->objs_arr_attr.uobjects;
+
+ return attr->objs_arr_attr.len;
+}
+
static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
{
return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
@@ -603,6 +681,9 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
{
return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
}
+int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val);
#else
static inline int
uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
@@ -631,6 +712,34 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
{
return ERR_PTR(-EINVAL);
}
+static inline int
+_uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val)
+{
+ return -EINVAL;
+}
#endif
+#define uverbs_get_const(_to, _attrs_bundle, _idx) \
+ ({ \
+ s64 _val; \
+ int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \
+ type_min(typeof(*_to)), \
+ type_max(typeof(*_to)), NULL); \
+ (*_to) = _val; \
+ _ret; \
+ })
+
+#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \
+ ({ \
+ s64 _val; \
+ s64 _def_val = _default; \
+ int _ret = \
+ _uverbs_get_const(&_val, _attrs_bundle, _idx, \
+ type_min(typeof(*_to)), \
+ type_max(typeof(*_to)), &_def_val); \
+ (*_to) = _val; \
+ _ret; \
+ })
#endif
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 3b00231cc084..3db2802fbc68 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -140,5 +140,56 @@ __uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
#define uobj_alloc(_type, _ufile, _ib_dev) \
__uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
+static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
+ struct ib_uobject *uobj,
+ struct ib_device *ib_dev,
+ enum ib_flow_action_type type)
+{
+ atomic_set(&action->usecnt, 0);
+ action->device = ib_dev;
+ action->type = type;
+ action->uobject = uobj;
+ uobj->object = action;
+}
+
+struct ib_uflow_resources {
+ size_t max;
+ size_t num;
+ size_t collection_num;
+ size_t counters_num;
+ struct ib_counters **counters;
+ struct ib_flow_action **collection;
+};
+
+struct ib_uflow_object {
+ struct ib_uobject uobject;
+ struct ib_uflow_resources *resources;
+};
+
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs);
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+ enum ib_flow_spec_type type,
+ void *ibobj);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+
+static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
+ struct ib_qp *qp, struct ib_device *device,
+ struct ib_uflow_resources *uflow_res)
+{
+ struct ib_uflow_object *uflow;
+
+ uobj->object = ibflow;
+ ibflow->uobject = uobj;
+
+ if (qp) {
+ atomic_inc(&qp->usecnt);
+ ibflow->qp = qp;
+ }
+
+ ibflow->device = device;
+ uflow = container_of(uobj, typeof(*uflow), uobject);
+ uflow->resources = uflow_res;
+}
+
#endif