summaryrefslogtreecommitdiffstats
path: root/include/rdma/ib_verbs.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-07 02:35:43 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-07 02:35:43 +0200
commit19fd08b85bc7e0502b55cd726f466df82ee7e777 (patch)
treeb042de4b9a8a9478c528ea950b14d34487375695 /include/rdma/ib_verbs.h
parentMerge tag 'mailbox-v4.17' of git://git.linaro.org/landing-teams/working/fujit... (diff)
parentIB/rxe: Fix for oops in rxe_register_device on ppc64le arch (diff)
downloadlinux-19fd08b85bc7e0502b55cd726f466df82ee7e777.tar.xz
linux-19fd08b85bc7e0502b55cd726f466df82ee7e777.zip
Merge tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Doug and I are at a conference next week so if another PR is sent I expect it to only be bug fixes. Parav noted yesterday that there are some fringe case behavior changes in his work that he would like to fix, and I see that Intel has a number of rc looking patches for HFI1 they posted yesterday. Parav is again the biggest contributor by patch count with his ongoing work to enable container support in the RDMA stack, followed by Leon doing syzkaller inspired cleanups, though most of the actual fixing went to RC. There is one uncomfortable series here fixing the user ABI to actually work as intended in 32 bit mode. There are lots of notes in the commit messages, but the basic summary is we don't think there is an actual 32 bit kernel user of drivers/infiniband for several good reasons. However we are seeing people want to use a 32 bit user space with 64 bit kernel, which didn't completely work today. So in fixing it we required a 32 bit rxe user to upgrade their userspace. rxe users are still already quite rare and we think a 32 bit one is non-existing. - Fix RDMA uapi headers to actually compile in userspace and be more complete - Three shared with netdev pull requests from Mellanox: * 7 patches, mostly to net with 1 IB related one at the back). This series addresses an IRQ performance issue (patch 1), cleanups related to the fix for the IRQ performance problem (patches 2-6), and then extends the fragmented completion queue support that already exists in the net side of the driver to the ib side of the driver (patch 7). * Mostly IB, with 5 patches to net that are needed to support the remaining 10 patches to the IB subsystem. This series extends the current 'representor' framework when the mlx5 driver is in switchdev mode from being a netdev only construct to being a netdev/IB dev construct. The IB dev is limited to raw Eth queue pairs only, but by having an IB dev of this type attached to the representor for a switchdev port, it enables DPDK to work on the switchdev device. * All net related, but needed as infrastructure for the rdma driver - Updates for the hns, i40iw, bnxt_re, cxgb3, cxgb4, hns drivers - SRP performance updates - IB uverbs write path cleanup patch series from Leon - Add RDMA_CM support to ib_srpt. This is disabled by default. Users need to set the port for ib_srpt to listen on in configfs in order for it to be enabled (/sys/kernel/config/target/srpt/discovery_auth/rdma_cm_port) - TSO and Scatter FCS support in mlx4 - Refactor of modify_qp routine to resolve problems seen while working on new code that is forthcoming - More refactoring and updates of RDMA CM for containers support from Parav - mlx5 'fine grained packet pacing', 'ipsec offload' and 'device memory' user API features - Infrastructure updates for the new IOCTL interface, based on increased usage - ABI compatibility bug fixes to fully support 32 bit userspace on 64 bit kernel as was originally intended. See the commit messages for extensive details - Syzkaller bugs and code cleanups motivated by them" * tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (199 commits) IB/rxe: Fix for oops in rxe_register_device on ppc64le arch IB/mlx5: Device memory mr registration support net/mlx5: Mkey creation command adjustments IB/mlx5: Device memory support in mlx5_ib net/mlx5: Query device memory capabilities IB/uverbs: Add device memory registration ioctl support IB/uverbs: Add alloc/free dm uverbs ioctl support IB/uverbs: Add device memory capabilities reporting IB/uverbs: Expose device memory capabilities to user RDMA/qedr: Fix wmb usage in qedr IB/rxe: Removed GID add/del dummy routines RDMA/qedr: Zero stack memory before copying to user space IB/mlx5: Add ability to hash by IPSEC_SPI when creating a TIR IB/mlx5: Add information for querying IPsec capabilities IB/mlx5: Add IPsec support for egress and ingress {net,IB}/mlx5: Add ipsec helper IB/mlx5: Add modify_flow_action_esp verb IB/mlx5: Add implementation for create and destroy action_xfrm IB/uverbs: Introduce ESP steering match filter IB/uverbs: Add modify ESP flow_action ...
Diffstat (limited to 'include/rdma/ib_verbs.h')
-rw-r--r--include/rdma/ib_verbs.h212
1 files changed, 171 insertions, 41 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6eb174753acf..9fc8a825aa28 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -64,6 +64,8 @@
#include <linux/cgroup_rdma.h>
#include <uapi/rdma/ib_user_verbs.h>
#include <rdma/restrack.h>
+#include <uapi/rdma/rdma_user_ioctl.h>
+#include <uapi/rdma/ib_user_ioctl_verbs.h>
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
@@ -90,8 +92,11 @@ enum ib_gid_type {
#define ROCE_V2_UDP_DPORT 4791
struct ib_gid_attr {
- enum ib_gid_type gid_type;
struct net_device *ndev;
+ struct ib_device *device;
+ enum ib_gid_type gid_type;
+ u16 index;
+ u8 port_num;
};
enum rdma_node_type {
@@ -316,6 +321,18 @@ struct ib_cq_caps {
u16 max_cq_moderation_period;
};
+struct ib_dm_mr_attr {
+ u64 length;
+ u64 offset;
+ u32 access_flags;
+};
+
+struct ib_dm_alloc_attr {
+ u64 length;
+ u32 alignment;
+ u32 flags;
+};
+
struct ib_device_attr {
u64 fw_ver;
__be64 sys_image_guid;
@@ -367,6 +384,7 @@ struct ib_device_attr {
u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
struct ib_tm_caps tm_caps;
struct ib_cq_caps cq_caps;
+ u64 max_dm_size;
};
enum ib_mtu {
@@ -469,6 +487,9 @@ enum ib_port_speed {
/**
* struct rdma_hw_stats
+ * @lock - Mutex to protect parallel write access to lifespan and values
+ * of counters, which are 64bits and not guaranteeed to be written
+ * atomicaly on 32bits systems.
* @timestamp - Used by the core code to track when the last update was
* @lifespan - Used by the core code to determine how old the counters
* should be before being updated again. Stored in jiffies, defaults
@@ -484,6 +505,7 @@ enum ib_port_speed {
* filled in by the drivers get_stats routine
*/
struct rdma_hw_stats {
+ struct mutex lock; /* Protect lifespan and values[] */
unsigned long timestamp;
unsigned long lifespan;
const char * const *names;
@@ -1755,6 +1777,14 @@ struct ib_qp {
struct rdma_restrack_entry res;
};
+struct ib_dm {
+ struct ib_device *device;
+ u32 length;
+ u32 flags;
+ struct ib_uobject *uobject;
+ atomic_t usecnt;
+};
+
struct ib_mr {
struct ib_device *device;
struct ib_pd *pd;
@@ -1768,6 +1798,13 @@ struct ib_mr {
struct ib_uobject *uobject; /* user */
struct list_head qp_entry; /* FR */
};
+
+ struct ib_dm *dm;
+
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
struct ib_mw {
@@ -1810,6 +1847,7 @@ enum ib_flow_spec_type {
/* L3 header*/
IB_FLOW_SPEC_IPV4 = 0x30,
IB_FLOW_SPEC_IPV6 = 0x31,
+ IB_FLOW_SPEC_ESP = 0x34,
/* L4 headers*/
IB_FLOW_SPEC_TCP = 0x40,
IB_FLOW_SPEC_UDP = 0x41,
@@ -1818,6 +1856,7 @@ enum ib_flow_spec_type {
/* Actions */
IB_FLOW_SPEC_ACTION_TAG = 0x1000,
IB_FLOW_SPEC_ACTION_DROP = 0x1001,
+ IB_FLOW_SPEC_ACTION_HANDLE = 0x1002,
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
@@ -1835,7 +1874,8 @@ enum ib_flow_domain {
enum ib_flow_flags {
IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
- IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */
+ IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
+ IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 3 /* Must be last */
};
struct ib_flow_eth_filter {
@@ -1940,6 +1980,20 @@ struct ib_flow_spec_tunnel {
struct ib_flow_tunnel_filter mask;
};
+struct ib_flow_esp_filter {
+ __be32 spi;
+ __be32 seq;
+ /* Must be last */
+ u8 real_sz[0];
+};
+
+struct ib_flow_spec_esp {
+ u32 type;
+ u16 size;
+ struct ib_flow_esp_filter val;
+ struct ib_flow_esp_filter mask;
+};
+
struct ib_flow_spec_action_tag {
enum ib_flow_spec_type type;
u16 size;
@@ -1951,6 +2005,12 @@ struct ib_flow_spec_action_drop {
u16 size;
};
+struct ib_flow_spec_action_handle {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_action *act;
+};
+
union ib_flow_spec {
struct {
u32 type;
@@ -1962,8 +2022,10 @@ union ib_flow_spec {
struct ib_flow_spec_tcp_udp tcp_udp;
struct ib_flow_spec_ipv6 ipv6;
struct ib_flow_spec_tunnel tunnel;
+ struct ib_flow_spec_esp esp;
struct ib_flow_spec_action_tag flow_tag;
struct ib_flow_spec_action_drop drop;
+ struct ib_flow_spec_action_handle action;
};
struct ib_flow_attr {
@@ -1984,6 +2046,64 @@ struct ib_flow {
struct ib_uobject *uobject;
};
+enum ib_flow_action_type {
+ IB_FLOW_ACTION_UNSPECIFIED,
+ IB_FLOW_ACTION_ESP = 1,
+};
+
+struct ib_flow_action_attrs_esp_keymats {
+ enum ib_uverbs_flow_action_esp_keymat protocol;
+ union {
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm aes_gcm;
+ } keymat;
+};
+
+struct ib_flow_action_attrs_esp_replays {
+ enum ib_uverbs_flow_action_esp_replay protocol;
+ union {
+ struct ib_uverbs_flow_action_esp_replay_bmp bmp;
+ } replay;
+};
+
+enum ib_flow_action_attrs_esp_flags {
+ /* All user-space flags at the top: Use enum ib_uverbs_flow_action_esp_flags
+ * This is done in order to share the same flags between user-space and
+ * kernel and spare an unnecessary translation.
+ */
+
+ /* Kernel flags */
+ IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED = 1ULL << 32,
+ IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS = 1ULL << 33,
+};
+
+struct ib_flow_spec_list {
+ struct ib_flow_spec_list *next;
+ union ib_flow_spec spec;
+};
+
+struct ib_flow_action_attrs_esp {
+ struct ib_flow_action_attrs_esp_keymats *keymat;
+ struct ib_flow_action_attrs_esp_replays *replay;
+ struct ib_flow_spec_list *encap;
+ /* Used only if IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED is enabled.
+ * Value of 0 is a valid value.
+ */
+ u32 esn;
+ u32 spi;
+ u32 seq;
+ u32 tfc_pad;
+ /* Use enum ib_flow_action_attrs_esp_flags */
+ u64 flags;
+ u64 hard_limit_pkts;
+};
+
+struct ib_flow_action {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ enum ib_flow_action_type type;
+ atomic_t usecnt;
+};
+
struct ib_mad_hdr;
struct ib_grh;
@@ -2060,6 +2180,8 @@ struct ib_port_pkey_list {
struct list_head pkey_list;
};
+struct uverbs_attr_bundle;
+
struct ib_device {
/* Do not access @dma_device directly from ULP nor from HW drivers. */
struct device *dma_device;
@@ -2127,37 +2249,36 @@ struct ib_device {
*/
struct net_device *(*get_netdev)(struct ib_device *device,
u8 port_num);
+ /* query_gid should be return GID value for @device, when @port_num
+ * link layer is either IB or iWarp. It is no-op if @port_num port
+ * is RoCE link layer.
+ */
int (*query_gid)(struct ib_device *device,
u8 port_num, int index,
union ib_gid *gid);
- /* When calling add_gid, the HW vendor's driver should
- * add the gid of device @device at gid index @index of
- * port @port_num to be @gid. Meta-info of that gid (for example,
- * the network device related to this gid is available
- * at @attr. @context allows the HW vendor driver to store extra
- * information together with a GID entry. The HW vendor may allocate
- * memory to contain this information and store it in @context when a
- * new GID entry is written to. Params are consistent until the next
- * call of add_gid or delete_gid. The function should return 0 on
+ /* When calling add_gid, the HW vendor's driver should add the gid
+ * of device of port at gid index available at @attr. Meta-info of
+ * that gid (for example, the network device related to this gid) is
+ * available at @attr. @context allows the HW vendor driver to store
+ * extra information together with a GID entry. The HW vendor driver may
+ * allocate memory to contain this information and store it in @context
+ * when a new GID entry is written to. Params are consistent until the
+ * next call of add_gid or delete_gid. The function should return 0 on
* success or error otherwise. The function could be called
- * concurrently for different ports. This function is only called
- * when roce_gid_table is used.
+ * concurrently for different ports. This function is only called when
+ * roce_gid_table is used.
*/
- int (*add_gid)(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
+ int (*add_gid)(const union ib_gid *gid,
const struct ib_gid_attr *attr,
void **context);
/* When calling del_gid, the HW vendor's driver should delete the
- * gid of device @device at gid index @index of port @port_num.
+ * gid of device @device at gid index gid_index of port port_num
+ * available in @attr.
* Upon the deletion of a GID entry, the HW vendor must free any
* allocated memory. The caller will clear @context afterwards.
* This function is only called when roce_gid_table is used.
*/
- int (*del_gid)(struct ib_device *device,
- u8 port_num,
- unsigned int index,
+ int (*del_gid)(const struct ib_gid_attr *attr,
void **context);
int (*query_pkey)(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey);
@@ -2315,6 +2436,21 @@ struct ib_device {
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
+ struct ib_flow_action * (*create_flow_action_esp)(struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*destroy_flow_action)(struct ib_flow_action *action);
+ int (*modify_flow_action_esp)(struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ struct ib_dm * (*alloc_dm)(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*dealloc_dm)(struct ib_dm *dm);
+ struct ib_mr * (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs);
/**
* rdma netdev operation
*
@@ -2376,6 +2512,7 @@ struct ib_device {
int comp_vector);
struct uverbs_root_spec *specs_root;
+ enum rdma_driver_id driver_id;
};
struct ib_client {
@@ -2435,11 +2572,9 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
}
-static inline bool ib_is_udata_cleared(struct ib_udata *udata,
- size_t offset,
- size_t len)
+static inline bool ib_is_buffer_cleared(const void __user *p,
+ size_t len)
{
- const void __user *p = udata->inbuf + offset;
bool ret;
u8 *buf;
@@ -2455,6 +2590,13 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
return ret;
}
+static inline bool ib_is_udata_cleared(struct ib_udata *udata,
+ size_t offset,
+ size_t len)
+{
+ return ib_is_buffer_cleared(udata->inbuf + offset, len);
+}
+
/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
@@ -2471,9 +2613,9 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
* transition from cur_state to next_state is allowed by the IB spec,
* and that the attribute mask supplied is allowed for the transition.
*/
-int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll);
+bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+ enum ib_qp_type type, enum ib_qp_attr_mask mask,
+ enum rdma_link_layer ll);
void ib_register_event_handler(struct ib_event_handler *event_handler);
void ib_unregister_event_handler(struct ib_event_handler *event_handler);
@@ -2848,7 +2990,7 @@ int ib_modify_port(struct ib_device *device,
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- struct net_device *ndev, u8 *port_num, u16 *index);
+ u8 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
@@ -3217,18 +3359,6 @@ static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
}
/**
- * ib_peek_cq - Returns the number of unreaped completions currently
- * on the specified CQ.
- * @cq: The CQ to peek.
- * @wc_cnt: A minimum number of unreaped completions to check for.
- *
- * If the number of unreaped completions is greater than or equal to wc_cnt,
- * this function returns wc_cnt, otherwise, it returns the actual number of
- * unreaped completions.
- */
-int ib_peek_cq(struct ib_cq *cq, int wc_cnt);
-
-/**
* ib_req_notify_cq - Request completion notification on a CQ.
* @cq: The CQ to generate an event for.
* @flags: