summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
-rw-r--r--drivers/infiniband/hw/mlx5/main.c458
1 files changed, 407 insertions, 51 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f7fcde1ff0aa..ab3c562d5ba7 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include <linux/debugfs.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -58,6 +59,7 @@
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
#include "cmd.h"
+#include <linux/mlx5/vport.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
@@ -65,7 +67,6 @@
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRIVER_VERSION);
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
@@ -97,6 +98,20 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
}
+static int get_port_state(struct ib_device *ibdev,
+ u8 port_num,
+ enum ib_port_state *state)
+{
+ struct ib_port_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+ if (!ret)
+ *state = attr.state;
+ return ret;
+}
+
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -114,6 +129,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
write_unlock(&ibdev->roce.netdev_lock);
break;
+ case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
@@ -127,10 +143,23 @@ static int mlx5_netdev_event(struct notifier_block *this,
if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
&& ibdev->ib_active) {
struct ib_event ibev = { };
+ enum ib_port_state port_state;
+ if (get_port_state(&ibdev->ib_dev, 1, &port_state))
+ return NOTIFY_DONE;
+
+ if (ibdev->roce.last_port_state == port_state)
+ return NOTIFY_DONE;
+
+ ibdev->roce.last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
- ibev.event = (event == NETDEV_UP) ?
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ if (port_state == IB_PORT_DOWN)
+ ibev.event = IB_EVENT_PORT_ERR;
+ else if (port_state == IB_PORT_ACTIVE)
+ ibev.event = IB_EVENT_PORT_ACTIVE;
+ else
+ return NOTIFY_DONE;
+
ibev.element.port_num = 1;
ib_dispatch_event(&ibev);
}
@@ -668,6 +697,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_TSO;
}
+ if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
+ MLX5_CAP_GEN(dev->mdev, general_notification_event))
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
+
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
+ MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap))
+ props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
/* Legacy bit to support old userspace libraries */
@@ -740,6 +777,16 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
}
+ if (MLX5_CAP_GEN(mdev, tag_matching)) {
+ props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+ props->xrq_caps.max_num_tags =
+ (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
+ props->xrq_caps.flags = IB_TM_CAP_RC;
+ props->xrq_caps.max_ops =
+ 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
+ props->xrq_caps.max_sge = MLX5_TM_MAX_SGE;
+ }
+
if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
resp.cqe_comp_caps.max_num =
MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
@@ -765,8 +812,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
uhw->outlen)) {
- resp.mlx5_ib_support_multi_pkt_send_wqes =
- MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
+ if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
+ resp.mlx5_ib_support_multi_pkt_send_wqes =
+ MLX5_IB_ALLOW_MPW;
+
+ if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
+ resp.mlx5_ib_support_multi_pkt_send_wqes |=
+ MLX5_IB_SUPPORT_EMPW;
+
resp.response_length +=
sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
}
@@ -774,6 +827,27 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (field_avail(typeof(resp), reserved, uhw->outlen))
resp.response_length += sizeof(resp.reserved);
+ if (field_avail(typeof(resp), sw_parsing_caps,
+ uhw->outlen)) {
+ resp.response_length += sizeof(resp.sw_parsing_caps);
+ if (MLX5_CAP_ETH(mdev, swp)) {
+ resp.sw_parsing_caps.sw_parsing_offloads |=
+ MLX5_IB_SW_PARSING;
+
+ if (MLX5_CAP_ETH(mdev, swp_csum))
+ resp.sw_parsing_caps.sw_parsing_offloads |=
+ MLX5_IB_SW_PARSING_CSUM;
+
+ if (MLX5_CAP_ETH(mdev, swp_lso))
+ resp.sw_parsing_caps.sw_parsing_offloads |=
+ MLX5_IB_SW_PARSING_LSO;
+
+ if (resp.sw_parsing_caps.sw_parsing_offloads)
+ resp.sw_parsing_caps.supported_qpts =
+ BIT(IB_QPT_RAW_PACKET);
+ }
+ }
+
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
@@ -1144,7 +1218,7 @@ static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
return -EINVAL;
- mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n",
+ mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, using %d sys pages\n",
MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
lib_uar_4k ? "yes" : "no", ref_bfregs,
req->total_num_bfregs, *num_sys_pages);
@@ -1193,6 +1267,45 @@ static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *con
return 0;
}
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
+{
+ int err;
+
+ err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
+ if (err)
+ return err;
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+ return err;
+
+ mutex_lock(&dev->lb_mutex);
+ dev->user_td++;
+
+ if (dev->user_td == 2)
+ err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+
+ mutex_unlock(&dev->lb_mutex);
+ return err;
+}
+
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
+{
+ mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+ return;
+
+ mutex_lock(&dev->lb_mutex);
+ dev->user_td--;
+
+ if (dev->user_td < 2)
+ mlx5_nic_vport_update_local_lb(dev->mdev, false);
+
+ mutex_unlock(&dev->lb_mutex);
+}
+
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
@@ -1203,7 +1316,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct mlx5_bfreg_info *bfregi;
int ver;
int err;
- size_t reqlen;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
bool lib_uar_4k;
@@ -1211,18 +1323,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
- return ERR_PTR(-EINVAL);
-
- reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
- if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
+ if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
- else if (reqlen >= min_req_v2)
+ else if (udata->inlen >= min_req_v2)
ver = 2;
else
return ERR_PTR(-EINVAL);
- err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
+ err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
if (err)
return ERR_PTR(err);
@@ -1301,8 +1409,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
mutex_init(&context->upd_xlt_page_mutex);
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
- err = mlx5_core_alloc_transport_domain(dev->mdev,
- &context->tdn);
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
if (err)
goto out_page;
}
@@ -1368,7 +1475,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
out_page:
free_page(context->upd_xlt_page);
@@ -1396,7 +1503,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
bfregi = &context->bfregi;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
free_page(context->upd_xlt_page);
deallocate_uars(dev, context);
@@ -2034,23 +2141,34 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
* it won't fall into the multicast flow steering table and this rule
* could steal other multicast packets.
*/
-static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
+static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
{
- struct ib_flow_spec_eth *eth_spec;
+ union ib_flow_spec *flow_spec;
if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
- ib_attr->size < sizeof(struct ib_flow_attr) +
- sizeof(struct ib_flow_spec_eth) ||
ib_attr->num_of_specs < 1)
return false;
- eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
- if (eth_spec->type != IB_FLOW_SPEC_ETH ||
- eth_spec->size != sizeof(*eth_spec))
+ flow_spec = (union ib_flow_spec *)(ib_attr + 1);
+ if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
+ struct ib_flow_spec_ipv4 *ipv4_spec;
+
+ ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
+ if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
+ return true;
+
return false;
+ }
+
+ if (flow_spec->type == IB_FLOW_SPEC_ETH) {
+ struct ib_flow_spec_eth *eth_spec;
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
- is_multicast_ether_addr(eth_spec->val.dst_mac);
+ eth_spec = (struct ib_flow_spec_eth *)flow_spec;
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+ }
+
+ return false;
}
static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
@@ -2235,10 +2353,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
return err ? ERR_PTR(err) : prio;
}
-static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
+static void set_underlay_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ u32 underlay_qpn)
+{
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+ spec->match_criteria,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (underlay_qpn &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ ft_field_support.bth_dst_qp)) {
+ MLX5_SET(fte_match_set_misc,
+ misc_params_v, bth_dst_qp, underlay_qpn);
+ MLX5_SET(fte_match_set_misc,
+ misc_params_c, bth_dst_qp, 0xffffff);
+ }
+}
+
+static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst,
+ u32 underlay_qpn)
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
@@ -2274,6 +2413,9 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
}
+ if (!flow_is_multicast_only(flow_attr))
+ set_underlay_qp(dev, spec, underlay_qpn);
+
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
if (is_drop) {
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
@@ -2313,6 +2455,14 @@ free:
return err ? ERR_PTR(err) : handler;
}
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0);
+}
+
static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
@@ -2449,6 +2599,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
struct mlx5_ib_flow_prio *ft_prio;
int err;
+ int underlay_qpn;
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOMEM);
@@ -2489,8 +2640,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
handler = create_dont_trap_rule(dev, ft_prio,
flow_attr, dst);
} else {
- handler = create_flow_rule(dev, ft_prio, flow_attr,
- dst);
+ underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
+ mqp->underlay_qpn : 0;
+ handler = _create_flow_rule(dev, ft_prio, flow_attr,
+ dst, underlay_qpn);
}
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
@@ -2528,8 +2681,14 @@ unlock:
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(ibqp);
int err;
+ if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
+ mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
@@ -2691,6 +2850,26 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
+static void delay_drop_handler(struct work_struct *work)
+{
+ int err;
+ struct mlx5_ib_delay_drop *delay_drop =
+ container_of(work, struct mlx5_ib_delay_drop,
+ delay_drop_work);
+
+ atomic_inc(&delay_drop->events_cnt);
+
+ mutex_lock(&delay_drop->lock);
+ err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
+ delay_drop->timeout);
+ if (err) {
+ mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
+ delay_drop->timeout);
+ delay_drop->activate = false;
+ }
+ mutex_unlock(&delay_drop->lock);
+}
+
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
@@ -2743,8 +2922,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
ibev.event = IB_EVENT_CLIENT_REREGISTER;
port = (u8)param;
break;
+ case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
+ schedule_work(&ibdev->delay_drop.delay_drop_work);
+ goto out;
default:
- return;
+ goto out;
}
ibev.device = &ibdev->ib_dev;
@@ -2752,7 +2934,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (port < 1 || port > ibdev->num_ports) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
- return;
+ goto out;
}
if (ibdev->ib_active)
@@ -2760,6 +2942,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (fatal)
ibdev->ib_active = false;
+
+out:
+ return;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -3042,7 +3227,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
- attr.ext.xrc.cq = devr->c0;
+ attr.ext.cq = devr->c0;
attr.ext.xrc.xrcd = devr->x0;
devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
@@ -3057,9 +3242,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
devr->s0->srq_context = NULL;
devr->s0->srq_type = IB_SRQT_XRC;
devr->s0->ext.xrc.xrcd = devr->x0;
- devr->s0->ext.xrc.cq = devr->c0;
+ devr->s0->ext.cq = devr->c0;
atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
- atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
+ atomic_inc(&devr->s0->ext.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
@@ -3078,9 +3263,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
devr->s1->event_handler = NULL;
devr->s1->srq_context = NULL;
devr->s1->srq_type = IB_SRQT_BASIC;
- devr->s1->ext.xrc.cq = devr->c0;
+ devr->s1->ext.cq = devr->c0;
atomic_inc(&devr->p0->usecnt);
- atomic_set(&devr->s0->usecnt, 0);
+ atomic_set(&devr->s1->usecnt, 0);
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
INIT_WORK(&devr->ports[port].pkey_change_work,
@@ -3173,13 +3358,13 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
-static void get_dev_fw_str(struct ib_device *ibdev, char *str,
- size_t str_len)
+static void get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct mlx5_ib_dev *dev =
container_of(ibdev, struct mlx5_ib_dev, ib_dev);
- snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev),
- fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d",
+ fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev),
+ fw_rev_sub(dev->mdev));
}
static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
@@ -3319,6 +3504,17 @@ static const struct mlx5_ib_counter cong_cnts[] = {
INIT_CONG_COUNTER(np_cnp_sent),
};
+static const struct mlx5_ib_counter extended_err_cnts[] = {
+ INIT_Q_COUNTER(resp_local_length_error),
+ INIT_Q_COUNTER(resp_cqe_error),
+ INIT_Q_COUNTER(req_cqe_error),
+ INIT_Q_COUNTER(req_remote_invalid_request),
+ INIT_Q_COUNTER(req_remote_access_errors),
+ INIT_Q_COUNTER(resp_remote_access_errors),
+ INIT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_Q_COUNTER(req_cqe_flush_error),
+};
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
@@ -3343,6 +3539,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
+ num_counters += ARRAY_SIZE(extended_err_cnts);
+
cnts->num_q_counters = num_counters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -3392,6 +3592,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
}
}
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+ names[j] = extended_err_cnts[i].name;
+ offsets[j] = extended_err_cnts[i].offset;
+ }
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
names[j] = cong_cnts[i].name;
@@ -3562,6 +3769,136 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
return netdev;
}
+static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!dev->delay_drop.dbg)
+ return;
+ debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs);
+ kfree(dev->delay_drop.dbg);
+ dev->delay_drop.dbg = NULL;
+}
+
+static void cancel_delay_drop(struct mlx5_ib_dev *dev)
+{
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+ delay_drop_debugfs_cleanup(dev);
+}
+
+static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
+ char lbuf[20];
+ int len;
+
+ len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout);
+ return simple_read_from_buffer(buf, count, pos, lbuf, len);
+}
+
+static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
+ u32 timeout;
+ u32 var;
+
+ if (kstrtouint_from_user(buf, count, 0, &var))
+ return -EFAULT;
+
+ timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS *
+ 1000);
+ if (timeout != var)
+ mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n",
+ timeout);
+
+ delay_drop->timeout = timeout;
+
+ return count;
+}
+
+static const struct file_operations fops_delay_drop_timeout = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = delay_drop_timeout_write,
+ .read = delay_drop_timeout_read,
+};
+
+static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dbg_delay_drop *dbg;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ dbg = kzalloc(sizeof(*dbg), GFP_KERNEL);
+ if (!dbg)
+ return -ENOMEM;
+
+ dbg->dir_debugfs =
+ debugfs_create_dir("delay_drop",
+ dev->mdev->priv.dbg_root);
+ if (!dbg->dir_debugfs)
+ return -ENOMEM;
+
+ dbg->events_cnt_debugfs =
+ debugfs_create_atomic_t("num_timeout_events", 0400,
+ dbg->dir_debugfs,
+ &dev->delay_drop.events_cnt);
+ if (!dbg->events_cnt_debugfs)
+ goto out_debugfs;
+
+ dbg->rqs_cnt_debugfs =
+ debugfs_create_atomic_t("num_rqs", 0400,
+ dbg->dir_debugfs,
+ &dev->delay_drop.rqs_cnt);
+ if (!dbg->rqs_cnt_debugfs)
+ goto out_debugfs;
+
+ dbg->timeout_debugfs =
+ debugfs_create_file("timeout", 0600,
+ dbg->dir_debugfs,
+ &dev->delay_drop,
+ &fops_delay_drop_timeout);
+ if (!dbg->timeout_debugfs)
+ goto out_debugfs;
+
+ dev->delay_drop.dbg = dbg;
+
+ return 0;
+
+out_debugfs:
+ delay_drop_debugfs_cleanup(dev);
+ return -ENOMEM;
+}
+
+static void init_delay_drop(struct mlx5_ib_dev *dev)
+{
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
+ atomic_set(&dev->delay_drop.rqs_cnt, 0);
+ atomic_set(&dev->delay_drop.events_cnt, 0);
+
+ if (delay_drop_debugfs_init(dev))
+ mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
+}
+
+static const struct cpumask *
+mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+
+ return mlx5_get_vector_affinity(dev->mdev, comp_vector);
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
@@ -3692,6 +4029,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
+ dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
@@ -3729,18 +4067,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ dev->ib_dev.create_flow = mlx5_ib_create_flow;
+ dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
- dev->ib_dev.create_flow = mlx5_ib_create_flow;
- dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
@@ -3760,6 +4100,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
err = mlx5_enable_eth(dev);
if (err)
goto err_free_port;
+ dev->roce.last_port_state = IB_PORT_DOWN;
}
err = create_dev_resources(&dev->devr);
@@ -3776,9 +4117,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
goto err_odp;
}
+ err = mlx5_ib_init_cong_debugfs(dev);
+ if (err)
+ goto err_cnt;
+
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
- goto err_cnt;
+ goto err_cong;
err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
@@ -3796,18 +4141,25 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_dev;
+ init_delay_drop(dev);
+
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
err = device_create_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
if (err)
- goto err_umrc;
+ goto err_delay_drop;
}
+ if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ MLX5_CAP_GEN(mdev, disable_local_lb))
+ mutex_init(&dev->lb_mutex);
+
dev->ib_active = true;
return dev;
-err_umrc:
+err_delay_drop:
+ cancel_delay_drop(dev);
destroy_umrc_res(dev);
err_dev:
@@ -3823,6 +4175,8 @@ err_uar_page:
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
err_cnt:
+ mlx5_ib_cleanup_cong_debugfs(dev);
+err_cong:
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
@@ -3852,11 +4206,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
struct mlx5_ib_dev *dev = context;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
+ cancel_delay_drop(dev);
mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
+ mlx5_ib_cleanup_cong_debugfs(dev);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
destroy_umrc_res(dev);