diff options
46 files changed, 3640 insertions, 15 deletions
diff --git a/Documentation/netlink/specs/net_shaper.yaml b/Documentation/netlink/specs/net_shaper.yaml new file mode 100644 index 000000000000..8ebad0d02904 --- /dev/null +++ b/Documentation/netlink/specs/net_shaper.yaml @@ -0,0 +1,362 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +name: net-shaper + +doc: | + Networking HW rate limiting configuration. + + This API allows configuring HW shapers available on the network + devices at different levels (queues, network device) and allows + arbitrary manipulation of the scheduling tree of the involved + shapers. + + Each @shaper is identified within the given device, by a @handle, + comprising both a @scope and an @id. + + Depending on the @scope value, the shapers are attached to specific + HW objects (queues, devices) or, for @node scope, represent a + scheduling group, that can be placed in an arbitrary location of + the scheduling tree. + + Shapers can be created with two different operations: the @set + operation, to create and update a single "attached" shaper, and + the @group operation, to create and update a scheduling + group. Only the @group operation can create @node scope shapers. + + Existing shapers can be deleted/reset via the @delete operation. + + The user can query the running configuration via the @get operation. + + Different devices can provide different feature sets, e.g. with no + support for complex scheduling hierarchy, or for some shaping + parameters. The user can introspect the HW capabilities via the + @cap-get operation. + +definitions: + - + type: enum + name: scope + doc: Defines the shaper @id interpretation. + render-max: true + entries: + - name: unspec + doc: The scope is not specified. + - + name: netdev + doc: The main shaper for the given network device. + - + name: queue + doc: | + The shaper is attached to the given device queue, + the @id represents the queue number. + - + name: node + doc: | + The shaper allows grouping of queues or other + node shapers; can be nested in either @netdev + shapers or other @node shapers, allowing placement + in any location of the scheduling tree, except + leaves and root. + - + type: enum + name: metric + doc: Different metric supported by the shaper. + entries: + - + name: bps + doc: Shaper operates on a bits per second basis. + - + name: pps + doc: Shaper operates on a packets per second basis. + +attribute-sets: + - + name: net-shaper + attributes: + - + name: handle + type: nest + nested-attributes: handle + doc: Unique identifier for the given shaper inside the owning device. + - + name: metric + type: u32 + enum: metric + doc: Metric used by the given shaper for bw-min, bw-max and burst. + - + name: bw-min + type: uint + doc: Guaranteed bandwidth for the given shaper. + - + name: bw-max + type: uint + doc: Maximum bandwidth for the given shaper or 0 when unlimited. + - + name: burst + type: uint + doc: | + Maximum burst-size for shaping. Should not be interpreted + as a quantum. + - + name: priority + type: u32 + doc: | + Scheduling priority for the given shaper. The priority + scheduling is applied to sibling shapers. + - + name: weight + type: u32 + doc: | + Relative weight for round robin scheduling of the + given shaper. + The scheduling is applied to all sibling shapers + with the same priority. + - + name: ifindex + type: u32 + doc: Interface index owning the specified shaper. + - + name: parent + type: nest + nested-attributes: handle + doc: | + Identifier for the parent of the affected shaper. + Only needed for @group operation. + - + name: leaves + type: nest + multi-attr: true + nested-attributes: leaf-info + doc: | + Describes a set of leaves shapers for a @group operation. + - + name: handle + attributes: + - + name: scope + type: u32 + enum: scope + doc: Defines the shaper @id interpretation. + - + name: id + type: u32 + doc: | + Numeric identifier of a shaper. The id semantic depends on + the scope. For @queue scope it's the queue id and for @node + scope it's the node identifier. + - + name: leaf-info + subset-of: net-shaper + attributes: + - + name: handle + - + name: priority + - + name: weight + - + name: caps + attributes: + - + name: ifindex + type: u32 + doc: Interface index queried for shapers capabilities. + - + name: scope + type: u32 + enum: scope + doc: The scope to which the queried capabilities apply. + - + name: support-metric-bps + type: flag + doc: The device accepts 'bps' metric for bw-min, bw-max and burst. + - + name: support-metric-pps + type: flag + doc: The device accepts 'pps' metric for bw-min, bw-max and burst. + - + name: support-nesting + type: flag + doc: | + The device supports nesting shaper belonging to this scope + below 'node' scoped shapers. Only 'queue' and 'node' + scope can have flag 'support-nesting'. + - + name: support-bw-min + type: flag + doc: The device supports a minimum guaranteed B/W. + - + name: support-bw-max + type: flag + doc: The device supports maximum B/W shaping. + - + name: support-burst + type: flag + doc: The device supports a maximum burst size. + - + name: support-priority + type: flag + doc: The device supports priority scheduling. + - + name: support-weight + type: flag + doc: The device supports weighted round robin scheduling. + +operations: + list: + - + name: get + doc: | + Get information about a shaper for a given device. + attribute-set: net-shaper + + do: + pre: net-shaper-nl-pre-doit + post: net-shaper-nl-post-doit + request: + attributes: &ns-binding + - ifindex + - handle + reply: + attributes: &ns-attrs + - ifindex + - parent + - handle + - metric + - bw-min + - bw-max + - burst + - priority + - weight + + dump: + pre: net-shaper-nl-pre-dumpit + post: net-shaper-nl-post-dumpit + request: + attributes: + - ifindex + reply: + attributes: *ns-attrs + - + name: set + doc: | + Create or update the specified shaper. + The set operation can't be used to create a @node scope shaper, + use the @group operation instead. + attribute-set: net-shaper + flags: [ admin-perm ] + + do: + pre: net-shaper-nl-pre-doit + post: net-shaper-nl-post-doit + request: + attributes: + - ifindex + - handle + - metric + - bw-min + - bw-max + - burst + - priority + - weight + + - + name: delete + doc: | + Clear (remove) the specified shaper. When deleting + a @node shaper, reattach all the node's leaves to the + deleted node's parent. + If, after the removal, the parent shaper has no more + leaves and the parent shaper scope is @node, the parent + node is deleted, recursively. + When deleting a @queue shaper or a @netdev shaper, + the shaper disappears from the hierarchy, but the + queue/device can still send traffic: it has an implicit + node with infinite bandwidth. The queue's implicit node + feeds an implicit RR node at the root of the hierarchy. + attribute-set: net-shaper + flags: [ admin-perm ] + + do: + pre: net-shaper-nl-pre-doit + post: net-shaper-nl-post-doit + request: + attributes: *ns-binding + + - + name: group + doc: | + Create or update a scheduling group, attaching the specified + @leaves shapers under the specified node identified by @handle. + The @leaves shapers scope must be @queue and the node shaper + scope must be either @node or @netdev. + When the node shaper has @node scope, if the @handle @id is not + specified, a new shaper of such scope is created, otherwise the + specified node must already exist. + When updating an existing node shaper, the specified @leaves are + added to the existing node; such node will also retain any preexisting + leave. + The @parent handle for a new node shaper defaults to the parent + of all the leaves, provided all the leaves share the same parent. + Otherwise @parent handle must be specified. + The user can optionally provide shaping attributes for the node + shaper. + The operation is atomic, on failure no change is applied to + the device shaping configuration, otherwise the @node shaper + full identifier, comprising @binding and @handle, is provided + as the reply. + attribute-set: net-shaper + flags: [ admin-perm ] + + do: + pre: net-shaper-nl-pre-doit + post: net-shaper-nl-post-doit + request: + attributes: + - ifindex + - parent + - handle + - metric + - bw-min + - bw-max + - burst + - priority + - weight + - leaves + reply: + attributes: *ns-binding + + - + name: cap-get + doc: | + Get the shaper capabilities supported by the given device + for the specified scope. + attribute-set: caps + + do: + pre: net-shaper-nl-cap-pre-doit + post: net-shaper-nl-cap-post-doit + request: + attributes: + - ifindex + - scope + reply: + attributes: &cap-attrs + - ifindex + - scope + - support-metric-bps + - support-metric-pps + - support-nesting + - support-bw-min + - support-bw-max + - support-burst + - support-priority + - support-weight + + dump: + pre: net-shaper-nl-cap-pre-dumpit + post: net-shaper-nl-cap-post-dumpit + request: + attributes: + - ifindex + reply: + attributes: *cap-attrs diff --git a/Documentation/networking/kapi.rst b/Documentation/networking/kapi.rst index ea55f462cefa..98682b9a13ee 100644 --- a/Documentation/networking/kapi.rst +++ b/Documentation/networking/kapi.rst @@ -104,6 +104,9 @@ Driver Support .. kernel-doc:: include/linux/netdevice.h :internal: +.. kernel-doc:: include/net/net_shaper.h + :internal: + PHY Support ----------- diff --git a/MAINTAINERS b/MAINTAINERS index 1389704c7d8d..2927b44dda25 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16116,6 +16116,7 @@ F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h F: include/uapi/linux/ethtool_netlink.h F: include/uapi/linux/if_* +F: include/uapi/linux/net_shaper.h F: include/uapi/linux/netdev* F: tools/testing/selftests/drivers/net/ X: Documentation/devicetree/bindings/net/bluetooth/ diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 9920b3a68ed1..1fd5acdc73c6 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -641,6 +641,7 @@ config NETDEVSIM depends on PTP_1588_CLOCK_MOCK || PTP_1588_CLOCK_MOCK=n select NET_DEVLINK select PAGE_POOL + select NET_SHAPER help This driver is a developer testing tool and software model that can be used to test various control path networking APIs, especially diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 0375c7448a57..20bc40eec487 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -258,6 +258,7 @@ config I40E_DCB config IAVF tristate select LIBIE + select NET_SHAPER config I40EVF tristate "Intel(R) Ethernet Adaptive Virtual Function support" diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 95b1d8f62304..532a0a595fe8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -34,6 +34,7 @@ #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_skbedit.h> +#include <net/net_shaper.h> #include "iavf_type.h" #include <linux/avf/virtchnl.h> @@ -250,6 +251,9 @@ struct iavf_cloud_filter { #define IAVF_RESET_WAIT_DETECTED_COUNT 500 #define IAVF_RESET_WAIT_COMPLETE_COUNT 2000 +#define IAVF_MAX_QOS_TC_NUM 8 +#define IAVF_DEFAULT_QUANTA_SIZE 1024 + /* board specific private data structure */ struct iavf_adapter { struct workqueue_struct *wq; @@ -336,6 +340,9 @@ struct iavf_adapter { #define IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_INSERTION BIT_ULL(36) #define IAVF_FLAG_AQ_ENABLE_STAG_VLAN_INSERTION BIT_ULL(37) #define IAVF_FLAG_AQ_DISABLE_STAG_VLAN_INSERTION BIT_ULL(38) +#define IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW BIT_ULL(39) +#define IAVF_FLAG_AQ_CFG_QUEUES_QUANTA_SIZE BIT_ULL(40) +#define IAVF_FLAG_AQ_GET_QOS_CAPS BIT_ULL(41) /* flags for processing extended capability messages during * __IAVF_INIT_EXTENDED_CAPS. Each capability exchange requires @@ -408,6 +415,8 @@ struct iavf_adapter { VIRTCHNL_VF_OFFLOAD_FDIR_PF) #define ADV_RSS_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \ VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF) +#define QOS_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \ + VIRTCHNL_VF_OFFLOAD_QOS) struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */ struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */ struct virtchnl_version_info pf_version; @@ -416,6 +425,7 @@ struct iavf_adapter { struct virtchnl_vlan_caps vlan_v2_caps; u16 msg_enable; struct iavf_eth_stats current_stats; + struct virtchnl_qos_cap_list *qos_caps; struct iavf_vsi vsi; u32 aq_wait_count; /* RSS stuff */ @@ -571,6 +581,9 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, enum virtchnl_ops v_opcode, enum iavf_status v_retval, u8 *msg, u16 msglen); int iavf_config_rss(struct iavf_adapter *adapter); +void iavf_cfg_queues_bw(struct iavf_adapter *adapter); +void iavf_cfg_queues_quanta_size(struct iavf_adapter *adapter); +void iavf_get_qos_caps(struct iavf_adapter *adapter); void iavf_enable_channels(struct iavf_adapter *adapter); void iavf_disable_channels(struct iavf_adapter *adapter); void iavf_add_cloud_filter(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index f782402cd789..12ef160425aa 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1972,8 +1972,11 @@ static void iavf_finish_config(struct work_struct *work) adapter = container_of(work, struct iavf_adapter, finish_config); - /* Always take RTNL first to prevent circular lock dependency */ + /* Always take RTNL first to prevent circular lock dependency; + * The dev->lock is needed to update the queue number + */ rtnl_lock(); + mutex_lock(&adapter->netdev->lock); mutex_lock(&adapter->crit_lock); if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && @@ -2017,6 +2020,7 @@ static void iavf_finish_config(struct work_struct *work) out: mutex_unlock(&adapter->crit_lock); + mutex_unlock(&adapter->netdev->lock); rtnl_unlock(); } @@ -2085,6 +2089,21 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) return 0; } + if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW) { + iavf_cfg_queues_bw(adapter); + return 0; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_GET_QOS_CAPS) { + iavf_get_qos_caps(adapter); + return 0; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_CFG_QUEUES_QUANTA_SIZE) { + iavf_cfg_queues_quanta_size(adapter); + return 0; + } + if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_QUEUES) { iavf_configure_queues(adapter); return 0; @@ -2670,6 +2689,9 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) /* request initial VLAN offload settings */ iavf_set_vlan_offload_features(adapter, 0, netdev->features); + if (QOS_ALLOWED(adapter)) + adapter->aq_required |= IAVF_FLAG_AQ_GET_QOS_CAPS; + iavf_schedule_finish_config(adapter); return; @@ -2919,6 +2941,30 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) } /** + * iavf_reconfig_qs_bw - Call-back task to handle hardware reset + * @adapter: board private structure + * + * After a reset, the shaper parameters of queues need to be replayed again. + * Since the net_shaper object inside TX rings persists across reset, + * set the update flag for all queues so that the virtchnl message is triggered + * for all queues. + **/ +static void iavf_reconfig_qs_bw(struct iavf_adapter *adapter) +{ + int i, num = 0; + + for (i = 0; i < adapter->num_active_queues; i++) + if (adapter->tx_rings[i].q_shaper.bw_min || + adapter->tx_rings[i].q_shaper.bw_max) { + adapter->tx_rings[i].q_shaper_update = true; + num++; + } + + if (num) + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW; +} + +/** * iavf_reset_task - Call-back task to handle hardware reset * @work: pointer to work_struct * @@ -2944,10 +2990,12 @@ static void iavf_reset_task(struct work_struct *work) /* When device is being removed it doesn't make sense to run the reset * task, just return in such a case. */ + mutex_lock(&netdev->lock); if (!mutex_trylock(&adapter->crit_lock)) { if (adapter->state != __IAVF_REMOVE) queue_work(adapter->wq, &adapter->reset_task); + mutex_unlock(&netdev->lock); return; } @@ -2995,6 +3043,7 @@ static void iavf_reset_task(struct work_struct *work) reg_val); iavf_disable_vf(adapter); mutex_unlock(&adapter->crit_lock); + mutex_unlock(&netdev->lock); return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -3124,6 +3173,8 @@ continue_reset: iavf_up_complete(adapter); iavf_irq_enable(adapter, true); + + iavf_reconfig_qs_bw(adapter); } else { iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); @@ -3133,6 +3184,7 @@ continue_reset: wake_up(&adapter->reset_waitqueue); mutex_unlock(&adapter->crit_lock); + mutex_unlock(&netdev->lock); return; reset_err: @@ -3143,6 +3195,7 @@ reset_err: iavf_disable_vf(adapter); mutex_unlock(&adapter->crit_lock); + mutex_unlock(&netdev->lock); dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); } @@ -3614,8 +3667,10 @@ exit: if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return 0; + mutex_lock(&netdev->lock); netif_set_real_num_rx_queues(netdev, total_qps); netif_set_real_num_tx_queues(netdev, total_qps); + mutex_unlock(&netdev->lock); return ret; } @@ -4893,6 +4948,98 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, return iavf_fix_strip_features(adapter, features); } +static int +iavf_verify_shaper(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + struct iavf_adapter *adapter = netdev_priv(binding->netdev); + u64 vf_max; + + if (shaper->handle.scope == NET_SHAPER_SCOPE_QUEUE) { + vf_max = adapter->qos_caps->cap[0].shaper.peak; + if (vf_max && shaper->bw_max > vf_max) { + NL_SET_ERR_MSG_FMT(extack, "Max rate (%llu) of queue %d can't exceed max TX rate of VF (%llu kbps)", + shaper->bw_max, shaper->handle.id, + vf_max); + return -EINVAL; + } + } + return 0; +} + +static int +iavf_shaper_set(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + struct iavf_adapter *adapter = netdev_priv(binding->netdev); + const struct net_shaper_handle *handle = &shaper->handle; + struct iavf_ring *tx_ring; + int ret = 0; + + mutex_lock(&adapter->crit_lock); + if (handle->id >= adapter->num_active_queues) + goto unlock; + + ret = iavf_verify_shaper(binding, shaper, extack); + if (ret) + goto unlock; + + tx_ring = &adapter->tx_rings[handle->id]; + + tx_ring->q_shaper.bw_min = div_u64(shaper->bw_min, 1000); + tx_ring->q_shaper.bw_max = div_u64(shaper->bw_max, 1000); + tx_ring->q_shaper_update = true; + + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW; + +unlock: + mutex_unlock(&adapter->crit_lock); + return ret; +} + +static int iavf_shaper_del(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct netlink_ext_ack *extack) +{ + struct iavf_adapter *adapter = netdev_priv(binding->netdev); + struct iavf_ring *tx_ring; + + mutex_lock(&adapter->crit_lock); + if (handle->id >= adapter->num_active_queues) + goto unlock; + + tx_ring = &adapter->tx_rings[handle->id]; + tx_ring->q_shaper.bw_min = 0; + tx_ring->q_shaper.bw_max = 0; + tx_ring->q_shaper_update = true; + + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW; + +unlock: + mutex_unlock(&adapter->crit_lock); + return 0; +} + +static void iavf_shaper_cap(struct net_shaper_binding *binding, + enum net_shaper_scope scope, + unsigned long *flags) +{ + if (scope != NET_SHAPER_SCOPE_QUEUE) + return; + + *flags = BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MIN) | + BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MAX) | + BIT(NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS); +} + +static const struct net_shaper_ops iavf_shaper_ops = { + .set = iavf_shaper_set, + .delete = iavf_shaper_del, + .capabilities = iavf_shaper_cap, +}; + static const struct net_device_ops iavf_netdev_ops = { .ndo_open = iavf_open, .ndo_stop = iavf_close, @@ -4908,6 +5055,7 @@ static const struct net_device_ops iavf_netdev_ops = { .ndo_fix_features = iavf_fix_features, .ndo_set_features = iavf_set_features, .ndo_setup_tc = iavf_setup_tc, + .net_shaper_ops = &iavf_shaper_ops, }; /** @@ -5054,7 +5202,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct net_device *netdev; struct iavf_adapter *adapter = NULL; struct iavf_hw *hw = NULL; - int err; + int err, len; err = pci_enable_device(pdev); if (err) @@ -5122,6 +5270,13 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->bus.func = PCI_FUNC(pdev->devfn); hw->bus.bus_id = pdev->bus->number; + len = struct_size(adapter->qos_caps, cap, IAVF_MAX_QOS_TC_NUM); + adapter->qos_caps = kzalloc(len, GFP_KERNEL); + if (!adapter->qos_caps) { + err = -ENOMEM; + goto err_alloc_qos_cap; + } + /* set up the locks for the AQ, do this only once in probe * and destroy them only once in remove */ @@ -5160,6 +5315,8 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Initialization goes on in the work. Do not add more of it below. */ return 0; +err_alloc_qos_cap: + iounmap(hw->hw_addr); err_ioremap: destroy_workqueue(adapter->wq); err_alloc_wq: diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index d7b5587aeb8e..f97c702c0802 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -296,6 +296,8 @@ struct iavf_ring { */ u32 rx_buf_len; + struct net_shaper q_shaper; + bool q_shaper_update; } ____cacheline_internodealigned_in_smp; #define IAVF_ITR_ADAPTIVE_MIN_INC 0x0002 diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 7e810b65380c..15d388b431c5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -151,7 +151,8 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_USO | VIRTCHNL_VF_OFFLOAD_FDIR_PF | VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF | - VIRTCHNL_VF_CAP_ADV_LINK_SPEED; + VIRTCHNL_VF_CAP_ADV_LINK_SPEED | + VIRTCHNL_VF_OFFLOAD_QOS; adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; adapter->aq_required &= ~IAVF_FLAG_AQ_GET_CONFIG; @@ -1508,6 +1509,130 @@ iavf_set_adapter_link_speed_from_vpe(struct iavf_adapter *adapter, } /** + * iavf_get_qos_caps - get qos caps support + * @adapter: iavf adapter struct instance + * + * This function requests PF for Supported QoS Caps. + */ +void iavf_get_qos_caps(struct iavf_adapter *adapter) +{ + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, + "Cannot get qos caps, command %d pending\n", + adapter->current_op); + return; + } + + adapter->current_op = VIRTCHNL_OP_GET_QOS_CAPS; + adapter->aq_required &= ~IAVF_FLAG_AQ_GET_QOS_CAPS; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_QOS_CAPS, NULL, 0); +} + +/** + * iavf_set_quanta_size - set quanta size of queue chunk + * @adapter: iavf adapter struct instance + * @quanta_size: quanta size in bytes + * @queue_index: starting index of queue chunk + * @num_queues: number of queues in the queue chunk + * + * This function requests PF to set quanta size of queue chunk + * starting at queue_index. + */ +static void +iavf_set_quanta_size(struct iavf_adapter *adapter, u16 quanta_size, + u16 queue_index, u16 num_queues) +{ + struct virtchnl_quanta_cfg quanta_cfg; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, + "Cannot set queue quanta size, command %d pending\n", + adapter->current_op); + return; + } + + adapter->current_op = VIRTCHNL_OP_CONFIG_QUANTA; + quanta_cfg.quanta_size = quanta_size; + quanta_cfg.queue_select.type = VIRTCHNL_QUEUE_TYPE_TX; + quanta_cfg.queue_select.start_queue_id = queue_index; + quanta_cfg.queue_select.num_queues = num_queues; + adapter->aq_required &= ~IAVF_FLAG_AQ_CFG_QUEUES_QUANTA_SIZE; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_QUANTA, + (u8 *)&quanta_cfg, sizeof(quanta_cfg)); +} + +/** + * iavf_cfg_queues_quanta_size - configure quanta size of queues + * @adapter: adapter structure + * + * Request that the PF configure quanta size of allocated queues. + **/ +void iavf_cfg_queues_quanta_size(struct iavf_adapter *adapter) +{ + int quanta_size = IAVF_DEFAULT_QUANTA_SIZE; + + /* Set Queue Quanta Size to default */ + iavf_set_quanta_size(adapter, quanta_size, 0, + adapter->num_active_queues); +} + +/** + * iavf_cfg_queues_bw - configure bandwidth of allocated queues + * @adapter: iavf adapter structure instance + * + * This function requests PF to configure queue bandwidth of allocated queues + */ +void iavf_cfg_queues_bw(struct iavf_adapter *adapter) +{ + struct virtchnl_queues_bw_cfg *qs_bw_cfg; + struct net_shaper *q_shaper; + int qs_to_update = 0; + int i, inx = 0; + size_t len; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, + "Cannot set tc queue bw, command %d pending\n", + adapter->current_op); + return; + } + + for (i = 0; i < adapter->num_active_queues; i++) { + if (adapter->tx_rings[i].q_shaper_update) + qs_to_update++; + } + len = struct_size(qs_bw_cfg, cfg, qs_to_update); + qs_bw_cfg = kzalloc(len, GFP_KERNEL); + if (!qs_bw_cfg) + return; + + qs_bw_cfg->vsi_id = adapter->vsi.id; + qs_bw_cfg->num_queues = qs_to_update; + + for (i = 0; i < adapter->num_active_queues; i++) { + struct iavf_ring *tx_ring = &adapter->tx_rings[i]; + + q_shaper = &tx_ring->q_shaper; + if (tx_ring->q_shaper_update) { + qs_bw_cfg->cfg[inx].queue_id = i; + qs_bw_cfg->cfg[inx].shaper.peak = q_shaper->bw_max; + qs_bw_cfg->cfg[inx].shaper.committed = q_shaper->bw_min; + qs_bw_cfg->cfg[inx].tc = 0; + inx++; + } + } + + adapter->current_op = VIRTCHNL_OP_CONFIG_QUEUE_BW; + adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_QUEUE_BW, + (u8 *)qs_bw_cfg, len); + kfree(qs_bw_cfg); +} + +/** * iavf_enable_channels * @adapter: adapter structure * @@ -2227,6 +2352,18 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC; break; + case VIRTCHNL_OP_GET_QOS_CAPS: + dev_warn(&adapter->pdev->dev, "Failed to Get Qos CAPs, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + break; + case VIRTCHNL_OP_CONFIG_QUANTA: + dev_warn(&adapter->pdev->dev, "Failed to Config Quanta, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + break; + case VIRTCHNL_OP_CONFIG_QUEUE_BW: + dev_warn(&adapter->pdev->dev, "Failed to Config Queue BW, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + break; default: dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", v_retval, iavf_stat_str(&adapter->hw, v_retval), @@ -2569,6 +2706,24 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, if (!v_retval) iavf_netdev_features_vlan_strip_set(netdev, false); break; + case VIRTCHNL_OP_GET_QOS_CAPS: { + u16 len = struct_size(adapter->qos_caps, cap, + IAVF_MAX_QOS_TC_NUM); + + memcpy(adapter->qos_caps, msg, min(msglen, len)); + + adapter->aq_required |= IAVF_FLAG_AQ_CFG_QUEUES_QUANTA_SIZE; + } + break; + case VIRTCHNL_OP_CONFIG_QUANTA: + break; + case VIRTCHNL_OP_CONFIG_QUEUE_BW: { + int i; + /* shaper configuration is successful for all queues */ + for (i = 0; i < adapter->num_active_queues; i++) + adapter->tx_rings[i].q_shaper_update = false; + } + break; default: if (adapter->current_op && (v_opcode != adapter->current_op)) dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 0e699a0432c5..680a81961ba1 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -667,6 +667,8 @@ struct ice_pf { struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES]; struct ice_dplls dplls; struct device *hwmon_dev; + + u8 num_quanta_prof_used; }; extern struct workqueue_struct *ice_lag_wq; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 98c3764fed39..3a8e156d7d86 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -347,6 +347,8 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf break; } + tlan_ctx->quanta_prof_idx = ring->quanta_prof_id; + tlan_ctx->tso_ena = ICE_TX_LEGACY; tlan_ctx->tso_qnum = pf_q; diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 009716a12a26..b22e71dc59d4 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2437,6 +2437,25 @@ ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, } /** + * ice_func_id_to_logical_id - map from function id to logical pf id + * @active_function_bitmap: active function bitmap + * @pf_id: function number of device + * + * Return: logical PF ID. + */ +static int ice_func_id_to_logical_id(u32 active_function_bitmap, u8 pf_id) +{ + u8 logical_id = 0; + u8 i; + + for (i = 0; i < pf_id; i++) + if (active_function_bitmap & BIT(i)) + logical_id++; + + return logical_id; +} + +/** * ice_parse_valid_functions_cap - Parse ICE_AQC_CAPS_VALID_FUNCTIONS caps * @hw: pointer to the HW struct * @dev_p: pointer to device capabilities structure @@ -2453,6 +2472,8 @@ ice_parse_valid_functions_cap(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, dev_p->num_funcs = hweight32(number); ice_debug(hw, ICE_DBG_INIT, "dev caps: num_funcs = %d\n", dev_p->num_funcs); + + hw->logical_pf_id = ice_func_id_to_logical_id(number, hw->pf_id); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 8d31bfe28cc8..dc88aea9f473 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -6,6 +6,14 @@ #ifndef _ICE_HW_AUTOGEN_H_ #define _ICE_HW_AUTOGEN_H_ +#define GLCOMM_QUANTA_PROF(_i) (0x002D2D68 + ((_i) * 4)) +#define GLCOMM_QUANTA_PROF_MAX_INDEX 15 +#define GLCOMM_QUANTA_PROF_QUANTA_SIZE_S 0 +#define GLCOMM_QUANTA_PROF_QUANTA_SIZE_M ICE_M(0x3FFF, 0) +#define GLCOMM_QUANTA_PROF_MAX_CMD_S 16 +#define GLCOMM_QUANTA_PROF_MAX_CMD_M ICE_M(0xFF, 16) +#define GLCOMM_QUANTA_PROF_MAX_DESC_S 24 +#define GLCOMM_QUANTA_PROF_MAX_DESC_M ICE_M(0x3F, 24) #define QTX_COMM_DBELL(_DBQM) (0x002C0000 + ((_DBQM) * 4)) #define QTX_COMM_HEAD(_DBQM) (0x000E0000 + ((_DBQM) * 4)) #define QTX_COMM_HEAD_HEAD_S 0 diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 67153f5b6891..cb347c852ba9 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -407,6 +407,7 @@ struct ice_tx_ring { #define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) u8 flags; u8 dcb_tc; /* Traffic class of ring */ + u16 quanta_prof_id; } ____cacheline_internodealigned_in_smp; static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring) diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 45768796691f..adb168860711 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -905,6 +905,7 @@ struct ice_hw { u8 revision_id; u8 pf_id; /* device profile info */ + u8 logical_pf_id; u16 max_burst_size; /* driver sets this value */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index be4266899690..4261fe1c2bcd 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -59,6 +59,13 @@ struct ice_fdir_prof_info { u64 fdir_active_cnt; }; +struct ice_vf_qs_bw { + u32 committed; + u32 peak; + u16 queue_id; + u8 tc; +}; + /* VF operations */ struct ice_vf_ops { enum ice_disq_rst_src reset_type; @@ -140,6 +147,7 @@ struct ice_vf { struct devlink_port devlink_port; u16 num_msix; /* num of MSI-X configured on this VF */ + struct ice_vf_qs_bw qs_bw[ICE_MAX_RSS_QS_PER_VF]; }; /* Flags for controlling behavior of ice_reset_vf */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index a230859584c2..aa2080747714 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -495,6 +495,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_USO; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_QOS) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_QOS; + vfres->num_vsis = 1; /* Tx and Rx queue are equal for VF */ vfres->num_queue_pairs = vsi->num_txq; @@ -1035,6 +1038,191 @@ error_param: } /** + * ice_vc_get_qos_caps - Get current QoS caps from PF + * @vf: pointer to the VF info + * + * Get VF's QoS capabilities, such as TC number, arbiter and + * bandwidth from PF. + * + * Return: 0 on success or negative error value. + */ +static int ice_vc_get_qos_caps(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_qos_cap_list *cap_list = NULL; + u8 tc_prio[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + struct virtchnl_qos_cap_elem *cfg = NULL; + struct ice_vsi_ctx *vsi_ctx; + struct ice_pf *pf = vf->pf; + struct ice_port_info *pi; + struct ice_vsi *vsi; + u8 numtc, tc; + u16 len = 0; + int ret, i; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + pi = pf->hw.port_info; + numtc = vsi->tc_cfg.numtc; + + vsi_ctx = ice_get_vsi_ctx(pi->hw, vf->lan_vsi_idx); + if (!vsi_ctx) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + len = struct_size(cap_list, cap, numtc); + cap_list = kzalloc(len, GFP_KERNEL); + if (!cap_list) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + len = 0; + goto err; + } + + cap_list->vsi_id = vsi->vsi_num; + cap_list->num_elem = numtc; + + /* Store the UP2TC configuration from DCB to a user priority bitmap + * of each TC. Each element of prio_of_tc represents one TC. Each + * bitmap indicates the user priorities belong to this TC. + */ + for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { + tc = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[i]; + tc_prio[tc] |= BIT(i); + } + + for (i = 0; i < numtc; i++) { + cfg = &cap_list->cap[i]; + cfg->tc_num = i; + cfg->tc_prio = tc_prio[i]; + cfg->arbiter = pi->qos_cfg.local_dcbx_cfg.etscfg.tsatable[i]; + cfg->weight = VIRTCHNL_STRICT_WEIGHT; + cfg->type = VIRTCHNL_BW_SHAPER; + cfg->shaper.committed = vsi_ctx->sched.bw_t_info[i].cir_bw.bw; + cfg->shaper.peak = vsi_ctx->sched.bw_t_info[i].eir_bw.bw; + } + +err: + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_QOS_CAPS, v_ret, + (u8 *)cap_list, len); + kfree(cap_list); + return ret; +} + +/** + * ice_vf_cfg_qs_bw - Configure per queue bandwidth + * @vf: pointer to the VF info + * @num_queues: number of queues to be configured + * + * Configure per queue bandwidth. + * + * Return: 0 on success or negative error value. + */ +static int ice_vf_cfg_qs_bw(struct ice_vf *vf, u16 num_queues) +{ + struct ice_hw *hw = &vf->pf->hw; + struct ice_vsi *vsi; + int ret; + u16 i; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -EINVAL; + + for (i = 0; i < num_queues; i++) { + u32 p_rate, min_rate; + u8 tc; + + p_rate = vf->qs_bw[i].peak; + min_rate = vf->qs_bw[i].committed; + tc = vf->qs_bw[i].tc; + if (p_rate) + ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MAX_BW, p_rate); + else + ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MAX_BW); + if (ret) + return ret; + + if (min_rate) + ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MIN_BW, min_rate); + else + ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MIN_BW); + + if (ret) + return ret; + } + + return 0; +} + +/** + * ice_vf_cfg_q_quanta_profile - Configure quanta profile + * @vf: pointer to the VF info + * @quanta_prof_idx: pointer to the quanta profile index + * @quanta_size: quanta size to be set + * + * This function chooses available quanta profile and configures the register. + * The quanta profile is evenly divided by the number of device ports, and then + * available to the specific PF and VFs. The first profile for each PF is a + * reserved default profile. Only quanta size of the rest unused profile can be + * modified. + * + * Return: 0 on success or negative error value. + */ +static int ice_vf_cfg_q_quanta_profile(struct ice_vf *vf, u16 quanta_size, + u16 *quanta_prof_idx) +{ + const u16 n_desc = calc_quanta_desc(quanta_size); + struct ice_hw *hw = &vf->pf->hw; + const u16 n_cmd = 2 * n_desc; + struct ice_pf *pf = vf->pf; + u16 per_pf, begin_id; + u8 n_used; + u32 reg; + + begin_id = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / hw->dev_caps.num_funcs * + hw->logical_pf_id; + + if (quanta_size == ICE_DFLT_QUANTA) { + *quanta_prof_idx = begin_id; + } else { + per_pf = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / + hw->dev_caps.num_funcs; + n_used = pf->num_quanta_prof_used; + if (n_used < per_pf) { + *quanta_prof_idx = begin_id + 1 + n_used; + pf->num_quanta_prof_used++; + } else { + return -EINVAL; + } + } + + reg = FIELD_PREP(GLCOMM_QUANTA_PROF_QUANTA_SIZE_M, quanta_size) | + FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_CMD_M, n_cmd) | + FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_DESC_M, n_desc); + wr32(hw, GLCOMM_QUANTA_PROF(*quanta_prof_idx), reg); + + return 0; +} + +/** * ice_vc_cfg_promiscuous_mode_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1636,6 +1824,141 @@ error_param: } /** + * ice_vc_cfg_q_bw - Configure per queue bandwidth + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer which holds the command descriptor + * + * Configure VF queues bandwidth. + * + * Return: 0 on success or negative error value. + */ +static int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_queues_bw_cfg *qbw = + (struct virtchnl_queues_bw_cfg *)msg; + struct ice_vsi *vsi; + u16 i; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || + !ice_vc_isvalid_vsi_id(vf, qbw->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (qbw->num_queues > ICE_MAX_RSS_QS_PER_VF || + qbw->num_queues > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + for (i = 0; i < qbw->num_queues; i++) { + if (qbw->cfg[i].shaper.peak != 0 && vf->max_tx_rate != 0 && + qbw->cfg[i].shaper.peak > vf->max_tx_rate) + dev_warn(ice_pf_to_dev(vf->pf), "The maximum queue %d rate limit configuration may not take effect because the maximum TX rate for VF-%d is %d\n", + qbw->cfg[i].queue_id, vf->vf_id, + vf->max_tx_rate); + if (qbw->cfg[i].shaper.committed != 0 && vf->min_tx_rate != 0 && + qbw->cfg[i].shaper.committed < vf->min_tx_rate) + dev_warn(ice_pf_to_dev(vf->pf), "The minimum queue %d rate limit configuration may not take effect because the minimum TX rate for VF-%d is %d\n", + qbw->cfg[i].queue_id, vf->vf_id, + vf->max_tx_rate); + } + + for (i = 0; i < qbw->num_queues; i++) { + vf->qs_bw[i].queue_id = qbw->cfg[i].queue_id; + vf->qs_bw[i].peak = qbw->cfg[i].shaper.peak; + vf->qs_bw[i].committed = qbw->cfg[i].shaper.committed; + vf->qs_bw[i].tc = qbw->cfg[i].tc; + } + + if (ice_vf_cfg_qs_bw(vf, qbw->num_queues)) + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + +err: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUEUE_BW, + v_ret, NULL, 0); +} + +/** + * ice_vc_cfg_q_quanta - Configure per queue quanta + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer which holds the command descriptor + * + * Configure VF queues quanta. + * + * Return: 0 on success or negative error value. + */ +static int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + u16 quanta_prof_id, quanta_size, start_qid, end_qid, i; + struct virtchnl_quanta_cfg *qquanta = + (struct virtchnl_quanta_cfg *)msg; + struct ice_vsi *vsi; + int ret; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + end_qid = qquanta->queue_select.start_queue_id + + qquanta->queue_select.num_queues; + if (end_qid > ICE_MAX_RSS_QS_PER_VF || + end_qid > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + quanta_size = qquanta->quanta_size; + if (quanta_size > ICE_MAX_QUANTA_SIZE || + quanta_size < ICE_MIN_QUANTA_SIZE) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (quanta_size % 64) { + dev_err(ice_pf_to_dev(vf->pf), "quanta size should be the product of 64\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + ret = ice_vf_cfg_q_quanta_profile(vf, quanta_size, + &quanta_prof_id); + if (ret) { + v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + goto err; + } + + start_qid = qquanta->queue_select.start_queue_id; + for (i = start_qid; i < end_qid; i++) + vsi->tx_rings[i]->quanta_prof_id = quanta_prof_id; + +err: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUANTA, + v_ret, NULL, 0); +} + +/** * ice_vc_cfg_qs_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -3819,6 +4142,9 @@ static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = { .dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg, .ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg, .dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg, + .get_qos_caps = ice_vc_get_qos_caps, + .cfg_q_bw = ice_vc_cfg_q_bw, + .cfg_q_quanta = ice_vc_cfg_q_quanta, }; /** @@ -4177,6 +4503,15 @@ error_handler: case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2: err = ops->dis_vlan_insertion_v2_msg(vf, msg); break; + case VIRTCHNL_OP_GET_QOS_CAPS: + err = ops->get_qos_caps(vf); + break; + case VIRTCHNL_OP_CONFIG_QUEUE_BW: + err = ops->cfg_q_bw(vf, msg); + break; + case VIRTCHNL_OP_CONFIG_QUANTA: + err = ops->cfg_q_quanta(vf, msg); + break; case VIRTCHNL_OP_UNKNOWN: default: dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode, diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index 3a4115869153..0c629aef9baf 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -13,6 +13,13 @@ /* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */ #define ICE_MAX_VLAN_PER_VF 8 +#define ICE_DFLT_QUANTA 1024 +#define ICE_MAX_QUANTA_SIZE 4096 +#define ICE_MIN_QUANTA_SIZE 256 + +#define calc_quanta_desc(x) \ + max_t(u16, 12, min_t(u16, 63, (((x) + 66) / 132) * 2 + 4)) + /* MAC filters: 1 is reserved for the VF's default/perm_addr/LAA MAC, 1 for * broadcast, and 16 for additional unicast/multicast filters */ @@ -61,6 +68,10 @@ struct ice_virtchnl_ops { int (*dis_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg); int (*ena_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); int (*dis_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*get_qos_caps)(struct ice_vf *vf); + int (*cfg_q_tc_map)(struct ice_vf *vf, u8 *msg); + int (*cfg_q_bw)(struct ice_vf *vf, u8 *msg); + int (*cfg_q_quanta)(struct ice_vf *vf, u8 *msg); }; #ifdef CONFIG_PCI_IOV diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index d796dbd2a440..c105a82ee136 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -84,6 +84,11 @@ static const u32 fdir_pf_allowlist_opcodes[] = { VIRTCHNL_OP_ADD_FDIR_FILTER, VIRTCHNL_OP_DEL_FDIR_FILTER, }; +static const u32 tc_allowlist_opcodes[] = { + VIRTCHNL_OP_GET_QOS_CAPS, VIRTCHNL_OP_CONFIG_QUEUE_BW, + VIRTCHNL_OP_CONFIG_QUANTA, +}; + struct allowlist_opcode_info { const u32 *opcodes; size_t size; @@ -104,6 +109,7 @@ static const struct allowlist_opcode_info allowlist_opcodes[] = { ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF, adv_rss_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN_V2, vlan_v2_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_QOS, tc_allowlist_opcodes), }; /** diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 1436905bc106..5fe1eaef99b5 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -103,8 +103,10 @@ nsim_set_channels(struct net_device *dev, struct ethtool_channels *ch) struct netdevsim *ns = netdev_priv(dev); int err; + mutex_lock(&dev->lock); err = netif_set_real_num_queues(dev, ch->combined_count, ch->combined_count); + mutex_unlock(&dev->lock); if (err) return err; diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 017a6102be0a..cad85bb0cf54 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -22,6 +22,7 @@ #include <net/netdev_queues.h> #include <net/page_pool/helpers.h> #include <net/netlink.h> +#include <net/net_shaper.h> #include <net/pkt_cls.h> #include <net/rtnetlink.h> #include <net/udp_tunnel.h> @@ -475,6 +476,43 @@ static int nsim_stop(struct net_device *dev) return 0; } +static int nsim_shaper_set(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int nsim_shaper_del(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int nsim_shaper_group(struct net_shaper_binding *binding, + int leaves_count, + const struct net_shaper *leaves, + const struct net_shaper *root, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static void nsim_shaper_cap(struct net_shaper_binding *binding, + enum net_shaper_scope scope, + unsigned long *flags) +{ + *flags = ULONG_MAX; +} + +static const struct net_shaper_ops nsim_shaper_ops = { + .set = nsim_shaper_set, + .delete = nsim_shaper_del, + .group = nsim_shaper_group, + .capabilities = nsim_shaper_cap, +}; + static const struct net_device_ops nsim_netdev_ops = { .ndo_start_xmit = nsim_start_xmit, .ndo_set_rx_mode = nsim_set_rx_mode, @@ -496,6 +534,7 @@ static const struct net_device_ops nsim_netdev_ops = { .ndo_bpf = nsim_bpf, .ndo_open = nsim_open, .ndo_stop = nsim_stop, + .net_shaper_ops = &nsim_shaper_ops, }; static const struct net_device_ops nsim_vf_netdev_ops = { diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c index 60eb95a06d55..ebed05a2804c 100644 --- a/drivers/net/vxlan/vxlan_mdb.c +++ b/drivers/net/vxlan/vxlan_mdb.c @@ -284,7 +284,7 @@ int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb, ASSERT_RTNL(); - NL_ASSERT_DUMP_CTX_FITS(struct vxlan_mdb_dump_ctx); + NL_ASSERT_CTX_FITS(struct vxlan_mdb_dump_ctx); nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWMDB, sizeof(*bpm), diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index f41395264dca..223e433c39fe 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -89,6 +89,9 @@ enum virtchnl_rx_hsplit { VIRTCHNL_RX_HSPLIT_SPLIT_SCTP = 8, }; +enum virtchnl_bw_limit_type { + VIRTCHNL_BW_SHAPER = 0, +}; /* END GENERIC DEFINES */ /* Opcodes for VF-PF communication. These are placed in the v_opcode field @@ -151,6 +154,11 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57, + /* opcode 57 - 65 are reserved */ + VIRTCHNL_OP_GET_QOS_CAPS = 66, + /* opcode 68 through 111 are reserved */ + VIRTCHNL_OP_CONFIG_QUEUE_BW = 112, + VIRTCHNL_OP_CONFIG_QUANTA = 113, VIRTCHNL_OP_MAX, }; @@ -261,6 +269,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC BIT(26) #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) #define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) +#define VIRTCHNL_VF_OFFLOAD_QOS BIT(29) #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ @@ -1416,6 +1425,85 @@ struct virtchnl_fdir_del { VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); +struct virtchnl_shaper_bw { + /* Unit is Kbps */ + u32 committed; + u32 peak; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw); + +/* VIRTCHNL_OP_GET_QOS_CAPS + * VF sends this message to get its QoS Caps, such as + * TC number, Arbiter and Bandwidth. + */ +struct virtchnl_qos_cap_elem { + u8 tc_num; + u8 tc_prio; +#define VIRTCHNL_ABITER_STRICT 0 +#define VIRTCHNL_ABITER_ETS 2 + u8 arbiter; +#define VIRTCHNL_STRICT_WEIGHT 1 + u8 weight; + enum virtchnl_bw_limit_type type; + union { + struct virtchnl_shaper_bw shaper; + u8 pad2[32]; + }; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem); + +struct virtchnl_qos_cap_list { + u16 vsi_id; + u16 num_elem; + struct virtchnl_qos_cap_elem cap[]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_qos_cap_list); +#define virtchnl_qos_cap_list_LEGACY_SIZEOF 44 + +/* VIRTCHNL_OP_CONFIG_QUEUE_BW */ +struct virtchnl_queue_bw { + u16 queue_id; + u8 tc; + u8 pad; + struct virtchnl_shaper_bw shaper; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_bw); + +struct virtchnl_queues_bw_cfg { + u16 vsi_id; + u16 num_queues; + struct virtchnl_queue_bw cfg[]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_queues_bw_cfg); +#define virtchnl_queues_bw_cfg_LEGACY_SIZEOF 16 + +enum virtchnl_queue_type { + VIRTCHNL_QUEUE_TYPE_TX = 0, + VIRTCHNL_QUEUE_TYPE_RX = 1, +}; + +/* structure to specify a chunk of contiguous queues */ +struct virtchnl_queue_chunk { + /* see enum virtchnl_queue_type */ + s32 type; + u16 start_queue_id; + u16 num_queues; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_queue_chunk); + +struct virtchnl_quanta_cfg { + u16 quanta_size; + struct virtchnl_queue_chunk queue_select; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_quanta_cfg); + #define __vss_byone(p, member, count, old) \ (struct_size(p, member, count) + (old - 1 - struct_size(p, member, 0))) @@ -1438,6 +1526,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); __vss(virtchnl_vlan_filter_list_v2, __vss_byelem, p, m, c), \ __vss(virtchnl_tc_info, __vss_byelem, p, m, c), \ __vss(virtchnl_rdma_qvlist_info, __vss_byelem, p, m, c), \ + __vss(virtchnl_qos_cap_list, __vss_byelem, p, m, c), \ + __vss(virtchnl_queues_bw_cfg, __vss_byelem, p, m, c), \ __vss(virtchnl_rss_key, __vss_byone, p, m, c), \ __vss(virtchnl_rss_lut, __vss_byone, p, m, c)) @@ -1637,6 +1727,35 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2: valid_len = sizeof(struct virtchnl_vlan_setting); break; + case VIRTCHNL_OP_GET_QOS_CAPS: + break; + case VIRTCHNL_OP_CONFIG_QUEUE_BW: + valid_len = virtchnl_queues_bw_cfg_LEGACY_SIZEOF; + if (msglen >= valid_len) { + struct virtchnl_queues_bw_cfg *q_bw = + (struct virtchnl_queues_bw_cfg *)msg; + + valid_len = virtchnl_struct_size(q_bw, cfg, + q_bw->num_queues); + if (q_bw->num_queues == 0) { + err_msg_format = true; + break; + } + } + break; + case VIRTCHNL_OP_CONFIG_QUANTA: + valid_len = sizeof(struct virtchnl_quanta_cfg); + if (msglen >= valid_len) { + struct virtchnl_quanta_cfg *q_quanta = + (struct virtchnl_quanta_cfg *)msg; + + if (q_quanta->quanta_size == 0 || + q_quanta->queue_select.num_queues == 0) { + err_msg_format = true; + break; + } + } + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3baf8e539b6f..e6b93d01e631 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1603,6 +1603,14 @@ struct net_device_ops { int (*ndo_hwtstamp_set)(struct net_device *dev, struct kernel_hwtstamp_config *kernel_config, struct netlink_ext_ack *extack); + +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_ops: Device shaping offload operations + * see include/net/net_shapers.h + */ + const struct net_shaper_ops *net_shaper_ops; +#endif }; /** @@ -2406,6 +2414,19 @@ struct net_device { u64 max_pacing_offload_horizon; + /** + * @lock: protects @net_shaper_hierarchy, feel free to use for other + * netdev-scope protection. Ordering: take after rtnl_lock. + */ + struct mutex lock; + +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_hierarchy: data tracking the current shaper status + * see include/net/net_shapers.h + */ + struct net_shaper_hierarchy *net_shaper_hierarchy; +#endif u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index b332c2048c75..a3ca198a3a9e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -34,6 +34,7 @@ struct netlink_skb_parms { #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) +#define NETLINK_CTX_SIZE 48 void netlink_table_grab(void); @@ -293,7 +294,7 @@ struct netlink_callback { int flags; bool strict_check; union { - u8 ctx[48]; + u8 ctx[NETLINK_CTX_SIZE]; /* args is deprecated. Cast a struct over ctx instead * for proper type safety. @@ -302,7 +303,7 @@ struct netlink_callback { }; }; -#define NL_ASSERT_DUMP_CTX_FITS(type_name) \ +#define NL_ASSERT_CTX_FITS(type_name) \ BUILD_BUG_ON(sizeof(type_name) > \ sizeof_field(struct netlink_callback, ctx)) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9ab49bfeae78..9d3726e8f90e 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -124,7 +124,8 @@ struct genl_family { * @genlhdr: generic netlink message header * @attrs: netlink attributes * @_net: network namespace - * @user_ptr: user pointers + * @ctx: storage space for the use by the family + * @user_ptr: user pointers (deprecated, use ctx instead) * @extack: extended ACK report struct */ struct genl_info { @@ -135,7 +136,10 @@ struct genl_info { struct genlmsghdr * genlhdr; struct nlattr ** attrs; possible_net_t _net; - void * user_ptr[2]; + union { + u8 ctx[NETLINK_CTX_SIZE]; + void * user_ptr[2]; + }; struct netlink_ext_ack *extack; }; diff --git a/include/net/net_shaper.h b/include/net/net_shaper.h new file mode 100644 index 000000000000..5c3f49b52fe9 --- /dev/null +++ b/include/net/net_shaper.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_SHAPER_H_ +#define _NET_SHAPER_H_ + +#include <linux/types.h> + +#include <uapi/linux/net_shaper.h> + +struct net_device; +struct devlink; +struct netlink_ext_ack; + +enum net_shaper_binding_type { + NET_SHAPER_BINDING_TYPE_NETDEV, + /* NET_SHAPER_BINDING_TYPE_DEVLINK_PORT */ +}; + +struct net_shaper_binding { + enum net_shaper_binding_type type; + union { + struct net_device *netdev; + struct devlink *devlink; + }; +}; + +struct net_shaper_handle { + enum net_shaper_scope scope; + u32 id; +}; + +/** + * struct net_shaper - represents a shaping node on the NIC H/W + * zeroed field are considered not set. + * @parent: Unique identifier for the shaper parent, usually implied + * @handle: Unique identifier for this shaper + * @metric: Specify if the rate limits refers to PPS or BPS + * @bw_min: Minimum guaranteed rate for this shaper + * @bw_max: Maximum peak rate allowed for this shaper + * @burst: Maximum burst for the peek rate of this shaper + * @priority: Scheduling priority for this shaper + * @weight: Scheduling weight for this shaper + */ +struct net_shaper { + struct net_shaper_handle parent; + struct net_shaper_handle handle; + enum net_shaper_metric metric; + u64 bw_min; + u64 bw_max; + u64 burst; + u32 priority; + u32 weight; + + /* private: */ + u32 leaves; /* accounted only for NODE scope */ + struct rcu_head rcu; +}; + +/** + * struct net_shaper_ops - Operations on device H/W shapers + * + * The operations applies to either net_device and devlink objects. + * The initial shaping configuration at device initialization is empty: + * does not constraint the rate in any way. + * The network core keeps track of the applied user-configuration in + * the net_device or devlink structure. + * The operations are serialized via a per device lock. + * + * Device not supporting any kind of nesting should not provide the + * group operation. + * + * Each shaper is uniquely identified within the device with a 'handle' + * comprising the shaper scope and a scope-specific id. + */ +struct net_shaper_ops { + /** + * @group: create the specified shapers scheduling group + * + * Nest the @leaves shapers identified under the * @node shaper. + * All the shapers belong to the device specified by @binding. + * The @leaves arrays size is specified by @leaves_count. + * Create either the @leaves and the @node shaper; or if they already + * exists, links them together in the desired way. + * @leaves scope must be NET_SHAPER_SCOPE_QUEUE. + */ + int (*group)(struct net_shaper_binding *binding, int leaves_count, + const struct net_shaper *leaves, + const struct net_shaper *node, + struct netlink_ext_ack *extack); + + /** + * @set: Updates the specified shaper + * + * Updates or creates the @shaper on the device specified by @binding. + */ + int (*set)(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack); + + /** + * @delete: Removes the specified shaper + * + * Removes the shaper configuration as identified by the given @handle + * on the device specified by @binding, restoring the default behavior. + */ + int (*delete)(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct netlink_ext_ack *extack); + + /** + * @capabilities: get the shaper features supported by the device + * + * Fills the bitmask @cap with the supported capabilities for the + * specified @scope and device specified by @binding. + */ + void (*capabilities)(struct net_shaper_binding *binding, + enum net_shaper_scope scope, unsigned long *cap); +}; + +#endif diff --git a/include/uapi/linux/net_shaper.h b/include/uapi/linux/net_shaper.h new file mode 100644 index 000000000000..d8834b59f7d7 --- /dev/null +++ b/include/uapi/linux/net_shaper.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/net_shaper.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_NET_SHAPER_H +#define _UAPI_LINUX_NET_SHAPER_H + +#define NET_SHAPER_FAMILY_NAME "net-shaper" +#define NET_SHAPER_FAMILY_VERSION 1 + +/** + * enum net_shaper_scope - Defines the shaper @id interpretation. + * @NET_SHAPER_SCOPE_UNSPEC: The scope is not specified. + * @NET_SHAPER_SCOPE_NETDEV: The main shaper for the given network device. + * @NET_SHAPER_SCOPE_QUEUE: The shaper is attached to the given device queue, + * the @id represents the queue number. + * @NET_SHAPER_SCOPE_NODE: The shaper allows grouping of queues or other node + * shapers; can be nested in either @netdev shapers or other @node shapers, + * allowing placement in any location of the scheduling tree, except leaves + * and root. + */ +enum net_shaper_scope { + NET_SHAPER_SCOPE_UNSPEC, + NET_SHAPER_SCOPE_NETDEV, + NET_SHAPER_SCOPE_QUEUE, + NET_SHAPER_SCOPE_NODE, + + /* private: */ + __NET_SHAPER_SCOPE_MAX, + NET_SHAPER_SCOPE_MAX = (__NET_SHAPER_SCOPE_MAX - 1) +}; + +/** + * enum net_shaper_metric - Different metric supported by the shaper. + * @NET_SHAPER_METRIC_BPS: Shaper operates on a bits per second basis. + * @NET_SHAPER_METRIC_PPS: Shaper operates on a packets per second basis. + */ +enum net_shaper_metric { + NET_SHAPER_METRIC_BPS, + NET_SHAPER_METRIC_PPS, +}; + +enum { + NET_SHAPER_A_HANDLE = 1, + NET_SHAPER_A_METRIC, + NET_SHAPER_A_BW_MIN, + NET_SHAPER_A_BW_MAX, + NET_SHAPER_A_BURST, + NET_SHAPER_A_PRIORITY, + NET_SHAPER_A_WEIGHT, + NET_SHAPER_A_IFINDEX, + NET_SHAPER_A_PARENT, + NET_SHAPER_A_LEAVES, + + __NET_SHAPER_A_MAX, + NET_SHAPER_A_MAX = (__NET_SHAPER_A_MAX - 1) +}; + +enum { + NET_SHAPER_A_HANDLE_SCOPE = 1, + NET_SHAPER_A_HANDLE_ID, + + __NET_SHAPER_A_HANDLE_MAX, + NET_SHAPER_A_HANDLE_MAX = (__NET_SHAPER_A_HANDLE_MAX - 1) +}; + +enum { + NET_SHAPER_A_CAPS_IFINDEX = 1, + NET_SHAPER_A_CAPS_SCOPE, + NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS, + NET_SHAPER_A_CAPS_SUPPORT_METRIC_PPS, + NET_SHAPER_A_CAPS_SUPPORT_NESTING, + NET_SHAPER_A_CAPS_SUPPORT_BW_MIN, + NET_SHAPER_A_CAPS_SUPPORT_BW_MAX, + NET_SHAPER_A_CAPS_SUPPORT_BURST, + NET_SHAPER_A_CAPS_SUPPORT_PRIORITY, + NET_SHAPER_A_CAPS_SUPPORT_WEIGHT, + + __NET_SHAPER_A_CAPS_MAX, + NET_SHAPER_A_CAPS_MAX = (__NET_SHAPER_A_CAPS_MAX - 1) +}; + +enum { + NET_SHAPER_CMD_GET = 1, + NET_SHAPER_CMD_SET, + NET_SHAPER_CMD_DELETE, + NET_SHAPER_CMD_GROUP, + NET_SHAPER_CMD_CAP_GET, + + __NET_SHAPER_CMD_MAX, + NET_SHAPER_CMD_MAX = (__NET_SHAPER_CMD_MAX - 1) +}; + +#endif /* _UAPI_LINUX_NET_SHAPER_H */ diff --git a/net/Kconfig b/net/Kconfig index a629f92dc86b..c3fca69a7c83 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -72,6 +72,9 @@ config NET_DEVMEM depends on GENERIC_ALLOCATOR depends on PAGE_POOL +config NET_SHAPER + bool + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/Makefile b/net/Makefile index 65bb8c72a35e..60ed5190eda8 100644 --- a/net/Makefile +++ b/net/Makefile @@ -79,3 +79,4 @@ obj-$(CONFIG_XDP_SOCKETS) += xdp/ obj-$(CONFIG_MPTCP) += mptcp/ obj-$(CONFIG_MCTP) += mctp/ obj-$(CONFIG_NET_HANDSHAKE) += handshake/ +obj-$(CONFIG_NET_SHAPER) += shaper/ diff --git a/net/core/dev.c b/net/core/dev.c index ea5fbcd133ae..b590eefce3b4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2949,6 +2949,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); + net_shaper_set_real_num_tx_queues(dev, txq); + dev_qdisc_change_real_num_tx(dev, txq); dev->real_num_tx_queues = txq; @@ -11147,6 +11149,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, hash_init(dev->qdisc_hash); #endif + mutex_init(&dev->lock); + dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -11217,6 +11221,8 @@ void free_netdev(struct net_device *dev) return; } + mutex_destroy(&dev->lock); + kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); @@ -11426,6 +11432,8 @@ void unregister_netdevice_many_notify(struct list_head *head, mutex_destroy(&dev->ethtool->rss_lock); + net_shaper_flush_netdev(dev); + if (skb) rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); diff --git a/net/core/dev.h b/net/core/dev.h index 5654325c5b71..d3ea92949ff3 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -35,6 +35,16 @@ void dev_addr_flush(struct net_device *dev); int dev_addr_init(struct net_device *dev); void dev_addr_check(struct net_device *dev); +#if IS_ENABLED(CONFIG_NET_SHAPER) +void net_shaper_flush_netdev(struct net_device *dev); +void net_shaper_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq); +#else +static inline void net_shaper_flush_netdev(struct net_device *dev) {} +static inline void net_shaper_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq) {} +#endif + /* sysctls not referred to from outside net/core/ */ extern int netdev_unregister_timeout_secs; extern int weight_p; diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 1cb954f2d39e..358cba248796 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -24,7 +24,7 @@ struct netdev_nl_dump_ctx { static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb) { - NL_ASSERT_DUMP_CTX_FITS(struct netdev_nl_dump_ctx); + NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx); return (struct netdev_nl_dump_ctx *)cb->ctx; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6d68247aea70..a9b81b7d9746 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6243,7 +6243,7 @@ static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int idx, s_idx; int err; - NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx); + NL_ASSERT_CTX_FITS(struct rtnl_mdb_dump_ctx); if (cb->strict_check) { err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack); diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index c7a8e13f917c..a9f064ab9ed9 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -166,7 +166,7 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb, static inline struct devlink_nl_dump_state * devlink_dump_state(struct netlink_callback *cb) { - NL_ASSERT_DUMP_CTX_FITS(struct devlink_nl_dump_state); + NL_ASSERT_CTX_FITS(struct devlink_nl_dump_state); return (struct devlink_nl_dump_state *)cb->ctx; } diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index e07386275e14..7cb106b590ab 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -224,7 +224,7 @@ struct rss_nl_dump_ctx { static struct rss_nl_dump_ctx *rss_dump_ctx(struct netlink_callback *cb) { - NL_ASSERT_DUMP_CTX_FITS(struct rss_nl_dump_ctx); + NL_ASSERT_CTX_FITS(struct rss_nl_dump_ctx); return (struct rss_nl_dump_ctx *)cb->ctx; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6a1239433830..36168f8b6efa 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3870,7 +3870,7 @@ static int __init ctnetlink_init(void) { int ret; - NL_ASSERT_DUMP_CTX_FITS(struct ctnetlink_list_dump_ctx); + NL_ASSERT_CTX_FITS(struct ctnetlink_list_dump_ctx); ret = nfnetlink_subsys_register(&ctnl_subsys); if (ret < 0) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index feb54c63a116..29387b605f3e 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -997,7 +997,7 @@ static int genl_start(struct netlink_callback *cb) info->info.attrs = attrs; genl_info_net_set(&info->info, sock_net(cb->skb->sk)); info->info.extack = cb->extack; - memset(&info->info.user_ptr, 0, sizeof(info->info.user_ptr)); + memset(&info->info.ctx, 0, sizeof(info->info.ctx)); cb->data = info; if (ops->start) { @@ -1104,7 +1104,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, info.attrs = attrbuf; info.extack = extack; genl_info_net_set(&info, net); - memset(&info.user_ptr, 0, sizeof(info.user_ptr)); + memset(&info.ctx, 0, sizeof(info.ctx)); if (ops->pre_doit) { err = ops->pre_doit(ops, skb, &info); diff --git a/net/shaper/Makefile b/net/shaper/Makefile new file mode 100644 index 000000000000..54af7169a331 --- /dev/null +++ b/net/shaper/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the net shaper infrastructure. +# +# Copyright (c) 2024, Red Hat, Inc. +# + +obj-y += shaper.o shaper_nl_gen.o diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c new file mode 100644 index 000000000000..15463062fe7b --- /dev/null +++ b/net/shaper/shaper.c @@ -0,0 +1,1438 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/bits.h> +#include <linux/bitfield.h> +#include <linux/idr.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/skbuff.h> +#include <linux/xarray.h> +#include <net/devlink.h> +#include <net/net_shaper.h> + +#include "shaper_nl_gen.h" + +#include "../core/dev.h" + +#define NET_SHAPER_SCOPE_SHIFT 26 +#define NET_SHAPER_ID_MASK GENMASK(NET_SHAPER_SCOPE_SHIFT - 1, 0) +#define NET_SHAPER_SCOPE_MASK GENMASK(31, NET_SHAPER_SCOPE_SHIFT) + +#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK + +struct net_shaper_hierarchy { + struct xarray shapers; +}; + +struct net_shaper_nl_ctx { + struct net_shaper_binding binding; + netdevice_tracker dev_tracker; + unsigned long start_index; +}; + +static struct net_shaper_binding *net_shaper_binding_from_ctx(void *ctx) +{ + return &((struct net_shaper_nl_ctx *)ctx)->binding; +} + +static void net_shaper_lock(struct net_shaper_binding *binding) +{ + switch (binding->type) { + case NET_SHAPER_BINDING_TYPE_NETDEV: + mutex_lock(&binding->netdev->lock); + break; + } +} + +static void net_shaper_unlock(struct net_shaper_binding *binding) +{ + switch (binding->type) { + case NET_SHAPER_BINDING_TYPE_NETDEV: + mutex_unlock(&binding->netdev->lock); + break; + } +} + +static struct net_shaper_hierarchy * +net_shaper_hierarchy(struct net_shaper_binding *binding) +{ + /* Pairs with WRITE_ONCE() in net_shaper_hierarchy_setup. */ + if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) + return READ_ONCE(binding->netdev->net_shaper_hierarchy); + + /* No other type supported yet. */ + return NULL; +} + +static const struct net_shaper_ops * +net_shaper_ops(struct net_shaper_binding *binding) +{ + if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) + return binding->netdev->netdev_ops->net_shaper_ops; + + /* No other type supported yet. */ + return NULL; +} + +/* Count the number of [multi] attributes of the given type. */ +static int net_shaper_list_len(struct genl_info *info, int type) +{ + struct nlattr *attr; + int rem, cnt = 0; + + nla_for_each_attr_type(attr, type, genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) + cnt++; + return cnt; +} + +static int net_shaper_handle_size(void) +{ + return nla_total_size(nla_total_size(sizeof(u32)) + + nla_total_size(sizeof(u32))); +} + +static int net_shaper_fill_binding(struct sk_buff *msg, + const struct net_shaper_binding *binding, + u32 type) +{ + /* Should never happen, as currently only NETDEV is supported. */ + if (WARN_ON_ONCE(binding->type != NET_SHAPER_BINDING_TYPE_NETDEV)) + return -EINVAL; + + if (nla_put_u32(msg, type, binding->netdev->ifindex)) + return -EMSGSIZE; + + return 0; +} + +static int net_shaper_fill_handle(struct sk_buff *msg, + const struct net_shaper_handle *handle, + u32 type) +{ + struct nlattr *handle_attr; + + if (handle->scope == NET_SHAPER_SCOPE_UNSPEC) + return 0; + + handle_attr = nla_nest_start(msg, type); + if (!handle_attr) + return -EMSGSIZE; + + if (nla_put_u32(msg, NET_SHAPER_A_HANDLE_SCOPE, handle->scope) || + (handle->scope >= NET_SHAPER_SCOPE_QUEUE && + nla_put_u32(msg, NET_SHAPER_A_HANDLE_ID, handle->id))) + goto handle_nest_cancel; + + nla_nest_end(msg, handle_attr); + return 0; + +handle_nest_cancel: + nla_nest_cancel(msg, handle_attr); + return -EMSGSIZE; +} + +static int +net_shaper_fill_one(struct sk_buff *msg, + const struct net_shaper_binding *binding, + const struct net_shaper *shaper, + const struct genl_info *info) +{ + void *hdr; + + hdr = genlmsg_iput(msg, info); + if (!hdr) + return -EMSGSIZE; + + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) || + net_shaper_fill_handle(msg, &shaper->parent, + NET_SHAPER_A_PARENT) || + net_shaper_fill_handle(msg, &shaper->handle, + NET_SHAPER_A_HANDLE) || + ((shaper->bw_min || shaper->bw_max || shaper->burst) && + nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) || + (shaper->bw_min && + nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) || + (shaper->bw_max && + nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) || + (shaper->burst && + nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) || + (shaper->priority && + nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) || + (shaper->weight && + nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/* Initialize the context fetching the relevant device and + * acquiring a reference to it. + */ +static int net_shaper_ctx_setup(const struct genl_info *info, int type, + struct net_shaper_nl_ctx *ctx) +{ + struct net *ns = genl_info_net(info); + struct net_device *dev; + int ifindex; + + if (GENL_REQ_ATTR_CHECK(info, type)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[type]); + dev = netdev_get_by_index(ns, ifindex, &ctx->dev_tracker, GFP_KERNEL); + if (!dev) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + return -ENOENT; + } + + if (!dev->netdev_ops->net_shaper_ops) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + netdev_put(dev, &ctx->dev_tracker); + return -EOPNOTSUPP; + } + + ctx->binding.type = NET_SHAPER_BINDING_TYPE_NETDEV; + ctx->binding.netdev = dev; + return 0; +} + +static void net_shaper_ctx_cleanup(struct net_shaper_nl_ctx *ctx) +{ + if (ctx->binding.type == NET_SHAPER_BINDING_TYPE_NETDEV) + netdev_put(ctx->binding.netdev, &ctx->dev_tracker); +} + +static u32 net_shaper_handle_to_index(const struct net_shaper_handle *handle) +{ + return FIELD_PREP(NET_SHAPER_SCOPE_MASK, handle->scope) | + FIELD_PREP(NET_SHAPER_ID_MASK, handle->id); +} + +static void net_shaper_index_to_handle(u32 index, + struct net_shaper_handle *handle) +{ + handle->scope = FIELD_GET(NET_SHAPER_SCOPE_MASK, index); + handle->id = FIELD_GET(NET_SHAPER_ID_MASK, index); +} + +static void net_shaper_default_parent(const struct net_shaper_handle *handle, + struct net_shaper_handle *parent) +{ + switch (handle->scope) { + case NET_SHAPER_SCOPE_UNSPEC: + case NET_SHAPER_SCOPE_NETDEV: + case __NET_SHAPER_SCOPE_MAX: + parent->scope = NET_SHAPER_SCOPE_UNSPEC; + break; + + case NET_SHAPER_SCOPE_QUEUE: + case NET_SHAPER_SCOPE_NODE: + parent->scope = NET_SHAPER_SCOPE_NETDEV; + break; + } + parent->id = 0; +} + +/* + * MARK_0 is already in use due to XA_FLAGS_ALLOC, can't reuse such flag as + * it's cleared by xa_store(). + */ +#define NET_SHAPER_NOT_VALID XA_MARK_1 + +static struct net_shaper * +net_shaper_lookup(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + u32 index = net_shaper_handle_to_index(handle); + + if (!hierarchy || xa_get_mark(&hierarchy->shapers, index, + NET_SHAPER_NOT_VALID)) + return NULL; + + return xa_load(&hierarchy->shapers, index); +} + +/* Allocate on demand the per device shaper's hierarchy container. + * Called under the net shaper lock + */ +static struct net_shaper_hierarchy * +net_shaper_hierarchy_setup(struct net_shaper_binding *binding) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + + if (hierarchy) + return hierarchy; + + hierarchy = kmalloc(sizeof(*hierarchy), GFP_KERNEL); + if (!hierarchy) + return NULL; + + /* The flag is required for ID allocation */ + xa_init_flags(&hierarchy->shapers, XA_FLAGS_ALLOC); + + switch (binding->type) { + case NET_SHAPER_BINDING_TYPE_NETDEV: + /* Pairs with READ_ONCE in net_shaper_hierarchy. */ + WRITE_ONCE(binding->netdev->net_shaper_hierarchy, hierarchy); + break; + } + return hierarchy; +} + +/* Prepare the hierarchy container to actually insert the given shaper, doing + * in advance the needed allocations. + */ +static int net_shaper_pre_insert(struct net_shaper_binding *binding, + struct net_shaper_handle *handle, + struct netlink_ext_ack *extack) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *prev, *cur; + bool id_allocated = false; + int ret, index; + + if (!hierarchy) + return -ENOMEM; + + index = net_shaper_handle_to_index(handle); + cur = xa_load(&hierarchy->shapers, index); + if (cur) + return 0; + + /* Allocated a new id, if needed. */ + if (handle->scope == NET_SHAPER_SCOPE_NODE && + handle->id == NET_SHAPER_ID_UNSPEC) { + u32 min, max; + + handle->id = NET_SHAPER_ID_MASK - 1; + max = net_shaper_handle_to_index(handle); + handle->id = 0; + min = net_shaper_handle_to_index(handle); + + ret = xa_alloc(&hierarchy->shapers, &index, NULL, + XA_LIMIT(min, max), GFP_KERNEL); + if (ret < 0) { + NL_SET_ERR_MSG(extack, "Can't allocate new id for NODE shaper"); + return ret; + } + + net_shaper_index_to_handle(index, handle); + id_allocated = true; + } + + cur = kzalloc(sizeof(*cur), GFP_KERNEL); + if (!cur) { + ret = -ENOMEM; + goto free_id; + } + + /* Mark 'tentative' shaper inside the hierarchy container. + * xa_set_mark is a no-op if the previous store fails. + */ + xa_lock(&hierarchy->shapers); + prev = __xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL); + __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID); + xa_unlock(&hierarchy->shapers); + if (xa_err(prev)) { + NL_SET_ERR_MSG(extack, "Can't insert shaper into device store"); + kfree_rcu(cur, rcu); + ret = xa_err(prev); + goto free_id; + } + return 0; + +free_id: + if (id_allocated) + xa_erase(&hierarchy->shapers, index); + return ret; +} + +/* Commit the tentative insert with the actual values. + * Must be called only after a successful net_shaper_pre_insert(). + */ +static void net_shaper_commit(struct net_shaper_binding *binding, + int nr_shapers, const struct net_shaper *shapers) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *cur; + int index; + int i; + + xa_lock(&hierarchy->shapers); + for (i = 0; i < nr_shapers; ++i) { + index = net_shaper_handle_to_index(&shapers[i].handle); + + cur = xa_load(&hierarchy->shapers, index); + if (WARN_ON_ONCE(!cur)) + continue; + + /* Successful update: drop the tentative mark + * and update the hierarchy container. + */ + __xa_clear_mark(&hierarchy->shapers, index, + NET_SHAPER_NOT_VALID); + *cur = shapers[i]; + } + xa_unlock(&hierarchy->shapers); +} + +/* Rollback all the tentative inserts from the hierarchy. */ +static void net_shaper_rollback(struct net_shaper_binding *binding) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *cur; + unsigned long index; + + if (!hierarchy) + return; + + xa_lock(&hierarchy->shapers); + xa_for_each_marked(&hierarchy->shapers, index, cur, + NET_SHAPER_NOT_VALID) { + __xa_erase(&hierarchy->shapers, index); + kfree(cur); + } + xa_unlock(&hierarchy->shapers); +} + +static int net_shaper_parse_handle(const struct nlattr *attr, + const struct genl_info *info, + struct net_shaper_handle *handle) +{ + struct nlattr *tb[NET_SHAPER_A_HANDLE_MAX + 1]; + struct nlattr *id_attr; + u32 id = 0; + int ret; + + ret = nla_parse_nested(tb, NET_SHAPER_A_HANDLE_MAX, attr, + net_shaper_handle_nl_policy, info->extack); + if (ret < 0) + return ret; + + if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, + NET_SHAPER_A_HANDLE_SCOPE)) + return -EINVAL; + + handle->scope = nla_get_u32(tb[NET_SHAPER_A_HANDLE_SCOPE]); + + /* The default id for NODE scope shapers is an invalid one + * to help the 'group' operation discriminate between new + * NODE shaper creation (ID_UNSPEC) and reuse of existing + * shaper (any other value). + */ + id_attr = tb[NET_SHAPER_A_HANDLE_ID]; + if (id_attr) + id = nla_get_u32(id_attr); + else if (handle->scope == NET_SHAPER_SCOPE_NODE) + id = NET_SHAPER_ID_UNSPEC; + + handle->id = id; + return 0; +} + +static int net_shaper_validate_caps(struct net_shaper_binding *binding, + struct nlattr **tb, + const struct genl_info *info, + struct net_shaper *shaper) +{ + const struct net_shaper_ops *ops = net_shaper_ops(binding); + struct nlattr *bad = NULL; + unsigned long caps = 0; + + ops->capabilities(binding, shaper->handle.scope, &caps); + + if (tb[NET_SHAPER_A_PRIORITY] && + !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_PRIORITY))) + bad = tb[NET_SHAPER_A_PRIORITY]; + if (tb[NET_SHAPER_A_WEIGHT] && + !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_WEIGHT))) + bad = tb[NET_SHAPER_A_WEIGHT]; + if (tb[NET_SHAPER_A_BW_MIN] && + !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MIN))) + bad = tb[NET_SHAPER_A_BW_MIN]; + if (tb[NET_SHAPER_A_BW_MAX] && + !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MAX))) + bad = tb[NET_SHAPER_A_BW_MAX]; + if (tb[NET_SHAPER_A_BURST] && + !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BURST))) + bad = tb[NET_SHAPER_A_BURST]; + + if (!caps) + bad = tb[NET_SHAPER_A_HANDLE]; + + if (bad) { + NL_SET_BAD_ATTR(info->extack, bad); + return -EOPNOTSUPP; + } + + if (shaper->handle.scope == NET_SHAPER_SCOPE_QUEUE && + binding->type == NET_SHAPER_BINDING_TYPE_NETDEV && + shaper->handle.id >= binding->netdev->real_num_tx_queues) { + NL_SET_ERR_MSG_FMT(info->extack, + "Not existing queue id %d max %d", + shaper->handle.id, + binding->netdev->real_num_tx_queues); + return -ENOENT; + } + + /* The metric is really used only if there is *any* rate-related + * setting, either in current attributes set or in pre-existing + * values. + */ + if (shaper->burst || shaper->bw_min || shaper->bw_max) { + u32 metric_cap = NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS + + shaper->metric; + + /* The metric test can fail even when the user did not + * specify the METRIC attribute. Pointing to rate related + * attribute will be confusing, as the attribute itself + * could be indeed supported, with a different metric. + * Be more specific. + */ + if (!(caps & BIT(metric_cap))) { + NL_SET_ERR_MSG_FMT(info->extack, "Bad metric %d", + shaper->metric); + return -EOPNOTSUPP; + } + } + return 0; +} + +static int net_shaper_parse_info(struct net_shaper_binding *binding, + struct nlattr **tb, + const struct genl_info *info, + struct net_shaper *shaper, + bool *exists) +{ + struct net_shaper *old; + int ret; + + /* The shaper handle is the only mandatory attribute. */ + if (NL_REQ_ATTR_CHECK(info->extack, NULL, tb, NET_SHAPER_A_HANDLE)) + return -EINVAL; + + ret = net_shaper_parse_handle(tb[NET_SHAPER_A_HANDLE], info, + &shaper->handle); + if (ret) + return ret; + + if (shaper->handle.scope == NET_SHAPER_SCOPE_UNSPEC) { + NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]); + return -EINVAL; + } + + /* Fetch existing hierarchy, if any, so that user provide info will + * incrementally update the existing shaper configuration. + */ + old = net_shaper_lookup(binding, &shaper->handle); + if (old) + *shaper = *old; + *exists = !!old; + + if (tb[NET_SHAPER_A_METRIC]) + shaper->metric = nla_get_u32(tb[NET_SHAPER_A_METRIC]); + + if (tb[NET_SHAPER_A_BW_MIN]) + shaper->bw_min = nla_get_uint(tb[NET_SHAPER_A_BW_MIN]); + + if (tb[NET_SHAPER_A_BW_MAX]) + shaper->bw_max = nla_get_uint(tb[NET_SHAPER_A_BW_MAX]); + + if (tb[NET_SHAPER_A_BURST]) + shaper->burst = nla_get_uint(tb[NET_SHAPER_A_BURST]); + + if (tb[NET_SHAPER_A_PRIORITY]) + shaper->priority = nla_get_u32(tb[NET_SHAPER_A_PRIORITY]); + + if (tb[NET_SHAPER_A_WEIGHT]) + shaper->weight = nla_get_u32(tb[NET_SHAPER_A_WEIGHT]); + + ret = net_shaper_validate_caps(binding, tb, info, shaper); + if (ret < 0) + return ret; + + return 0; +} + +static int net_shaper_validate_nesting(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + const struct net_shaper_ops *ops = net_shaper_ops(binding); + unsigned long caps = 0; + + ops->capabilities(binding, shaper->handle.scope, &caps); + if (!(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_NESTING))) { + NL_SET_ERR_MSG_FMT(extack, + "Nesting not supported for scope %d", + shaper->handle.scope); + return -EOPNOTSUPP; + } + return 0; +} + +/* Fetch the existing leaf and update it with the user-provided + * attributes. + */ +static int net_shaper_parse_leaf(struct net_shaper_binding *binding, + const struct nlattr *attr, + const struct genl_info *info, + const struct net_shaper *node, + struct net_shaper *shaper) +{ + struct nlattr *tb[NET_SHAPER_A_WEIGHT + 1]; + bool exists; + int ret; + + ret = nla_parse_nested(tb, NET_SHAPER_A_WEIGHT, attr, + net_shaper_leaf_info_nl_policy, info->extack); + if (ret < 0) + return ret; + + ret = net_shaper_parse_info(binding, tb, info, shaper, &exists); + if (ret < 0) + return ret; + + if (shaper->handle.scope != NET_SHAPER_SCOPE_QUEUE) { + NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]); + return -EINVAL; + } + + if (node->handle.scope == NET_SHAPER_SCOPE_NODE) { + ret = net_shaper_validate_nesting(binding, shaper, + info->extack); + if (ret < 0) + return ret; + } + + if (!exists) + net_shaper_default_parent(&shaper->handle, &shaper->parent); + return 0; +} + +/* Alike net_parse_shaper_info(), but additionally allow the user specifying + * the shaper's parent handle. + */ +static int net_shaper_parse_node(struct net_shaper_binding *binding, + struct nlattr **tb, + const struct genl_info *info, + struct net_shaper *shaper) +{ + bool exists; + int ret; + + ret = net_shaper_parse_info(binding, tb, info, shaper, &exists); + if (ret) + return ret; + + if (shaper->handle.scope != NET_SHAPER_SCOPE_NODE && + shaper->handle.scope != NET_SHAPER_SCOPE_NETDEV) { + NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]); + return -EINVAL; + } + + if (tb[NET_SHAPER_A_PARENT]) { + ret = net_shaper_parse_handle(tb[NET_SHAPER_A_PARENT], info, + &shaper->parent); + if (ret) + return ret; + + if (shaper->parent.scope != NET_SHAPER_SCOPE_NODE && + shaper->parent.scope != NET_SHAPER_SCOPE_NETDEV) { + NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_PARENT]); + return -EINVAL; + } + } + return 0; +} + +static int net_shaper_generic_pre(struct genl_info *info, int type) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)info->ctx; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(info->ctx)); + + return net_shaper_ctx_setup(info, type, ctx); +} + +int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + return net_shaper_generic_pre(info, NET_SHAPER_A_IFINDEX); +} + +static void net_shaper_generic_post(struct genl_info *info) +{ + net_shaper_ctx_cleanup((struct net_shaper_nl_ctx *)info->ctx); +} + +void net_shaper_nl_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + net_shaper_generic_post(info); +} + +int net_shaper_nl_pre_dumpit(struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + const struct genl_info *info = genl_info_dump(cb); + + return net_shaper_ctx_setup(info, NET_SHAPER_A_IFINDEX, ctx); +} + +int net_shaper_nl_post_dumpit(struct netlink_callback *cb) +{ + net_shaper_ctx_cleanup((struct net_shaper_nl_ctx *)cb->ctx); + return 0; +} + +int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + return net_shaper_generic_pre(info, NET_SHAPER_A_CAPS_IFINDEX); +} + +void net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + net_shaper_generic_post(info); +} + +int net_shaper_nl_cap_pre_dumpit(struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + + return net_shaper_ctx_setup(genl_info_dump(cb), + NET_SHAPER_A_CAPS_IFINDEX, ctx); +} + +int net_shaper_nl_cap_post_dumpit(struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + + net_shaper_ctx_cleanup(ctx); + return 0; +} + +int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper_binding *binding; + struct net_shaper_handle handle; + struct net_shaper *shaper; + struct sk_buff *msg; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE)) + return -EINVAL; + + binding = net_shaper_binding_from_ctx(info->ctx); + ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info, + &handle); + if (ret < 0) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rcu_read_lock(); + shaper = net_shaper_lookup(binding, &handle); + if (!shaper) { + NL_SET_BAD_ATTR(info->extack, + info->attrs[NET_SHAPER_A_HANDLE]); + rcu_read_unlock(); + ret = -ENOENT; + goto free_msg; + } + + ret = net_shaper_fill_one(msg, binding, shaper, info); + rcu_read_unlock(); + if (ret) + goto free_msg; + + ret = genlmsg_reply(msg, info); + if (ret) + goto free_msg; + + return 0; + +free_msg: + nlmsg_free(msg); + return ret; +} + +int net_shaper_nl_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + const struct genl_info *info = genl_info_dump(cb); + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding *binding; + struct net_shaper *shaper; + int ret = 0; + + /* Don't error out dumps performed before any set operation. */ + binding = net_shaper_binding_from_ctx(ctx); + hierarchy = net_shaper_hierarchy(binding); + if (!hierarchy) + return 0; + + rcu_read_lock(); + for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, + U32_MAX, XA_PRESENT)); ctx->start_index++) { + ret = net_shaper_fill_one(skb, binding, shaper, info); + if (ret) + break; + } + rcu_read_unlock(); + + return ret; +} + +int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding *binding; + const struct net_shaper_ops *ops; + struct net_shaper_handle handle; + struct net_shaper shaper = {}; + bool exists; + int ret; + + binding = net_shaper_binding_from_ctx(info->ctx); + + net_shaper_lock(binding); + ret = net_shaper_parse_info(binding, info->attrs, info, &shaper, + &exists); + if (ret) + goto unlock; + + if (!exists) + net_shaper_default_parent(&shaper.handle, &shaper.parent); + + hierarchy = net_shaper_hierarchy_setup(binding); + if (!hierarchy) { + ret = -ENOMEM; + goto unlock; + } + + /* The 'set' operation can't create node-scope shapers. */ + handle = shaper.handle; + if (handle.scope == NET_SHAPER_SCOPE_NODE && + !net_shaper_lookup(binding, &handle)) { + ret = -ENOENT; + goto unlock; + } + + ret = net_shaper_pre_insert(binding, &handle, info->extack); + if (ret) + goto unlock; + + ops = net_shaper_ops(binding); + ret = ops->set(binding, &shaper, info->extack); + if (ret) { + net_shaper_rollback(binding); + goto unlock; + } + + net_shaper_commit(binding, 1, &shaper); + +unlock: + net_shaper_unlock(binding); + return ret; +} + +static int __net_shaper_delete(struct net_shaper_binding *binding, + struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper_handle parent_handle, handle = shaper->handle; + const struct net_shaper_ops *ops = net_shaper_ops(binding); + int ret; + +again: + parent_handle = shaper->parent; + + ret = ops->delete(binding, &handle, extack); + if (ret < 0) + return ret; + + xa_erase(&hierarchy->shapers, net_shaper_handle_to_index(&handle)); + kfree_rcu(shaper, rcu); + + /* Eventually delete the parent, if it is left over with no leaves. */ + if (parent_handle.scope == NET_SHAPER_SCOPE_NODE) { + shaper = net_shaper_lookup(binding, &parent_handle); + if (shaper && !--shaper->leaves) { + handle = parent_handle; + goto again; + } + } + return 0; +} + +static int net_shaper_handle_cmp(const struct net_shaper_handle *a, + const struct net_shaper_handle *b) +{ + /* Must avoid holes in struct net_shaper_handle. */ + BUILD_BUG_ON(sizeof(*a) != 8); + + return memcmp(a, b, sizeof(*a)); +} + +static int net_shaper_parent_from_leaves(int leaves_count, + const struct net_shaper *leaves, + struct net_shaper *node, + struct netlink_ext_ack *extack) +{ + struct net_shaper_handle parent = leaves[0].parent; + int i; + + for (i = 1; i < leaves_count; ++i) { + if (net_shaper_handle_cmp(&leaves[i].parent, &parent)) { + NL_SET_ERR_MSG_FMT(extack, "All the leaves shapers must have the same old parent"); + return -EINVAL; + } + } + + node->parent = parent; + return 0; +} + +static int __net_shaper_group(struct net_shaper_binding *binding, + bool update_node, int leaves_count, + struct net_shaper *leaves, + struct net_shaper *node, + struct netlink_ext_ack *extack) +{ + const struct net_shaper_ops *ops = net_shaper_ops(binding); + struct net_shaper_handle leaf_handle; + struct net_shaper *parent = NULL; + bool new_node = false; + int i, ret; + + if (node->handle.scope == NET_SHAPER_SCOPE_NODE) { + new_node = node->handle.id == NET_SHAPER_ID_UNSPEC; + + if (!new_node && !net_shaper_lookup(binding, &node->handle)) { + /* The related attribute is not available when + * reaching here from the delete() op. + */ + NL_SET_ERR_MSG_FMT(extack, "Node shaper %d:%d does not exists", + node->handle.scope, node->handle.id); + return -ENOENT; + } + + /* When unspecified, the node parent scope is inherited from + * the leaves. + */ + if (node->parent.scope == NET_SHAPER_SCOPE_UNSPEC) { + ret = net_shaper_parent_from_leaves(leaves_count, + leaves, node, + extack); + if (ret) + return ret; + } + + } else { + net_shaper_default_parent(&node->handle, &node->parent); + } + + if (node->parent.scope == NET_SHAPER_SCOPE_NODE) { + parent = net_shaper_lookup(binding, &node->parent); + if (!parent) { + NL_SET_ERR_MSG_FMT(extack, "Node parent shaper %d:%d does not exists", + node->parent.scope, node->parent.id); + return -ENOENT; + } + + ret = net_shaper_validate_nesting(binding, node, extack); + if (ret < 0) + return ret; + } + + if (update_node) { + /* For newly created node scope shaper, the following will + * update the handle, due to id allocation. + */ + ret = net_shaper_pre_insert(binding, &node->handle, extack); + if (ret) + return ret; + } + + for (i = 0; i < leaves_count; ++i) { + leaf_handle = leaves[i].handle; + + ret = net_shaper_pre_insert(binding, &leaf_handle, extack); + if (ret) + goto rollback; + + if (!net_shaper_handle_cmp(&leaves[i].parent, &node->handle)) + continue; + + /* The leaves shapers will be nested to the node, update the + * linking accordingly. + */ + leaves[i].parent = node->handle; + node->leaves++; + } + + ret = ops->group(binding, leaves_count, leaves, node, extack); + if (ret < 0) + goto rollback; + + /* The node's parent gains a new leaf only when the node itself + * is created by this group operation + */ + if (new_node && parent) + parent->leaves++; + if (update_node) + net_shaper_commit(binding, 1, node); + net_shaper_commit(binding, leaves_count, leaves); + return 0; + +rollback: + net_shaper_rollback(binding); + return ret; +} + +static int net_shaper_pre_del_node(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *cur, *leaves, node = {}; + int ret, leaves_count = 0; + unsigned long index; + bool update_node; + + if (!shaper->leaves) + return 0; + + /* Fetch the new node information. */ + node.handle = shaper->parent; + cur = net_shaper_lookup(binding, &node.handle); + if (cur) { + node = *cur; + } else { + /* A scope NODE shaper can be nested only to the NETDEV scope + * shaper without creating the latter, this check may fail only + * if the data is in inconsistent status. + */ + if (WARN_ON_ONCE(node.handle.scope != NET_SHAPER_SCOPE_NETDEV)) + return -EINVAL; + } + + leaves = kcalloc(shaper->leaves, sizeof(struct net_shaper), + GFP_KERNEL); + if (!leaves) + return -ENOMEM; + + /* Build the leaves arrays. */ + xa_for_each(&hierarchy->shapers, index, cur) { + if (net_shaper_handle_cmp(&cur->parent, &shaper->handle)) + continue; + + if (WARN_ON_ONCE(leaves_count == shaper->leaves)) { + ret = -EINVAL; + goto free; + } + + leaves[leaves_count++] = *cur; + } + + /* When re-linking to the netdev shaper, avoid the eventual, implicit, + * creation of the new node, would be surprising since the user is + * doing a delete operation. + */ + update_node = node.handle.scope != NET_SHAPER_SCOPE_NETDEV; + ret = __net_shaper_group(binding, update_node, leaves_count, + leaves, &node, extack); + +free: + kfree(leaves); + return ret; +} + +int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding *binding; + struct net_shaper_handle handle; + struct net_shaper *shaper; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE)) + return -EINVAL; + + binding = net_shaper_binding_from_ctx(info->ctx); + + net_shaper_lock(binding); + ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info, + &handle); + if (ret) + goto unlock; + + hierarchy = net_shaper_hierarchy(binding); + if (!hierarchy) { + ret = -ENOENT; + goto unlock; + } + + shaper = net_shaper_lookup(binding, &handle); + if (!shaper) { + ret = -ENOENT; + goto unlock; + } + + if (handle.scope == NET_SHAPER_SCOPE_NODE) { + ret = net_shaper_pre_del_node(binding, shaper, info->extack); + if (ret) + goto unlock; + } + + ret = __net_shaper_delete(binding, shaper, info->extack); + +unlock: + net_shaper_unlock(binding); + return ret; +} + +static int net_shaper_group_send_reply(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct genl_info *info, + struct sk_buff *msg) +{ + void *hdr; + + hdr = genlmsg_iput(msg, info); + if (!hdr) + goto free_msg; + + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) || + net_shaper_fill_handle(msg, handle, NET_SHAPER_A_HANDLE)) + goto free_msg; + + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); + +free_msg: + /* Should never happen as msg is pre-allocated with enough space. */ + WARN_ONCE(true, "calculated message payload length (%d)", + net_shaper_handle_size()); + nlmsg_free(msg); + return -EMSGSIZE; +} + +int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper **old_nodes, *leaves, node = {}; + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding *binding; + int i, ret, rem, leaves_count; + int old_nodes_count = 0; + struct sk_buff *msg; + struct nlattr *attr; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_LEAVES)) + return -EINVAL; + + binding = net_shaper_binding_from_ctx(info->ctx); + + /* The group operation is optional. */ + if (!net_shaper_ops(binding)->group) + return -EOPNOTSUPP; + + net_shaper_lock(binding); + leaves_count = net_shaper_list_len(info, NET_SHAPER_A_LEAVES); + if (!leaves_count) { + NL_SET_BAD_ATTR(info->extack, + info->attrs[NET_SHAPER_A_LEAVES]); + ret = -EINVAL; + goto unlock; + } + + leaves = kcalloc(leaves_count, sizeof(struct net_shaper) + + sizeof(struct net_shaper *), GFP_KERNEL); + if (!leaves) { + ret = -ENOMEM; + goto unlock; + } + old_nodes = (void *)&leaves[leaves_count]; + + ret = net_shaper_parse_node(binding, info->attrs, info, &node); + if (ret) + goto free_leaves; + + i = 0; + nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES, + genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + if (WARN_ON_ONCE(i >= leaves_count)) + goto free_leaves; + + ret = net_shaper_parse_leaf(binding, attr, info, + &node, &leaves[i]); + if (ret) + goto free_leaves; + i++; + } + + /* Prepare the msg reply in advance, to avoid device operation + * rollback on allocation failure. + */ + msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL); + if (!msg) + goto free_leaves; + + hierarchy = net_shaper_hierarchy_setup(binding); + if (!hierarchy) { + ret = -ENOMEM; + goto free_msg; + } + + /* Record the node shapers that this group() operation can make + * childless for later cleanup. + */ + for (i = 0; i < leaves_count; i++) { + if (leaves[i].parent.scope == NET_SHAPER_SCOPE_NODE && + net_shaper_handle_cmp(&leaves[i].parent, &node.handle)) { + struct net_shaper *tmp; + + tmp = net_shaper_lookup(binding, &leaves[i].parent); + if (!tmp) + continue; + + old_nodes[old_nodes_count++] = tmp; + } + } + + ret = __net_shaper_group(binding, true, leaves_count, leaves, &node, + info->extack); + if (ret) + goto free_msg; + + /* Check if we need to delete any node left alone by the new leaves + * linkage. + */ + for (i = 0; i < old_nodes_count; ++i) { + struct net_shaper *tmp = old_nodes[i]; + + if (--tmp->leaves > 0) + continue; + + /* Errors here are not fatal: the grouping operation is + * completed, and user-space can still explicitly clean-up + * left-over nodes. + */ + __net_shaper_delete(binding, tmp, info->extack); + } + + ret = net_shaper_group_send_reply(binding, &node.handle, info, msg); + if (ret) + GENL_SET_ERR_MSG_FMT(info, "Can't send reply"); + +free_leaves: + kfree(leaves); + +unlock: + net_shaper_unlock(binding); + return ret; + +free_msg: + kfree_skb(msg); + goto free_leaves; +} + +static int +net_shaper_cap_fill_one(struct sk_buff *msg, + struct net_shaper_binding *binding, + enum net_shaper_scope scope, unsigned long flags, + const struct genl_info *info) +{ + unsigned long cur; + void *hdr; + + hdr = genlmsg_iput(msg, info); + if (!hdr) + return -EMSGSIZE; + + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_CAPS_IFINDEX) || + nla_put_u32(msg, NET_SHAPER_A_CAPS_SCOPE, scope)) + goto nla_put_failure; + + for (cur = NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS; + cur <= NET_SHAPER_A_CAPS_MAX; ++cur) { + if (flags & BIT(cur) && nla_put_flag(msg, cur)) + goto nla_put_failure; + } + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper_binding *binding; + const struct net_shaper_ops *ops; + enum net_shaper_scope scope; + unsigned long flags = 0; + struct sk_buff *msg; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_CAPS_SCOPE)) + return -EINVAL; + + binding = net_shaper_binding_from_ctx(info->ctx); + scope = nla_get_u32(info->attrs[NET_SHAPER_A_CAPS_SCOPE]); + ops = net_shaper_ops(binding); + ops->capabilities(binding, scope, &flags); + if (!flags) + return -EOPNOTSUPP; + + msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = net_shaper_cap_fill_one(msg, binding, scope, flags, info); + if (ret) + goto free_msg; + + ret = genlmsg_reply(msg, info); + if (ret) + goto free_msg; + return 0; + +free_msg: + nlmsg_free(msg); + return ret; +} + +int net_shaper_nl_cap_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct genl_info *info = genl_info_dump(cb); + struct net_shaper_binding *binding; + const struct net_shaper_ops *ops; + enum net_shaper_scope scope; + int ret; + + binding = net_shaper_binding_from_ctx(cb->ctx); + ops = net_shaper_ops(binding); + for (scope = 0; scope <= NET_SHAPER_SCOPE_MAX; ++scope) { + unsigned long flags = 0; + + ops->capabilities(binding, scope, &flags); + if (!flags) + continue; + + ret = net_shaper_cap_fill_one(skb, binding, scope, flags, + info); + if (ret) + return ret; + } + + return 0; +} + +static void net_shaper_flush(struct net_shaper_binding *binding) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *cur; + unsigned long index; + + if (!hierarchy) + return; + + net_shaper_lock(binding); + xa_lock(&hierarchy->shapers); + xa_for_each(&hierarchy->shapers, index, cur) { + __xa_erase(&hierarchy->shapers, index); + kfree(cur); + } + xa_unlock(&hierarchy->shapers); + net_shaper_unlock(binding); + + kfree(hierarchy); +} + +void net_shaper_flush_netdev(struct net_device *dev) +{ + struct net_shaper_binding binding = { + .type = NET_SHAPER_BINDING_TYPE_NETDEV, + .netdev = dev, + }; + + net_shaper_flush(&binding); +} + +void net_shaper_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq) +{ + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding binding; + int i; + + binding.type = NET_SHAPER_BINDING_TYPE_NETDEV; + binding.netdev = dev; + hierarchy = net_shaper_hierarchy(&binding); + if (!hierarchy) + return; + + /* Only drivers implementing shapers support ensure + * the lock is acquired in advance. + */ + lockdep_assert_held(&dev->lock); + + /* Take action only when decreasing the tx queue number. */ + for (i = txq; i < dev->real_num_tx_queues; ++i) { + struct net_shaper_handle handle, parent_handle; + struct net_shaper *shaper; + u32 index; + + handle.scope = NET_SHAPER_SCOPE_QUEUE; + handle.id = i; + shaper = net_shaper_lookup(&binding, &handle); + if (!shaper) + continue; + + /* Don't touch the H/W for the queue shaper, the drivers already + * deleted the queue and related resources. + */ + parent_handle = shaper->parent; + index = net_shaper_handle_to_index(&handle); + xa_erase(&hierarchy->shapers, index); + kfree_rcu(shaper, rcu); + + /* The recursion on parent does the full job. */ + if (parent_handle.scope != NET_SHAPER_SCOPE_NODE) + continue; + + shaper = net_shaper_lookup(&binding, &parent_handle); + if (shaper && !--shaper->leaves) + __net_shaper_delete(&binding, shaper, NULL); + } +} + +static int __init shaper_init(void) +{ + return genl_register_family(&net_shaper_nl_family); +} + +subsys_initcall(shaper_init); diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c new file mode 100644 index 000000000000..204c8ae8c7b1 --- /dev/null +++ b/net/shaper/shaper_nl_gen.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/net_shaper.yaml */ +/* YNL-GEN kernel source */ + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include "shaper_nl_gen.h" + +#include <uapi/linux/net_shaper.h> + +/* Common nested types */ +const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1] = { + [NET_SHAPER_A_HANDLE_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3), + [NET_SHAPER_A_HANDLE_ID] = { .type = NLA_U32, }, +}; + +const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1] = { + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, }, + [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, }, +}; + +/* NET_SHAPER_CMD_GET - do */ +static const struct nla_policy net_shaper_get_do_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), +}; + +/* NET_SHAPER_CMD_GET - dump */ +static const struct nla_policy net_shaper_get_dump_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, +}; + +/* NET_SHAPER_CMD_SET - do */ +static const struct nla_policy net_shaper_set_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_METRIC] = NLA_POLICY_MAX(NLA_U32, 1), + [NET_SHAPER_A_BW_MIN] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BW_MAX] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BURST] = { .type = NLA_UINT, }, + [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, }, + [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, }, +}; + +/* NET_SHAPER_CMD_DELETE - do */ +static const struct nla_policy net_shaper_delete_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), +}; + +/* NET_SHAPER_CMD_GROUP - do */ +static const struct nla_policy net_shaper_group_nl_policy[NET_SHAPER_A_LEAVES + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_PARENT] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_METRIC] = NLA_POLICY_MAX(NLA_U32, 1), + [NET_SHAPER_A_BW_MIN] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BW_MAX] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BURST] = { .type = NLA_UINT, }, + [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, }, + [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, }, + [NET_SHAPER_A_LEAVES] = NLA_POLICY_NESTED(net_shaper_leaf_info_nl_policy), +}; + +/* NET_SHAPER_CMD_CAP_GET - do */ +static const struct nla_policy net_shaper_cap_get_do_nl_policy[NET_SHAPER_A_CAPS_SCOPE + 1] = { + [NET_SHAPER_A_CAPS_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_CAPS_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3), +}; + +/* NET_SHAPER_CMD_CAP_GET - dump */ +static const struct nla_policy net_shaper_cap_get_dump_nl_policy[NET_SHAPER_A_CAPS_IFINDEX + 1] = { + [NET_SHAPER_A_CAPS_IFINDEX] = { .type = NLA_U32, }, +}; + +/* Ops table for net_shaper */ +static const struct genl_split_ops net_shaper_nl_ops[] = { + { + .cmd = NET_SHAPER_CMD_GET, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_get_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_get_do_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_GET, + .start = net_shaper_nl_pre_dumpit, + .dumpit = net_shaper_nl_get_dumpit, + .done = net_shaper_nl_post_dumpit, + .policy = net_shaper_get_dump_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, + { + .cmd = NET_SHAPER_CMD_SET, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_set_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_set_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_DELETE, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_delete_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_delete_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_GROUP, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_group_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_group_nl_policy, + .maxattr = NET_SHAPER_A_LEAVES, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_CAP_GET, + .pre_doit = net_shaper_nl_cap_pre_doit, + .doit = net_shaper_nl_cap_get_doit, + .post_doit = net_shaper_nl_cap_post_doit, + .policy = net_shaper_cap_get_do_nl_policy, + .maxattr = NET_SHAPER_A_CAPS_SCOPE, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_CAP_GET, + .start = net_shaper_nl_cap_pre_dumpit, + .dumpit = net_shaper_nl_cap_get_dumpit, + .done = net_shaper_nl_cap_post_dumpit, + .policy = net_shaper_cap_get_dump_nl_policy, + .maxattr = NET_SHAPER_A_CAPS_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, +}; + +struct genl_family net_shaper_nl_family __ro_after_init = { + .name = NET_SHAPER_FAMILY_NAME, + .version = NET_SHAPER_FAMILY_VERSION, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .split_ops = net_shaper_nl_ops, + .n_split_ops = ARRAY_SIZE(net_shaper_nl_ops), +}; diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h new file mode 100644 index 000000000000..cb7f9026fc23 --- /dev/null +++ b/net/shaper/shaper_nl_gen.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/net_shaper.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_NET_SHAPER_GEN_H +#define _LINUX_NET_SHAPER_GEN_H + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include <uapi/linux/net_shaper.h> + +/* Common nested types */ +extern const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1]; +extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1]; + +int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); +void +net_shaper_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); +void +net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_pre_dumpit(struct netlink_callback *cb); +int net_shaper_nl_cap_pre_dumpit(struct netlink_callback *cb); +int net_shaper_nl_post_dumpit(struct netlink_callback *cb); +int net_shaper_nl_cap_post_dumpit(struct netlink_callback *cb); + +int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_cap_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); + +extern struct genl_family net_shaper_nl_family; + +#endif /* _LINUX_NET_SHAPER_GEN_H */ diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 39fb97a8c1df..25aec5c081df 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -9,6 +9,7 @@ TEST_PROGS := \ ping.py \ queues.py \ stats.py \ + shaper.py # end of TEST_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/shaper.py b/tools/testing/selftests/drivers/net/shaper.py new file mode 100755 index 000000000000..11310f19bfa0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/shaper.py @@ -0,0 +1,461 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true, KsftSkipEx +from lib.py import EthtoolFamily, NetshaperFamily +from lib.py import NetDrvEnv +from lib.py import NlError +from lib.py import cmd + +def get_shapers(cfg, nl_shaper) -> None: + try: + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + + # Default configuration: no shapers configured. + ksft_eq(len(shapers), 0) + +def get_caps(cfg, nl_shaper) -> None: + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex}, dump=True) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + + # Each device implementing shaper support must support some + # features in at least a scope. + ksft_true(len(caps)> 0) + +def set_qshapers(cfg, nl_shaper) -> None: + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support queue scope shapers with bw_max and metric bps") + + cfg.queues = True; + netnl = EthtoolFamily() + channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + cfg.rx_type = 'rx' + cfg.nr_queues = channels['rx-count'] + else: + cfg.rx_type = 'combined' + cfg.nr_queues = channels['combined-count'] + if cfg.nr_queues < 3: + raise KsftSkipEx(f"device does not support enough queues min 3 found {cfg.nr_queues}") + + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}) + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 20000}) + + # Querying a specific shaper not yet configured must fail. + raised = False + try: + shaper_q0 = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 0}}) + except (NlError): + raised = True + ksft_eq(raised, True) + + shaper_q1 = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper_q1, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 20000}]) + +def del_qshapers(cfg, nl_shaper) -> None: + if not cfg.queues: + raise KsftSkipEx("queue shapers not supported by device, skipping delete") + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def set_nshapers(cfg, nl_shaper) -> None: + # Check required features. + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'netdev'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support nested netdev scope shapers with weight") + + cfg.netdev = True; + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev', 'id': 0}, + 'bw-max': 100000}) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}, + 'metric': 'bps', + 'bw-max': 100000}]) + +def del_nshapers(cfg, nl_shaper) -> None: + if not cfg.netdev: + raise KsftSkipEx("netdev shaper not supported by device, skipping delete") + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def basic_groups(cfg, nl_shaper) -> None: + if not cfg.netdev: + raise KsftSkipEx("netdev shaper not supported by the device") + if cfg.nr_queues < 3: + raise KsftSkipEx(f"netdev does not have enough queues min 3 reported {cfg.nr_queues}") + + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-weight' in caps: + raise KsftSkipEx("device does not support queue scope shapers with weight") + + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 1}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'netdev'}, + 'metric': 'bps', + 'bw-max': 10000}) + ksft_eq(node_handle, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 1 }) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + + # Deleting all the leaves shaper does not affect the node one + # when the latter has 'netdev' scope. + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 1) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + +def qgroups(cfg, nl_shaper) -> None: + if cfg.nr_queues < 4: + raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'node'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support node scope shapers with bw_max and metric bps") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-nesting' in caps or not 'support-weight' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support nested queue scope shapers with weight") + + cfg.groups = True; + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 10000}) + node_id = node_handle['handle']['id'] + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}) + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}, + 'parent': {'scope': 'netdev'}, + 'metric': 'bps', + 'bw-max': 10000}) + + # Grouping to a specified, not existing node scope shaper must fail + raised = False + try: + nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 3}, + 'weight': 3}], + 'handle': {'scope':'node', 'id': node_id + 1}, + 'metric': 'bps', + 'bw-max': 10000}) + + except (NlError): + raised = True + ksft_eq(raised, True) + + # Add to an existing node + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}], + 'handle': {'scope':'node', 'id': node_id}}) + ksft_eq(node_handle, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 3}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + + # Deleting a non empty node will move the leaves downstream. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}]) + + # Finish and verify the complete cleanup. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 3}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def delegation(cfg, nl_shaper) -> None: + if not cfg.groups: + raise KsftSkipEx("device does not support node scope") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'node'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("node scope shapers not supported by the device") + raise + if not 'support-nesting' in caps: + raise KsftSkipEx("device does not support node scope shapers nesting") + + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 10000}) + node_id = node_handle['handle']['id'] + + # Create the nested node and validate the hierarchy + nested_node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 5000}) + nested_node_id = nested_node_handle['handle']['id'] + ksft_true(nested_node_id != node_id) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': nested_node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': nested_node_id}, + 'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'node', 'id': node_id}, + 'metric': 'bps', + 'bw-max': 10000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'node', 'id': nested_node_id}, + 'metric': 'bps', + 'bw-max': 5000}]) + + # Deleting a non empty node will move the leaves downstream. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': nested_node_id}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'node', 'id': node_id}, + 'metric': 'bps', + 'bw-max': 10000}]) + + # Final cleanup. + for i in range(1, 4): + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def queue_update(cfg, nl_shaper) -> None: + if cfg.nr_queues < 4: + raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}") + if not cfg.queues: + raise KsftSkipEx("device does not support queue scope") + + for i in range(3): + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}, + 'metric': 'bps', + 'bw-max': (i + 1) * 1000}) + # Delete a channel, with no shapers configured on top of the related + # queue: no changes expected + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 3", timeout=10) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 3000}]) + + # Delete a channel, with a shaper configured on top of the related + # queue: the shaper must be deleted, too + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 2", timeout=10) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}]) + + # Restore the original channels number, no expected changes + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} {cfg.nr_queues}", timeout=10) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}]) + + # Final cleanup. + for i in range(0, 2): + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}}) + +def main() -> None: + with NetDrvEnv(__file__, queue_count=4) as cfg: + cfg.queues = False + cfg.netdev = False + cfg.groups = False + cfg.nr_queues = 0 + ksft_run([get_shapers, + get_caps, + set_qshapers, + del_qshapers, + set_nshapers, + del_nshapers, + basic_groups, + qgroups, + delegation, + queue_update], args=(cfg, NetshaperFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index b6d498d125fe..54d8f5eba810 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -6,3 +6,4 @@ from .netns import NetNS from .nsim import * from .utils import * from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily +from .ynl import NetshaperFamily diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 1ace58370c06..a0d689d58c57 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -47,3 +47,8 @@ class NetdevFamily(YnlFamily): def __init__(self): super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), schema='') + +class NetshaperFamily(YnlFamily): + def __init__(self): + super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), + schema='') |